diff --git a/.github/collectivex-source-run.env b/.github/collectivex-source-run.env new file mode 100644 index 00000000..d5001ea0 --- /dev/null +++ b/.github/collectivex-source-run.env @@ -0,0 +1,5 @@ +source_run_id=28629100934 +source_sha=b21a7206b5bc58aee42f8dddc71e5fe3d9197779 +source_workflow=CollectiveX Sweep +source_run_url=https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934 +triggered_at=2026-07-03T00:03:25Z diff --git a/.github/workflows/update-collectivex-data.yml b/.github/workflows/update-collectivex-data.yml new file mode 100644 index 00000000..ba6eab5f --- /dev/null +++ b/.github/workflows/update-collectivex-data.yml @@ -0,0 +1,95 @@ +name: Update CollectiveX Data + +on: + push: + branches: + - collectivex + paths-ignore: + - packages/app/public/data/collectivex.json + workflow_dispatch: + inputs: + source_run_id: + description: Optional CollectiveX workflow run ID to include before it reaches completed status + type: string + required: false + default: '' + repository_dispatch: + types: [update-collectivex-data] + +concurrency: + group: update-collectivex-data + cancel-in-progress: false + +permissions: + contents: write + +jobs: + update: + timeout-minutes: 30 + runs-on: ubuntu-latest + env: + TARGET_BRANCH: collectivex + SOURCE_RUN_ID: ${{ github.event.client_payload.source_run_id || github.event.inputs.source_run_id || '' }} + steps: + - name: Checkout app repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + ref: ${{ env.TARGET_BRANCH }} + token: ${{ secrets.PAT }} + fetch-depth: 0 + + - name: Setup pnpm + uses: pnpm/action-setup@0e279bb959325dab635dd2c09392533439d90093 # v6.0.8 + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: '24' + cache: pnpm + + - name: Install app dependencies + run: pnpm install --frozen-lockfile --filter @semianalysisai/inferencex-app... + env: + CYPRESS_INSTALL_BINARY: '0' + + - name: Generate static CollectiveX snapshot + env: + GITHUB_TOKEN: ${{ secrets.PAT }} + run: | + set -euo pipefail + source_run_id="$SOURCE_RUN_ID" + if [ -z "$source_run_id" ] && [ "${GITHUB_EVENT_NAME:-}" = "push" ]; then + if git diff-tree --no-commit-id --name-only -r "$GITHUB_SHA" | grep -qx '.github/collectivex-source-run.env'; then + source_run_id="$(sed -n 's/^source_run_id=//p' .github/collectivex-source-run.env | head -n 1 | tr -cd '0-9')" + fi + fi + + if [ -n "$source_run_id" ]; then + pnpm --filter @semianalysisai/inferencex-app generate:collectivex -- \ + --source-run-id "$source_run_id" + else + pnpm --filter @semianalysisai/inferencex-app generate:collectivex + fi + + - name: Commit snapshot when data changed + run: | + set -euo pipefail + SNAPSHOT=packages/app/public/data/collectivex.json + git add "$SNAPSHOT" + if git diff --cached --quiet; then + echo "CollectiveX snapshot is already current." >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + + git config user.name "InferenceX Data Bot" + git config user.email "actions@users.noreply.github.com" + git commit -m "chore: update CollectiveX data" + git pull --rebase origin "$TARGET_BRANCH" + git push origin "HEAD:$TARGET_BRANCH" + + { + echo "Updated \`$SNAPSHOT\`." + if [ -n "$SOURCE_RUN_ID" ]; then + echo "Included source run: \`$SOURCE_RUN_ID\`." + fi + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.gitignore b/.gitignore index a86f6e23..18acd7ef 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ **/public/data/* !**/public/data/github-stars.json !**/public/data/evaluation/dummy_eval_data.json +!**/public/data/collectivex.json # cypress **/cypress/videos/ diff --git a/packages/app/cypress/component/tab-nav.cy.tsx b/packages/app/cypress/component/tab-nav.cy.tsx index 2c24d256..31229ac7 100644 --- a/packages/app/cypress/component/tab-nav.cy.tsx +++ b/packages/app/cypress/component/tab-nav.cy.tsx @@ -70,6 +70,11 @@ describe('TabNav — unofficialrun URL preservation (issue #319)', () => { 'href', '/submissions?unofficialruns=12345', ); + cy.get('[data-testid="tab-trigger-collectivex"]').should( + 'have.attr', + 'href', + '/collectivex?unofficialruns=12345', + ); cy.get('[data-testid="tab-trigger-historical"]').should( 'have.attr', 'href', @@ -109,6 +114,7 @@ describe('TabNav — Hidden popover for gated tabs', () => { mountTabNav({}); cy.get('[data-testid="tab-trigger-inference"]').should('exist'); cy.get('[data-testid="tab-trigger-gpu-specs"]').should('exist'); + cy.get('[data-testid="tab-trigger-collectivex"]').should('exist'); cy.get('[data-testid="tab-trigger-submissions"]').should('exist'); cy.get('[data-testid="tab-trigger-hidden"]').should('not.exist'); cy.get('[data-testid="tab-trigger-feedback"]').should('not.exist'); diff --git a/packages/app/cypress/e2e/collectivex.cy.ts b/packages/app/cypress/e2e/collectivex.cy.ts new file mode 100644 index 00000000..7072b55c --- /dev/null +++ b/packages/app/cypress/e2e/collectivex.cy.ts @@ -0,0 +1,187 @@ +function expectToggleOptions(testId: string, labels: string[]) { + cy.get(`[data-testid="${testId}"]`) + .find('button') + .then(($buttons) => { + expect($buttons.toArray().map((button) => button.textContent?.trim())).to.deep.equal(labels); + }); +} + +function xTickLabels() { + return cy.get('[data-testid="collectivex-explorer-chart"] .x-axis .tick text').then(($ticks) => + $ticks + .toArray() + .map((tick) => tick.textContent?.trim()) + .filter(Boolean), + ); +} + +describe('CollectiveX', () => { + beforeEach(() => { + cy.intercept('GET', '/data/collectivex.json', { fixture: 'api/collectivex.json' }).as( + 'collectivexData', + ); + cy.visit('/collectivex'); + cy.wait('@collectivexData'); + }); + + it('renders the v3 report hierarchy with headline defaults', () => { + cy.get('[data-testid="collectivex-display"]') + .should('contain.text', 'CollectiveX') + .and('contain.text', 'Retained sweeps'); + + cy.contains('[role="tab"]', 'EP dispatch / combine').should( + 'have.attr', + 'aria-selected', + 'true', + ); + cy.contains('[role="tab"]', 'Decision').should('be.visible'); + cy.contains('[role="tab"]', 'All-reduce').should('be.visible'); + cy.contains('[role="tab"]', 'KV-cache transfer').should('be.visible'); + + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'Round trip (measured) · decode · p99') + .and('contain.text', 'DeepSeek-V3/V4') + .and('contain.text', 'BF16') + .and('contain.text', 'EP8'); + cy.get('[data-testid="collectivex-explorer-chart"] svg').should('be.visible'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); + cy.get('[data-testid="collectivex-explorer-chart"] .point').should('have.length', 20); + xTickLabels().should('include.members', ['1', '8', '128']); + }); + + it('keeps log axes sparse and byte/tokens axes readable', () => { + cy.get('[data-testid="collectivex-explorer-chart"] .y-axis .tick').should(($ticks) => { + expect($ticks.length).to.be.greaterThan(0); + expect($ticks.length).to.be.at.most(5); + }); + + cy.contains('[role="tab"]', 'All-reduce').click(); + cy.get('[data-testid="collectivex-all-reduce-chart"] svg').should('be.visible'); + cy.get('[data-testid="collectivex-all-reduce-chart"] .x-axis .tick').should(($ticks) => { + expect($ticks.length).to.be.at.most(10); + }); + }); + + it('exposes model, backend, precision, activation, EP, publication, and axis controls', () => { + expectToggleOptions('collectivex-operation-toggle', [ + 'Dispatch', + 'Combine', + 'Round trip', + 'Isolated sum', + ]); + expectToggleOptions('collectivex-phase-toggle', ['Decode', 'Prefill']); + expectToggleOptions('collectivex-percentile-toggle', ['p50', 'p90', 'p99']); + expectToggleOptions('collectivex-suite-toggle', [ + 'All', + 'Backend default', + 'Resource constrained', + ]); + expectToggleOptions('collectivex-publication-toggle', [ + 'Official headline', + 'Publishable', + 'Official only', + 'All', + ]); + expectToggleOptions('collectivex-precision-toggle', ['All', 'BF16', 'FP8']); + expectToggleOptions('collectivex-ep-toggle', ['All', 'EP8']); + expectToggleOptions('collectivex-x-scale-toggle', ['Log', 'Linear']); + expectToggleOptions('collectivex-y-scale-toggle', ['Log', 'Linear']); + + cy.get('[data-testid="collectivex-model-shape-select"]').should( + 'contain.text', + 'DeepSeek-V3/V4', + ); + cy.get('[data-testid="collectivex-backend-select"]').click(); + cy.get('[role="option"]').then(($options) => { + expect($options.toArray().map((option) => option.textContent?.trim())).to.include.members([ + 'All', + 'deepep', + 'deepep v2', + ]); + }); + cy.contains('[role="option"]', 'All').click(); + cy.get('[data-testid="collectivex-activation-select"]').click(); + cy.get('[role="option"]').then(($options) => { + expect($options.toArray().map((option) => option.textContent?.trim())).to.include.members([ + 'All', + 'Normal', + ]); + }); + cy.contains('[role="option"]', 'Normal').click(); + cy.get('[data-testid="collectivex-activation-select"]').should('contain.text', 'Normal'); + }); + + it('filters the EP explorer by precision, activation, routing, and phase', () => { + cy.get('[data-testid="collectivex-suite-toggle"]') + .contains('button', 'Backend default') + .click(); + cy.get('[data-testid="collectivex-precision-toggle"]').contains('button', 'FP8').click(); + cy.get('[data-testid="collectivex-main-chart"]') + .should('contain.text', 'FP8') + .and('contain.text', 'B300 EP8 · deepep v2 · fp8'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 1); + + cy.get('[data-testid="collectivex-suite-toggle"]').contains('button', 'All').click(); + cy.get('[data-testid="collectivex-precision-toggle"]').contains('button', 'BF16').click(); + cy.get('[data-testid="collectivex-routing-select"]').click(); + cy.contains('[role="option"]', 'zipf').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'zipf'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should( + 'have.length.at.least', + 1, + ); + + cy.get('[data-testid="collectivex-routing-select"]').click(); + cy.contains('[role="option"]', 'uniform').click(); + cy.get('[data-testid="collectivex-phase-toggle"]').contains('button', 'Prefill').click(); + cy.get('[data-testid="collectivex-main-chart"]').should('contain.text', 'prefill'); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + xTickLabels().should('include.members', ['128', '256']); + }); + + it('renders Decision and Evidence tabs from the static snapshot', () => { + cy.contains('[role="tab"]', 'Decision').click(); + cy.get('[data-testid="collectivex-decision"]') + .should('contain.text', 'Best backend') + .and('contain.text', 'Max tokens under round-trip p99 budget'); + cy.get('[data-testid="collectivex-summary-card"]').should('have.length', 7); + cy.get('[data-testid="collectivex-budget-table"]').should('contain.text', '<= 100 us'); + + cy.contains('[role="tab"]', 'Evidence').click(); + cy.get('[data-testid="collectivex-sensitivity-table"]').should('exist'); + cy.get('[data-testid="collectivex-failures-table"]').should('contain.text', 'diagnostic'); + cy.get('[data-testid="collectivex-coverage-table"]').should('contain.text', 'official'); + cy.get('section[data-testid="collectivex-display"]').should('contain.text', 'Provenance'); + }); + + it('renders all new collective and transfer family tabs', () => { + const familyTabs = [ + ['All-reduce', 'collectivex-all-reduce-chart', 'collectivex-all-reduce-metric-toggle'], + ['All-gather', 'collectivex-all-gather-chart', 'collectivex-all-gather-metric-toggle'], + ['CPU-GPU offload', 'collectivex-offload-chart', 'collectivex-offload-metric-toggle'], + ['KV-cache transfer', 'collectivex-kv-cache-chart', 'collectivex-kv-cache-metric-toggle'], + [ + 'Copy-engine / SDMA', + 'collectivex-copy-engine-chart', + 'collectivex-copy-engine-metric-toggle', + ], + ['RL mesh', 'collectivex-rl-mesh-chart', 'collectivex-rl-mesh-metric-toggle'], + ]; + + for (const [label, chartTestId, metricTestId] of familyTabs) { + cy.contains('[role="tab"]', label).click(); + cy.get(`[data-testid="${metricTestId}"]`).should('be.visible'); + cy.get(`[data-testid="${chartTestId}"] svg`).should('be.visible'); + cy.get(`[data-testid="${chartTestId}"] .line-path`).should('have.length.at.least', 1); + } + }); + + it('legend toggles remove and restore a rendered EP series', () => { + cy.get('[data-testid="collectivex-main-chart"]').within(() => { + cy.contains('label', 'B300 EP8 · deepep · bf16').click(); + }); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 2); + cy.get('[data-testid="collectivex-main-chart"]').contains('button', 'Reset filter').click(); + cy.get('[data-testid="collectivex-explorer-chart"] .line-path').should('have.length', 3); + }); +}); diff --git a/packages/app/cypress/fixtures/api/collectivex.json b/packages/app/cypress/fixtures/api/collectivex.json new file mode 100644 index 00000000..5dd8acb7 --- /dev/null +++ b/packages/app/cypress/fixtures/api/collectivex.json @@ -0,0 +1,5669 @@ +{ + "snapshotVersion": 3, + "series": [ + { + "id": "cx-f0dd83d8", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "b300_c1ad910f", + "comparisonKey": "80e2eefb7447672f", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:41:08.828331+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:41:08.828331+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.992001831531525, + "p90": 59.039998799562454, + "p95": 61.824001371860504, + "p99": 73.44000041484833 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 67.4239993095398, + "p95": 68.15999746322632, + "p99": 77.47200131416321 + }, + "roundtrip": { + "p50": 106.81600123643875, + "p90": 113.08799684047699, + "p95": 114.23999816179276, + "p99": 135.6479972600937 + }, + "isolatedSum": { + "p50": 123.32800030708313, + "p90": 126.46399810910225, + "p95": 129.98399883508682, + "p99": 150.91200172901154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.992001831531525, + "p90": 58.78400057554245, + "p95": 60.92799827456474, + "p99": 73.21599870920181 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 69.11999732255936, + "p95": 70.65600156784058, + "p99": 79.93599772453308 + }, + "roundtrip": { + "p50": 106.9440022110939, + "p90": 109.40799862146378, + "p95": 110.88000237941742, + "p99": 119.39200013875961 + }, + "isolatedSum": { + "p50": 124.32000041007996, + "p90": 127.9039978981018, + "p95": 131.58399984240532, + "p99": 153.1519964337349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.792000472545624, + "p90": 59.39200147986412, + "p95": 61.28000095486641, + "p99": 68.09599697589874 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 69.66400146484375, + "p95": 76.99199765920639, + "p99": 78.75200361013412 + }, + "roundtrip": { + "p50": 116.22399836778641, + "p90": 122.68800288438797, + "p95": 124.35200065374374, + "p99": 127.93600559234619 + }, + "isolatedSum": { + "p50": 125.60000270605087, + "p90": 129.05600294470787, + "p95": 138.2719986140728, + "p99": 146.84800058603287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.29600074887276, + "p90": 61.15199998021126, + "p95": 62.39999830722809, + "p99": 68.1919977068901 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 77.31200009584427, + "p95": 77.72800326347351, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 120.25599926710129, + "p90": 125.82400441169739, + "p95": 126.75200402736664, + "p99": 133.44000279903412 + }, + "isolatedSum": { + "p50": 127.67999991774559, + "p90": 138.46400007605553, + "p95": 140.1280015707016, + "p99": 146.97599411010742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.78400123119354, + "p90": 69.023996591568, + "p95": 71.03999704122543, + "p99": 76.73600316047668 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 78.5600021481514, + "p95": 78.72000336647034, + "p99": 80.86399734020233 + }, + "roundtrip": { + "p50": 119.61600184440613, + "p90": 122.72000312805176, + "p95": 124.35200065374374, + "p99": 131.29599392414093 + }, + "isolatedSum": { + "p50": 140.03200083971024, + "p90": 147.5839987397194, + "p95": 149.76000040769577, + "p99": 157.60000050067902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.24799829721451, + "p90": 70.91200351715088, + "p95": 73.69600236415863, + "p99": 81.69600367546082 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 79.80799674987793, + "p95": 80.73599636554718, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 130.68799674510956, + "p90": 135.23200154304504, + "p95": 136.51199638843536, + "p99": 140.47999680042267 + }, + "isolatedSum": { + "p50": 147.8400006890297, + "p90": 150.7200002670288, + "p95": 154.4319987297058, + "p99": 172.64000326395035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.49600231647491, + "p90": 92.70399808883667, + "p95": 95.0080007314682, + "p99": 99.45599734783173 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 100.09600222110748, + "p95": 102.36799716949463, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 158.65600109100342, + "p90": 163.00800442695618, + "p95": 164.19200599193573, + "p99": 169.50400173664093 + }, + "isolatedSum": { + "p50": 174.75200444459915, + "p90": 192.80000030994415, + "p95": 197.37599790096283, + "p99": 206.11199736595154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.91999989748001, + "p90": 95.83999961614609, + "p95": 98.04800152778625, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 115.35999923944473, + "p90": 115.93600362539291, + "p95": 116.60800129175186, + "p99": 119.45600062608719 + }, + "roundtrip": { + "p50": 192.51200556755066, + "p90": 198.88000190258026, + "p95": 199.48799908161163, + "p99": 209.47200059890747 + }, + "isolatedSum": { + "p50": 209.27999913692474, + "p90": 211.776003241539, + "p95": 214.65600281953812, + "p99": 224.44800287485123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ad32f1a", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|fp8-saturation|none|none|0|normalized|0.18|8c8497a77d9085d", + "colorKey": "h100_7b3247bf", + "comparisonKey": "2a087c80bac58077", + "schemaVersion": 3, + "generatedAt": "2026-06-26T15:27:59.966964+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_12", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "fp8-saturation", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "unknown", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8c8497a77d9085d", + "workloadId": "set:4:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28247603308", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28247603308", + "createdAt": "2026-06-26T15:27:59.966964+00:00", + "sha": "fd23d02b65dba6f1ed963342b188022fc27263d1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.73599898815155, + "p90": 102.49599814414978, + "p95": 104.12800312042236, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 79.42400127649307, + "p90": 81.4720019698143, + "p95": 82.14399963617325, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 146.84799313545227, + "p90": 156.15999698638916, + "p95": 159.13599729537964, + "p99": 164.000004529953 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 183.96800011396408, + "p95": 186.2720027565956, + "p99": 200.1279965043068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.33600372076035, + "p90": 103.93600165843964, + "p95": 106.52799904346466, + "p99": 111.58400028944016 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 86.84799820184708, + "p95": 87.61599659919739, + "p99": 88.06400001049042 + }, + "roundtrip": { + "p50": 151.64799988269806, + "p90": 159.16800498962402, + "p95": 160.35200655460358, + "p99": 165.50399363040924 + }, + "isolatedSum": { + "p50": 178.3680021762848, + "p90": 190.7839998602867, + "p95": 194.14399564266205, + "p99": 199.64800029993057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.90400075912476, + "p90": 105.76000064611435, + "p95": 108.15999656915665, + "p99": 116.60800129175186 + }, + "combine": { + "p50": 87.90399879217148, + "p90": 90.55999666452408, + "p95": 95.23200243711472, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 157.82399475574493, + "p90": 163.7759953737259, + "p95": 166.78400337696075, + "p99": 169.95200514793396 + }, + "isolatedSum": { + "p50": 187.80799955129623, + "p90": 196.31999731063843, + "p95": 203.39199900627136, + "p99": 213.18399906158447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.60800325870514, + "p90": 133.53599607944489, + "p95": 135.51999628543854, + "p99": 138.49599659442902 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 120.4800009727478, + "p95": 120.7680031657219, + "p99": 122.40000069141388 + }, + "roundtrip": { + "p50": 208.3519995212555, + "p90": 215.71199595928192, + "p95": 217.56799519062042, + "p99": 220.5439954996109 + }, + "isolatedSum": { + "p50": 241.18400365114212, + "p90": 254.0159970521927, + "p95": 256.28799945116043, + "p99": 260.8959972858429 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d6ef23b", + "identity": "h200|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "h200_c851a534", + "comparisonKey": "6b4f4d7f65293019", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:29:45.312905+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254392935", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254392935", + "createdAt": "2026-06-26T17:29:45.312905+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.11199808120728, + "p90": 94.11200135946274, + "p95": 104.35199737548828, + "p99": 138.0160003900528 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 78.72000336647034, + "p95": 83.48800241947174, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 144.31999623775482, + "p95": 156.3200056552887, + "p99": 193.53599846363068 + }, + "isolatedSum": { + "p50": 142.5279974937439, + "p90": 172.83200472593307, + "p95": 187.83999979496002, + "p99": 243.74400079250336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.33599978685379, + "p90": 99.42399710416794, + "p95": 109.66400057077408, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 83.00799876451492, + "p95": 90.40000289678574, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 144.6080058813095, + "p95": 154.62400019168854, + "p99": 173.69599640369415 + }, + "isolatedSum": { + "p50": 144.19200271368027, + "p90": 182.43199586868286, + "p95": 200.06400346755981, + "p99": 246.0480034351349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.97599720954895, + "p90": 95.29600292444229, + "p95": 104.12800312042236, + "p99": 139.74399864673615 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 81.63200318813324, + "p95": 88.22400122880936, + "p99": 119.4240003824234 + }, + "roundtrip": { + "p50": 123.74400347471237, + "p90": 150.36800503730774, + "p95": 160.3199988603592, + "p99": 204.8960030078888 + }, + "isolatedSum": { + "p50": 144.3839967250824, + "p90": 176.92800611257553, + "p95": 192.35200434923172, + "p99": 259.16799902915955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.78400319814682, + "p90": 92.25600212812424, + "p95": 102.91200131177902, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 81.95199817419052, + "p95": 87.48800307512283, + "p99": 100.51199793815613 + }, + "roundtrip": { + "p50": 124.03199821710587, + "p90": 147.20000326633453, + "p95": 153.9199948310852, + "p99": 180.00000715255737 + }, + "isolatedSum": { + "p50": 145.31200379133224, + "p90": 174.20800030231476, + "p95": 190.40000438690186, + "p99": 223.67999702692032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.18399846553802, + "p90": 92.83199906349182, + "p95": 103.61599922180176, + "p99": 195.93599438667297 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 86.33600175380707, + "p95": 92.03200042247772, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 129.72800433635712, + "p90": 161.31199896335602, + "p95": 172.86400496959686, + "p99": 215.10399878025055 + }, + "isolatedSum": { + "p50": 144.51199769973755, + "p90": 179.1680008172989, + "p95": 195.64799964427948, + "p99": 316.73599779605865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.2720006108284, + "p90": 100.80000013113022, + "p95": 108.92800241708755, + "p99": 134.88000631332397 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 89.40800279378891, + "p95": 94.97600048780441, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 130.8480054140091, + "p90": 154.33600544929504, + "p95": 164.73600268363953, + "p99": 204.0639966726303 + }, + "isolatedSum": { + "p50": 158.30399841070175, + "p90": 190.20800292491913, + "p95": 203.90400290489197, + "p99": 252.83200293779373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.32800251245499, + "p90": 110.04800349473953, + "p95": 116.86400324106216, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 98.36799651384354, + "p95": 104.70400005578995, + "p99": 124.92799758911133 + }, + "roundtrip": { + "p50": 156.031996011734, + "p90": 173.24799299240112, + "p95": 180.38399517536163, + "p99": 215.39199352264404 + }, + "isolatedSum": { + "p50": 178.6240041255951, + "p90": 208.41600000858307, + "p95": 221.5680032968521, + "p99": 271.7759907245636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 129.7599971294403, + "p95": 136.57599687576294, + "p99": 149.24800395965576 + }, + "combine": { + "p50": 103.42399775981903, + "p90": 116.54400080442429, + "p95": 123.3920007944107, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 192.54399836063385, + "p90": 208.8959962129593, + "p95": 215.64799547195435, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 219.4559946656227, + "p90": 246.3039979338646, + "p95": 259.96799767017365, + "p99": 291.1999970674515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7727ce9", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "b300_c1ad910f", + "comparisonKey": "9532205a80f3d757", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:38:48.516779+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_15", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1824, + "configuredUnits": 27, + "deviceUnits": 148, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254469772", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254469772", + "createdAt": "2026-06-26T17:38:48.516779+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.11200135946274, + "p90": 98.9760011434555, + "p95": 100.54399818181992, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 115.1999980211258, + "p90": 115.9679964184761, + "p95": 116.89600348472595, + "p99": 129.02399897575378 + }, + "roundtrip": { + "p50": 193.2159960269928, + "p90": 198.43199849128723, + "p95": 199.8080015182495, + "p99": 217.50399470329285 + }, + "isolatedSum": { + "p50": 209.31199938058853, + "p90": 214.9439975619316, + "p95": 217.44000166654587, + "p99": 245.4719990491867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.42400300502777, + "p90": 138.75199854373932, + "p95": 141.184002161026, + "p99": 151.0079950094223 + }, + "combine": { + "p50": 154.59200739860535, + "p90": 163.90399634838104, + "p95": 164.5440012216568, + "p99": 176.54399573802948 + }, + "roundtrip": { + "p50": 271.67999744415283, + "p90": 277.6319980621338, + "p95": 280.70399165153503, + "p99": 291.3599908351898 + }, + "isolatedSum": { + "p50": 290.0160104036331, + "p90": 302.65599489212036, + "p95": 305.7280033826828, + "p99": 327.5519907474518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.24800372123718, + "p90": 199.61600005626678, + "p95": 200.80000162124634, + "p99": 206.68800175189972 + }, + "combine": { + "p50": 265.8880054950714, + "p90": 274.59201216697693, + "p95": 275.2000093460083, + "p99": 286.78399324417114 + }, + "roundtrip": { + "p50": 442.59199500083923, + "p90": 448.96000623703003, + "p95": 455.00800013542175, + "p99": 461.40798926353455 + }, + "isolatedSum": { + "p50": 459.1360092163086, + "p90": 474.2080122232437, + "p95": 476.00001096725464, + "p99": 493.47199499607086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.2079954147339, + "p90": 329.75998520851135, + "p95": 331.6799998283386, + "p99": 341.6000008583069 + }, + "combine": { + "p50": 457.66401290893555, + "p90": 459.77601408958435, + "p95": 469.760000705719, + "p99": 473.7600088119507 + }, + "roundtrip": { + "p50": 762.5920176506042, + "p90": 771.7440128326416, + "p95": 774.2080092430115, + "p99": 789.6320223808289 + }, + "isolatedSum": { + "p50": 783.8720083236694, + "p90": 789.5359992980957, + "p95": 801.4400005340576, + "p99": 815.3600096702576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.1200060844421, + "p90": 582.5920104980469, + "p95": 583.5520029067993, + "p99": 591.2960171699524 + }, + "combine": { + "p50": 817.2799944877625, + "p90": 828.4159898757935, + "p95": 831.8719863891602, + "p99": 913.4079813957214 + }, + "roundtrip": { + "p50": 1376.9279718399048, + "p90": 1386.9119882583618, + "p95": 1392.7680253982544, + "p99": 1453.8240432739258 + }, + "isolatedSum": { + "p50": 1394.4000005722046, + "p90": 1411.0080003738403, + "p95": 1415.4239892959595, + "p99": 1504.7039985656738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1069.5040225982666, + "p90": 1078.0160427093506, + "p95": 1080.2559852600098, + "p99": 1090.880036354065 + }, + "combine": { + "p50": 1528.8959741592407, + "p90": 1540.4479503631592, + "p95": 1542.688012123108, + "p99": 1554.751992225647 + }, + "roundtrip": { + "p50": 2581.9520950317383, + "p90": 2594.6240425109863, + "p95": 2602.303981781006, + "p99": 2637.9199028015137 + }, + "isolatedSum": { + "p50": 2598.3999967575073, + "p90": 2618.4639930725098, + "p95": 2622.9439973831177, + "p99": 2645.632028579712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19a8d159", + "identity": "h100|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "h100_7b3247bf", + "comparisonKey": "0ac8f8817cb63abb", + "schemaVersion": 3, + "generatedAt": "2026-06-26T17:30:47.651979+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.1818, + "configuredUnits": 24, + "deviceUnits": 132, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": "set:6:a426d66e479dc893", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28254315809", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254315809", + "createdAt": "2026-06-26T17:30:47.651979+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.46399921178818, + "p90": 116.35199934244156, + "p95": 117.8240031003952, + "p99": 166.01599752902985 + }, + "combine": { + "p50": 106.1440035700798, + "p90": 111.51999980211258, + "p95": 112.06399649381638, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 197.40800559520721, + "p90": 200.9280025959015, + "p95": 203.0400037765503, + "p99": 206.01600408554077 + }, + "isolatedSum": { + "p50": 216.60800278186798, + "p90": 227.87199914455414, + "p95": 229.88799959421158, + "p99": 280.09599447250366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.39200472831726, + "p90": 150.68799257278442, + "p95": 151.7760008573532, + "p99": 154.33600544929504 + }, + "combine": { + "p50": 145.1839953660965, + "p90": 149.88799393177032, + "p95": 151.67999267578125, + "p99": 154.7199934720993 + }, + "roundtrip": { + "p50": 262.4000012874603, + "p90": 267.2640085220337, + "p95": 269.27998661994934, + "p99": 357.34400153160095 + }, + "isolatedSum": { + "p50": 292.57600009441376, + "p90": 300.57598650455475, + "p95": 303.45599353313446, + "p99": 309.05599892139435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.92799580097198, + "p90": 219.39200162887573, + "p95": 221.76000475883484, + "p99": 226.4000028371811 + }, + "combine": { + "p50": 217.15199947357178, + "p90": 221.3120013475418, + "p95": 224.57599639892578, + "p99": 227.743998169899 + }, + "roundtrip": { + "p50": 392.60798692703247, + "p90": 397.47199416160583, + "p95": 400.09599924087524, + "p99": 421.37598991394043 + }, + "isolatedSum": { + "p50": 422.07999527454376, + "p90": 440.70400297641754, + "p95": 446.3360011577606, + "p99": 454.1440010070801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 319.93600726127625, + "p90": 324.8960077762604, + "p95": 327.1679878234863, + "p99": 330.55999875068665 + }, + "combine": { + "p50": 330.01598715782166, + "p90": 335.1680040359497, + "p95": 336.64000034332275, + "p99": 340.2239978313446 + }, + "roundtrip": { + "p50": 624.064028263092, + "p90": 629.2480230331421, + "p95": 631.6159963607788, + "p99": 638.2399797439575 + }, + "isolatedSum": { + "p50": 649.9519944190979, + "p90": 660.0640118122101, + "p95": 663.8079881668091, + "p99": 670.7839965820312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.9440112113953, + "p90": 584.5119953155518, + "p95": 589.1519784927368, + "p99": 593.9199924468994 + }, + "combine": { + "p50": 564.9920105934143, + "p90": 574.3039846420288, + "p95": 576.7999887466431, + "p99": 583.5199952125549 + }, + "roundtrip": { + "p50": 1105.5680513381958, + "p90": 1120.1599836349487, + "p95": 1124.7680187225342, + "p99": 1134.719967842102 + }, + "isolatedSum": { + "p50": 1135.9360218048096, + "p90": 1158.8159799575806, + "p95": 1165.9519672393799, + "p99": 1177.4399876594543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1075.8719444274902, + "p90": 1088.703989982605, + "p95": 1093.5360193252563, + "p99": 1102.463960647583 + }, + "combine": { + "p50": 1031.872034072876, + "p90": 1041.3119792938232, + "p95": 1044.4799661636353, + "p99": 1055.359959602356 + }, + "roundtrip": { + "p50": 2082.304000854492, + "p90": 2096.640110015869, + "p95": 2100.895881652832, + "p99": 2108.031988143921 + }, + "isolatedSum": { + "p50": 2107.743978500366, + "p90": 2130.015969276428, + "p95": 2138.0159854888916, + "p99": 2157.823920249939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7d11224e", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_8d2811e3", + "comparisonKey": "801e704d68c28ca9", + "schemaVersion": 3, + "generatedAt": "2026-06-27T09:48:25.920368+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_09", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": "set:8:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28285620595", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28285620595", + "createdAt": "2026-06-27T09:48:25.920368+00:00", + "sha": "149586650dbed5b7579537347e9489d5b41543c1" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.384000927209854, + "p90": 58.81600081920624, + "p95": 61.37600168585777, + "p99": 80.60800284147263 + }, + "combine": { + "p50": 65.47199934720993, + "p90": 66.3679987192154, + "p95": 66.72000139951706, + "p99": 68.09599697589874 + }, + "roundtrip": { + "p50": 107.42399841547012, + "p90": 111.84000223875046, + "p95": 112.96000331640244, + "p99": 126.14400684833527 + }, + "isolatedSum": { + "p50": 121.85600027441978, + "p90": 125.18399953842163, + "p95": 128.09600308537483, + "p99": 148.70399981737137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.2559999525547, + "p90": 58.33600088953972, + "p95": 60.447998344898224, + "p99": 72.83200323581696 + }, + "combine": { + "p50": 66.01600348949432, + "p90": 66.68800115585327, + "p95": 67.48799979686737, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 105.02400249242783, + "p90": 112.41599917411804, + "p95": 113.0559965968132, + "p99": 119.64800208806992 + }, + "isolatedSum": { + "p50": 122.27200344204903, + "p90": 125.02400204539299, + "p95": 127.9359981417656, + "p99": 164.73600268363953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.73599988222122, + "p90": 59.29600074887276, + "p95": 60.99199876189232, + "p99": 73.11999797821045 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 67.55200028419495, + "p95": 68.80000233650208, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 105.85600137710571, + "p90": 108.73600095510483, + "p95": 110.43199896812439, + "p99": 124.92799758911133 + }, + "isolatedSum": { + "p50": 122.94399738311768, + "p90": 126.8480010330677, + "p95": 129.7920010983944, + "p99": 152.8640016913414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.30400064587593, + "p90": 64.57599997520447, + "p95": 65.85600227117538, + "p99": 70.88000327348709 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 68.51200014352798, + "p95": 69.023996591568, + "p99": 78.17599922418594 + }, + "roundtrip": { + "p50": 114.56000059843063, + "p90": 121.15199863910675, + "p95": 122.5920021533966, + "p99": 138.72000575065613 + }, + "isolatedSum": { + "p50": 125.12000277638435, + "p90": 133.08800011873245, + "p95": 134.87999886274338, + "p99": 149.05600249767303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.29600074887276, + "p90": 63.45599889755249, + "p95": 66.3679987192154, + "p99": 85.82399785518646 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 69.023996591568, + "p95": 70.3359991312027, + "p99": 79.93599772453308 + }, + "roundtrip": { + "p50": 122.6240023970604, + "p90": 125.66399574279785, + "p95": 126.65599584579468, + "p99": 131.9359987974167 + }, + "isolatedSum": { + "p50": 126.3359971344471, + "p90": 132.47999548912048, + "p95": 136.7039978504181, + "p99": 165.75999557971954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.14399832487106, + "p90": 76.54400169849396, + "p95": 77.85599678754807, + "p99": 89.4400030374527 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 78.52800190448761, + "p95": 78.68800312280655, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 127.10399925708771, + "p90": 132.1280002593994, + "p95": 133.760005235672, + "p99": 136.3839954137802 + }, + "isolatedSum": { + "p50": 151.2639969587326, + "p90": 155.07200360298157, + "p95": 156.54399991035461, + "p99": 178.8800060749054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.77599650621414, + "p90": 81.53600245714188, + "p95": 82.8159973025322, + "p99": 89.9839997291565 + }, + "combine": { + "p50": 90.87999910116196, + "p90": 102.88000106811523, + "p95": 104.41599786281586, + "p99": 115.58400094509125 + }, + "roundtrip": { + "p50": 157.95199573040009, + "p90": 162.59199380874634, + "p95": 164.19200599193573, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 170.6559956073761, + "p90": 184.4160035252571, + "p95": 187.23199516534805, + "p99": 205.56800067424774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.24800354242325, + "p90": 104.22399640083313, + "p95": 105.3759977221489, + "p99": 124.67200309038162 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 127.71199643611908, + "p95": 128.31999361515045, + "p99": 139.93600010871887 + }, + "roundtrip": { + "p50": 208.92800390720367, + "p90": 213.76000344753265, + "p95": 214.78399634361267, + "p99": 229.0239930152893 + }, + "isolatedSum": { + "p50": 227.4240031838417, + "p90": 231.9359928369522, + "p95": 233.69599133729935, + "p99": 264.6080031991005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc647506", + "identity": "b300|deepep|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1fa7fe74d0e30a3", + "colorKey": "b300_8d2811e3", + "comparisonKey": "478acd4108c50326", + "schemaVersion": 3, + "generatedAt": "2026-06-26T23:58:32.426052+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1fa7fe74d0e30a3", + "workloadId": "set:4:f5576e2b712d38c3", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28271886823", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28271886823", + "createdAt": "2026-06-26T23:58:32.426052+00:00", + "sha": "45fa5044582f50ee3282fe889d2e2e2f5ab8ba13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.703999638557434, + "p90": 59.90400165319443, + "p95": 62.65600025653839, + "p99": 69.98399645090103 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 66.43199920654297, + "p95": 66.72000139951706, + "p99": 73.7600028514862 + }, + "roundtrip": { + "p50": 107.16799646615982, + "p90": 112.83200234174728, + "p95": 114.14399743080139, + "p99": 120.44800072908401 + }, + "isolatedSum": { + "p50": 122.5920021533966, + "p90": 126.3360008597374, + "p95": 129.37600165605545, + "p99": 143.74399930238724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.848001062870026, + "p90": 60.80000102519989, + "p95": 62.84800171852112, + "p99": 74.40000027418137 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 70.30399888753891, + "p95": 76.99199765920639, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 116.54400080442429, + "p90": 123.29600006341934, + "p95": 124.83199685811996, + "p99": 130.46400249004364 + }, + "isolatedSum": { + "p50": 126.848004758358, + "p90": 131.1039999127388, + "p95": 139.8399993777275, + "p99": 152.96000242233276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.9039968252182, + "p90": 78.27199995517731, + "p95": 79.52000200748444, + "p99": 87.5839963555336 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 79.19999957084656, + "p95": 79.71200346946716, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 134.24000144004822, + "p90": 138.20800185203552, + "p95": 139.5840048789978, + "p99": 144.3520039319992 + }, + "isolatedSum": { + "p50": 154.30399775505066, + "p90": 157.47199952602386, + "p95": 159.2320054769516, + "p99": 171.23199999332428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.07200253009796, + "p90": 105.98400235176086, + "p95": 107.04000294208527, + "p99": 113.21599781513214 + }, + "combine": { + "p50": 127.13600695133209, + "p90": 128.1599998474121, + "p95": 128.57599556446075, + "p99": 131.04000687599182 + }, + "roundtrip": { + "p50": 209.1200053691864, + "p90": 214.30400013923645, + "p95": 216.12800657749176, + "p99": 229.66399788856506 + }, + "isolatedSum": { + "p50": 230.20800948143005, + "p90": 234.14400219917297, + "p95": 235.61599850654602, + "p99": 244.25600469112396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c27e2cad", + "identity": "b300|deepep|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_d6fd14c3", + "comparisonKey": "ac13ebc2bb2c560a", + "schemaVersion": 3, + "generatedAt": "2026-06-27T10:26:01.213105+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_10", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": "set:8:d8d49658059863f2", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28286436120", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28286436120", + "createdAt": "2026-06-27T10:26:01.213105+00:00", + "sha": "91c7acf59a5e524f37742922ec67721d86a03f6b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.2559999525547, + "p90": 58.78400057554245, + "p95": 61.28000095486641, + "p99": 77.69600301980972 + }, + "combine": { + "p50": 61.983998864889145, + "p90": 78.8159966468811, + "p95": 86.87999844551086, + "p99": 95.10400146245956 + }, + "roundtrip": { + "p50": 120.44800072908401, + "p90": 123.19999933242798, + "p95": 125.82400441169739, + "p99": 144.03200149536133 + }, + "isolatedSum": { + "p50": 118.23999881744385, + "p90": 137.59999722242355, + "p95": 148.15999940037727, + "p99": 172.8000044822693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.151999324560165, + "p90": 59.039998799562454, + "p95": 59.99999865889549, + "p99": 73.11999797821045 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 66.17599725723267, + "p95": 67.16799736022949, + "p99": 74.23999905586243 + }, + "roundtrip": { + "p50": 124.15999919176102, + "p90": 126.39999389648438, + "p95": 129.60000336170197, + "p99": 138.49599659442902 + }, + "isolatedSum": { + "p50": 121.69599905610085, + "p90": 125.21599605679512, + "p95": 127.16799601912498, + "p99": 147.35999703407288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.88000130653381, + "p90": 61.37600168585777, + "p95": 63.10400366783142, + "p99": 91.10400080680847 + }, + "combine": { + "p50": 67.35999882221222, + "p90": 69.50400024652481, + "p95": 70.14399766921997, + "p99": 86.30400151014328 + }, + "roundtrip": { + "p50": 127.68000364303589, + "p90": 130.14400005340576, + "p95": 131.55199587345123, + "p99": 137.08800077438354 + }, + "isolatedSum": { + "p50": 126.24000012874603, + "p90": 130.88000193238258, + "p95": 133.2480013370514, + "p99": 177.40800231695175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.03199890255928, + "p90": 62.30400130152702, + "p95": 63.26399743556976, + "p99": 69.2799985408783 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 70.46400010585785, + "p95": 71.3919997215271, + "p99": 87.74399757385254 + }, + "roundtrip": { + "p50": 130.62399625778198, + "p90": 133.08799266815186, + "p95": 134.94400680065155, + "p99": 141.88799262046814 + }, + "isolatedSum": { + "p50": 128.80000099539757, + "p90": 132.76800140738487, + "p95": 134.65599715709686, + "p99": 157.02399611473083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.28000095486641, + "p90": 63.551999628543854, + "p95": 64.89600241184235, + "p99": 75.58400183916092 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 71.45600020885468, + "p95": 72.38399982452393, + "p99": 76.67200267314911 + }, + "roundtrip": { + "p50": 132.9919993877411, + "p90": 135.55200397968292, + "p95": 137.37599551677704, + "p99": 149.63200688362122 + }, + "isolatedSum": { + "p50": 130.75200095772743, + "p90": 135.00799983739853, + "p95": 137.28000223636627, + "p99": 152.25600451231003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 64.2239972949028, + "p90": 66.39999896287918, + "p95": 67.87200272083282, + "p99": 82.8159973025322 + }, + "combine": { + "p50": 75.39200037717819, + "p90": 77.02399790287018, + "p95": 77.72800326347351, + "p99": 85.82399785518646 + }, + "roundtrip": { + "p50": 145.37599682807922, + "p90": 147.8399932384491, + "p95": 148.83199334144592, + "p99": 160.41600704193115 + }, + "isolatedSum": { + "p50": 139.615997672081, + "p90": 143.42399686574936, + "p95": 145.60000598430634, + "p99": 168.63999515771866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.9919970035553, + "p90": 75.6480023264885, + "p95": 76.89599692821503, + "p99": 89.79199826717377 + }, + "combine": { + "p50": 89.24800157546997, + "p90": 91.2960022687912, + "p95": 92.99200028181076, + "p99": 104.76800054311752 + }, + "roundtrip": { + "p50": 173.92000555992126, + "p90": 176.9919991493225, + "p95": 179.1040003299713, + "p99": 198.08000326156616 + }, + "isolatedSum": { + "p50": 162.23999857902527, + "p90": 166.9440045952797, + "p95": 169.8879972100258, + "p99": 194.5599988102913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.90399813652039, + "p90": 86.65599673986435, + "p95": 87.96799927949905, + "p99": 94.2080020904541 + }, + "combine": { + "p50": 110.20799726247787, + "p90": 112.92800307273865, + "p95": 113.88800293207169, + "p99": 120.92799693346024 + }, + "roundtrip": { + "p50": 220.19200026988983, + "p90": 223.4240025281906, + "p95": 224.99200701713562, + "p99": 245.08799612522125 + }, + "isolatedSum": { + "p50": 194.11199539899826, + "p90": 199.583999812603, + "p95": 201.85600221157074, + "p99": 215.13599902391434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0eafa1d5", + "identity": "b300|deepep|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_c9569580", + "comparisonKey": "62e1e2299cdc509d", + "schemaVersion": 3, + "generatedAt": "2026-06-27T11:14:16.179311+00:00", + "status": "valid", + "publicationStatus": "official", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": "set:8:d1b92539bddfb570", + "workloadSource": "canonical-serialized", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28287508460", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28287508460", + "createdAt": "2026-06-27T11:14:16.179311+00:00", + "sha": "df7fddee0a275156d4c72fa006cd2b73bce72613" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.992001831531525, + "p90": 59.328000992536545, + "p95": 62.55999952554703, + "p99": 80.38400113582611 + }, + "combine": { + "p50": 55.00800162553787, + "p90": 57.0559985935688, + "p95": 64.41599875688553, + "p99": 65.92000275850296 + }, + "roundtrip": { + "p50": 94.81599926948547, + "p90": 97.63199836015701, + "p95": 99.04000163078308, + "p99": 108.0000028014183 + }, + "isolatedSum": { + "p50": 112.0000034570694, + "p90": 116.38399958610535, + "p95": 126.97599828243256, + "p99": 146.30400389432907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.89600110054016, + "p90": 59.039998799562454, + "p95": 61.15199998021126, + "p99": 82.04799890518188 + }, + "combine": { + "p50": 55.67999929189682, + "p90": 58.400001376867294, + "p95": 64.67200070619583, + "p99": 76.67200267314911 + }, + "roundtrip": { + "p50": 95.16800194978714, + "p90": 98.11200201511383, + "p95": 100.67199915647507, + "p99": 112.03200370073318 + }, + "isolatedSum": { + "p50": 112.57600039243698, + "p90": 117.44000017642975, + "p95": 125.82400068640709, + "p99": 158.720001578331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.21599981188774, + "p90": 59.74400043487549, + "p95": 61.664000153541565, + "p99": 77.18399912118912 + }, + "combine": { + "p50": 56.063998490571976, + "p90": 58.14399942755699, + "p95": 64.92800265550613, + "p99": 78.68800312280655 + }, + "roundtrip": { + "p50": 95.74399888515472, + "p90": 98.78399968147278, + "p95": 103.26399654150009, + "p99": 113.0559965968132 + }, + "isolatedSum": { + "p50": 113.27999830245972, + "p90": 117.88799986243248, + "p95": 126.5920028090477, + "p99": 155.87200224399567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.079998940229416, + "p90": 61.08799949288368, + "p95": 62.65600025653839, + "p99": 71.68000191450119 + }, + "combine": { + "p50": 64.44799900054932, + "p90": 66.23999774456024, + "p95": 66.59200042486191, + "p99": 69.023996591568 + }, + "roundtrip": { + "p50": 108.8000014424324, + "p90": 113.95200341939926, + "p95": 114.84800279140472, + "p99": 122.72000312805176 + }, + "isolatedSum": { + "p50": 122.52799794077873, + "p90": 127.32799723744392, + "p95": 129.2480006814003, + "p99": 140.70399850606918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 58.687999844551086, + "p90": 61.055999249219894, + "p95": 63.00800293684006, + "p99": 71.96799665689468 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 66.3679987192154, + "p95": 66.81600213050842, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 111.39199882745743, + "p90": 122.04799801111221, + "p95": 126.5919953584671, + "p99": 132.86399841308594 + }, + "isolatedSum": { + "p50": 116.5120005607605, + "p90": 127.42399796843529, + "p95": 129.82400506734848, + "p99": 149.9519944190979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.8480030298233, + "p90": 74.68800246715546, + "p95": 75.71200281381607, + "p99": 81.31200075149536 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 67.07199662923813, + "p95": 67.71200150251389, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 108.99200290441513, + "p90": 114.07999694347382, + "p95": 116.7680025100708, + "p99": 132.47999548912048 + }, + "isolatedSum": { + "p50": 137.15200126171112, + "p90": 141.75999909639359, + "p95": 143.42400431632996, + "p99": 158.4639996290207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 70.72000205516815, + "p90": 72.95999675989151, + "p95": 74.8480036854744, + "p99": 81.02399855852127 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 79.55200225114822, + "p95": 80.19199967384338, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 131.77600502967834, + "p90": 136.63999736309052, + "p95": 138.91200721263885, + "p99": 158.04800391197205 + }, + "isolatedSum": { + "p50": 149.47200566530228, + "p90": 152.51199901103973, + "p95": 155.04000335931778, + "p99": 176.9919991493225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.5280025601387, + "p90": 85.21600067615509, + "p95": 88.16000074148178, + "p99": 100.80000013113022 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 94.59199756383896, + "p95": 101.72799974679947, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 157.53600001335144, + "p90": 165.24800658226013, + "p95": 166.97600483894348, + "p99": 184.76800620555878 + }, + "isolatedSum": { + "p50": 174.30400103330612, + "p90": 179.80799823999405, + "p95": 189.88800048828125, + "p99": 205.72800189256668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + } + ], + "failures": [ + { + "id": "cxf-6e691abd", + "identity": "h100|deepep|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "generatedAt": "2026-06-26T17:32:59.549027+00:00", + "publicationStatus": "diagnostic", + "status": "valid", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "config": "fp8/ll/layout-and-dispatch", + "reason": "anomaly:roundtrip_gt_isolated_sum", + "returnCode": null, + "run": { + "id": "28254359089", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28254359089", + "createdAt": "2026-06-26T17:32:59.549027+00:00", + "sha": "60dec7d70f554e252fec87709e2be52752947db1" + } + } + ], + "summaryCards": [ + { + "title": "Best backend · decode EP8", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Best backend · prefill EP8", + "value": "flashinfer · B300", + "sub": "85 us RT p99 · nvfp4 · T=256" + }, + { + "title": "LL -> normal crossover", + "value": "T~128 tok/rank", + "sub": "H100 EP8 fp8 · normal RT p50 wins above this" + }, + { + "title": "Resource-normalized winner", + "value": "deepep · H100", + "sub": "113 us RT p99 · bf16 · T=64" + }, + { + "title": "Backend-default winner", + "value": "flashinfer · B300", + "sub": "71 us RT p99 · mxfp8 · T=64" + }, + { + "title": "Most unstable config", + "value": "H100 · deepep decode", + "sub": "3.27x p99 under zipf-heavy vs uniform", + "warning": true + }, + { + "title": "Invalid / diagnostic cases", + "value": "8", + "sub": "see Evidence failed table", + "warning": true, + "href": "#tab-evidence" + } + ], + "decision": { + "budgetsUs": [100, 250, 500], + "maxTokensUnderBudget": [ + { + "id": "cxb-3f6620d0", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-c27e2cad", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-567c4192", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-directcast", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-10314900", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "dispatchDtype": "fp8-pertoken", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 128 + } + }, + { + "id": "cxb-238797ce", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "bf16", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 512 + } + }, + { + "id": "cxb-67e5feea", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "dispatchDtype": "fp8", + "epSize": 8, + "mode": "normal", + "budgets": { + "100": null, + "250": 128, + "500": 256 + } + } + ], + "recommendations": [ + { + "id": "cxr-d2992d7c", + "sku": "b300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 71.4, + "config": "mxfp8/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-1c3060b2", + "sku": "b300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 85, + "config": "nvfp4/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-8fcf986c", + "sku": "h100", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 53.1, + "config": "fp8/ll/layout-and-dispatch-v1/uniform/normalized", + "epSize": 8 + }, + { + "id": "cxr-466c0bc2", + "sku": "h100", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 104.6, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 + } + ], + "llCrossover": [ + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + } + ], + "resourcePareto": [ + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.2, + "dispatch_p99": 93, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.4, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 80.9, + "dispatch_p99": 89.8, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57, + "dispatch_p99": 73.2, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 107, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 57.8, + "dispatch_p99": 68.1, + "resource_class": "resource-constrained" + } + ] + }, + { + "sku": "b300", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "n_points": 2, + "curve": [ + { + "achieved_fraction": 0.1351, + "dispatch_p50": 81.4, + "dispatch_p99": 93.3, + "resource_class": "backend-tuned" + }, + { + "achieved_fraction": 0.1824, + "dispatch_p50": 59.3, + "dispatch_p99": 68.2, + "resource_class": "resource-constrained" + } + ] + } + ], + "topologyPenalty": [ + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "ep8_p50": 123.6, + "ep16_p50": 578.4, + "penalty_pct": 367.9 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "ep8_p50": 114.8, + "ep16_p50": 547.2, + "penalty_pct": 376.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "ep8_p50": 111.7, + "ep16_p50": 621.5, + "penalty_pct": 456.5 + }, + { + "sku": "h200", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "ep8_p50": 112.8, + "ep16_p50": 611.8, + "penalty_pct": 442.2 + } + ], + "skewPenalty": [ + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.694, + "p99_amplification": 0.867 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.695, + "p99_amplification": 0.811 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.697, + "p99_amplification": 0.683 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.716, + "p99_amplification": 0.76 + } + ] + }, + "nccl": [ + { + "id": "cxn-940e3e1c", + "identity": "nccl|b300|all_reduce|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "139076c9959b0653", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 218.816, + "status": "valid", + "valid": true, + "colorKey": "b300_940e3e1c", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:24.142157+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:24.142157+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 28.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 28.3, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 27.27, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.27, + "inPlaceUs": 27.06, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 27.25, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.25, + "inPlaceUs": 27.3, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 27.32, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.32, + "inPlaceUs": 27.28, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.42, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 27.42, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 27.26, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.26, + "inPlaceUs": 27.32, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.16, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 27.16, + "inPlaceUs": 27.38, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 27.33, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.33, + "inPlaceUs": 27.14, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.33, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 27.3, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.26, + "outOfPlaceUs": 27.3, + "inPlaceUs": 27.35, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 27.52, + "algBandwidthGbps": 0.3, + "busBandwidthGbps": 0.52, + "outOfPlaceUs": 27.52, + "inPlaceUs": 27.59, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.64, + "algBandwidthGbps": 0.59, + "busBandwidthGbps": 1.04, + "outOfPlaceUs": 27.64, + "inPlaceUs": 27.61, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 27.76, + "algBandwidthGbps": 1.18, + "busBandwidthGbps": 2.07, + "outOfPlaceUs": 27.76, + "inPlaceUs": 27.85, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 27.7, + "algBandwidthGbps": 2.37, + "busBandwidthGbps": 4.14, + "outOfPlaceUs": 28.19, + "inPlaceUs": 27.7, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 28.21, + "algBandwidthGbps": 4.65, + "busBandwidthGbps": 8.13, + "outOfPlaceUs": 28.59, + "inPlaceUs": 28.21, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.56, + "algBandwidthGbps": 9.18, + "busBandwidthGbps": 16.06, + "outOfPlaceUs": 29.16, + "inPlaceUs": 28.56, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.89, + "algBandwidthGbps": 17.54, + "busBandwidthGbps": 30.7, + "outOfPlaceUs": 29.89, + "inPlaceUs": 29.93, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 32.16, + "algBandwidthGbps": 32.61, + "busBandwidthGbps": 57.06, + "outOfPlaceUs": 32.16, + "inPlaceUs": 32.67, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 37.47, + "algBandwidthGbps": 55.97, + "busBandwidthGbps": 97.94, + "outOfPlaceUs": 37.47, + "inPlaceUs": 38.07, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 56.79, + "algBandwidthGbps": 73.86, + "busBandwidthGbps": 129.26, + "outOfPlaceUs": 56.88, + "inPlaceUs": 56.79, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 77.08, + "algBandwidthGbps": 108.83, + "busBandwidthGbps": 190.45, + "outOfPlaceUs": 78.24, + "inPlaceUs": 77.08, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 104.77, + "algBandwidthGbps": 160.14, + "busBandwidthGbps": 280.24, + "outOfPlaceUs": 106.93, + "inPlaceUs": 104.77, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 166.18, + "algBandwidthGbps": 201.91, + "busBandwidthGbps": 353.34, + "outOfPlaceUs": 168.44, + "inPlaceUs": 166.18, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 274.52, + "algBandwidthGbps": 244.46, + "busBandwidthGbps": 427.8, + "outOfPlaceUs": 274.52, + "inPlaceUs": 275.23, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 391.34, + "algBandwidthGbps": 342.97, + "busBandwidthGbps": 600.19, + "outOfPlaceUs": 391.34, + "inPlaceUs": 392.6, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 711.09, + "algBandwidthGbps": 377.5, + "busBandwidthGbps": 660.62, + "outOfPlaceUs": 711.09, + "inPlaceUs": 712.3, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1324.96, + "algBandwidthGbps": 405.2, + "busBandwidthGbps": 709.1, + "outOfPlaceUs": 1324.96, + "inPlaceUs": 1327.33, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2553.96, + "algBandwidthGbps": 420.42, + "busBandwidthGbps": 735.74, + "outOfPlaceUs": 2558.96, + "inPlaceUs": 2553.96, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4571.5, + "algBandwidthGbps": 469.75, + "busBandwidthGbps": 822.07, + "outOfPlaceUs": 4576.46, + "inPlaceUs": 4571.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9024.56, + "algBandwidthGbps": 475.92, + "busBandwidthGbps": 832.86, + "outOfPlaceUs": 9034.78, + "inPlaceUs": 9024.56, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 17971.9, + "algBandwidthGbps": 477.96, + "busBandwidthGbps": 836.44, + "outOfPlaceUs": 17991.5, + "inPlaceUs": 17971.9, + "correct": true + } + ] + }, + { + "id": "cxn-fd5a787b", + "identity": "allreduce-fw|b300|flashinfer-oneshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "81bfaa10f5beda36", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_fd5a787b", + "label": "B300 · flashinfer-oneshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 11.661, + "algBandwidthGbps": 0.351, + "busBandwidthGbps": 0.615, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 11.601, + "algBandwidthGbps": 1.412, + "busBandwidthGbps": 2.472, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 12.381, + "algBandwidthGbps": 5.293, + "busBandwidthGbps": 9.263, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 14.274, + "algBandwidthGbps": 18.365, + "busBandwidthGbps": 32.139, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 23.854, + "algBandwidthGbps": 43.958, + "busBandwidthGbps": 76.926, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 75.394, + "algBandwidthGbps": 55.632, + "busBandwidthGbps": 97.356, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 244.644, + "algBandwidthGbps": 68.578, + "busBandwidthGbps": 120.011, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 956.149, + "algBandwidthGbps": 70.187, + "busBandwidthGbps": 122.827, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-087af4ad", + "identity": "allreduce-fw|b300|flashinfer-twoshot|b300-nvlink-island|nvlink|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "b300", + "runner": "b300-nv_11", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "183298dcd11c3e1e", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "b300_087af4ad", + "label": "B300 · flashinfer-twoshot (fw-AR · ws8)", + "generatedAt": "2026-06-28T01:47:48.908164+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-28T01:47:48.908164+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 385.191, + "algBandwidthGbps": 0.17, + "busBandwidthGbps": 0.298, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 118.644, + "algBandwidthGbps": 2.209, + "busBandwidthGbps": 3.867, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 47.46, + "algBandwidthGbps": 22.094, + "busBandwidthGbps": 38.664, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 43.002, + "algBandwidthGbps": 97.537, + "busBandwidthGbps": 170.69, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 90.81, + "algBandwidthGbps": 184.75, + "busBandwidthGbps": 323.313, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 353.165, + "algBandwidthGbps": 190.021, + "busBandwidthGbps": 332.537, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-a8203ce9", + "identity": "nccl|b300|all_gather|b300-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "b300", + "runner": "b300-nv_03", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "e6eafb7204b78dd3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 186.922, + "status": "valid", + "valid": true, + "colorKey": "b300_a8203ce9", + "label": "B300 · b300-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:41.342024+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:41.342024+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 27.36, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 27.36, + "inPlaceUs": 27.26, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 26.88, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 26.88, + "inPlaceUs": 26.89, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 27.11, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 27.11, + "inPlaceUs": 27.07, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 26.64, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 26.64, + "inPlaceUs": 26.87, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 27.03, + "algBandwidthGbps": 0.08, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 27.03, + "inPlaceUs": 26.8, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 26.95, + "algBandwidthGbps": 0.15, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 26.95, + "inPlaceUs": 27.51, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 26.84, + "algBandwidthGbps": 0.31, + "busBandwidthGbps": 0.27, + "outOfPlaceUs": 27.05, + "inPlaceUs": 26.84, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 27.2, + "algBandwidthGbps": 0.6, + "busBandwidthGbps": 0.53, + "outOfPlaceUs": 27.2, + "inPlaceUs": 26.86, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 26.68, + "algBandwidthGbps": 1.23, + "busBandwidthGbps": 1.07, + "outOfPlaceUs": 26.98, + "inPlaceUs": 26.68, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 26.75, + "algBandwidthGbps": 2.45, + "busBandwidthGbps": 2.14, + "outOfPlaceUs": 26.89, + "inPlaceUs": 26.75, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 27.63, + "algBandwidthGbps": 4.74, + "busBandwidthGbps": 4.15, + "outOfPlaceUs": 27.63, + "inPlaceUs": 27.81, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 28.34, + "algBandwidthGbps": 9.25, + "busBandwidthGbps": 8.09, + "outOfPlaceUs": 28.34, + "inPlaceUs": 28.46, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 29.45, + "algBandwidthGbps": 17.8, + "busBandwidthGbps": 15.58, + "outOfPlaceUs": 29.49, + "inPlaceUs": 29.45, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 31.36, + "algBandwidthGbps": 33.43, + "busBandwidthGbps": 29.25, + "outOfPlaceUs": 31.51, + "inPlaceUs": 31.36, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 35.8, + "algBandwidthGbps": 58.58, + "busBandwidthGbps": 51.26, + "outOfPlaceUs": 35.94, + "inPlaceUs": 35.8, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 36.17, + "algBandwidthGbps": 115.95, + "busBandwidthGbps": 101.45, + "outOfPlaceUs": 36.29, + "inPlaceUs": 36.17, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 36.99, + "algBandwidthGbps": 226.76, + "busBandwidthGbps": 198.42, + "outOfPlaceUs": 37.02, + "inPlaceUs": 36.99, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 47.07, + "algBandwidthGbps": 356.41, + "busBandwidthGbps": 311.86, + "outOfPlaceUs": 47.08, + "inPlaceUs": 47.07, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 74.95, + "algBandwidthGbps": 447.68, + "busBandwidthGbps": 391.72, + "outOfPlaceUs": 75.78, + "inPlaceUs": 74.95, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 138.64, + "algBandwidthGbps": 484.06, + "busBandwidthGbps": 423.55, + "outOfPlaceUs": 139.26, + "inPlaceUs": 138.64, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 211.47, + "algBandwidthGbps": 634.68, + "busBandwidthGbps": 555.34, + "outOfPlaceUs": 211.47, + "inPlaceUs": 211.53, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 399.32, + "algBandwidthGbps": 672.24, + "busBandwidthGbps": 588.21, + "outOfPlaceUs": 399.32, + "inPlaceUs": 399.95, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 779.11, + "algBandwidthGbps": 689.08, + "busBandwidthGbps": 602.95, + "outOfPlaceUs": 779.96, + "inPlaceUs": 779.11, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 1532.87, + "algBandwidthGbps": 700.48, + "busBandwidthGbps": 612.92, + "outOfPlaceUs": 1533.45, + "inPlaceUs": 1532.87, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 3010.48, + "algBandwidthGbps": 713.34, + "busBandwidthGbps": 624.17, + "outOfPlaceUs": 3010.48, + "inPlaceUs": 3011.29, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 5911.41, + "algBandwidthGbps": 726.55, + "busBandwidthGbps": 635.74, + "outOfPlaceUs": 5949.57, + "inPlaceUs": 5911.41, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 11675.3, + "algBandwidthGbps": 735.74, + "busBandwidthGbps": 643.77, + "outOfPlaceUs": 11728.1, + "inPlaceUs": 11675.3, + "correct": true + } + ] + }, + { + "id": "cxn-17454439", + "identity": "nccl|h100|all_gather|h100-nvlink-island|nvlink|8|nccl-tests-v1", + "op": "all_gather", + "sku": "h100", + "runner": "h100-dgxc-slurm_09", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "dacea770825df094", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 110.587, + "status": "valid", + "valid": true, + "colorKey": "h100_17454439", + "label": "H100 · h100-nvlink-island · nvlink (ws8)", + "generatedAt": "2026-06-27T11:18:57.699787+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T11:18:57.699787+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 40.4, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 40.4, + "inPlaceUs": 39.34, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 38.62, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.62, + "inPlaceUs": 38.09, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 38.41, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 38.41, + "inPlaceUs": 38.32, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.68, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 38.68, + "inPlaceUs": 37.58, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 37.29, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 37.29, + "inPlaceUs": 37.12, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 37.53, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.1, + "outOfPlaceUs": 37.53, + "inPlaceUs": 37.17, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 37.52, + "algBandwidthGbps": 0.22, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 37.52, + "inPlaceUs": 37.53, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 37.13, + "algBandwidthGbps": 0.44, + "busBandwidthGbps": 0.39, + "outOfPlaceUs": 37.13, + "inPlaceUs": 37.09, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 37.43, + "algBandwidthGbps": 0.88, + "busBandwidthGbps": 0.77, + "outOfPlaceUs": 37.43, + "inPlaceUs": 37.42, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 37.64, + "algBandwidthGbps": 1.74, + "busBandwidthGbps": 1.52, + "outOfPlaceUs": 37.64, + "inPlaceUs": 37.63, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 38.19, + "algBandwidthGbps": 3.43, + "busBandwidthGbps": 3, + "outOfPlaceUs": 38.48, + "inPlaceUs": 38.19, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 39.66, + "algBandwidthGbps": 6.61, + "busBandwidthGbps": 5.78, + "outOfPlaceUs": 39.66, + "inPlaceUs": 40.15, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 41.79, + "algBandwidthGbps": 12.55, + "busBandwidthGbps": 10.98, + "outOfPlaceUs": 42.17, + "inPlaceUs": 41.79, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 43.89, + "algBandwidthGbps": 23.89, + "busBandwidthGbps": 20.9, + "outOfPlaceUs": 45.09, + "inPlaceUs": 43.89, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.32, + "algBandwidthGbps": 47.31, + "busBandwidthGbps": 41.4, + "outOfPlaceUs": 44.55, + "inPlaceUs": 44.32, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 44.97, + "algBandwidthGbps": 93.27, + "busBandwidthGbps": 81.61, + "outOfPlaceUs": 44.97, + "inPlaceUs": 45, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 45.6, + "algBandwidthGbps": 183.98, + "busBandwidthGbps": 160.98, + "outOfPlaceUs": 46.08, + "inPlaceUs": 45.6, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 67.94, + "algBandwidthGbps": 246.95, + "busBandwidthGbps": 216.08, + "outOfPlaceUs": 70.1, + "inPlaceUs": 67.94, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 122.71, + "algBandwidthGbps": 273.44, + "busBandwidthGbps": 239.26, + "outOfPlaceUs": 125.34, + "inPlaceUs": 122.71, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 206.56, + "algBandwidthGbps": 324.88, + "busBandwidthGbps": 284.27, + "outOfPlaceUs": 210.98, + "inPlaceUs": 206.56, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 390.25, + "algBandwidthGbps": 343.93, + "busBandwidthGbps": 300.94, + "outOfPlaceUs": 396.19, + "inPlaceUs": 390.25, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 728.52, + "algBandwidthGbps": 368.47, + "busBandwidthGbps": 322.41, + "outOfPlaceUs": 733.59, + "inPlaceUs": 728.52, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1394.3, + "algBandwidthGbps": 385.05, + "busBandwidthGbps": 336.92, + "outOfPlaceUs": 1397.39, + "inPlaceUs": 1394.3, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2705.03, + "algBandwidthGbps": 396.94, + "busBandwidthGbps": 347.33, + "outOfPlaceUs": 2729.3, + "inPlaceUs": 2705.03, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5306.37, + "algBandwidthGbps": 404.7, + "busBandwidthGbps": 354.11, + "outOfPlaceUs": 5374.68, + "inPlaceUs": 5306.37, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10451.7, + "algBandwidthGbps": 410.93, + "busBandwidthGbps": 359.57, + "outOfPlaceUs": 10616.4, + "inPlaceUs": 10451.7, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 20734.1, + "algBandwidthGbps": 414.29, + "busBandwidthGbps": 362.5, + "outOfPlaceUs": 21013.2, + "inPlaceUs": 20734.1, + "correct": true + } + ] + } + ], + "offload": [ + { + "id": "cxt-2254035a", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_2254035a", + "label": "B300 · d2h · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.319, + "latency": 12.8224, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.197, + "latency": 13.6896, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.07, + "latency": 16.1008, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.171, + "latency": 25.7744, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 16.232, + "latency": 64.5984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 22.845, + "latency": 183.6016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 25.057, + "latency": 669.5584, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 25.741, + "latency": 2607.0801, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 25.884, + "latency": 10370.5231, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-ec9c695d", + "identity": "offload|b300|b300-nvlink-island|nvlink|d2h|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_ec9c695d", + "label": "B300 · d2h · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.314, + "latency": 3.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.911, + "latency": 3.336, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 16.26, + "latency": 4.0304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.371, + "latency": 7.4112, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.656, + "latency": 21.1168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.179, + "latency": 76.0128, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 56.698, + "latency": 295.9056, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.243, + "latency": 1172.3568, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.376, + "latency": 4678.5118, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-0325201a", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pageable|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_0325201a", + "label": "B300 · h2d · pageable", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.48, + "latency": 8.5408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.73, + "latency": 9.4704, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 15.0656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.573, + "latency": 22.6512, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 19.272, + "latency": 54.408, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 32.974, + "latency": 127.2, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 38.009, + "latency": 441.4016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 39.678, + "latency": 1691.3168, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 40.13, + "latency": 6689.2288, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-6112e71d", + "identity": "offload|b300|b300-nvlink-island|nvlink|h2d|pinned|us", + "cohortIdentity": "offload|b300|b300-nvlink-island|nvlink", + "family": "offload", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 58 GB/s · copy/compute overlap 26% · 8 NUMA node(s)", + "peakBandwidthGbps": 57.71, + "latencyUnit": "us", + "colorKey": "b300_6112e71d", + "label": "B300 · h2d · pinned", + "generatedAt": "2026-06-27T13:14:13.476946+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:13.476946+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.204, + "latency": 3.4032, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.481, + "latency": 3.656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 15.087, + "latency": 4.344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 32.966, + "latency": 7.952, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 49.231, + "latency": 21.2992, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 55.149, + "latency": 76.0544, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 57.026, + "latency": 294.2016, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 57.572, + "latency": 1165.6432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.71, + "latency": 4651.4656, + "sizeClass": null, + "correct": null + } + ] + } + ], + "copyEngine": [ + { + "id": "cxt-6e3131b7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_6e3131b7", + "label": "B300 · dtod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.729, + "latency": 8.4789, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.851, + "latency": 8.2304, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 131.475, + "latency": 7.9755, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 506.069, + "latency": 8.288, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2092.131, + "latency": 8.0192, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8232.735, + "latency": 8.1515, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 33743.395, + "latency": 7.9552, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-214329f7", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|dtod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_214329f7", + "label": "B300 · dtod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.772, + "latency": 8.432, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.011, + "latency": 8.4533, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.139, + "latency": 8.2475, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 515.355, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8245.683, + "latency": 8.1387, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32844.98, + "latency": 8.1728, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-64e7ea33", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|copy-engine|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_64e7ea33", + "label": "B300 · htod · copy-engine", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 8.922, + "latency": 7.3451, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 35.159, + "latency": 7.456, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 139.617, + "latency": 7.5104, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 525.479, + "latency": 7.9819, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2004.925, + "latency": 8.368, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8211.245, + "latency": 8.1728, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32556.046, + "latency": 8.2453, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-4b3f523b", + "identity": "copy-engine|b300|b300-nvlink-island|nvlink|htod|sm|us", + "cohortIdentity": "copy-engine|b300|b300-nvlink-island|nvlink", + "family": "copy-engine", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 33743 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 33743.395, + "latencyUnit": "us", + "colorKey": "b300_4b3f523b", + "label": "B300 · htod · sm", + "generatedAt": "2026-06-27T13:14:14.567612+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:14.567612+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.918, + "latency": 8.2773, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 31.703, + "latency": 8.2688, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 127.9, + "latency": 8.1984, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 509.743, + "latency": 8.2283, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2022.716, + "latency": 8.2944, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 8166.48, + "latency": 8.2176, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 32413.478, + "latency": 8.2816, + "sizeClass": null, + "correct": null + } + ] + } + ], + "kvCache": [ + { + "id": "cxt-72e44191", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_72e44191", + "label": "B300 · dtod-local · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 4.86, + "latency": 0.00337, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 18.31, + "latency": 0.00358, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 79.48, + "latency": 0.0033, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 315.89, + "latency": 0.00332, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 1140.42, + "latency": 0.00368, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 2696.03, + "latency": 0.00622, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 2724.4, + "latency": 0.02463, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 3189.99, + "latency": 0.08415, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-0198272e", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-local|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-local", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_0198272e", + "label": "B300 · dtod-local · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 3.27, + "latency": 0.005, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 13.15, + "latency": 0.00498, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 13.46, + "latency": 0.01948, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 13.76, + "latency": 0.07619, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 13.84, + "latency": 0.30311, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 13.87, + "latency": 1.20968, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 13.83, + "latency": 4.85211, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 13.89, + "latency": 19.32599, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-65e093de", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|contiguous/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "contiguous/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_65e093de", + "label": "B300 · dtod-remote · contiguous/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.08, + "latency": 0.01514, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.52, + "latency": 0.01451, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 17.43, + "latency": 0.01504, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 67.07, + "latency": 0.01563, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 205.84, + "latency": 0.02038, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 409.12, + "latency": 0.04101, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 644.24, + "latency": 0.10417, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 736.42, + "latency": 0.36451, + "sizeClass": "prefill", + "correct": true + } + ] + }, + { + "id": "cxt-502d7923", + "identity": "kv-cache|b300|b300-nvlink-island|nvlink|dtod-remote|paged/memcpy|ms", + "cohortIdentity": "kv-cache|b300|nvlink", + "family": "kv-cache", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "dtod-remote", + "subtype": "paged/memcpy", + "valid": true, + "status": "valid", + "note": "wired: memcpy, pinned · declared-unwired: nixl, mooncake, mori-io, nccl", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "b300_502d7923", + "label": "B300 · dtod-remote · paged/memcpy", + "generatedAt": "2026-06-27T13:14:28.674652+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:14:28.674652+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 16384, + "bandwidthGbps": 1.11, + "latency": 0.01473, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.35, + "latency": 0.01507, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 4.3, + "latency": 0.06098, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 4.27, + "latency": 0.24556, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 4.26, + "latency": 0.98559, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 4.24, + "latency": 3.9593, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 4.27, + "latency": 15.72352, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 4.25, + "latency": 63.14588, + "sizeClass": "prefill", + "correct": true + } + ] + } + ], + "rlMesh": [ + { + "id": "cxt-e28663d4", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_e28663d4", + "label": "B300 · gen->trn · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 22.43, + "latency": 0.04675, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 210.03, + "latency": 0.01997, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 444.24, + "latency": 0.03777, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.35, + "latency": 0.10941, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 672.64, + "latency": 0.39908, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.89, + "latency": 1.57465, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-abc63f3d", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_abc63f3d", + "label": "B300 · gen->trn · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.02, + "latency": 44.24712, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 56.86, + "latency": 0.07377, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 168.78, + "latency": 0.0994, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 466.61, + "latency": 0.14382, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 565.6, + "latency": 0.4746, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 656.22, + "latency": 1.63626, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-08ab0854", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_08ab0854", + "label": "B300 · trn->gen · paired", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 8.13, + "latency": 0.12892, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 161.07, + "latency": 0.02604, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 455.8, + "latency": 0.03681, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 613.96, + "latency": 0.10931, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 670.34, + "latency": 0.40045, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 681.46, + "latency": 1.57564, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-bea1bfbd", + "identity": "rl-mesh|b300|b300-nvlink-island|nvlink|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|b300|nvlink", + "family": "rl-mesh", + "sku": "b300", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 682 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 681.89, + "latencyUnit": "ms", + "colorKey": "b300_bea1bfbd", + "label": "B300 · trn->gen · redistribute", + "generatedAt": "2026-06-27T13:38:50.291192+00:00", + "run": { + "id": null, + "url": null, + "createdAt": "2026-06-27T13:38:50.291192+00:00", + "sha": null + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.01, + "latency": 74.91642, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 66.21, + "latency": 0.06334, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 295.56, + "latency": 0.05676, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 581.82, + "latency": 0.11534, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 543.6, + "latency": 0.49381, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 659.57, + "latency": 1.62794, + "sizeClass": null, + "correct": true + } + ] + } + ], + "scannedRuns": 3, + "scannedArtifacts": 42, + "contributingRuns": 3, + "generatedAt": "2026-06-29T02:42:52.989Z" +} diff --git a/packages/app/package.json b/packages/app/package.json index d1a41ae9..b743a80b 100644 --- a/packages/app/package.json +++ b/packages/app/package.json @@ -26,7 +26,8 @@ "clean:all": "rimraf .next out cypress/videos cypress/screenshots coverage", "cache:invalidate": "dotenv -e ../../.env -- tsx scripts/invalidate-cache.ts", "cache:warmup": "dotenv -e ../../.env -- tsx scripts/warmup-cache.ts", - "capture:fixtures": "tsx scripts/capture-cypress-fixtures.ts" + "capture:fixtures": "tsx scripts/capture-cypress-fixtures.ts", + "generate:collectivex": "tsx scripts/generate-collectivex-data.ts" }, "dependencies": { "@chenglou/pretext": "^0.0.8", diff --git a/packages/app/public/data/collectivex.json b/packages/app/public/data/collectivex.json new file mode 100644 index 00000000..c9d31902 --- /dev/null +++ b/packages/app/public/data/collectivex.json @@ -0,0 +1,547260 @@ +{ + "snapshotVersion": 3, + "series": [ + { + "id": "cx-e1bccb49", + "identity": "b200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_f0618d20", + "comparisonKey": "31af2b075ec4ea25", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:26:51.843963+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.05599880218506, + "p90": 89.34400230646133, + "p95": 95.67999839782715, + "p99": 106.81600123643875 + }, + "combine": { + "p50": 64.19199705123901, + "p90": 66.81600213050842, + "p95": 71.68000191450119, + "p99": 79.96799796819687 + }, + "roundtrip": { + "p50": 115.29599875211716, + "p90": 127.10399925708771, + "p95": 132.89600610733032, + "p99": 142.14399456977844 + }, + "isolatedSum": { + "p50": 145.24799585342407, + "p90": 156.16000443696976, + "p95": 167.36000031232834, + "p99": 186.78399920463562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.59200239181519, + "p90": 87.3280018568039, + "p95": 94.36800330877304, + "p99": 102.75200009346008 + }, + "combine": { + "p50": 64.57599997520447, + "p90": 67.55200028419495, + "p95": 73.34399968385696, + "p99": 80.60800284147263 + }, + "roundtrip": { + "p50": 121.76000326871872, + "p90": 131.74399733543396, + "p95": 135.96799969673157, + "p99": 140.54399728775024 + }, + "isolatedSum": { + "p50": 143.16800236701965, + "p90": 154.88000214099884, + "p95": 167.71200299263, + "p99": 183.3600029349327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.74400305747986, + "p90": 85.60000360012054, + "p95": 92.12800115346909, + "p99": 97.75999933481216 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 75.9039968252182, + "p95": 80.03199845552444, + "p99": 82.87999778985977 + }, + "roundtrip": { + "p50": 124.92799758911133, + "p90": 132.89600610733032, + "p95": 137.53600418567657, + "p99": 149.05600249767303 + }, + "isolatedSum": { + "p50": 141.82399958372116, + "p90": 161.50400042533875, + "p95": 172.15999960899353, + "p99": 180.63999712467194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.9039968252182, + "p90": 86.01599931716919, + "p95": 91.71199798583984, + "p99": 97.56799787282944 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 75.96799731254578, + "p95": 78.33600044250488, + "p99": 82.33600109815598 + }, + "roundtrip": { + "p50": 123.87199699878693, + "p90": 134.8160058259964, + "p95": 139.96799290180206, + "p99": 147.64800667762756 + }, + "isolatedSum": { + "p50": 142.14399456977844, + "p90": 161.98399662971497, + "p95": 170.04799842834473, + "p99": 179.9039989709854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.9599980711937, + "p90": 99.20000284910202, + "p95": 108.22399705648422, + "p99": 119.55200135707855 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 77.18399912118912, + "p95": 80.64000308513641, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 124.57600235939026, + "p90": 138.75199854373932, + "p95": 143.8080072402954, + "p99": 149.47199821472168 + }, + "isolatedSum": { + "p50": 149.76000040769577, + "p90": 176.38400197029114, + "p95": 188.86400014162064, + "p99": 208.54400098323822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.54400235414505, + "p90": 92.83199906349182, + "p95": 96.8639999628067, + "p99": 105.27999699115753 + }, + "combine": { + "p50": 75.93599706888199, + "p90": 77.63200253248215, + "p95": 80.51200211048126, + "p99": 85.9839990735054 + }, + "roundtrip": { + "p50": 134.88000631332397, + "p90": 149.59999918937683, + "p95": 152.0320028066635, + "p99": 166.78400337696075 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 170.46400159597397, + "p95": 177.37600207328796, + "p99": 191.26399606466293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.34400230646133, + "p90": 102.20800340175629, + "p95": 106.175996363163, + "p99": 112.38399893045425 + }, + "combine": { + "p50": 88.79999816417694, + "p90": 91.80799871683121, + "p95": 96.57599776983261, + "p99": 105.79200088977814 + }, + "roundtrip": { + "p50": 149.59999918937683, + "p90": 158.78400206565857, + "p95": 163.13600540161133, + "p99": 168.99199783802032 + }, + "isolatedSum": { + "p50": 178.14400047063828, + "p90": 194.0160021185875, + "p95": 202.7519941329956, + "p99": 218.1759998202324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.44799810647964, + "p90": 115.52000045776367, + "p95": 119.74400281906128, + "p99": 130.17599284648895 + }, + "combine": { + "p50": 102.11200267076492, + "p90": 111.16799712181091, + "p95": 114.52800035476685, + "p99": 118.367999792099 + }, + "roundtrip": { + "p50": 183.67999792099, + "p90": 189.66400623321533, + "p95": 192.60799884796143, + "p99": 200.6399929523468 + }, + "isolatedSum": { + "p50": 206.56000077724457, + "p90": 226.68799757957458, + "p95": 234.27200317382812, + "p99": 248.54399263858795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8463349c", + "identity": "b200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_f0618d20", + "comparisonKey": "b74f93ed63d2202e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:28:41.569557+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.64800298213959, + "p90": 84.51200276613235, + "p95": 87.77599781751633, + "p99": 95.29600292444229 + }, + "combine": { + "p50": 65.08799642324448, + "p90": 70.36799937486649, + "p95": 73.91999661922455, + "p99": 80.64000308513641 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 129.92000579833984, + "p95": 135.3600025177002, + "p99": 143.8080072402954 + }, + "isolatedSum": { + "p50": 144.73599940538406, + "p90": 154.88000214099884, + "p95": 161.69599443674088, + "p99": 175.9360060095787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.58400183916092, + "p90": 86.07999980449677, + "p95": 88.99199962615967, + "p99": 97.6639986038208 + }, + "combine": { + "p50": 65.47199934720993, + "p90": 74.0479975938797, + "p95": 77.15199887752533, + "p99": 81.24800026416779 + }, + "roundtrip": { + "p50": 127.16799974441528, + "p90": 140.86399972438812, + "p95": 145.31199634075165, + "p99": 158.4639996290207 + }, + "isolatedSum": { + "p50": 141.05600118637085, + "p90": 160.12799739837646, + "p95": 166.143998503685, + "p99": 178.9119988679886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.12799853086472, + "p90": 84.73599702119827, + "p95": 87.80799806118011, + "p99": 94.91200000047684 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 77.40800082683563, + "p95": 80.32000064849854, + "p99": 91.67999774217606 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 133.40799510478973, + "p95": 138.43199610710144, + "p99": 147.13600277900696 + }, + "isolatedSum": { + "p50": 150.7200002670288, + "p90": 162.1439978480339, + "p95": 168.12799870967865, + "p99": 186.5919977426529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.87199658155441, + "p90": 88.32000195980072, + "p95": 94.36800330877304, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 76.09599828720093, + "p90": 78.07999849319458, + "p95": 80.60800284147263, + "p99": 91.67999774217606 + }, + "roundtrip": { + "p50": 128.03199887275696, + "p90": 136.80000603199005, + "p95": 141.79199934005737, + "p99": 150.78400075435638 + }, + "isolatedSum": { + "p50": 151.96799486875534, + "p90": 166.4000004529953, + "p95": 174.97600615024567, + "p99": 201.05599611997604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.4160007238388, + "p90": 82.46400207281113, + "p95": 87.64799684286118, + "p99": 94.14400160312653 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 77.53600180149078, + "p95": 80.19199967384338, + "p99": 88.48000317811966 + }, + "roundtrip": { + "p50": 133.05599987506866, + "p90": 138.94400000572205, + "p95": 142.11200177669525, + "p99": 151.93599462509155 + }, + "isolatedSum": { + "p50": 152.54399925470352, + "p90": 160.0000038743019, + "p95": 167.83999651670456, + "p99": 182.62400478124619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.76799792051315, + "p90": 95.16800194978714, + "p95": 99.29600358009338, + "p99": 109.76000130176544 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 87.13600039482117, + "p95": 90.20800143480301, + "p99": 100.00000149011612 + }, + "roundtrip": { + "p50": 135.42400300502777, + "p90": 145.85599303245544, + "p95": 151.5199989080429, + "p99": 157.50400722026825 + }, + "isolatedSum": { + "p50": 167.10399836301804, + "p90": 182.3040023446083, + "p95": 189.5040050148964, + "p99": 209.76000279188156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.70399743318558, + "p90": 96.73599898815155, + "p95": 100.92800110578537, + "p99": 111.64800077676773 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 98.84800016880035, + "p95": 100.60799866914749, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 163.07200491428375, + "p90": 167.67999529838562, + "p95": 170.6559956073761, + "p99": 176.4799952507019 + }, + "isolatedSum": { + "p50": 179.00799959897995, + "p90": 195.5839991569519, + "p95": 201.53599977493286, + "p99": 216.95999801158905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.68799960613251, + "p90": 114.1119971871376, + "p95": 121.18399888277054, + "p99": 131.6159963607788 + }, + "combine": { + "p50": 112.96000331640244, + "p90": 116.2559986114502, + "p95": 118.52800101041794, + "p99": 127.93600559234619 + }, + "roundtrip": { + "p50": 192.03199446201324, + "p90": 202.4639993906021, + "p95": 206.65599405765533, + "p99": 212.8639966249466 + }, + "isolatedSum": { + "p50": 215.64800292253494, + "p90": 230.3679957985878, + "p95": 239.71199989318848, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b6cdc4c9", + "identity": "b200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_f0618d20", + "comparisonKey": "1f0a5c9cfc120672", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:30:34.471154+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.87999647855759, + "p90": 84.35200154781342, + "p95": 87.52000331878662, + "p99": 96.8639999628067 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 75.96799731254578, + "p95": 77.56800204515457, + "p99": 80.60800284147263 + }, + "roundtrip": { + "p50": 122.6240023970604, + "p90": 129.85600531101227, + "p95": 133.40799510478973, + "p99": 144.83200013637543 + }, + "isolatedSum": { + "p50": 141.31199568510056, + "p90": 160.3199988603592, + "p95": 165.0880053639412, + "p99": 177.47200280427933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.11999863386154, + "p90": 96.44799679517746, + "p95": 99.58399832248688, + "p99": 116.70400202274323 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 77.40800082683563, + "p95": 80.4160013794899, + "p99": 87.48800307512283 + }, + "roundtrip": { + "p50": 122.8799968957901, + "p90": 131.52000308036804, + "p95": 137.37599551677704, + "p99": 147.0080018043518 + }, + "isolatedSum": { + "p50": 151.10399574041367, + "p90": 173.8559976220131, + "p95": 179.99999970197678, + "p99": 204.19200509786606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.16799867153168, + "p90": 83.64800363779068, + "p95": 91.42400324344635, + "p99": 97.9200005531311 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 80.32000064849854, + "p95": 85.9839990735054, + "p99": 92.3520028591156 + }, + "roundtrip": { + "p50": 134.39999520778656, + "p90": 141.40799641609192, + "p95": 146.84799313545227, + "p99": 156.19200468063354 + }, + "isolatedSum": { + "p50": 152.03199535608292, + "p90": 163.96800428628922, + "p95": 177.40800231695175, + "p99": 190.2720034122467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.87199658155441, + "p90": 82.24000036716461, + "p95": 89.9839997291565, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 76.54400169849396, + "p90": 80.9599980711937, + "p95": 87.23200112581253, + "p99": 92.96000003814697 + }, + "roundtrip": { + "p50": 138.7840062379837, + "p90": 149.21599626541138, + "p95": 156.8319946527481, + "p99": 182.3039948940277 + }, + "isolatedSum": { + "p50": 152.41599828004837, + "p90": 163.1999984383583, + "p95": 177.21600085496902, + "p99": 191.48799777030945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.99199765920639, + "p90": 88.03199976682663, + "p95": 91.20000153779984, + "p99": 99.7759997844696 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 86.75199747085571, + "p95": 89.63199704885483, + "p99": 98.75199943780899 + }, + "roundtrip": { + "p50": 137.9839926958084, + "p90": 149.47199821472168, + "p95": 156.6080003976822, + "p99": 168.41599345207214 + }, + "isolatedSum": { + "p50": 154.87999469041824, + "p90": 174.78399723768234, + "p95": 180.83199858665466, + "p99": 198.5279992222786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.72799843549728, + "p90": 97.56799787282944, + "p95": 103.2319962978363, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 88.25600147247314, + "p95": 91.61599725484848, + "p99": 95.36000341176987 + }, + "roundtrip": { + "p50": 147.13600277900696, + "p90": 152.6080071926117, + "p95": 160.51200032234192, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 185.82399934530258, + "p95": 194.84799355268478, + "p99": 206.36800676584244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.36799651384354, + "p90": 108.25599730014801, + "p95": 111.48799955844879, + "p99": 121.37600034475327 + }, + "combine": { + "p50": 99.23200309276581, + "p90": 102.20800340175629, + "p95": 104.89600151777267, + "p99": 114.3999993801117 + }, + "roundtrip": { + "p50": 169.3120002746582, + "p90": 178.01600694656372, + "p95": 183.48799645900726, + "p99": 192.9599940776825 + }, + "isolatedSum": { + "p50": 197.59999960660934, + "p90": 210.4640007019043, + "p95": 216.38400107622147, + "p99": 235.77599972486496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.64000087976456, + "p90": 119.10399794578552, + "p95": 121.5360015630722, + "p99": 133.59999656677246 + }, + "combine": { + "p50": 114.84800279140472, + "p90": 123.77600371837616, + "p95": 126.08000636100769, + "p99": 130.23999333381653 + }, + "roundtrip": { + "p50": 201.56799256801605, + "p90": 211.0079973936081, + "p95": 214.7199958562851, + "p99": 224.12799298763275 + }, + "isolatedSum": { + "p50": 227.48800367116928, + "p90": 242.88000166416168, + "p95": 247.6160079240799, + "p99": 263.839989900589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b0cf7b86", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_7c871228", + "comparisonKey": "f9228511145d4265", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:18:23.694147+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.58400183916092, + "p90": 83.64800363779068, + "p95": 85.82399785518646, + "p99": 93.75999867916107 + }, + "combine": { + "p50": 75.99999755620956, + "p90": 77.63200253248215, + "p95": 79.77599650621414, + "p99": 82.07999914884567 + }, + "roundtrip": { + "p50": 126.3359934091568, + "p90": 138.91200721263885, + "p95": 147.16799557209015, + "p99": 173.98400604724884 + }, + "isolatedSum": { + "p50": 151.58399939537048, + "p90": 161.28000617027283, + "p95": 165.5999943614006, + "p99": 175.83999782800674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.79199630022049, + "p90": 90.36800265312195, + "p95": 96.92800045013428, + "p99": 108.76800119876862 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 80.86399734020233, + "p95": 85.66399663686752, + "p99": 91.80799871683121 + }, + "roundtrip": { + "p50": 130.20800054073334, + "p90": 137.02400028705597, + "p95": 140.35199582576752, + "p99": 147.64800667762756 + }, + "isolatedSum": { + "p50": 154.94399517774582, + "p90": 171.23199999332428, + "p95": 182.5919970870018, + "p99": 200.57599991559982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.68000257015228, + "p90": 88.70399743318558, + "p95": 94.65599805116653, + "p99": 111.7440015077591 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 86.20800077915192, + "p95": 88.3840024471283, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 137.31199502944946, + "p90": 143.00799369812012, + "p95": 146.7200070619583, + "p99": 152.41600573062897 + }, + "isolatedSum": { + "p50": 153.34400534629822, + "p90": 174.9119982123375, + "p95": 183.04000049829483, + "p99": 203.61600071191788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.06399804353714, + "p90": 89.02399986982346, + "p95": 94.33600306510925, + "p99": 101.95200145244598 + }, + "combine": { + "p50": 78.015998005867, + "p90": 86.56000345945358, + "p95": 88.28800171613693, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 134.8160058259964, + "p90": 143.51999759674072, + "p95": 146.36799693107605, + "p99": 161.79199516773224 + }, + "isolatedSum": { + "p50": 154.07999604940414, + "p90": 175.58400332927704, + "p95": 182.62400478124619, + "p99": 192.1280026435852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.77599650621414, + "p90": 89.37600255012512, + "p95": 91.77599847316742, + "p99": 101.40799731016159 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 88.19200098514557, + "p95": 89.05600011348724, + "p99": 92.22400188446045 + }, + "roundtrip": { + "p50": 138.91200721263885, + "p90": 145.9839940071106, + "p95": 148.80000054836273, + "p99": 153.50399911403656 + }, + "isolatedSum": { + "p50": 158.36799889802933, + "p90": 177.5680035352707, + "p95": 180.83199858665466, + "p99": 193.63199919462204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.83199906349182, + "p90": 96.63999825716019, + "p95": 99.20000284910202, + "p99": 104.67199981212616 + }, + "combine": { + "p50": 88.54400366544724, + "p90": 92.76799857616425, + "p95": 98.62399846315384, + "p99": 102.94400155544281 + }, + "roundtrip": { + "p50": 146.4959979057312, + "p90": 151.74399316310883, + "p95": 154.27200496196747, + "p99": 159.04000401496887 + }, + "isolatedSum": { + "p50": 181.37600272893906, + "p90": 189.40799683332443, + "p95": 197.82400131225586, + "p99": 207.61600136756897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.95200210809708, + "p90": 109.27999764680862, + "p95": 110.78400164842606, + "p99": 118.9119964838028 + }, + "combine": { + "p50": 102.24000364542007, + "p90": 108.86400192975998, + "p95": 112.41599917411804, + "p99": 115.1999980211258 + }, + "roundtrip": { + "p50": 178.6240041255951, + "p90": 187.00799345970154, + "p95": 188.92799317836761, + "p99": 194.39999759197235 + }, + "isolatedSum": { + "p50": 208.19200575351715, + "p90": 218.1439995765686, + "p95": 223.2000008225441, + "p99": 234.1119945049286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.7120019197464, + "p90": 121.40800058841705, + "p95": 123.64800274372101, + "p99": 129.2479932308197 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 129.5360028743744, + "p95": 132.51200318336487, + "p99": 139.3280029296875 + }, + "roundtrip": { + "p50": 210.59200167655945, + "p90": 219.00799870491028, + "p95": 222.6880043745041, + "p99": 229.98400032520294 + }, + "isolatedSum": { + "p50": 241.11999571323395, + "p90": 250.94400346279144, + "p95": 256.1600059270859, + "p99": 268.5759961605072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ea6097e", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_f0618d20", + "comparisonKey": "b0eeb888df54a33e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:20:11.847580+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.07199794054031, + "p90": 86.27200126647949, + "p95": 93.40800344944, + "p99": 101.69599950313568 + }, + "combine": { + "p50": 76.06399804353714, + "p90": 78.11199873685837, + "p95": 81.02399855852127, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 126.20800733566284, + "p90": 139.20000195503235, + "p95": 145.9520012140274, + "p99": 155.008003115654 + }, + "isolatedSum": { + "p50": 151.13599598407745, + "p90": 164.38400000333786, + "p95": 174.43200200796127, + "p99": 191.77599996328354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.65600222349167, + "p90": 85.4720026254654, + "p95": 91.839998960495, + "p99": 98.59199821949005 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 87.64799684286118, + "p95": 92.03200042247772, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 130.91200590133667, + "p90": 143.99999380111694, + "p95": 150.2400040626526, + "p99": 155.83999454975128 + }, + "isolatedSum": { + "p50": 151.0400027036667, + "p90": 173.11999946832657, + "p95": 183.87199938297272, + "p99": 198.36799800395966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.08799839019775, + "p90": 92.38400310277939, + "p95": 100.28800368309021, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 79.00799810886383, + "p90": 88.19200098514557, + "p95": 92.44800359010696, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 136.9599997997284, + "p90": 150.9760022163391, + "p95": 156.6080003976822, + "p99": 165.75999557971954 + }, + "isolatedSum": { + "p50": 156.09599649906158, + "p90": 180.57600408792496, + "p95": 192.73600727319717, + "p99": 208.99200439453125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.80800354480743, + "p90": 86.496002972126, + "p95": 94.36800330877304, + "p99": 102.04800218343735 + }, + "combine": { + "p50": 78.17599922418594, + "p90": 87.99999952316284, + "p95": 91.90399944782257, + "p99": 94.52799707651138 + }, + "roundtrip": { + "p50": 136.86400651931763, + "p90": 152.12799608707428, + "p95": 156.47999942302704, + "p99": 166.9439971446991 + }, + "isolatedSum": { + "p50": 153.98400276899338, + "p90": 174.49600249528885, + "p95": 186.2720027565956, + "p99": 196.57599925994873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.98399776220322, + "p90": 87.07199990749359, + "p95": 91.45600348711014, + "p99": 99.2640033364296 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 91.87199920415878, + "p95": 95.13600170612335, + "p99": 104.41599786281586 + }, + "roundtrip": { + "p50": 139.1039937734604, + "p90": 150.68799257278442, + "p95": 156.70399367809296, + "p99": 164.41600024700165 + }, + "isolatedSum": { + "p50": 157.47199952602386, + "p90": 178.94399911165237, + "p95": 186.5920051932335, + "p99": 203.68000119924545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.04800087213516, + "p90": 99.87200051546097, + "p95": 105.40799796581268, + "p99": 115.99999666213989 + }, + "combine": { + "p50": 88.60799670219421, + "p90": 92.06400066614151, + "p95": 96.3520035147667, + "p99": 105.85600137710571 + }, + "roundtrip": { + "p50": 148.8959938287735, + "p90": 162.88000345230103, + "p95": 170.23999989032745, + "p99": 187.1040016412735 + }, + "isolatedSum": { + "p50": 182.65599757432938, + "p90": 191.93600118160248, + "p95": 201.76000148057938, + "p99": 221.8559980392456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.04000294208527, + "p90": 112.47999966144562, + "p95": 118.17599833011627, + "p99": 129.08799946308136 + }, + "combine": { + "p50": 101.72799974679947, + "p90": 109.53599959611893, + "p95": 114.81600254774094, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 176.9919991493225, + "p90": 187.42400407791138, + "p95": 193.27999651432037, + "p99": 198.97599518299103 + }, + "isolatedSum": { + "p50": 208.76800268888474, + "p90": 222.01599925756454, + "p95": 232.9920008778572, + "p99": 247.03999608755112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.65600198507309, + "p90": 129.34400141239166, + "p95": 133.5040032863617, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 125.34399330615997, + "p90": 129.85600531101227, + "p95": 138.59200477600098, + "p99": 141.4400041103363 + }, + "roundtrip": { + "p50": 210.62399446964264, + "p90": 219.7439968585968, + "p95": 223.77599775791168, + "p99": 230.84799945354462 + }, + "isolatedSum": { + "p50": 243.99999529123306, + "p90": 259.20000672340393, + "p95": 272.09600806236267, + "p99": 285.8880013227463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7acb3403", + "identity": "b200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_f0618d20", + "comparisonKey": "134a8c7c073f4930", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:25:00.269010+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 80.44800162315369, + "p90": 88.48000317811966, + "p95": 94.17600184679031, + "p99": 99.67999905347824 + }, + "combine": { + "p50": 75.48800110816956, + "p90": 79.99999821186066, + "p95": 82.71999657154083, + "p99": 90.84799885749817 + }, + "roundtrip": { + "p50": 123.71200323104858, + "p90": 145.21600306034088, + "p95": 149.4400054216385, + "p99": 157.02399611473083 + }, + "isolatedSum": { + "p50": 155.93600273132324, + "p90": 168.48000138998032, + "p95": 176.89599841833115, + "p99": 190.5279979109764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.87199658155441, + "p90": 89.24800157546997, + "p95": 94.14400160312653, + "p99": 103.20000350475311 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 80.70400357246399, + "p95": 88.25600147247314, + "p99": 95.0080007314682 + }, + "roundtrip": { + "p50": 129.82399761676788, + "p90": 141.66399836540222, + "p95": 148.83199334144592, + "p99": 156.15999698638916 + }, + "isolatedSum": { + "p50": 152.47999876737595, + "p90": 169.95200514793396, + "p95": 182.40000307559967, + "p99": 198.2080042362213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.17599922418594, + "p90": 93.79199892282486, + "p95": 101.3759970664978, + "p99": 116.57600104808807 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 92.06400066614151, + "p95": 95.20000219345093, + "p99": 105.69600015878677 + }, + "roundtrip": { + "p50": 139.96799290180206, + "p90": 151.13599598407745, + "p95": 159.55199301242828, + "p99": 169.40799355506897 + }, + "isolatedSum": { + "p50": 157.3759987950325, + "p90": 185.85599958896637, + "p95": 196.57599925994873, + "p99": 222.27200120687485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.19199901819229, + "p90": 88.3840024471283, + "p95": 95.8079993724823, + "p99": 101.50399804115295 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 90.62399715185165, + "p95": 94.04800087213516, + "p99": 103.67999970912933 + }, + "roundtrip": { + "p50": 139.80799913406372, + "p90": 149.1519957780838, + "p95": 154.78399395942688, + "p99": 164.8319959640503 + }, + "isolatedSum": { + "p50": 154.88000214099884, + "p90": 179.00799959897995, + "p95": 189.85600024461746, + "p99": 205.1839977502823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.95199751853943, + "p90": 88.76799792051315, + "p95": 95.36000341176987, + "p99": 103.16800326108932 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 89.24800157546997, + "p95": 93.08800101280212, + "p99": 101.98400169610977 + }, + "roundtrip": { + "p50": 139.00800049304962, + "p90": 152.0960032939911, + "p95": 157.18400478363037, + "p99": 164.70399498939514 + }, + "isolatedSum": { + "p50": 156.76799416542053, + "p90": 178.01599949598312, + "p95": 188.448004424572, + "p99": 205.1520049571991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.96800059080124, + "p90": 107.61599987745285, + "p95": 113.98400366306305, + "p99": 128.25599312782288 + }, + "combine": { + "p50": 88.79999816417694, + "p90": 92.6079973578453, + "p95": 95.23200243711472, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 148.51200580596924, + "p90": 160.3199988603592, + "p95": 165.40800034999847, + "p99": 176.06399953365326 + }, + "isolatedSum": { + "p50": 184.76799875497818, + "p90": 200.22399723529816, + "p95": 209.21600610017776, + "p99": 233.3119958639145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.39199817180634, + "p90": 114.17599767446518, + "p95": 121.50400131940842, + "p99": 134.07999277114868 + }, + "combine": { + "p50": 102.59199887514114, + "p90": 114.1119971871376, + "p95": 117.40799993276596, + "p99": 125.82400441169739 + }, + "roundtrip": { + "p50": 177.15199291706085, + "p90": 194.20799612998962, + "p95": 198.36799800395966, + "p99": 201.9840031862259 + }, + "isolatedSum": { + "p50": 209.98399704694748, + "p90": 228.28799486160278, + "p95": 238.91200125217438, + "p99": 259.90399718284607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.15999853610992, + "p90": 130.62399625778198, + "p95": 135.0719928741455, + "p99": 150.91200172901154 + }, + "combine": { + "p50": 125.47199428081512, + "p90": 130.65600395202637, + "p95": 138.65600526332855, + "p99": 142.30400323867798 + }, + "roundtrip": { + "p50": 212.54399418830872, + "p90": 221.79199755191803, + "p95": 227.39200294017792, + "p99": 233.40800404548645 + }, + "isolatedSum": { + "p50": 245.63199281692505, + "p90": 261.28000020980835, + "p95": 273.72799813747406, + "p99": 293.2160049676895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-76e66eb1", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_027514d0", + "comparisonKey": "5479e293532130d2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:31:29.212840+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.26400005817413, + "p90": 84.6719965338707, + "p95": 86.84799820184708, + "p99": 93.28000247478485 + }, + "combine": { + "p50": 77.02399790287018, + "p90": 79.74400371313095, + "p95": 81.18399977684021, + "p99": 89.24800157546997 + }, + "roundtrip": { + "p50": 132.47999548912048, + "p90": 138.2399946451187, + "p95": 140.51200449466705, + "p99": 148.5760062932968 + }, + "isolatedSum": { + "p50": 156.2879979610443, + "p90": 164.41600024700165, + "p95": 168.0319979786873, + "p99": 182.52800405025482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.97599786520004, + "p90": 90.7839983701706, + "p95": 92.99200028181076, + "p99": 100.47999769449234 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 86.36800199747086, + "p95": 87.52000331878662, + "p99": 91.77599847316742 + }, + "roundtrip": { + "p50": 138.08000087738037, + "p90": 144.06399428844452, + "p95": 146.30399644374847, + "p99": 152.54400670528412 + }, + "isolatedSum": { + "p50": 157.60000050067902, + "p90": 177.15200036764145, + "p95": 180.51200360059738, + "p99": 192.25599616765976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.44000107049942, + "p90": 86.40000224113464, + "p95": 90.01599997282028, + "p99": 97.37599641084671 + }, + "combine": { + "p50": 84.76799726486206, + "p90": 91.10400080680847, + "p95": 94.17600184679031, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 140.9280002117157, + "p90": 152.8960019350052, + "p95": 156.92800283432007, + "p99": 163.83999586105347 + }, + "isolatedSum": { + "p50": 162.20799833536148, + "p90": 177.50400304794312, + "p95": 184.1920018196106, + "p99": 196.54399901628494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 82.30400085449219, + "p90": 91.13600105047226, + "p95": 95.07200121879578, + "p99": 106.23999685049057 + }, + "combine": { + "p50": 85.9839990735054, + "p90": 89.15200084447861, + "p95": 90.81599861383438, + "p99": 93.02400052547455 + }, + "roundtrip": { + "p50": 139.0399932861328, + "p90": 146.464005112648, + "p95": 149.21599626541138, + "p99": 153.76000106334686 + }, + "isolatedSum": { + "p50": 168.2879999279976, + "p90": 180.28800189495087, + "p95": 185.88799983263016, + "p99": 199.26399737596512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.64000308513641, + "p90": 88.70399743318558, + "p95": 91.07200056314468, + "p99": 96.6079980134964 + }, + "combine": { + "p50": 88.54400366544724, + "p90": 93.75999867916107, + "p95": 97.72799909114838, + "p99": 106.20799660682678 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 154.30399775505066, + "p95": 156.70399367809296, + "p99": 161.53599321842194 + }, + "isolatedSum": { + "p50": 169.18400675058365, + "p90": 182.46399611234665, + "p95": 188.79999965429306, + "p99": 202.81599462032318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.27200192213058, + "p90": 106.59199953079224, + "p95": 108.86400192975998, + "p99": 111.96800321340561 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 98.04800152778625, + "p95": 99.7759997844696, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 160.38399934768677, + "p90": 167.7439957857132, + "p95": 171.83999717235565, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 180.57600408792496, + "p90": 204.6400010585785, + "p95": 208.64000171422958, + "p99": 214.75200355052948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.71199995279312, + "p90": 124.51200187206268, + "p95": 127.03999876976013, + "p99": 132.47999548912048 + }, + "combine": { + "p50": 106.20799660682678, + "p90": 113.47199976444244, + "p95": 114.49600011110306, + "p99": 116.73600226640701 + }, + "roundtrip": { + "p50": 187.51999735832214, + "p90": 194.72000002861023, + "p95": 197.31199741363525, + "p99": 201.92000269889832 + }, + "isolatedSum": { + "p50": 209.9199965596199, + "p90": 237.98400163650513, + "p95": 241.5359988808632, + "p99": 249.2159977555275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.51999497413635, + "p90": 134.24000144004822, + "p95": 136.76799833774567, + "p99": 141.92000031471252 + }, + "combine": { + "p50": 145.79200744628906, + "p90": 151.87199413776398, + "p95": 153.82400155067444, + "p99": 157.6640009880066 + }, + "roundtrip": { + "p50": 251.0719895362854, + "p90": 258.5279941558838, + "p95": 262.36799359321594, + "p99": 269.79199051856995 + }, + "isolatedSum": { + "p50": 273.3120024204254, + "p90": 286.1119955778122, + "p95": 290.5919998884201, + "p99": 299.5840013027191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2645782a", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_d85136b3", + "comparisonKey": "3321baaca126ad11", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:32:24.734815+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.40000092983246, + "p90": 85.21600067615509, + "p95": 88.57599645853043, + "p99": 96.89600020647049 + }, + "combine": { + "p50": 62.97600269317627, + "p90": 64.57599997520447, + "p95": 65.92000275850296, + "p99": 68.64000111818314 + }, + "roundtrip": { + "p50": 109.8560020327568, + "p90": 118.52800101041794, + "p95": 121.56800180673599, + "p99": 130.97600638866425 + }, + "isolatedSum": { + "p50": 141.37600362300873, + "p90": 149.79200065135956, + "p95": 154.4959992170334, + "p99": 165.53600132465363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.43200051784515, + "p90": 83.61600339412689, + "p95": 85.53600311279297, + "p99": 90.46400338411331 + }, + "combine": { + "p50": 65.95200300216675, + "p90": 74.72000271081924, + "p95": 76.22399926185608, + "p99": 79.13599908351898 + }, + "roundtrip": { + "p50": 126.5919953584671, + "p90": 131.9359987974167, + "p95": 134.20799374580383, + "p99": 140.32000303268433 + }, + "isolatedSum": { + "p50": 140.3840035200119, + "p90": 158.33600610494614, + "p95": 161.76000237464905, + "p99": 169.6000024676323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.08000111579895, + "p90": 97.15200215578079, + "p95": 99.2640033364296, + "p99": 106.91200196743011 + }, + "combine": { + "p50": 73.88799637556076, + "p90": 76.7040029168129, + "p95": 78.40000092983246, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 134.62400436401367, + "p90": 144.16000247001648, + "p95": 148.41599762439728, + "p99": 152.96000242233276 + }, + "isolatedSum": { + "p50": 167.9679974913597, + "p90": 173.8560050725937, + "p95": 177.66400426626205, + "p99": 191.80800020694733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.2960016131401, + "p90": 97.08800166845322, + "p95": 98.75199943780899, + "p99": 106.59199953079224 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 83.5840031504631, + "p95": 87.36000210046768, + "p99": 94.33600306510925 + }, + "roundtrip": { + "p50": 147.87200093269348, + "p90": 151.99999511241913, + "p95": 155.39200603961945, + "p99": 158.78400206565857 + }, + "isolatedSum": { + "p50": 165.02400487661362, + "p90": 180.67200481891632, + "p95": 186.11200153827667, + "p99": 200.9280025959015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-03652303", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_df12795e", + "comparisonKey": "71ced5b9429afe12", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:55.558919+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.77599650621414, + "p90": 86.81599795818329, + "p95": 89.75999802350998, + "p99": 98.24000298976898 + }, + "combine": { + "p50": 64.19199705123901, + "p90": 67.23199784755707, + "p95": 69.05599683523178, + "p99": 77.63200253248215 + }, + "roundtrip": { + "p50": 122.40000069141388, + "p90": 127.83999741077423, + "p95": 131.3599944114685, + "p99": 137.7280056476593 + }, + "isolatedSum": { + "p50": 143.96799355745316, + "p90": 154.04799580574036, + "p95": 158.81599485874176, + "p99": 175.87200552225113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.68800312280655, + "p90": 84.22400057315826, + "p95": 86.68799698352814, + "p99": 92.16000139713287 + }, + "combine": { + "p50": 64.83200192451477, + "p90": 68.28799843788147, + "p95": 72.73600250482559, + "p99": 76.99199765920639 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 129.66400384902954, + "p95": 133.59999656677246, + "p99": 140.60799777507782 + }, + "isolatedSum": { + "p50": 143.52000504732132, + "p90": 152.51199901103973, + "p95": 159.42399948835373, + "p99": 169.15199905633926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.27999985218048, + "p90": 93.40800344944, + "p95": 95.29600292444229, + "p99": 99.64799880981445 + }, + "combine": { + "p50": 65.5359998345375, + "p90": 73.34399968385696, + "p95": 74.46400076150894, + "p99": 77.40800082683563 + }, + "roundtrip": { + "p50": 126.24000012874603, + "p90": 134.8160058259964, + "p95": 139.26400244235992, + "p99": 145.50399780273438 + }, + "isolatedSum": { + "p50": 142.815999686718, + "p90": 166.75200313329697, + "p95": 169.76000368595123, + "p99": 177.05599963665009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.55200159549713, + "p90": 84.48000252246857, + "p95": 87.39200234413147, + "p99": 94.7519987821579 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 76.31999999284744, + "p95": 77.40800082683563, + "p99": 79.6160027384758 + }, + "roundtrip": { + "p50": 122.97599762678146, + "p90": 130.97600638866425, + "p95": 133.59999656677246, + "p99": 137.88799941539764 + }, + "isolatedSum": { + "p50": 141.82399958372116, + "p90": 160.800002515316, + "p95": 164.8000031709671, + "p99": 174.3680015206337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.9599974155426, + "p90": 86.56000345945358, + "p95": 89.75999802350998, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 76.64000242948532, + "p95": 77.7600035071373, + "p99": 85.4720026254654 + }, + "roundtrip": { + "p50": 126.97599828243256, + "p90": 140.99200069904327, + "p95": 146.2080031633377, + "p99": 153.02400290966034 + }, + "isolatedSum": { + "p50": 146.94399386644363, + "p90": 163.2000058889389, + "p95": 167.52000153064728, + "p99": 184.00000035762787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.80000364780426, + "p90": 83.61600339412689, + "p95": 87.2960016131401, + "p99": 94.43199634552002 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 77.27999985218048, + "p95": 79.42400127649307, + "p99": 85.50400286912918 + }, + "roundtrip": { + "p50": 132.79999792575836, + "p90": 141.82400703430176, + "p95": 146.27200365066528, + "p99": 150.62400698661804 + }, + "isolatedSum": { + "p50": 152.3200049996376, + "p90": 160.89600324630737, + "p95": 166.72000288963318, + "p99": 179.9359992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.91999924182892, + "p90": 95.71199864149094, + "p95": 97.34400361776352, + "p99": 104.60799932479858 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 85.88799834251404, + "p95": 87.2960016131401, + "p99": 90.7519981265068 + }, + "roundtrip": { + "p50": 140.3840035200119, + "p90": 147.71200716495514, + "p95": 150.30400454998016, + "p99": 155.03999590873718 + }, + "isolatedSum": { + "p50": 167.83999651670456, + "p90": 181.59999698400497, + "p95": 184.64000523090363, + "p99": 195.3599974513054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.08000177145004, + "p90": 106.27199709415436, + "p95": 108.19199681282043, + "p99": 113.56800049543381 + }, + "combine": { + "p50": 91.10400080680847, + "p90": 99.84000027179718, + "p95": 101.6639992594719, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 163.7440025806427, + "p90": 170.49600183963776, + "p95": 173.0560064315796, + "p99": 177.59999632835388 + }, + "isolatedSum": { + "p50": 189.18400257825851, + "p90": 206.11199736595154, + "p95": 209.85599607229233, + "p99": 221.28000110387802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2d05732", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_f1e6dd2a", + "comparisonKey": "85350196b91c44cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:35:03.392969+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 80.73599636554718, + "p90": 88.639996945858, + "p95": 93.1520015001297, + "p99": 103.45599800348282 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 77.79199630022049, + "p95": 81.05599880218506, + "p99": 89.6959975361824 + }, + "roundtrip": { + "p50": 123.71200323104858, + "p90": 144.73600685596466, + "p95": 149.08799529075623, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 155.39199858903885, + "p90": 166.4319932460785, + "p95": 174.20800030231476, + "p99": 193.15199553966522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.26399940252304, + "p90": 94.71999853849411, + "p95": 97.47199714183807, + "p99": 114.3679991364479 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 79.29600030183792, + "p95": 85.15200018882751, + "p99": 93.12000125646591 + }, + "roundtrip": { + "p50": 129.40800189971924, + "p90": 143.5520052909851, + "p95": 150.59199929237366, + "p99": 161.1199975013733 + }, + "isolatedSum": { + "p50": 152.12799608707428, + "p90": 174.01599884033203, + "p95": 182.6239973306656, + "p99": 207.48800039291382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.12799853086472, + "p90": 90.01599997282028, + "p95": 96.70399874448776, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 86.46400272846222, + "p95": 91.80799871683121, + "p99": 98.39999675750732 + }, + "roundtrip": { + "p50": 138.97599279880524, + "p90": 153.50399911403656, + "p95": 160.288006067276, + "p99": 176.64000391960144 + }, + "isolatedSum": { + "p50": 153.888002038002, + "p90": 176.4800027012825, + "p95": 188.51199746131897, + "p99": 204.47999984025955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.93599706888199, + "p90": 86.14400029182434, + "p95": 92.76799857616425, + "p99": 100.3199964761734 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 88.83199840784073, + "p95": 91.87199920415878, + "p99": 94.7519987821579 + }, + "roundtrip": { + "p50": 139.23199474811554, + "p90": 151.74399316310883, + "p95": 158.65600109100342, + "p99": 165.3439998626709 + }, + "isolatedSum": { + "p50": 154.1759967803955, + "p90": 174.97599869966507, + "p95": 184.63999778032303, + "p99": 195.0719952583313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.44000107049942, + "p90": 88.83199840784073, + "p95": 97.63199836015701, + "p99": 106.88000172376633 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 91.26400202512741, + "p95": 93.37600320577621, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 137.53600418567657, + "p90": 153.18399667739868, + "p95": 158.55999290943146, + "p99": 168.35199296474457 + }, + "isolatedSum": { + "p50": 156.76800161600113, + "p90": 180.09600043296814, + "p95": 191.00800156593323, + "p99": 210.33599972724915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.65599805116653, + "p90": 100.00000149011612, + "p95": 107.93600231409073, + "p99": 117.66400188207626 + }, + "combine": { + "p50": 89.15200084447861, + "p90": 92.83199906349182, + "p95": 100.92800110578537, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 162.4639928340912, + "p95": 168.7680035829544, + "p99": 180.06399273872375 + }, + "isolatedSum": { + "p50": 183.80799889564514, + "p90": 192.83200055360794, + "p95": 208.8640034198761, + "p99": 223.68000447750092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.84000158309937, + "p90": 115.7120019197464, + "p95": 119.90399658679962, + "p99": 129.05600666999817 + }, + "combine": { + "p50": 103.64799946546555, + "p90": 112.73600161075592, + "p95": 115.52000045776367, + "p99": 127.68000364303589 + }, + "roundtrip": { + "p50": 182.6239973306656, + "p90": 190.49599766731262, + "p95": 196.06399536132812, + "p99": 201.53599977493286 + }, + "isolatedSum": { + "p50": 211.4880010485649, + "p90": 228.44800353050232, + "p95": 235.4239970445633, + "p99": 256.73601031303406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.65599584579468, + "p90": 136.76799833774567, + "p95": 140.1280015707016, + "p99": 154.04799580574036 + }, + "combine": { + "p50": 138.72000575065613, + "p90": 147.77599275112152, + "p95": 151.93599462509155, + "p99": 154.94400262832642 + }, + "roundtrip": { + "p50": 238.20799589157104, + "p90": 245.02399563789368, + "p95": 251.3279914855957, + "p99": 260.22401452064514 + }, + "isolatedSum": { + "p50": 265.3760015964508, + "p90": 284.5439910888672, + "p95": 292.06399619579315, + "p99": 308.9919984340668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2a06f2f", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_44d733a4", + "comparisonKey": "8cecf5a89197dc7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:43:18.035119+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.24800026416779, + "p90": 85.28000116348267, + "p95": 88.3840024471283, + "p99": 97.28000313043594 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 77.79199630022049, + "p95": 79.55200225114822, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 127.3919939994812, + "p90": 154.91199493408203, + "p95": 173.8239973783493, + "p99": 197.88800179958344 + }, + "isolatedSum": { + "p50": 157.05600380897522, + "p90": 163.07199746370316, + "p95": 167.93600469827652, + "p99": 184.6720054745674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.4160007238388, + "p90": 84.927998483181, + "p95": 88.16000074148178, + "p99": 91.839998960495 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 78.59200239181519, + "p95": 80.6720033288002, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 127.96799838542938, + "p90": 135.45599579811096, + "p95": 137.7599984407425, + "p99": 144.3839967250824 + }, + "isolatedSum": { + "p50": 153.34399789571762, + "p90": 163.52000087499619, + "p95": 168.83200407028198, + "p99": 180.4479956626892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 79.03999835252762, + "p90": 99.5199978351593, + "p95": 110.81600189208984, + "p99": 197.37599790096283 + }, + "combine": { + "p50": 79.74400371313095, + "p90": 85.60000360012054, + "p95": 88.95999938249588, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 139.45600390434265, + "p90": 147.32800424098969, + "p95": 152.96000242233276, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 158.78400206565857, + "p90": 185.12000143527985, + "p95": 199.77600127458572, + "p99": 290.68800061941147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.2159993648529, + "p90": 82.49600231647491, + "p95": 87.26400136947632, + "p99": 90.87999910116196 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 86.56000345945358, + "p95": 88.32000195980072, + "p99": 91.64799749851227 + }, + "roundtrip": { + "p50": 140.57600498199463, + "p90": 148.15999567508698, + "p95": 151.296004652977, + "p99": 155.39200603961945 + }, + "isolatedSum": { + "p50": 155.7760015130043, + "p90": 169.0560057759285, + "p95": 175.58400332927704, + "p99": 182.52799659967422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.78399640321732, + "p90": 89.28000181913376, + "p95": 92.00000017881393, + "p99": 98.7199991941452 + }, + "combine": { + "p50": 79.74400371313095, + "p90": 88.60799670219421, + "p95": 89.6959975361824, + "p99": 92.19200164079666 + }, + "roundtrip": { + "p50": 138.62399756908417, + "p90": 152.99199521541595, + "p95": 156.8640023469925, + "p99": 169.0559983253479 + }, + "isolatedSum": { + "p50": 158.52800011634827, + "p90": 177.88799852132797, + "p95": 181.69599771499634, + "p99": 190.91200083494186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.61599791049957, + "p90": 102.1760031580925, + "p95": 104.19200360774994, + "p99": 110.78400164842606 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 94.11200135946274, + "p95": 95.83999961614609, + "p99": 100.19200295209885 + }, + "roundtrip": { + "p50": 149.79200065135956, + "p90": 157.31200575828552, + "p95": 161.21600568294525, + "p99": 166.84800386428833 + }, + "isolatedSum": { + "p50": 184.9600002169609, + "p90": 196.28800451755524, + "p95": 200.03200322389603, + "p99": 210.9760046005249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.35999792814255, + "p90": 111.55200004577637, + "p95": 112.86400258541107, + "p99": 117.3119992017746 + }, + "combine": { + "p50": 102.75200009346008, + "p90": 107.93600231409073, + "p95": 112.22399771213531, + "p99": 114.94400352239609 + }, + "roundtrip": { + "p50": 178.52799594402313, + "p90": 185.5359971523285, + "p95": 188.960000872612, + "p99": 193.27999651432037 + }, + "isolatedSum": { + "p50": 210.11199802160263, + "p90": 219.4880023598671, + "p95": 225.0880002975464, + "p99": 232.25600272417068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.68800222873688, + "p90": 124.89599734544754, + "p95": 126.97599828243256, + "p99": 132.86399841308594 + }, + "combine": { + "p50": 126.24000012874603, + "p90": 129.56799566745758, + "p95": 130.91200590133667, + "p99": 138.3039951324463 + }, + "roundtrip": { + "p50": 218.52800250053406, + "p90": 226.84800624847412, + "p95": 230.24000227451324, + "p99": 234.3679964542389 + }, + "isolatedSum": { + "p50": 244.9280023574829, + "p90": 254.46399301290512, + "p95": 257.8880041837692, + "p99": 271.1679935455322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2ab175b0", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_e7d2ef86", + "comparisonKey": "ab210e683097ab6d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:00.728766+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.04799890518188, + "p90": 88.76799792051315, + "p95": 96.0640013217926, + "p99": 104.60799932479858 + }, + "combine": { + "p50": 75.9039968252182, + "p90": 78.78399640321732, + "p95": 82.75199681520462, + "p99": 93.21600198745728 + }, + "roundtrip": { + "p50": 127.87200510501862, + "p90": 140.57600498199463, + "p95": 147.45600521564484, + "p99": 154.7199934720993 + }, + "isolatedSum": { + "p50": 157.95199573040009, + "p90": 167.55199432373047, + "p95": 178.81599813699722, + "p99": 197.82400131225586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 90.2400016784668, + "p90": 158.720001578331, + "p95": 168.35199296474457, + "p99": 182.72000551223755 + }, + "combine": { + "p50": 76.73600316047668, + "p90": 89.56799656152725, + "p95": 97.28000313043594, + "p99": 113.82400244474411 + }, + "roundtrip": { + "p50": 131.52000308036804, + "p90": 157.50400722026825, + "p95": 164.000004529953, + "p99": 180.12799322605133 + }, + "isolatedSum": { + "p50": 166.97600483894348, + "p90": 248.28799813985825, + "p95": 265.6319960951805, + "p99": 296.54400795698166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.504001557827, + "p90": 90.30400216579437, + "p95": 97.43999689817429, + "p99": 106.78400099277496 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 86.97599917650223, + "p95": 92.3520028591156, + "p99": 101.59999877214432 + }, + "roundtrip": { + "p50": 139.93600010871887, + "p90": 149.59999918937683, + "p95": 156.89599514007568, + "p99": 163.42400014400482 + }, + "isolatedSum": { + "p50": 155.2640050649643, + "p90": 177.2800013422966, + "p95": 189.7919997572899, + "p99": 208.38399976491928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.79199630022049, + "p90": 92.6079973578453, + "p95": 98.65599870681763, + "p99": 112.03200370073318 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 90.30400216579437, + "p95": 93.50399672985077, + "p99": 100.16000270843506 + }, + "roundtrip": { + "p50": 140.9599930047989, + "p90": 151.36000514030457, + "p95": 157.27999806404114, + "p99": 166.17600619792938 + }, + "isolatedSum": { + "p50": 157.663993537426, + "p90": 182.91199952363968, + "p95": 192.1599954366684, + "p99": 212.19200640916824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.68800312280655, + "p90": 91.0400003194809, + "p95": 96.79999947547913, + "p99": 102.81600058078766 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 89.9519994854927, + "p95": 92.99200028181076, + "p99": 101.56799852848053 + }, + "roundtrip": { + "p50": 138.5280042886734, + "p90": 156.67200088500977, + "p95": 160.76800227165222, + "p99": 177.66399681568146 + }, + "isolatedSum": { + "p50": 157.98400342464447, + "p90": 180.9919998049736, + "p95": 189.7919997572899, + "p99": 204.3839991092682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.09600156545639, + "p90": 105.56799918413162, + "p95": 113.27999830245972, + "p99": 126.71999633312225 + }, + "combine": { + "p50": 89.15200084447861, + "p90": 92.79999881982803, + "p95": 98.59199821949005, + "p99": 105.27999699115753 + }, + "roundtrip": { + "p50": 149.1519957780838, + "p90": 160.47999262809753, + "p95": 165.95199704170227, + "p99": 173.12000691890717 + }, + "isolatedSum": { + "p50": 185.248002409935, + "p90": 198.36799800395966, + "p95": 211.87199652194977, + "p99": 231.99999332427979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 106.4319983124733, + "p90": 114.68800157308578, + "p95": 123.23199957609177, + "p99": 133.85599851608276 + }, + "combine": { + "p50": 102.24000364542007, + "p90": 111.61600053310394, + "p95": 116.60800129175186, + "p99": 124.12799894809723 + }, + "roundtrip": { + "p50": 179.00800704956055, + "p90": 194.68800723552704, + "p95": 198.65599274635315, + "p99": 206.94400370121002 + }, + "isolatedSum": { + "p50": 208.67200195789337, + "p90": 226.30400210618973, + "p95": 239.84000086784363, + "p99": 257.98399746418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.28799951076508, + "p90": 132.76800513267517, + "p95": 135.5839967727661, + "p99": 145.9839940071106 + }, + "combine": { + "p50": 126.14400684833527, + "p90": 132.32000172138214, + "p95": 139.20000195503235, + "p99": 142.20799505710602 + }, + "roundtrip": { + "p50": 213.69600296020508, + "p90": 222.6880043745041, + "p95": 229.5999974012375, + "p99": 234.94400084018707 + }, + "isolatedSum": { + "p50": 246.43200635910034, + "p90": 265.0880068540573, + "p95": 274.78399872779846, + "p99": 288.1919890642166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19a186a6", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_1439be5b", + "comparisonKey": "90afac04eef9b81c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:33:10.686401+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.70399677753448, + "p90": 96.19200229644775, + "p95": 102.81600058078766, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 73.60000163316727, + "p90": 79.99999821186066, + "p95": 83.45600217580795, + "p99": 90.97599983215332 + }, + "roundtrip": { + "p50": 127.93600559234619, + "p90": 142.752006649971, + "p95": 149.21599626541138, + "p99": 162.52799332141876 + }, + "isolatedSum": { + "p50": 158.30399841070175, + "p90": 176.1920005083084, + "p95": 186.2720027565956, + "p99": 214.1439989209175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.69600367546082, + "p90": 95.29600292444229, + "p95": 100.25600343942642, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 75.07199794054031, + "p90": 79.55200225114822, + "p95": 81.66400343179703, + "p99": 88.79999816417694 + }, + "roundtrip": { + "p50": 127.6479959487915, + "p90": 146.33600413799286, + "p95": 151.74399316310883, + "p99": 171.83999717235565 + }, + "isolatedSum": { + "p50": 156.76800161600113, + "p90": 174.84800517559052, + "p95": 181.92000687122345, + "p99": 201.21599733829498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.27999985218048, + "p90": 97.15200215578079, + "p95": 102.33599692583084, + "p99": 125.40799379348755 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 80.4160013794899, + "p95": 88.03199976682663, + "p99": 92.76799857616425 + }, + "roundtrip": { + "p50": 133.98399949073792, + "p90": 142.0159935951233, + "p95": 147.93600142002106, + "p99": 155.16799688339233 + }, + "isolatedSum": { + "p50": 154.39999848604202, + "p90": 177.5680035352707, + "p95": 190.36799669265747, + "p99": 218.1759923696518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.99999755620956, + "p90": 93.53599697351456, + "p95": 97.34400361776352, + "p99": 108.96000266075134 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 87.0399996638298, + "p95": 91.10400080680847, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 140.09599387645721, + "p90": 150.4639983177185, + "p95": 157.151997089386, + "p99": 172.7360039949417 + }, + "isolatedSum": { + "p50": 154.2079970240593, + "p90": 180.57599663734436, + "p95": 188.448004424572, + "p99": 210.1760059595108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.64000242948532, + "p90": 86.43200248479843, + "p95": 92.32000261545181, + "p99": 99.84000027179718 + }, + "combine": { + "p50": 77.82399654388428, + "p90": 86.20800077915192, + "p95": 90.04800021648407, + "p99": 93.6959981918335 + }, + "roundtrip": { + "p50": 140.09599387645721, + "p90": 149.24800395965576, + "p95": 155.45600652694702, + "p99": 163.16799819469452 + }, + "isolatedSum": { + "p50": 154.4639989733696, + "p90": 172.64000326395035, + "p95": 182.36800283193588, + "p99": 193.53599846363068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.04800087213516, + "p90": 101.95200145244598, + "p95": 109.8880022764206, + "p99": 122.14399874210358 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 92.67199784517288, + "p95": 95.48799693584442, + "p99": 104.38399761915207 + }, + "roundtrip": { + "p50": 151.39199793338776, + "p90": 159.8079949617386, + "p95": 166.30400717258453, + "p99": 177.72799730300903 + }, + "isolatedSum": { + "p50": 177.0239993929863, + "p90": 194.62399929761887, + "p95": 205.37599921226501, + "p99": 226.52799636125565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.09600156545639, + "p90": 103.07200253009796, + "p95": 107.10400342941284, + "p99": 116.73600226640701 + }, + "combine": { + "p50": 101.69599950313568, + "p90": 105.40799796581268, + "p95": 109.0880036354065, + "p99": 117.60000139474869 + }, + "roundtrip": { + "p50": 178.49600315093994, + "p90": 190.0160014629364, + "p95": 196.1279958486557, + "p99": 208.15999805927277 + }, + "isolatedSum": { + "p50": 197.79200106859207, + "p90": 208.48000049591064, + "p95": 216.19200706481934, + "p99": 234.3360036611557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.22399836778641, + "p90": 123.87199699878693, + "p95": 126.75200402736664, + "p99": 135.5839967727661 + }, + "combine": { + "p50": 137.28000223636627, + "p90": 142.7839994430542, + "p95": 148.47999811172485, + "p99": 163.32800686359406 + }, + "roundtrip": { + "p50": 229.66399788856506, + "p90": 244.83199417591095, + "p95": 251.42401456832886, + "p99": 258.14399123191833 + }, + "isolatedSum": { + "p50": 253.50400060415268, + "p90": 266.6559964418411, + "p95": 275.2320021390915, + "p99": 298.91200363636017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3318aef5", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_e357f621", + "comparisonKey": "07c61f1c15bcc74f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:34:07.176148+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.25600081682205, + "p90": 98.49599748849869, + "p95": 109.15199667215347, + "p99": 119.32799965143204 + }, + "combine": { + "p50": 65.50399959087372, + "p90": 74.91199672222137, + "p95": 77.504001557827, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 120.83200365304947, + "p90": 134.46399569511414, + "p95": 140.6719982624054, + "p99": 156.76799416542053 + }, + "isolatedSum": { + "p50": 149.76000040769577, + "p90": 173.40799421072006, + "p95": 186.65599822998047, + "p99": 201.7280012369156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.79199695587158, + "p90": 92.28800237178802, + "p95": 99.80800002813339, + "p99": 114.656001329422 + }, + "combine": { + "p50": 65.5359998345375, + "p90": 74.30399954319, + "p95": 76.35200023651123, + "p99": 80.73599636554718 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 131.16799294948578, + "p95": 137.9839926958084, + "p99": 144.48000490665436 + }, + "isolatedSum": { + "p50": 147.3279967904091, + "p90": 166.59200191497803, + "p95": 176.16000026464462, + "p99": 195.39199769496918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 84.22400057315826, + "p90": 99.0080013871193, + "p95": 108.31999778747559, + "p99": 127.23200023174286 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 74.97599720954895, + "p95": 78.43200117349625, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 127.83999741077423, + "p90": 143.0400013923645, + "p95": 149.24800395965576, + "p99": 167.1999990940094 + }, + "isolatedSum": { + "p50": 150.14400333166122, + "p90": 173.98399859666824, + "p95": 186.75199896097183, + "p99": 213.6320024728775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.26399940252304, + "p90": 88.16000074148178, + "p95": 95.07200121879578, + "p99": 101.69599950313568 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 79.16799932718277, + "p95": 82.04799890518188, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 137.66400516033173, + "p95": 143.96800100803375, + "p99": 151.0079950094223 + }, + "isolatedSum": { + "p50": 147.51999825239182, + "p90": 167.32800006866455, + "p95": 177.12000012397766, + "p99": 191.9040009379387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.77600330114365, + "p90": 88.0960002541542, + "p95": 96.89600020647049, + "p99": 104.47999835014343 + }, + "combine": { + "p50": 75.39200037717819, + "p90": 78.11199873685837, + "p95": 81.34400099515915, + "p99": 91.10400080680847 + }, + "roundtrip": { + "p50": 127.83999741077423, + "p90": 139.1039937734604, + "p95": 144.80000734329224, + "p99": 154.23999726772308 + }, + "isolatedSum": { + "p50": 151.16800367832184, + "p90": 166.20799899101257, + "p95": 178.24000120162964, + "p99": 195.5839991569519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.59200370311737, + "p90": 94.52799707651138, + "p95": 100.51199793815613, + "p99": 112.67200112342834 + }, + "combine": { + "p50": 78.07999849319458, + "p90": 86.97599917650223, + "p95": 90.33600240945816, + "p99": 93.88799965381622 + }, + "roundtrip": { + "p50": 141.6960060596466, + "p90": 152.8960019350052, + "p95": 157.85600244998932, + "p99": 174.97600615024567 + }, + "isolatedSum": { + "p50": 164.67200219631195, + "p90": 181.5039962530136, + "p95": 190.8480003476143, + "p99": 206.56000077724457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.83999961614609, + "p90": 102.1760031580925, + "p95": 110.52799969911575, + "p99": 125.02400577068329 + }, + "combine": { + "p50": 92.73599833250046, + "p90": 101.79200023412704, + "p95": 103.74400019645691, + "p99": 106.55999928712845 + }, + "roundtrip": { + "p50": 172.0000058412552, + "p90": 184.25600230693817, + "p95": 189.15200233459473, + "p99": 205.37599921226501 + }, + "isolatedSum": { + "p50": 188.57599794864655, + "p90": 203.96800339221954, + "p95": 214.27199989557266, + "p99": 231.58400505781174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.77600240707397, + "p90": 125.11999905109406, + "p95": 134.88000631332397, + "p99": 147.35999703407288 + }, + "combine": { + "p50": 128.25599312782288, + "p90": 137.43999600410461, + "p95": 138.87999951839447, + "p99": 141.12000167369843 + }, + "roundtrip": { + "p50": 223.03999960422516, + "p90": 230.1120012998581, + "p95": 233.98399353027344, + "p99": 239.6479994058609 + }, + "isolatedSum": { + "p50": 244.03199553489685, + "p90": 262.55999505519867, + "p95": 273.76000583171844, + "p99": 288.4799987077713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6f6cb123", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_5ec7faad", + "comparisonKey": "df1512ea1f111e18", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:42:22.849764+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.79199695587158, + "p90": 93.1520015001297, + "p95": 101.53599828481674, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 73.88799637556076, + "p90": 79.45600152015686, + "p95": 86.62399649620056, + "p99": 93.66399794816971 + }, + "roundtrip": { + "p50": 132.57600367069244, + "p90": 149.53599870204926, + "p95": 166.78400337696075, + "p99": 182.6239973306656 + }, + "isolatedSum": { + "p50": 155.67999333143234, + "p90": 172.60800302028656, + "p95": 188.1599947810173, + "p99": 208.44800025224686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.3600001335144, + "p90": 86.01599931716919, + "p95": 94.33600306510925, + "p99": 99.58399832248688 + }, + "combine": { + "p50": 76.06399804353714, + "p90": 79.52000200748444, + "p95": 83.96799862384796, + "p99": 91.77599847316742 + }, + "roundtrip": { + "p50": 135.23200154304504, + "p90": 152.319997549057, + "p95": 163.00800442695618, + "p99": 179.00800704956055 + }, + "isolatedSum": { + "p50": 151.42399817705154, + "p90": 165.53600132465363, + "p95": 178.30400168895721, + "p99": 191.3599967956543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.25599950551987, + "p90": 84.54400300979614, + "p95": 92.00000017881393, + "p99": 101.56799852848053 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 87.48800307512283, + "p95": 92.57599711418152, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 139.8719996213913, + "p90": 153.6639928817749, + "p95": 159.93599593639374, + "p99": 169.66399550437927 + }, + "isolatedSum": { + "p50": 154.14399653673172, + "p90": 172.03200608491898, + "p95": 184.57599729299545, + "p99": 205.72800189256668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.03999835252762, + "p90": 98.49599748849869, + "p95": 105.43999820947647, + "p99": 125.2480000257492 + }, + "combine": { + "p50": 79.00799810886383, + "p90": 90.2400016784668, + "p95": 97.15200215578079, + "p99": 104.22399640083313 + }, + "roundtrip": { + "p50": 138.94400000572205, + "p90": 154.65599298477173, + "p95": 160.64000129699707, + "p99": 172.4800020456314 + }, + "isolatedSum": { + "p50": 158.04799646139145, + "p90": 188.73599916696548, + "p95": 202.59200036525726, + "p99": 229.47199642658234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.88799703121185, + "p90": 87.93599903583527, + "p95": 98.36799651384354, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 91.39200299978256, + "p95": 94.24000233411789, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 145.47200500965118, + "p90": 166.55999422073364, + "p95": 177.824005484581, + "p99": 195.360004901886 + }, + "isolatedSum": { + "p50": 158.07999670505524, + "p90": 179.32800203561783, + "p95": 192.60799884796143, + "p99": 213.85599672794342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.78399902582169, + "p90": 105.76000064611435, + "p95": 112.41599917411804, + "p99": 124.12799894809723 + }, + "combine": { + "p50": 88.51200342178345, + "p90": 92.96000003814697, + "p95": 100.89600086212158, + "p99": 107.13600367307663 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 159.5200002193451, + "p95": 166.17600619792938, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 183.29600244760513, + "p90": 198.72000068426132, + "p95": 213.31200003623962, + "p99": 231.26400262117386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.51199859380722, + "p90": 109.3439981341362, + "p95": 114.07999694347382, + "p99": 120.35199999809265 + }, + "combine": { + "p50": 102.55999863147736, + "p90": 111.93600296974182, + "p95": 116.73600226640701, + "p99": 125.18399953842163 + }, + "roundtrip": { + "p50": 177.15199291706085, + "p90": 191.52000546455383, + "p95": 197.50399887561798, + "p99": 204.79999482631683 + }, + "isolatedSum": { + "p50": 207.07199722528458, + "p90": 221.28000110387802, + "p95": 230.81599920988083, + "p99": 245.53599953651428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.46400052309036, + "p90": 125.63200294971466, + "p95": 130.8159977197647, + "p99": 138.97599279880524 + }, + "combine": { + "p50": 125.47199428081512, + "p90": 133.56800377368927, + "p95": 139.93600010871887, + "p99": 149.56800639629364 + }, + "roundtrip": { + "p50": 212.3199999332428, + "p90": 221.72799706459045, + "p95": 228.28799486160278, + "p99": 236.64000630378723 + }, + "isolatedSum": { + "p50": 243.9359948039055, + "p90": 259.20000672340393, + "p95": 270.7519978284836, + "p99": 288.5439991950989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e5cde1c", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_49d52a3c", + "comparisonKey": "621700d9e27f0ef8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:40.761056+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.52000200748444, + "p90": 83.3280012011528, + "p95": 85.53600311279297, + "p99": 90.43200314044952 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 76.89599692821503, + "p95": 78.40000092983246, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 124.15999919176102, + "p90": 130.52800297737122, + "p95": 133.95200669765472, + "p99": 140.1599943637848 + }, + "isolatedSum": { + "p50": 153.21600437164307, + "p90": 160.22399812936783, + "p95": 163.93600404262543, + "p99": 178.97600680589676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.23199915885925, + "p90": 83.5840031504631, + "p95": 85.4400023818016, + "p99": 91.51999652385712 + }, + "combine": { + "p50": 76.19199901819229, + "p90": 80.38400113582611, + "p95": 81.24800026416779, + "p99": 88.128000497818 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 133.215993642807, + "p95": 136.63999736309052, + "p99": 142.39999651908875 + }, + "isolatedSum": { + "p50": 151.42399817705154, + "p90": 163.96800428628922, + "p95": 166.6880026459694, + "p99": 179.6479970216751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.3600001335144, + "p90": 84.44800227880478, + "p95": 90.27200192213058, + "p99": 168.03200542926788 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 80.60800284147263, + "p95": 86.27200126647949, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 138.14400136470795, + "p90": 144.0960019826889, + "p95": 146.43199741840363, + "p99": 157.151997089386 + }, + "isolatedSum": { + "p50": 152.47999876737595, + "p90": 165.0560051202774, + "p95": 176.54400318861008, + "p99": 257.8880041837692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.87199658155441, + "p90": 82.56000280380249, + "p95": 84.89599823951721, + "p99": 92.70399808883667 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 84.32000130414963, + "p95": 87.10400015115738, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 142.0159935951233, + "p90": 152.0320028066635, + "p95": 159.10400450229645, + "p99": 175.9680062532425 + }, + "isolatedSum": { + "p50": 153.34399789571762, + "p90": 166.88000410795212, + "p95": 171.9999983906746, + "p99": 183.6479976773262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.80000364780426, + "p90": 87.96799927949905, + "p95": 91.64799749851227, + "p99": 102.75200009346008 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 92.57599711418152, + "p95": 98.1760025024414, + "p99": 106.62399977445602 + }, + "roundtrip": { + "p50": 142.46399700641632, + "p90": 155.61600029468536, + "p95": 164.32000696659088, + "p99": 195.90400159358978 + }, + "isolatedSum": { + "p50": 158.88000279664993, + "p90": 180.54399639368057, + "p95": 189.82400000095367, + "p99": 209.3759998679161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.63199770450592, + "p90": 97.28000313043594, + "p95": 98.78399968147278, + "p99": 103.61599922180176 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 90.62399715185165, + "p95": 92.25600212812424, + "p99": 98.04800152778625 + }, + "roundtrip": { + "p50": 150.14399588108063, + "p90": 158.720001578331, + "p95": 162.75200247764587, + "p99": 172.54400253295898 + }, + "isolatedSum": { + "p50": 182.01600015163422, + "p90": 187.9040002822876, + "p95": 191.04000180959702, + "p99": 201.664000749588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.06400263309479, + "p90": 112.47999966144562, + "p95": 118.07999759912491, + "p99": 165.6000018119812 + }, + "combine": { + "p50": 101.88800096511841, + "p90": 108.60799998044968, + "p95": 111.90400272607803, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 178.65599691867828, + "p90": 189.28000330924988, + "p95": 192.1599954366684, + "p99": 196.51199877262115 + }, + "isolatedSum": { + "p50": 205.9520035982132, + "p90": 221.0879996418953, + "p95": 229.98400032520294, + "p99": 280.63999861478806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.46400117874146, + "p90": 130.91200590133667, + "p95": 134.11200046539307, + "p99": 146.7200070619583 + }, + "combine": { + "p50": 137.56799697875977, + "p90": 142.0159935951233, + "p95": 144.83200013637543, + "p99": 150.14399588108063 + }, + "roundtrip": { + "p50": 234.23999547958374, + "p90": 240.12799561023712, + "p95": 241.98399484157562, + "p99": 244.25600469112396 + }, + "isolatedSum": { + "p50": 260.0319981575012, + "p90": 272.92799949645996, + "p95": 278.9440006017685, + "p99": 296.86400294303894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-000ed0a4", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_75d4e512", + "comparisonKey": "73f7d8813dd7de33", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:04.701924+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 80.6720033288002, + "p90": 95.83999961614609, + "p95": 102.46399790048599, + "p99": 116.09599739313126 + }, + "combine": { + "p50": 74.52800124883652, + "p90": 80.38400113582611, + "p95": 86.11200004816055, + "p99": 95.45599669218063 + }, + "roundtrip": { + "p50": 127.61600315570831, + "p90": 146.55999839305878, + "p95": 152.8639942407608, + "p99": 179.26399409770966 + }, + "isolatedSum": { + "p50": 155.20000457763672, + "p90": 176.2240007519722, + "p95": 188.57599794864655, + "p99": 211.5519940853119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.45600086450577, + "p90": 94.27200257778168, + "p95": 98.2080027461052, + "p99": 107.58399963378906 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 82.04799890518188, + "p95": 87.87199854850769, + "p99": 100.73599964380264 + }, + "roundtrip": { + "p50": 132.28799402713776, + "p90": 146.43199741840363, + "p95": 153.1199961900711, + "p99": 164.2879992723465 + }, + "isolatedSum": { + "p50": 151.71200037002563, + "p90": 176.32000148296356, + "p95": 186.08000129461288, + "p99": 208.3199992775917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.08799904584885, + "p90": 95.16800194978714, + "p95": 102.68799960613251, + "p99": 127.36000120639801 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 90.27200192213058, + "p95": 94.36800330877304, + "p99": 103.74400019645691 + }, + "roundtrip": { + "p50": 139.74399864673615, + "p90": 154.81600165367126, + "p95": 162.20800578594208, + "p99": 173.15199971199036 + }, + "isolatedSum": { + "p50": 160.99199652671814, + "p90": 185.44000387191772, + "p95": 197.05600291490555, + "p99": 231.10400140285492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.34400033950806, + "p90": 93.18400174379349, + "p95": 98.55999797582626, + "p99": 116.73600226640701 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 87.26400136947632, + "p95": 92.25600212812424, + "p99": 100.3199964761734 + }, + "roundtrip": { + "p50": 138.20800185203552, + "p90": 153.6320000886917, + "p95": 159.10400450229645, + "p99": 171.00800573825836 + }, + "isolatedSum": { + "p50": 155.13599663972855, + "p90": 180.4480031132698, + "p95": 190.8160001039505, + "p99": 217.0559987425804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.88799703121185, + "p90": 88.73599767684937, + "p95": 97.18400239944458, + "p99": 103.74400019645691 + }, + "combine": { + "p50": 86.07999980449677, + "p90": 91.839998960495, + "p95": 99.55199807882309, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 146.62399888038635, + "p90": 163.2319986820221, + "p95": 175.07199943065643, + "p99": 193.4400051832199 + }, + "isolatedSum": { + "p50": 163.96799683570862, + "p90": 180.57599663734436, + "p95": 196.73600047826767, + "p99": 208.19199830293655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.8959988951683, + "p90": 98.04800152778625, + "p95": 106.175996363163, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 88.51200342178345, + "p90": 92.16000139713287, + "p95": 97.63199836015701, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 149.34399724006653, + "p90": 161.3440066576004, + "p95": 166.46400094032288, + "p99": 175.9680062532425 + }, + "isolatedSum": { + "p50": 177.40800231695175, + "p90": 190.20800292491913, + "p95": 203.80799472332, + "p99": 220.70399671792984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 106.33599758148193, + "p90": 112.64000087976456, + "p95": 115.07199704647064, + "p99": 121.60000205039978 + }, + "combine": { + "p50": 101.98400169610977, + "p90": 110.91200262308121, + "p95": 114.72000181674957, + "p99": 121.98399752378464 + }, + "roundtrip": { + "p50": 177.98399925231934, + "p90": 191.6159987449646, + "p95": 198.08000326156616, + "p99": 206.91199600696564 + }, + "isolatedSum": { + "p50": 208.3199992775917, + "p90": 223.55200350284576, + "p95": 229.79199886322021, + "p99": 243.58399957418442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.18399757146835, + "p90": 124.60800260305405, + "p95": 129.02399897575378, + "p99": 135.80800592899323 + }, + "combine": { + "p50": 125.34399330615997, + "p90": 130.3039938211441, + "p95": 134.65599715709686, + "p99": 142.39999651908875 + }, + "roundtrip": { + "p50": 212.92799711227417, + "p90": 222.33599424362183, + "p95": 229.37600314617157, + "p99": 238.49600553512573 + }, + "isolatedSum": { + "p50": 238.52799087762833, + "p90": 254.91199642419815, + "p95": 263.67999613285065, + "p99": 278.20800244808197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba0dc288", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_e0f6e15b", + "comparisonKey": "d07ab0613df74ba8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:32.066798+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.39200103282928, + "p90": 83.96799862384796, + "p95": 85.82399785518646, + "p99": 93.40800344944 + }, + "combine": { + "p50": 65.98400324583054, + "p90": 75.16799867153168, + "p95": 76.38400048017502, + "p99": 80.1599994301796 + }, + "roundtrip": { + "p50": 122.5920021533966, + "p90": 131.1040073633194, + "p95": 133.88800621032715, + "p99": 137.05599308013916 + }, + "isolatedSum": { + "p50": 145.37600427865982, + "p90": 159.13599729537964, + "p95": 162.20799833536148, + "p99": 173.5680028796196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 79.45600152015686, + "p90": 92.92799979448318, + "p95": 94.7519987821579, + "p99": 102.46399790048599 + }, + "combine": { + "p50": 75.6480023264885, + "p90": 80.6720033288002, + "p95": 81.98399841785431, + "p99": 87.52000331878662 + }, + "roundtrip": { + "p50": 125.85599720478058, + "p90": 139.93600010871887, + "p95": 144.48000490665436, + "p99": 160.0639969110489 + }, + "isolatedSum": { + "p50": 155.10400384664536, + "p90": 173.6000031232834, + "p95": 176.7359972000122, + "p99": 189.9840012192726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.39200037717819, + "p90": 82.30400085449219, + "p95": 86.62399649620056, + "p99": 90.4960036277771 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 78.8159966468811, + "p95": 80.76799660921097, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 135.04000008106232, + "p90": 147.45600521564484, + "p95": 151.296004652977, + "p99": 168.32000017166138 + }, + "isolatedSum": { + "p50": 152.0320028066635, + "p90": 161.1199975013733, + "p95": 167.39199310541153, + "p99": 178.46400290727615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.32799988985062, + "p90": 83.03999900817871, + "p95": 86.30400151014328, + "p99": 91.39200299978256 + }, + "combine": { + "p50": 77.69600301980972, + "p90": 84.927998483181, + "p95": 87.3280018568039, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 138.72000575065613, + "p90": 144.0960019826889, + "p95": 146.4959979057312, + "p99": 155.87200224399567 + }, + "isolatedSum": { + "p50": 153.02400290966034, + "p90": 167.9679974913597, + "p95": 173.63200336694717, + "p99": 182.46400356292725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.09599828720093, + "p90": 81.82399719953537, + "p95": 86.2400010228157, + "p99": 92.12800115346909 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 87.16800063848495, + "p95": 89.47200328111649, + "p99": 99.80800002813339 + }, + "roundtrip": { + "p50": 140.00000059604645, + "p90": 148.5760062932968, + "p95": 151.48800611495972, + "p99": 174.6239960193634 + }, + "isolatedSum": { + "p50": 154.6880006790161, + "p90": 168.99199783802032, + "p95": 175.7120043039322, + "p99": 191.93600118160248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.48799628019333, + "p90": 96.19200229644775, + "p95": 97.43999689817429, + "p99": 103.74400019645691 + }, + "combine": { + "p50": 80.64000308513641, + "p90": 88.57599645853043, + "p95": 89.66399729251862, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 154.36799824237823, + "p95": 156.031996011734, + "p99": 158.87999534606934 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 184.76799875497818, + "p95": 187.1039941906929, + "p99": 196.73600047826767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.73599898815155, + "p90": 101.6319990158081, + "p95": 104.44799810647964, + "p99": 113.3119985461235 + }, + "combine": { + "p50": 101.34399682283401, + "p90": 104.22399640083313, + "p95": 105.15200346708298, + "p99": 107.64800012111664 + }, + "roundtrip": { + "p50": 174.01599884033203, + "p90": 180.7360053062439, + "p95": 184.1599941253662, + "p99": 188.60800564289093 + }, + "isolatedSum": { + "p50": 198.07999581098557, + "p90": 205.85599541664124, + "p95": 209.60000157356262, + "p99": 220.95999866724014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.23199826478958, + "p90": 121.8239963054657, + "p95": 123.6800029873848, + "p99": 128.67200374603271 + }, + "combine": { + "p50": 137.2160017490387, + "p90": 140.57600498199463, + "p95": 141.85599982738495, + "p99": 149.02399480342865 + }, + "roundtrip": { + "p50": 227.90400683879852, + "p90": 234.592005610466, + "p95": 236.9920015335083, + "p99": 241.02400243282318 + }, + "isolatedSum": { + "p50": 252.44800001382828, + "p90": 262.4000012874603, + "p95": 265.53600281476974, + "p99": 277.69599854946136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7851c670", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_7d7375c3", + "comparisonKey": "c16927c7382e4175", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:55.710753+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.9039974808693, + "p90": 86.27200126647949, + "p95": 89.21600133180618, + "p99": 95.45599669218063 + }, + "combine": { + "p50": 73.47200065851212, + "p90": 76.80000364780426, + "p95": 77.91999727487564, + "p99": 81.15199953317642 + }, + "roundtrip": { + "p50": 126.68800354003906, + "p90": 134.5600038766861, + "p95": 138.62399756908417, + "p99": 142.04800128936768 + }, + "isolatedSum": { + "p50": 153.3759981393814, + "p90": 163.07200491428375, + "p95": 167.13599860668182, + "p99": 176.60799622535706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.6720033288002, + "p90": 88.16000074148178, + "p95": 91.13600105047226, + "p99": 95.45599669218063 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 79.32800054550171, + "p95": 81.60000294446945, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 131.80799782276154, + "p90": 137.37599551677704, + "p95": 139.39200341701508, + "p99": 148.54399859905243 + }, + "isolatedSum": { + "p50": 157.82400220632553, + "p90": 167.4880012869835, + "p95": 172.7360039949417, + "p99": 184.4159960746765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.43200117349625, + "p90": 85.08799970149994, + "p95": 88.16000074148178, + "p99": 93.31200271844864 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 83.71199667453766, + "p95": 86.87999844551086, + "p99": 91.26400202512741 + }, + "roundtrip": { + "p50": 140.03199338912964, + "p90": 146.01600170135498, + "p95": 148.80000054836273, + "p99": 153.6960005760193 + }, + "isolatedSum": { + "p50": 156.64000064134598, + "p90": 168.7999963760376, + "p95": 175.03999918699265, + "p99": 184.57600474357605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.27199995517731, + "p90": 86.01599931716919, + "p95": 90.08000046014786, + "p99": 93.9520001411438 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 86.7839977145195, + "p95": 88.35200220346451, + "p99": 92.41600334644318 + }, + "roundtrip": { + "p50": 140.57600498199463, + "p90": 149.08799529075623, + "p95": 152.79999375343323, + "p99": 158.49600732326508 + }, + "isolatedSum": { + "p50": 156.96000307798386, + "p90": 172.7999970316887, + "p95": 178.43200266361237, + "p99": 186.36800348758698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.9039974808693, + "p90": 89.40800279378891, + "p95": 91.93599969148636, + "p99": 98.43199700117111 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 87.2960016131401, + "p95": 88.70399743318558, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 140.4159963130951, + "p90": 148.76799285411835, + "p95": 151.296004652977, + "p99": 154.88000214099884 + }, + "isolatedSum": { + "p50": 159.19999778270721, + "p90": 176.70400440692902, + "p95": 180.63999712467194, + "p99": 190.14399498701096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.01600062847137, + "p90": 98.08000177145004, + "p95": 100.35199671983719, + "p99": 104.63999956846237 + }, + "combine": { + "p50": 89.21600133180618, + "p90": 91.16800129413605, + "p95": 93.28000247478485, + "p99": 101.6639992594719 + }, + "roundtrip": { + "p50": 152.99199521541595, + "p90": 158.07999670505524, + "p95": 160.7999950647354, + "p99": 165.72800278663635 + }, + "isolatedSum": { + "p50": 183.23200196027756, + "p90": 189.2480030655861, + "p95": 193.63199919462204, + "p99": 206.30399882793427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.5759990811348, + "p90": 109.56799983978271, + "p95": 111.455999314785, + "p99": 116.19199812412262 + }, + "combine": { + "p50": 102.84800082445145, + "p90": 108.64000022411346, + "p95": 113.02399635314941, + "p99": 117.76000261306763 + }, + "roundtrip": { + "p50": 180.7679980993271, + "p90": 188.1600022315979, + "p95": 191.3280040025711, + "p99": 195.99999487400055 + }, + "isolatedSum": { + "p50": 207.42399990558624, + "p90": 218.20800006389618, + "p95": 224.47999566793442, + "p99": 233.95200073719025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.6240017414093, + "p90": 124.38400089740753, + "p95": 126.46399438381195, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 126.14400684833527, + "p90": 129.2479932308197, + "p95": 130.43199479579926, + "p99": 139.0399932861328 + }, + "roundtrip": { + "p50": 217.50399470329285, + "p90": 226.3679951429367, + "p95": 228.38400304317474, + "p99": 232.5119972229004 + }, + "isolatedSum": { + "p50": 244.76800858974457, + "p90": 253.63199412822723, + "p95": 256.8959891796112, + "p99": 271.13598585128784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-482bfd90", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_3facccc3", + "comparisonKey": "0df594e5f221239d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:45.982420+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.77599650621414, + "p90": 84.95999872684479, + "p95": 88.06400001049042, + "p99": 100.76799988746643 + }, + "combine": { + "p50": 68.03199648857117, + "p90": 76.31999999284744, + "p95": 76.92799717187881, + "p99": 86.81599795818329 + }, + "roundtrip": { + "p50": 122.20799922943115, + "p90": 137.08800077438354, + "p95": 143.8080072402954, + "p99": 151.93599462509155 + }, + "isolatedSum": { + "p50": 147.8079929947853, + "p90": 161.27999871969223, + "p95": 164.99199718236923, + "p99": 187.58399784564972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.19199901819229, + "p90": 89.88799899816513, + "p95": 93.24800223112106, + "p99": 103.80800068378448 + }, + "combine": { + "p50": 76.19199901819229, + "p90": 79.19999957084656, + "p95": 80.35200089216232, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 127.23200023174286, + "p90": 136.89599931240082, + "p95": 140.1280015707016, + "p99": 148.83199334144592 + }, + "isolatedSum": { + "p50": 152.38399803638458, + "p90": 169.0879985690117, + "p95": 173.6000031232834, + "p99": 192.76800006628036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.93599706888199, + "p90": 85.88799834251404, + "p95": 89.1840010881424, + "p99": 98.65599870681763 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 82.20800012350082, + "p95": 86.87999844551086, + "p99": 97.59999811649323 + }, + "roundtrip": { + "p50": 138.87999951839447, + "p90": 143.5520052909851, + "p95": 147.2959965467453, + "p99": 153.24799716472626 + }, + "isolatedSum": { + "p50": 153.50399911403656, + "p90": 168.09599846601486, + "p95": 176.06399953365326, + "p99": 196.25599682331085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.99999755620956, + "p90": 89.53599631786346, + "p95": 93.40800344944, + "p99": 100.8640006184578 + }, + "combine": { + "p50": 78.07999849319458, + "p90": 86.5280032157898, + "p95": 88.32000195980072, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 135.6160044670105, + "p90": 145.02400159835815, + "p95": 147.23199605941772, + "p99": 157.21599757671356 + }, + "isolatedSum": { + "p50": 154.07999604940414, + "p90": 176.06399953365326, + "p95": 181.72800540924072, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.53600180149078, + "p90": 84.22400057315826, + "p95": 87.99999952316284, + "p99": 96.38399630784988 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 87.55200356245041, + "p95": 90.11200070381165, + "p99": 95.39200365543365 + }, + "roundtrip": { + "p50": 140.06400108337402, + "p90": 152.8639942407608, + "p95": 156.25600516796112, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 156.12800419330597, + "p90": 171.77600413560867, + "p95": 178.1120002269745, + "p99": 191.77599996328354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.7519987821579, + "p90": 100.70399940013885, + "p95": 102.9760017991066, + "p99": 112.2559979557991 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 91.2960022687912, + "p95": 93.02400052547455, + "p99": 104.63999956846237 + }, + "roundtrip": { + "p50": 147.61599898338318, + "p90": 157.1200042963028, + "p95": 160.99199652671814, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 183.20000171661377, + "p90": 192.00000166893005, + "p95": 196.00000232458115, + "p99": 216.89599752426147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.34399747848511, + "p90": 109.69600081443787, + "p95": 111.61600053310394, + "p99": 115.55200070142746 + }, + "combine": { + "p50": 102.52799838781357, + "p90": 110.72000116109848, + "p95": 112.92800307273865, + "p99": 122.6240023970604 + }, + "roundtrip": { + "p50": 179.07199263572693, + "p90": 188.38399648666382, + "p95": 191.83999300003052, + "p99": 198.7520009279251 + }, + "isolatedSum": { + "p50": 207.87199586629868, + "p90": 220.41600197553635, + "p95": 224.5440036058426, + "p99": 238.17600309848785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.59200149774551, + "p90": 130.2720010280609, + "p95": 133.02400708198547, + "p99": 138.84800672531128 + }, + "combine": { + "p50": 125.47199428081512, + "p90": 130.8159977197647, + "p95": 133.37600231170654, + "p99": 139.67999815940857 + }, + "roundtrip": { + "p50": 213.31200003623962, + "p90": 220.7999974489212, + "p95": 225.055992603302, + "p99": 235.9360009431839 + }, + "isolatedSum": { + "p50": 244.06399577856064, + "p90": 261.0879987478256, + "p95": 266.400009393692, + "p99": 278.52800488471985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8a5bd7a", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_81246364", + "comparisonKey": "3b881ca5859a8e4e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:19:17.743215+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.07200342416763, + "p90": 73.21599870920181, + "p95": 79.39200103282928, + "p99": 85.63199639320374 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 78.40000092983246, + "p95": 80.99199831485748, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 116.12799763679504, + "p90": 126.43200159072876, + "p95": 134.5919966697693, + "p99": 141.40799641609192 + }, + "isolatedSum": { + "p50": 139.39200341701508, + "p90": 151.61599963903427, + "p95": 160.38399934768677, + "p99": 174.23999309539795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.968002796173096, + "p90": 80.22399991750717, + "p95": 81.60000294446945, + "p99": 92.38400310277939 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 87.2960016131401, + "p95": 89.53599631786346, + "p99": 94.81599926948547 + }, + "roundtrip": { + "p50": 119.39200013875961, + "p90": 133.34399461746216, + "p95": 138.33600282669067, + "p99": 148.92800152301788 + }, + "isolatedSum": { + "p50": 141.12000167369843, + "p90": 167.52000153064728, + "p95": 171.13599926233292, + "p99": 187.20000237226486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 64.09599632024765, + "p90": 71.26399874687195, + "p95": 76.38400048017502, + "p99": 83.20000022649765 + }, + "combine": { + "p50": 77.98399776220322, + "p90": 87.39200234413147, + "p95": 90.68799763917923, + "p99": 94.87999975681305 + }, + "roundtrip": { + "p50": 126.8479973077774, + "p90": 132.54399597644806, + "p95": 137.63199746608734, + "p99": 144.41600441932678 + }, + "isolatedSum": { + "p50": 142.07999408245087, + "p90": 158.65600109100342, + "p95": 167.07199811935425, + "p99": 178.0799999833107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 66.94400310516357, + "p90": 82.8159973025322, + "p95": 88.25600147247314, + "p99": 101.95200145244598 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 90.52799642086029, + "p95": 92.8959995508194, + "p99": 101.1200025677681 + }, + "roundtrip": { + "p50": 129.95199859142303, + "p90": 144.3520039319992, + "p95": 149.6639996767044, + "p99": 165.8560037612915 + }, + "isolatedSum": { + "p50": 146.27200365066528, + "p90": 173.3439937233925, + "p95": 181.15200102329254, + "p99": 203.07200402021408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.43999975919724, + "p90": 76.57600194215775, + "p95": 80.4160013794899, + "p99": 88.0960002541542 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 89.6959975361824, + "p95": 92.06400066614151, + "p99": 95.551997423172 + }, + "roundtrip": { + "p50": 130.3360015153885, + "p90": 139.67999815940857, + "p95": 145.63199877738953, + "p99": 153.53600680828094 + }, + "isolatedSum": { + "p50": 148.12800288200378, + "p90": 166.27199947834015, + "p95": 172.4800020456314, + "p99": 183.6479976773262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.50400221347809, + "p90": 86.91199868917465, + "p95": 91.93599969148636, + "p99": 101.85600072145462 + }, + "combine": { + "p50": 89.02399986982346, + "p90": 92.6079973578453, + "p95": 95.8079993724823, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 135.68000495433807, + "p90": 146.65600657463074, + "p95": 150.94399452209473, + "p99": 157.4079990386963 + }, + "isolatedSum": { + "p50": 170.52800208330154, + "p90": 179.51999604701996, + "p95": 187.74399906396866, + "p99": 206.33599907159805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.2640026807785, + "p90": 101.15200281143188, + "p95": 104.22399640083313, + "p99": 112.8000020980835 + }, + "combine": { + "p50": 101.88800096511841, + "p90": 107.64800012111664, + "p95": 112.64000087976456, + "p99": 117.18399822711945 + }, + "roundtrip": { + "p50": 166.78400337696075, + "p90": 173.88799786567688, + "p95": 178.14399302005768, + "p99": 182.97599256038666 + }, + "isolatedSum": { + "p50": 197.1520036458969, + "p90": 208.80000293254852, + "p95": 216.8639972805977, + "p99": 229.98400032520294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.53599894046783, + "p90": 111.16799712181091, + "p95": 115.03999680280685, + "p99": 121.56800180673599 + }, + "combine": { + "p50": 125.66399574279785, + "p90": 129.18399274349213, + "p95": 131.67999684810638, + "p99": 140.00000059604645 + }, + "roundtrip": { + "p50": 201.85600221157074, + "p90": 208.48000049591064, + "p95": 212.19199895858765, + "p99": 216.76799654960632 + }, + "isolatedSum": { + "p50": 231.1999946832657, + "p90": 240.35198986530304, + "p95": 246.71999365091324, + "p99": 261.56800240278244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-36b35cf8", + "identity": "b200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_216f4a3f", + "comparisonKey": "674b54aa6fca456d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:25:56.758892+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 206.33600652217865, + "p90": 253.9519965648651, + "p95": 274.6559977531433, + "p99": 300.4480004310608 + }, + "combine": { + "p50": 58.14399942755699, + "p90": 64.70400094985962, + "p95": 72.1919983625412, + "p99": 83.00799876451492 + }, + "roundtrip": { + "p50": 239.26399648189545, + "p90": 270.687997341156, + "p95": 277.7920067310333, + "p99": 291.77600145339966 + }, + "isolatedSum": { + "p50": 264.48000594973564, + "p90": 318.65599751472473, + "p95": 346.8479961156845, + "p99": 383.4559991955757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 197.1520036458969, + "p90": 231.10400140285492, + "p95": 237.37600445747375, + "p99": 264.5440101623535 + }, + "combine": { + "p50": 59.29600074887276, + "p90": 63.519999384880066, + "p95": 69.85600292682648, + "p99": 75.87199658155441 + }, + "roundtrip": { + "p50": 242.78399348258972, + "p90": 271.1679935455322, + "p95": 275.4240036010742, + "p99": 284.5759987831116 + }, + "isolatedSum": { + "p50": 256.44800439476967, + "p90": 294.624000787735, + "p95": 307.23200738430023, + "p99": 340.41600674390793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 196.48000597953796, + "p90": 231.07199370861053, + "p95": 236.86400055885315, + "p99": 275.9679853916168 + }, + "combine": { + "p50": 63.19999694824219, + "p90": 68.06399673223495, + "p95": 74.72000271081924, + "p99": 79.03999835252762 + }, + "roundtrip": { + "p50": 245.85600197315216, + "p90": 274.3360102176666, + "p95": 279.6800136566162, + "p99": 286.3039970397949 + }, + "isolatedSum": { + "p50": 259.68000292778015, + "p90": 299.1359904408455, + "p95": 311.5840032696724, + "p99": 355.00798374414444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 195.8400011062622, + "p90": 225.98400712013245, + "p95": 229.66399788856506, + "p99": 234.6239984035492 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 72.76800274848938, + "p95": 79.64800298213959, + "p99": 88.16000074148178 + }, + "roundtrip": { + "p50": 247.71200120449066, + "p90": 275.8080065250397, + "p95": 279.231995344162, + "p99": 287.9999876022339 + }, + "isolatedSum": { + "p50": 261.9839981198311, + "p90": 298.7520098686218, + "p95": 309.31200087070465, + "p99": 322.783999145031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 196.86399400234222, + "p90": 233.37599635124207, + "p95": 243.74400079250336, + "p99": 279.5200049877167 + }, + "combine": { + "p50": 66.94400310516357, + "p90": 72.86400347948074, + "p95": 79.1039988398552, + "p99": 87.77599781751633 + }, + "roundtrip": { + "p50": 249.02400374412537, + "p90": 282.81599283218384, + "p95": 292.1279966831207, + "p99": 325.24800300598145 + }, + "isolatedSum": { + "p50": 263.8079971075058, + "p90": 306.2399998307228, + "p95": 322.84799963235855, + "p99": 367.296002805233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 197.66399264335632, + "p90": 235.55199801921844, + "p95": 245.66400051116943, + "p99": 282.0799946784973 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 72.41600006818771, + "p95": 79.48800176382065, + "p99": 83.99999886751175 + }, + "roundtrip": { + "p50": 274.84801411628723, + "p90": 445.21600008010864, + "p95": 453.8240134716034, + "p99": 471.3599979877472 + }, + "isolatedSum": { + "p50": 265.5999958515167, + "p90": 307.96799808740616, + "p95": 325.1520022749901, + "p99": 366.07999354600906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.0399956703186, + "p90": 230.43200373649597, + "p95": 234.72000658512115, + "p99": 243.26400458812714 + }, + "combine": { + "p50": 81.95199817419052, + "p90": 89.75999802350998, + "p95": 95.74399888515472, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 267.67998933792114, + "p90": 295.23199796676636, + "p95": 299.29599165916443, + "p99": 307.68001079559326 + }, + "isolatedSum": { + "p50": 280.9919938445091, + "p90": 320.19200176000595, + "p95": 330.4640054702759, + "p99": 344.4800078868866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 211.8079960346222, + "p90": 244.159996509552, + "p95": 253.53598594665527, + "p99": 273.21600914001465 + }, + "combine": { + "p50": 97.50399738550186, + "p90": 102.75200009346008, + "p95": 108.0000028014183, + "p99": 114.81600254774094 + }, + "roundtrip": { + "p50": 295.199990272522, + "p90": 323.8080143928528, + "p95": 330.4640054702759, + "p99": 355.1360070705414 + }, + "isolatedSum": { + "p50": 309.31199342012405, + "p90": 346.9119966030121, + "p95": 361.5359887480736, + "p99": 388.0320116877556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f305f01f", + "identity": "b200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_216f4a3f", + "comparisonKey": "3d16ef2a874aaa5b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:27:45.544334+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 185.72799861431122, + "p90": 206.84799551963806, + "p95": 212.67199516296387, + "p99": 221.0880070924759 + }, + "combine": { + "p50": 60.35200133919716, + "p90": 64.19199705123901, + "p95": 66.52799993753433, + "p99": 74.5600014925003 + }, + "roundtrip": { + "p50": 237.05600202083588, + "p90": 263.5200023651123, + "p95": 282.4000120162964, + "p99": 303.9039969444275 + }, + "isolatedSum": { + "p50": 246.07999995350838, + "p90": 271.0399925708771, + "p95": 279.1999951004982, + "p99": 295.6480085849762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 186.52799725532532, + "p90": 206.43199980258942, + "p95": 210.68799495697021, + "p99": 235.80799996852875 + }, + "combine": { + "p50": 62.01599910855293, + "p90": 64.60800021886826, + "p95": 66.880002617836, + "p99": 73.34399968385696 + }, + "roundtrip": { + "p50": 237.63200640678406, + "p90": 252.25600600242615, + "p95": 254.65598702430725, + "p99": 264.3199861049652 + }, + "isolatedSum": { + "p50": 248.54399636387825, + "p90": 271.0400000214577, + "p95": 277.5679975748062, + "p99": 309.1519996523857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 186.0799938440323, + "p90": 207.16799795627594, + "p95": 213.82400393486023, + "p99": 220.7999974489212 + }, + "combine": { + "p50": 64.28799778223038, + "p90": 67.23199784755707, + "p95": 70.27199864387512, + "p99": 79.96799796819687 + }, + "roundtrip": { + "p50": 239.74399268627167, + "p90": 259.2639923095703, + "p95": 264.19198513031006, + "p99": 276.95998549461365 + }, + "isolatedSum": { + "p50": 250.36799162626266, + "p90": 274.399995803833, + "p95": 284.09600257873535, + "p99": 300.7679954171181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 186.46399676799774, + "p90": 205.82400262355804, + "p95": 213.15200626850128, + "p99": 253.66398692131042 + }, + "combine": { + "p50": 66.880002617836, + "p90": 70.3359991312027, + "p95": 72.12799787521362, + "p99": 75.68000257015228 + }, + "roundtrip": { + "p50": 240.89600145816803, + "p90": 255.840003490448, + "p95": 258.87998938560486, + "p99": 267.36000180244446 + }, + "isolatedSum": { + "p50": 253.34399938583374, + "p90": 276.16000175476074, + "p95": 285.2800041437149, + "p99": 329.3439894914627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.9040002822876, + "p90": 222.9440063238144, + "p95": 237.2480034828186, + "p99": 262.91200518608093 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 70.94399631023407, + "p95": 74.0479975938797, + "p99": 81.7599967122078 + }, + "roundtrip": { + "p50": 242.88000166416168, + "p90": 271.4880108833313, + "p95": 289.5680069923401, + "p99": 310.33599376678467 + }, + "isolatedSum": { + "p50": 255.48800081014633, + "p90": 293.88800263404846, + "p95": 311.2960010766983, + "p99": 344.6720018982887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 196.57599925994873, + "p90": 241.18399620056152, + "p95": 254.62400913238525, + "p99": 279.3920040130615 + }, + "combine": { + "p50": 71.03999704122543, + "p90": 74.27199929952621, + "p95": 75.83999633789062, + "p99": 79.26400005817413 + }, + "roundtrip": { + "p50": 245.63199281692505, + "p90": 260.3839933872223, + "p95": 262.7840042114258, + "p99": 270.4960107803345 + }, + "isolatedSum": { + "p50": 267.61599630117416, + "p90": 315.45599550008774, + "p95": 330.4640054702759, + "p99": 358.65600407123566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 189.7599995136261, + "p90": 213.3760005235672, + "p95": 225.3119945526123, + "p99": 233.47200453281403 + }, + "combine": { + "p50": 86.68799698352814, + "p90": 90.97599983215332, + "p95": 93.40800344944, + "p99": 100.41599720716476 + }, + "roundtrip": { + "p50": 263.35999369621277, + "p90": 281.5999984741211, + "p95": 291.1680042743683, + "p99": 303.5520017147064 + }, + "isolatedSum": { + "p50": 276.44799649715424, + "p90": 304.3520003557205, + "p95": 318.7199980020523, + "p99": 333.8880017399788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.4640007019043, + "p90": 223.10400009155273, + "p95": 226.23999416828156, + "p99": 232.06399381160736 + }, + "combine": { + "p50": 105.53599894046783, + "p90": 109.18399691581726, + "p95": 110.84800213575363, + "p99": 119.19999867677689 + }, + "roundtrip": { + "p50": 310.4960024356842, + "p90": 343.03998947143555, + "p95": 356.7360043525696, + "p99": 393.3440148830414 + }, + "isolatedSum": { + "p50": 315.99999964237213, + "p90": 332.28799700737, + "p95": 337.0879963040352, + "p99": 351.26399248838425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02192f08", + "identity": "b200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_216f4a3f", + "comparisonKey": "380bc188218a1127", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:29:36.637262+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 178.30400168895721, + "p90": 199.8080015182495, + "p95": 205.4399996995926, + "p99": 218.52800250053406 + }, + "combine": { + "p50": 64.64000046253204, + "p90": 69.63200122117996, + "p95": 72.76800274848938, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 233.98399353027344, + "p90": 257.53599405288696, + "p95": 267.39200949668884, + "p99": 286.0479950904846 + }, + "isolatedSum": { + "p50": 242.94400215148926, + "p90": 269.4400027394295, + "p95": 278.20800244808197, + "p99": 297.08800464868546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 183.58400464057922, + "p90": 230.20799458026886, + "p95": 251.71199440956116, + "p99": 287.84000873565674 + }, + "combine": { + "p50": 65.79200178384781, + "p90": 73.5040009021759, + "p95": 76.51200145483017, + "p99": 83.13599973917007 + }, + "roundtrip": { + "p50": 233.72800648212433, + "p90": 259.2960000038147, + "p95": 265.79201221466064, + "p99": 278.49599719047546 + }, + "isolatedSum": { + "p50": 249.37600642442703, + "p90": 303.71199548244476, + "p95": 328.2239958643913, + "p99": 370.9760084748268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 178.6240041255951, + "p90": 201.12000405788422, + "p95": 209.4080001115799, + "p99": 217.0879989862442 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 69.47200000286102, + "p95": 71.6480016708374, + "p99": 80.64000308513641 + }, + "roundtrip": { + "p50": 234.55999791622162, + "p90": 253.24800610542297, + "p95": 258.7519884109497, + "p99": 266.975998878479 + }, + "isolatedSum": { + "p50": 245.08800357580185, + "p90": 270.59200406074524, + "p95": 281.0560017824173, + "p99": 297.7280020713806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 178.71999740600586, + "p90": 201.02399587631226, + "p95": 208.80000293254852, + "p99": 216.95999801158905 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 74.23999905586243, + "p95": 77.95199751853943, + "p99": 83.10399949550629 + }, + "roundtrip": { + "p50": 239.48800563812256, + "p90": 266.9439911842346, + "p95": 280.09599447250366, + "p99": 306.7840039730072 + }, + "isolatedSum": { + "p50": 248.6720010638237, + "p90": 275.2639949321747, + "p95": 286.75200045108795, + "p99": 300.06399750709534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 180.06399273872375, + "p90": 203.45599949359894, + "p95": 208.8959962129593, + "p99": 217.6000028848648 + }, + "combine": { + "p50": 69.72800195217133, + "p90": 72.95999675989151, + "p95": 74.81600344181061, + "p99": 82.78399705886841 + }, + "roundtrip": { + "p50": 239.9040013551712, + "p90": 280.5120050907135, + "p95": 287.9039943218231, + "p99": 310.2400004863739 + }, + "isolatedSum": { + "p50": 249.79199469089508, + "p90": 276.41599625349045, + "p95": 283.7119996547699, + "p99": 300.3839999437332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 179.9039989709854, + "p90": 218.20800006389618, + "p95": 227.52000391483307, + "p99": 272.67199754714966 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 78.3040001988411, + "p95": 83.77599716186523, + "p99": 89.08800035715103 + }, + "roundtrip": { + "p50": 244.4159984588623, + "p90": 272.4800109863281, + "p95": 277.9200077056885, + "p99": 296.35199904441833 + }, + "isolatedSum": { + "p50": 254.30399924516678, + "p90": 296.5120002627373, + "p95": 311.2960010766983, + "p99": 361.7599979043007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 188.1600022315979, + "p90": 217.98400580883026, + "p95": 223.90399873256683, + "p99": 244.83199417591095 + }, + "combine": { + "p50": 89.66399729251862, + "p90": 94.17600184679031, + "p95": 96.6079980134964, + "p99": 103.29599678516388 + }, + "roundtrip": { + "p50": 263.64800333976746, + "p90": 283.7119996547699, + "p95": 300.4800081253052, + "p99": 336.5760147571564 + }, + "isolatedSum": { + "p50": 277.8239995241165, + "p90": 312.1600076556206, + "p95": 320.51199674606323, + "p99": 348.12799096107483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.7519954442978, + "p90": 234.68799889087677, + "p95": 241.2160038948059, + "p99": 266.6879892349243 + }, + "combine": { + "p50": 109.24799740314484, + "p90": 112.8000020980835, + "p95": 115.1999980211258, + "p99": 120.25599926710129 + }, + "roundtrip": { + "p50": 307.1039915084839, + "p90": 337.40800619125366, + "p95": 345.984011888504, + "p99": 377.7279853820801 + }, + "isolatedSum": { + "p50": 319.9999928474426, + "p90": 347.48800098896027, + "p95": 356.4160019159317, + "p99": 386.9439885020256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c3d8bad5", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_bb944e8b", + "comparisonKey": "f65036d3b5e7f8a3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:21:06.708454+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.85600358247757, + "p90": 83.39200168848038, + "p95": 88.79999816417694, + "p99": 97.98400104045868 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 74.20799881219864, + "p95": 79.16799932718277, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 157.4079990386963, + "p90": 183.32800269126892, + "p95": 188.03200125694275, + "p99": 201.53599977493286 + }, + "isolatedSum": { + "p50": 142.14400202035904, + "p90": 157.60000050067902, + "p95": 167.9679974913597, + "p99": 187.55199760198593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.87199658155441, + "p90": 96.09600156545639, + "p95": 100.19200295209885, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 83.80799740552902, + "p95": 89.34400230646133, + "p99": 107.64800012111664 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 183.74399840831757, + "p95": 190.62399864196777, + "p99": 200.22399723529816 + }, + "isolatedSum": { + "p50": 146.39999717473984, + "p90": 179.9039989709854, + "p95": 189.53600525856018, + "p99": 219.16799992322922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.15199887752533, + "p90": 86.36800199747086, + "p95": 94.59199756383896, + "p99": 100.76799988746643 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 79.0719985961914, + "p95": 83.99999886751175, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 164.95999693870544, + "p90": 188.12799453735352, + "p95": 197.31199741363525, + "p99": 211.58400177955627 + }, + "isolatedSum": { + "p50": 151.64799988269806, + "p90": 165.44000059366226, + "p95": 178.5919964313507, + "p99": 191.55199825763702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.69600301980972, + "p90": 86.04799956083298, + "p95": 94.68799829483032, + "p99": 102.4319976568222 + }, + "combine": { + "p50": 75.68000257015228, + "p90": 82.11199939250946, + "p95": 88.06400001049042, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 164.51199352741241, + "p90": 189.15200233459473, + "p95": 197.02400267124176, + "p99": 207.23199844360352 + }, + "isolatedSum": { + "p50": 153.376005589962, + "p90": 168.15999895334244, + "p95": 182.75199830532074, + "p99": 195.74400037527084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.59200239181519, + "p90": 84.44800227880478, + "p95": 92.54399687051773, + "p99": 102.75200009346008 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 82.75199681520462, + "p95": 88.86399865150452, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 166.04800522327423, + "p90": 188.83199989795685, + "p95": 194.5600062608719, + "p99": 200.3519982099533 + }, + "isolatedSum": { + "p50": 155.64800053834915, + "p90": 167.1999990940094, + "p95": 181.40799552202225, + "p99": 198.4959989786148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.34400099515915, + "p90": 90.52799642086029, + "p95": 100.92800110578537, + "p99": 108.70400071144104 + }, + "combine": { + "p50": 82.04799890518188, + "p90": 89.4400030374527, + "p95": 95.48799693584442, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 172.4800020456314, + "p90": 200.95999538898468, + "p95": 208.41600000858307, + "p99": 235.52000522613525 + }, + "isolatedSum": { + "p50": 163.39199990034103, + "p90": 179.967999458313, + "p95": 196.4159980416298, + "p99": 212.6080021262169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.9519994854927, + "p90": 99.35999661684036, + "p95": 105.40799796581268, + "p99": 115.29599875211716 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 101.72799974679947, + "p95": 106.75200074911118, + "p99": 113.34399878978729 + }, + "roundtrip": { + "p50": 192.83199310302734, + "p90": 210.84800362586975, + "p95": 215.7440036535263, + "p99": 224.09600019454956 + }, + "isolatedSum": { + "p50": 186.8479996919632, + "p90": 201.08799636363983, + "p95": 212.15999871492386, + "p99": 228.63999754190445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.18400305509567, + "p90": 112.2559979557991, + "p95": 120.38400024175644, + "p99": 131.6159963607788 + }, + "combine": { + "p50": 118.65600198507309, + "p90": 126.49600207805634, + "p95": 131.00799918174744, + "p99": 140.25600254535675 + }, + "roundtrip": { + "p50": 238.0480021238327, + "p90": 245.60000002384186, + "p95": 252.16001272201538, + "p99": 261.4080011844635 + }, + "isolatedSum": { + "p50": 219.84000504016876, + "p90": 238.75200003385544, + "p95": 251.39199942350388, + "p99": 271.87199890613556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a6912e85", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_216f4a3f", + "comparisonKey": "b1713e9436adf7a0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:23:00.724882+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 185.31200289726257, + "p90": 214.65599536895752, + "p95": 219.35999393463135, + "p99": 226.33600234985352 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 72.92799651622772, + "p95": 79.6160027384758, + "p99": 88.19200098514557 + }, + "roundtrip": { + "p50": 244.7039932012558, + "p90": 283.03998708724976, + "p95": 293.88800263404846, + "p99": 329.02398705482483 + }, + "isolatedSum": { + "p50": 253.4720003604889, + "p90": 287.58399188518524, + "p95": 298.97599667310715, + "p99": 314.5280033349991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 189.37599658966064, + "p90": 233.75999927520752, + "p95": 244.28799748420715, + "p99": 282.1759879589081 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 76.89599692821503, + "p95": 84.63999629020691, + "p99": 91.42400324344635 + }, + "roundtrip": { + "p50": 246.5279996395111, + "p90": 284.0319871902466, + "p95": 293.5360074043274, + "p99": 328.575998544693 + }, + "isolatedSum": { + "p50": 260.0959986448288, + "p90": 310.65599620342255, + "p95": 328.92799377441406, + "p99": 373.59999120235443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 186.0159933567047, + "p90": 220.96000611782074, + "p95": 228.28799486160278, + "p99": 249.4720071554184 + }, + "combine": { + "p50": 74.72000271081924, + "p90": 81.37600123882294, + "p95": 87.16800063848495, + "p99": 92.00000017881393 + }, + "roundtrip": { + "p50": 251.52000784873962, + "p90": 280.35199642181396, + "p95": 287.1359884738922, + "p99": 299.5840013027191 + }, + "isolatedSum": { + "p50": 260.73599606752396, + "p90": 302.3360073566437, + "p95": 315.45599550008774, + "p99": 341.47200733423233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 188.73600661754608, + "p90": 231.48800432682037, + "p95": 255.840003490448, + "p99": 274.6239900588989 + }, + "combine": { + "p50": 76.35200023651123, + "p90": 82.87999778985977, + "p95": 91.77599847316742, + "p99": 99.80800002813339 + }, + "roundtrip": { + "p50": 251.8399953842163, + "p90": 282.6560139656067, + "p95": 286.8799865245819, + "p99": 314.39998745918274 + }, + "isolatedSum": { + "p50": 265.0880068540573, + "p90": 314.36800211668015, + "p95": 347.6160019636154, + "p99": 374.4319900870323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 186.88000738620758, + "p90": 226.81599855422974, + "p95": 234.27200317382812, + "p99": 272.0319926738739 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 82.07999914884567, + "p95": 88.35200220346451, + "p99": 94.4959968328476 + }, + "roundtrip": { + "p50": 254.27201390266418, + "p90": 282.8480005264282, + "p95": 297.91998863220215, + "p99": 327.7440071105957 + }, + "isolatedSum": { + "p50": 264.22400772571564, + "p90": 308.8959977030754, + "p95": 322.62400537729263, + "p99": 366.5279895067215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 188.4479969739914, + "p90": 239.1359955072403, + "p95": 266.4960026741028, + "p99": 298.94399642944336 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 94.24000233411789, + "p95": 97.28000313043594, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 262.56000995635986, + "p90": 309.34399366378784, + "p95": 328.2879889011383, + "p99": 351.936012506485 + }, + "isolatedSum": { + "p50": 271.42399549484253, + "p90": 333.3759978413582, + "p95": 363.7760058045387, + "p99": 403.51999551057816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 197.1520036458969, + "p90": 233.11999440193176, + "p95": 238.62400650978088, + "p99": 265.9519910812378 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 103.71199995279312, + "p95": 109.79200154542923, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 279.29601073265076, + "p90": 309.1840147972107, + "p95": 319.2639946937561, + "p99": 336.70398592948914 + }, + "isolatedSum": { + "p50": 294.40000653266907, + "p90": 336.8319943547249, + "p95": 348.4160080552101, + "p99": 381.21598958969116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 218.30399334430695, + "p90": 242.88000166416168, + "p95": 249.08800423145294, + "p99": 258.62398743629456 + }, + "combine": { + "p50": 118.9119964838028, + "p90": 123.64800274372101, + "p95": 129.02399897575378, + "p99": 135.77599823474884 + }, + "roundtrip": { + "p50": 326.81599259376526, + "p90": 354.91201281547546, + "p95": 364.47998881340027, + "p99": 389.3440067768097 + }, + "isolatedSum": { + "p50": 337.21598982810974, + "p90": 366.5280044078827, + "p95": 378.1120032072067, + "p99": 394.3999856710434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-73734d6c", + "identity": "b200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_216f4a3f", + "comparisonKey": "9df9cbdcbeb5bd05", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:24:01.269540+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 180.4800033569336, + "p90": 212.44800090789795, + "p95": 215.36000072956085, + "p99": 227.9359996318817 + }, + "combine": { + "p50": 67.00800359249115, + "p90": 70.78400254249573, + "p95": 75.58400183916092, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 239.58399891853333, + "p90": 267.96799898147583, + "p95": 273.0239927768707, + "p99": 290.20801186561584 + }, + "isolatedSum": { + "p50": 247.48800694942474, + "p90": 283.2320034503937, + "p95": 290.94400256872177, + "p99": 311.6159960627556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 180.2240014076233, + "p90": 212.6079946756363, + "p95": 217.631995677948, + "p99": 226.6560047864914 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 76.92799717187881, + "p95": 83.13599973917007, + "p99": 88.0960002541542 + }, + "roundtrip": { + "p50": 240.83200097084045, + "p90": 273.9520072937012, + "p95": 285.504013299942, + "p99": 321.21598720550537 + }, + "isolatedSum": { + "p50": 249.6960014104843, + "p90": 289.5359918475151, + "p95": 300.7679954171181, + "p99": 314.7520050406456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 181.18399381637573, + "p90": 213.3760005235672, + "p95": 219.80799734592438, + "p99": 232.92799293994904 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 78.33600044250488, + "p95": 84.35200154781342, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 244.22399699687958, + "p90": 275.7759988307953, + "p95": 280.89600801467896, + "p99": 289.66400027275085 + }, + "isolatedSum": { + "p50": 255.26399165391922, + "p90": 291.7120009660721, + "p95": 304.1599988937378, + "p99": 324.8319923877716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 180.4800033569336, + "p90": 216.5759950876236, + "p95": 222.1439927816391, + "p99": 233.63199830055237 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 84.35200154781342, + "p95": 90.65599739551544, + "p99": 98.94400089979172 + }, + "roundtrip": { + "p50": 246.94399535655975, + "p90": 277.3759961128235, + "p95": 283.1040024757385, + "p99": 296.57599329948425 + }, + "isolatedSum": { + "p50": 256.51200115680695, + "p90": 300.927996635437, + "p95": 312.79999017715454, + "p99": 332.5759992003441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 181.92000687122345, + "p90": 220.5120027065277, + "p95": 229.95199263095856, + "p99": 269.6639895439148 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 81.34400099515915, + "p95": 90.30400216579437, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 263.0400061607361, + "p90": 308.4160089492798, + "p95": 323.743999004364, + "p99": 378.9440095424652 + }, + "isolatedSum": { + "p50": 259.3280076980591, + "p90": 301.85600370168686, + "p95": 320.25599479675293, + "p99": 365.5039891600609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 182.52800405025482, + "p90": 217.56799519062042, + "p95": 225.43999552726746, + "p99": 268.48000288009644 + }, + "combine": { + "p50": 82.46400207281113, + "p90": 90.81599861383438, + "p95": 97.120001912117, + "p99": 104.73600029945374 + }, + "roundtrip": { + "p50": 252.57599353790283, + "p90": 280.5759906768799, + "p95": 285.18399596214294, + "p99": 302.0159900188446 + }, + "isolatedSum": { + "p50": 264.99200612306595, + "p90": 308.3839938044548, + "p95": 322.55999743938446, + "p99": 373.21600317955017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 193.9840018749237, + "p90": 232.96000063419342, + "p95": 252.25600600242615, + "p99": 281.0559868812561 + }, + "combine": { + "p50": 101.05600208044052, + "p90": 111.93600296974182, + "p95": 118.30399930477142, + "p99": 124.51200187206268 + }, + "roundtrip": { + "p50": 278.11199426651, + "p90": 316.1599934101105, + "p95": 335.42400598526, + "p99": 363.5520040988922 + }, + "isolatedSum": { + "p50": 295.0400039553642, + "p90": 344.89600360393524, + "p95": 370.56000530719757, + "p99": 405.5679887533188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 216.25599265098572, + "p90": 239.29600417613983, + "p95": 244.1280037164688, + "p99": 259.13599133491516 + }, + "combine": { + "p50": 118.56000125408173, + "p90": 124.64000284671783, + "p95": 128.51199507713318, + "p99": 135.55200397968292 + }, + "roundtrip": { + "p50": 325.72799921035767, + "p90": 354.52800989151, + "p95": 364.9919927120209, + "p99": 399.9679982662201 + }, + "isolatedSum": { + "p50": 334.81599390506744, + "p90": 363.93600702285767, + "p95": 372.639998793602, + "p99": 394.6879953145981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-90bacc01", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_e415a083", + "comparisonKey": "4d4d1ccda55690ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:22:02.959709+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.44799900054932, + "p90": 74.17599856853485, + "p95": 82.62400329113007, + "p99": 90.94399958848953 + }, + "combine": { + "p50": 69.023996591568, + "p90": 78.49600166082382, + "p95": 83.99999886751175, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 152.51199901103973, + "p90": 189.7599995136261, + "p95": 212.70400285720825, + "p99": 231.77599906921387 + }, + "isolatedSum": { + "p50": 133.4719955921173, + "p90": 152.67200022935867, + "p95": 166.62400215864182, + "p99": 180.41600286960602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.7680014371872, + "p90": 73.15199822187424, + "p95": 81.216000020504, + "p99": 89.05600011348724 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 77.2159993648529, + "p95": 84.1279998421669, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 151.71200037002563, + "p90": 178.17600071430206, + "p95": 184.51200425624847, + "p99": 207.74400234222412 + }, + "isolatedSum": { + "p50": 135.52000373601913, + "p90": 150.36799758672714, + "p95": 165.3439998626709, + "p99": 178.01599949598312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 67.00800359249115, + "p90": 73.53600114583969, + "p95": 78.59200239181519, + "p99": 85.79199761152267 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 78.59200239181519, + "p95": 83.74399691820145, + "p99": 92.28800237178802 + }, + "roundtrip": { + "p50": 156.0640037059784, + "p90": 179.1359931230545, + "p95": 185.18400192260742, + "p99": 202.36800611019135 + }, + "isolatedSum": { + "p50": 141.79200679063797, + "p90": 152.12800353765488, + "p95": 162.33599931001663, + "p99": 178.0799999833107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.6800012588501, + "p90": 74.23999905586243, + "p95": 80.32000064849854, + "p99": 89.56799656152725 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 80.60800284147263, + "p95": 86.20800077915192, + "p99": 93.37600320577621 + }, + "roundtrip": { + "p50": 156.8640023469925, + "p90": 182.559996843338, + "p95": 189.02400135993958, + "p99": 221.6320037841797 + }, + "isolatedSum": { + "p50": 143.93600076436996, + "p90": 154.84800189733505, + "p95": 166.52800142765045, + "p99": 182.94399976730347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.7360018491745, + "p90": 76.86399668455124, + "p95": 81.34400099515915, + "p99": 89.37600255012512 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 84.16000008583069, + "p95": 87.77599781751633, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 160.44799983501434, + "p90": 185.37600338459015, + "p95": 194.46399807929993, + "p99": 209.60000157356262 + }, + "isolatedSum": { + "p50": 146.97600156068802, + "p90": 161.02399677038193, + "p95": 169.11999881267548, + "p99": 185.2160021662712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.3919997215271, + "p90": 79.3600007891655, + "p95": 84.60800349712372, + "p99": 93.05600076913834 + }, + "combine": { + "p50": 82.65600353479385, + "p90": 87.5839963555336, + "p95": 92.16000139713287, + "p99": 99.80800002813339 + }, + "roundtrip": { + "p50": 164.35199975967407, + "p90": 190.3039962053299, + "p95": 196.25599682331085, + "p99": 216.2880003452301 + }, + "isolatedSum": { + "p50": 154.04800325632095, + "p90": 166.9439971446991, + "p95": 176.7680048942566, + "p99": 192.86400079727173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.1599994301796, + "p90": 86.01599931716919, + "p95": 95.77599912881851, + "p99": 102.04800218343735 + }, + "combine": { + "p50": 97.63199836015701, + "p90": 104.38399761915207, + "p95": 109.02400314807892, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 182.6239973306656, + "p90": 204.76800203323364, + "p95": 210.40000021457672, + "p99": 231.58399760723114 + }, + "isolatedSum": { + "p50": 177.7919977903366, + "p90": 190.39999693632126, + "p95": 204.80000227689743, + "p99": 217.31200069189072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.26400202512741, + "p90": 98.2080027461052, + "p95": 103.93600165843964, + "p99": 114.59200084209442 + }, + "combine": { + "p50": 120.64000219106674, + "p90": 132.64000415802002, + "p95": 138.91200721263885, + "p99": 146.464005112648 + }, + "roundtrip": { + "p50": 229.40799593925476, + "p90": 241.37599766254425, + "p95": 246.59200012683868, + "p99": 262.9440128803253 + }, + "isolatedSum": { + "p50": 211.90400421619415, + "p90": 230.8480069041252, + "p95": 242.8480088710785, + "p99": 261.05600595474243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71778916", + "identity": "b200|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_790e9497", + "comparisonKey": "fd3b9f6197d4f6c0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:26:22.111213+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.23999971151352, + "p90": 90.17600119113922, + "p95": 97.85600006580353, + "p99": 111.16799712181091 + }, + "combine": { + "p50": 66.39999896287918, + "p90": 75.83999633789062, + "p95": 79.48800176382065, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 121.11999839544296, + "p90": 136.9280070066452, + "p95": 140.54399728775024, + "p99": 150.2079963684082 + }, + "isolatedSum": { + "p50": 144.6399986743927, + "p90": 166.01599752902985, + "p95": 177.34400182962418, + "p99": 197.56799936294556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.66400277614594, + "p90": 96.41599655151367, + "p95": 102.7199998497963, + "p99": 123.32800030708313 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 77.60000228881836, + "p95": 81.34400099515915, + "p99": 91.51999652385712 + }, + "roundtrip": { + "p50": 129.18399274349213, + "p90": 138.3039951324463, + "p95": 145.85599303245544, + "p99": 152.99199521541595 + }, + "isolatedSum": { + "p50": 144.896000623703, + "p90": 174.01599884033203, + "p95": 184.06400084495544, + "p99": 214.84799683094025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.92799717187881, + "p90": 89.15200084447861, + "p95": 98.27200323343277, + "p99": 103.04000228643417 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 78.65600287914276, + "p95": 82.5280025601387, + "p99": 92.25600212812424 + }, + "roundtrip": { + "p50": 129.7599971294403, + "p90": 146.7519998550415, + "p95": 150.751993060112, + "p99": 164.63999450206757 + }, + "isolatedSum": { + "p50": 145.79199999570847, + "p90": 167.80800372362137, + "p95": 180.80000579357147, + "p99": 195.2960044145584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.11199873685837, + "p90": 87.55200356245041, + "p95": 93.88799965381622, + "p99": 100.76799988746643 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 79.39200103282928, + "p95": 83.20000022649765, + "p99": 92.51199662685394 + }, + "roundtrip": { + "p50": 129.34400141239166, + "p90": 143.19999516010284, + "p95": 148.95999431610107, + "p99": 156.67200088500977 + }, + "isolatedSum": { + "p50": 155.2319973707199, + "p90": 166.9440045952797, + "p95": 177.08799988031387, + "p99": 193.27999651432037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.5600021481514, + "p90": 88.44800293445587, + "p95": 96.79999947547913, + "p99": 104.25599664449692 + }, + "combine": { + "p50": 77.82399654388428, + "p90": 81.4720019698143, + "p95": 83.71199667453766, + "p99": 94.68799829483032 + }, + "roundtrip": { + "p50": 132.35199451446533, + "p90": 145.82400023937225, + "p95": 151.2320041656494, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 156.38399869203568, + "p90": 169.92000490427017, + "p95": 180.51199615001678, + "p99": 198.94399493932724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.92799913883209, + "p90": 95.58399766683578, + "p95": 100.12800246477127, + "p99": 116.48000031709671 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 81.63200318813324, + "p95": 88.92799913883209, + "p99": 94.52799707651138 + }, + "roundtrip": { + "p50": 141.6960060596466, + "p90": 152.319997549057, + "p95": 158.720001578331, + "p99": 169.3439930677414 + }, + "isolatedSum": { + "p50": 167.03999787569046, + "p90": 177.21600085496902, + "p95": 189.05600160360336, + "p99": 211.0079973936081 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.45600348711014, + "p90": 108.8000014424324, + "p95": 127.00800597667694, + "p99": 137.08800077438354 + }, + "combine": { + "p50": 90.91199934482574, + "p90": 100.3199964761734, + "p95": 105.24799674749374, + "p99": 113.95200341939926 + }, + "roundtrip": { + "p50": 158.49600732326508, + "p90": 173.21600019931793, + "p95": 183.77600610256195, + "p99": 209.1519981622696 + }, + "isolatedSum": { + "p50": 182.36800283193588, + "p90": 209.1199979186058, + "p95": 232.25600272417068, + "p99": 251.0400041937828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.33599692583084, + "p90": 114.14399743080139, + "p95": 122.01599776744843, + "p99": 128.4479945898056 + }, + "combine": { + "p50": 104.44799810647964, + "p90": 117.53600090742111, + "p95": 118.9119964838028, + "p99": 127.29600071907043 + }, + "roundtrip": { + "p50": 190.46400487422943, + "p90": 198.0160027742386, + "p95": 201.63199305534363, + "p99": 208.19200575351715 + }, + "isolatedSum": { + "p50": 206.7839950323105, + "p90": 231.6799983382225, + "p95": 240.92799425125122, + "p99": 255.74399530887604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38d6d30b", + "identity": "b200|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_790e9497", + "comparisonKey": "99c9eb3331abf3e7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:28:51.288387+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.44800227880478, + "p90": 90.36800265312195, + "p95": 96.3520035147667, + "p99": 104.70400005578995 + }, + "combine": { + "p50": 67.45599955320358, + "p90": 75.96799731254578, + "p95": 79.71200346946716, + "p99": 84.73599702119827 + }, + "roundtrip": { + "p50": 127.58399546146393, + "p90": 137.63199746608734, + "p95": 144.76799964904785, + "p99": 153.6960005760193 + }, + "isolatedSum": { + "p50": 151.90400183200836, + "p90": 166.33599996566772, + "p95": 176.06400698423386, + "p99": 189.43999707698822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.05599880218506, + "p90": 92.28800237178802, + "p95": 96.28800302743912, + "p99": 103.29599678516388 + }, + "combine": { + "p50": 75.19999891519547, + "p90": 78.36800068616867, + "p95": 79.83999699354172, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 130.65600395202637, + "p90": 146.464005112648, + "p95": 150.07999539375305, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 156.25599771738052, + "p90": 170.6560030579567, + "p95": 176.12800002098083, + "p99": 185.92000007629395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.33600044250488, + "p90": 87.2960016131401, + "p95": 91.90399944782257, + "p99": 98.4639972448349 + }, + "combine": { + "p50": 77.82399654388428, + "p90": 83.23200047016144, + "p95": 86.14400029182434, + "p99": 94.94400024414062 + }, + "roundtrip": { + "p50": 129.92000579833984, + "p90": 140.86399972438812, + "p95": 145.85599303245544, + "p99": 156.63999319076538 + }, + "isolatedSum": { + "p50": 156.15999698638916, + "p90": 170.52800208330154, + "p95": 178.0479997396469, + "p99": 193.40799748897552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.39200103282928, + "p90": 88.70399743318558, + "p95": 92.28800237178802, + "p99": 101.43999755382538 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 82.87999778985977, + "p95": 87.20000088214874, + "p99": 94.40000355243683 + }, + "roundtrip": { + "p50": 136.31999492645264, + "p90": 147.5840061903, + "p95": 152.76800096035004, + "p99": 160.3199988603592 + }, + "isolatedSum": { + "p50": 157.9200029373169, + "p90": 171.58399522304535, + "p95": 179.48800325393677, + "p99": 195.8400011062622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.83999699354172, + "p90": 89.53599631786346, + "p95": 93.75999867916107, + "p99": 104.70400005578995 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 82.68799632787704, + "p95": 90.30400216579437, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 138.36799561977386, + "p90": 153.79199385643005, + "p95": 158.9760035276413, + "p99": 172.2240000963211 + }, + "isolatedSum": { + "p50": 158.36799889802933, + "p90": 172.2239926457405, + "p95": 184.06400084495544, + "p99": 199.35999810695648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.66399794816971, + "p90": 97.79199957847595, + "p95": 100.38399696350098, + "p99": 108.83200168609619 + }, + "combine": { + "p50": 80.64000308513641, + "p90": 90.17600119113922, + "p95": 91.48799628019333, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 142.20799505710602, + "p90": 151.2320041656494, + "p95": 155.61600029468536, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 174.30400103330612, + "p90": 187.96800076961517, + "p95": 191.8719932436943, + "p99": 204.54400032758713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.07200121879578, + "p90": 107.07200318574905, + "p95": 112.44799941778183, + "p99": 119.45600062608719 + }, + "combine": { + "p50": 94.97600048780441, + "p90": 103.71199995279312, + "p95": 106.97600245475769, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 167.1680063009262, + "p90": 175.80799758434296, + "p95": 180.16000092029572, + "p99": 194.20799612998962 + }, + "isolatedSum": { + "p50": 190.0480017066002, + "p90": 210.78400313854218, + "p95": 219.42400187253952, + "p99": 234.46399718523026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.17599701881409, + "p90": 119.19999867677689, + "p95": 128.35200130939484, + "p99": 138.14400136470795 + }, + "combine": { + "p50": 115.64800143241882, + "p90": 123.74400347471237, + "p95": 127.48800218105316, + "p99": 141.56800508499146 + }, + "roundtrip": { + "p50": 200.76799392700195, + "p90": 210.04800498485565, + "p95": 213.05599808692932, + "p99": 220.5120027065277 + }, + "isolatedSum": { + "p50": 225.8239984512329, + "p90": 242.94400215148926, + "p95": 255.840003490448, + "p99": 279.7120064496994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bba0474e", + "identity": "b200|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_790e9497", + "comparisonKey": "099906222c908a21", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:31:21.801975+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 80.89599758386612, + "p90": 87.99999952316284, + "p95": 90.08000046014786, + "p99": 93.98400038480759 + }, + "combine": { + "p50": 76.06399804353714, + "p90": 78.78399640321732, + "p95": 80.35200089216232, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 125.2799928188324, + "p90": 133.5040032863617, + "p95": 137.60000467300415, + "p99": 141.37600362300873 + }, + "isolatedSum": { + "p50": 156.95999562740326, + "p90": 166.78399592638016, + "p95": 170.43200135231018, + "p99": 182.49600380659103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 83.83999764919281, + "p90": 97.4079966545105, + "p95": 100.63999891281128, + "p99": 122.20799922943115 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 81.56800270080566, + "p95": 83.10399949550629, + "p99": 91.26400202512741 + }, + "roundtrip": { + "p50": 127.71199643611908, + "p90": 143.8400000333786, + "p95": 151.45599842071533, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 161.56800091266632, + "p90": 178.97599935531616, + "p95": 183.74399840831757, + "p99": 213.47200125455856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.99199765920639, + "p90": 83.99999886751175, + "p95": 88.99199962615967, + "p99": 91.48799628019333 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 81.11999928951263, + "p95": 82.30400085449219, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 137.08800077438354, + "p90": 143.96800100803375, + "p95": 146.97599411010742, + "p99": 152.6080071926117 + }, + "isolatedSum": { + "p50": 155.10399639606476, + "p90": 165.11999815702438, + "p95": 171.29600048065186, + "p99": 181.98399990797043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.11199873685837, + "p90": 88.35200220346451, + "p95": 93.47199648618698, + "p99": 104.12800312042236 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 88.99199962615967, + "p95": 90.52799642086029, + "p99": 100.22400319576263 + }, + "roundtrip": { + "p50": 141.9840008020401, + "p90": 147.5840061903, + "p95": 149.6960073709488, + "p99": 158.4639996290207 + }, + "isolatedSum": { + "p50": 157.69600123167038, + "p90": 177.34400182962418, + "p95": 183.99999290704727, + "p99": 204.352006316185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.25600016117096, + "p90": 92.8959995508194, + "p95": 97.95200079679489, + "p99": 102.68799960613251 + }, + "combine": { + "p50": 81.63200318813324, + "p90": 90.36800265312195, + "p95": 93.18400174379349, + "p99": 99.67999905347824 + }, + "roundtrip": { + "p50": 141.34399592876434, + "p90": 150.36800503730774, + "p95": 153.43999862670898, + "p99": 161.47199273109436 + }, + "isolatedSum": { + "p50": 161.8880033493042, + "p90": 183.26400220394135, + "p95": 191.13600254058838, + "p99": 202.36799865961075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.29600292444229, + "p90": 99.20000284910202, + "p95": 100.73599964380264, + "p99": 105.27999699115753 + }, + "combine": { + "p50": 89.21600133180618, + "p90": 91.71199798583984, + "p95": 94.30400282144547, + "p99": 101.6319990158081 + }, + "roundtrip": { + "p50": 152.319997549057, + "p90": 157.24800527095795, + "p95": 161.53599321842194, + "p99": 174.72000420093536 + }, + "isolatedSum": { + "p50": 184.51200425624847, + "p90": 190.91200083494186, + "p95": 195.0400024652481, + "p99": 206.91199600696564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.8880016207695, + "p90": 158.49600732326508, + "p95": 166.143998503685, + "p99": 192.9599940776825 + }, + "combine": { + "p50": 102.59199887514114, + "p90": 121.40800058841705, + "p95": 128.7360042333603, + "p99": 137.69599795341492 + }, + "roundtrip": { + "p50": 173.6000031232834, + "p90": 185.2799952030182, + "p95": 189.4720047712326, + "p99": 224.83199834823608 + }, + "isolatedSum": { + "p50": 208.48000049591064, + "p90": 279.90400791168213, + "p95": 294.8800027370453, + "p99": 330.6559920310974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.7440015077591, + "p90": 118.52800101041794, + "p95": 120.41600048542023, + "p99": 124.54400211572647 + }, + "combine": { + "p50": 117.37599968910217, + "p90": 125.50400197505951, + "p95": 127.32799351215363, + "p99": 131.00799918174744 + }, + "roundtrip": { + "p50": 201.9519954919815, + "p90": 208.15999805927277, + "p95": 211.10400557518005, + "p99": 217.47200191020966 + }, + "isolatedSum": { + "p50": 229.12000119686127, + "p90": 244.03200298547745, + "p95": 247.74399399757385, + "p99": 255.5520012974739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-53c82f51", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_d95ab2b3", + "comparisonKey": "725807966742f594", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:15:06.288125+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.87999713420868, + "p90": 86.65599673986435, + "p95": 89.47200328111649, + "p99": 97.82399982213974 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 80.79999685287476, + "p95": 81.7599967122078, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 131.77600502967834, + "p90": 138.43199610710144, + "p95": 143.39199662208557, + "p99": 146.91199362277985 + }, + "isolatedSum": { + "p50": 156.79999440908432, + "p90": 167.4559935927391, + "p95": 171.23199999332428, + "p99": 187.77599930763245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.7040029168129, + "p90": 85.05599945783615, + "p95": 88.79999816417694, + "p99": 92.96000003814697 + }, + "combine": { + "p50": 78.94399762153625, + "p90": 84.99199897050858, + "p95": 87.87199854850769, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 139.0399932861328, + "p90": 147.8399932384491, + "p95": 152.96000242233276, + "p99": 162.52799332141876 + }, + "isolatedSum": { + "p50": 155.64800053834915, + "p90": 170.04799842834473, + "p95": 176.67199671268463, + "p99": 185.02400070428848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.80000364780426, + "p90": 85.37600189447403, + "p95": 90.08000046014786, + "p99": 98.24000298976898 + }, + "combine": { + "p50": 79.13599908351898, + "p90": 86.84799820184708, + "p95": 88.79999816417694, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 141.05600118637085, + "p90": 150.11200308799744, + "p95": 155.87200224399567, + "p99": 172.89599776268005 + }, + "isolatedSum": { + "p50": 155.93600273132324, + "p90": 172.2240000963211, + "p95": 178.8799986243248, + "p99": 194.84800100326538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.20799946784973, + "p90": 84.41600203514099, + "p95": 89.37600255012512, + "p99": 100.63999891281128 + }, + "combine": { + "p50": 87.36000210046768, + "p90": 90.4960036277771, + "p95": 93.31200271844864, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 138.14400136470795, + "p90": 147.67999947071075, + "p95": 153.56799960136414, + "p99": 162.6880019903183 + }, + "isolatedSum": { + "p50": 165.5680015683174, + "p90": 174.9120056629181, + "p95": 182.68800526857376, + "p99": 200.41599869728088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.57600259780884, + "p90": 90.55999666452408, + "p95": 95.58399766683578, + "p99": 102.88000106811523 + }, + "combine": { + "p50": 89.63199704885483, + "p90": 94.84799951314926, + "p95": 97.02400118112564, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 146.17599546909332, + "p90": 155.7759940624237, + "p95": 160.47999262809753, + "p99": 170.30400037765503 + }, + "isolatedSum": { + "p50": 170.20799964666367, + "p90": 185.40799617767334, + "p95": 192.60799884796143, + "p99": 206.33599907159805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.94400024414062, + "p90": 99.96800124645233, + "p95": 104.60799932479858, + "p99": 113.82400244474411 + }, + "combine": { + "p50": 90.84799885749817, + "p90": 99.23200309276581, + "p95": 101.53599828481674, + "p99": 106.97600245475769 + }, + "roundtrip": { + "p50": 151.87199413776398, + "p90": 161.5999937057495, + "p95": 164.2879992723465, + "p99": 173.98400604724884 + }, + "isolatedSum": { + "p50": 185.7919991016388, + "p90": 199.20000433921814, + "p95": 206.14399760961533, + "p99": 220.8000048995018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.01600325107574, + "p90": 137.7599984407425, + "p95": 139.64800536632538, + "p99": 147.0080018043518 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 130.3039938211441, + "p95": 136.35200262069702, + "p99": 141.12000167369843 + }, + "roundtrip": { + "p50": 181.536003947258, + "p90": 212.351992726326, + "p95": 215.7440036535263, + "p99": 225.8239984512329 + }, + "isolatedSum": { + "p50": 216.2880003452301, + "p90": 268.0639922618866, + "p95": 276.0000079870224, + "p99": 288.12800347805023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.80800330638885, + "p90": 126.91199779510498, + "p95": 130.5599957704544, + "p99": 143.00799369812012 + }, + "combine": { + "p50": 127.16799974441528, + "p90": 130.78400492668152, + "p95": 132.28799402713776, + "p99": 140.09599387645721 + }, + "roundtrip": { + "p50": 216.0000056028366, + "p90": 225.600004196167, + "p95": 228.7680059671402, + "p99": 233.69599878787994 + }, + "isolatedSum": { + "p50": 246.97600305080414, + "p90": 257.6960027217865, + "p95": 262.84798979759216, + "p99": 283.10398757457733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c99c0779", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_790e9497", + "comparisonKey": "9d2ea5938ebe18c7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:17:31.978039+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.68000322580338, + "p90": 88.639996945858, + "p95": 93.63199770450592, + "p99": 102.52799838781357 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 83.71199667453766, + "p95": 90.55999666452408, + "p99": 95.51999717950821 + }, + "roundtrip": { + "p50": 132.28799402713776, + "p90": 144.0960019826889, + "p95": 151.07199549674988, + "p99": 161.18399798870087 + }, + "isolatedSum": { + "p50": 158.27200561761856, + "p90": 172.35199362039566, + "p95": 184.19199436903, + "p99": 198.04799556732178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.14399898052216, + "p90": 86.07999980449677, + "p95": 91.45600348711014, + "p99": 98.7199991941452 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 87.71199733018875, + "p95": 90.52799642086029, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 140.28799533843994, + "p90": 150.14399588108063, + "p95": 154.23999726772308, + "p99": 165.79200327396393 + }, + "isolatedSum": { + "p50": 157.6320007443428, + "p90": 173.79199713468552, + "p95": 181.98399990797043, + "p99": 195.42399793863297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.20799946784973, + "p90": 86.17600053548813, + "p95": 92.38400310277939, + "p99": 98.91200065612793 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 89.75999802350998, + "p95": 91.71199798583984, + "p99": 95.2640026807785 + }, + "roundtrip": { + "p50": 143.23200285434723, + "p90": 149.24800395965576, + "p95": 153.79199385643005, + "p99": 162.56000101566315 + }, + "isolatedSum": { + "p50": 159.29599851369858, + "p90": 175.9359985589981, + "p95": 184.09600108861923, + "p99": 194.17600333690643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.23199981451035, + "p90": 88.19200098514557, + "p95": 93.05600076913834, + "p99": 108.70400071144104 + }, + "combine": { + "p50": 88.28800171613693, + "p90": 91.20000153779984, + "p95": 94.2080020904541, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 141.05600118637085, + "p90": 157.8879952430725, + "p95": 162.1759980916977, + "p99": 172.4800020456314 + }, + "isolatedSum": { + "p50": 167.52000153064728, + "p90": 179.3920025229454, + "p95": 187.26400285959244, + "p99": 214.07999843358994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.31200075149536, + "p90": 90.62399715185165, + "p95": 95.87199985980988, + "p99": 105.05600273609161 + }, + "combine": { + "p50": 90.2400016784668, + "p90": 95.29600292444229, + "p95": 98.84800016880035, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 147.39200472831726, + "p90": 165.12000560760498, + "p95": 169.79199647903442, + "p99": 186.36800348758698 + }, + "isolatedSum": { + "p50": 171.55200242996216, + "p90": 185.92000007629395, + "p95": 194.72000002861023, + "p99": 212.96000480651855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.51999717950821, + "p90": 100.44799745082855, + "p95": 103.58399897813797, + "p99": 115.7120019197464 + }, + "combine": { + "p50": 91.07200056314468, + "p90": 95.77599912881851, + "p95": 100.8640006184578, + "p99": 106.27199709415436 + }, + "roundtrip": { + "p50": 154.11199629306793, + "p90": 165.53600132465363, + "p95": 171.00800573825836, + "p99": 180.09600043296814 + }, + "isolatedSum": { + "p50": 186.5919977426529, + "p90": 196.22399657964706, + "p95": 204.44799959659576, + "p99": 221.98399901390076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 109.40799862146378, + "p90": 134.91199910640717, + "p95": 139.16799426078796, + "p99": 147.20000326633453 + }, + "combine": { + "p50": 104.2879968881607, + "p90": 113.27999830245972, + "p95": 116.41599982976913, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 180.92800676822662, + "p90": 187.74400651454926, + "p95": 190.08000195026398, + "p99": 198.7520009279251 + }, + "isolatedSum": { + "p50": 213.69599550962448, + "p90": 248.19199740886688, + "p95": 255.5839940905571, + "p99": 272.0640003681183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.35199999809265, + "p90": 127.36000120639801, + "p95": 132.35199451446533, + "p99": 144.83200013637543 + }, + "combine": { + "p50": 127.71199643611908, + "p90": 131.04000687599182, + "p95": 132.7359974384308, + "p99": 142.84799993038177 + }, + "roundtrip": { + "p50": 218.75199675559998, + "p90": 227.48799622058868, + "p95": 230.17600178718567, + "p99": 245.40799856185913 + }, + "isolatedSum": { + "p50": 248.06399643421173, + "p90": 258.40000808238983, + "p95": 265.0879919528961, + "p99": 287.6800000667572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d13aa92c", + "identity": "b200|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_790e9497", + "comparisonKey": "361ef1f1dbb7aaa6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:23:52.674259+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.727996468544, + "p90": 85.69599688053131, + "p95": 88.16000074148178, + "p99": 94.94400024414062 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 79.8719972372055, + "p95": 82.14399963617325, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 126.17599964141846, + "p90": 135.00800728797913, + "p95": 138.14400136470795, + "p99": 144.0960019826889 + }, + "isolatedSum": { + "p50": 159.64799374341965, + "p90": 165.56799411773682, + "p95": 170.30400037765503, + "p99": 183.45600366592407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 86.75199747085571, + "p90": 97.9200005531311, + "p95": 103.39199751615524, + "p99": 120.31999975442886 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 83.99999886751175, + "p95": 86.59200370311737, + "p99": 91.67999774217606 + }, + "roundtrip": { + "p50": 137.85600662231445, + "p90": 143.48800480365753, + "p95": 146.43199741840363, + "p99": 152.319997549057 + }, + "isolatedSum": { + "p50": 165.50400108098984, + "p90": 181.91999942064285, + "p95": 189.9840012192726, + "p99": 211.99999749660492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.80000364780426, + "p90": 85.15200018882751, + "p95": 88.70399743318558, + "p99": 91.90399944782257 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 82.68799632787704, + "p95": 88.8959988951683, + "p99": 92.6399976015091 + }, + "roundtrip": { + "p50": 140.1280015707016, + "p90": 147.96799421310425, + "p95": 151.48800611495972, + "p99": 163.10399770736694 + }, + "isolatedSum": { + "p50": 155.5200070142746, + "p90": 167.83999651670456, + "p95": 177.59999632835388, + "p99": 184.54399704933167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.60000228881836, + "p90": 84.95999872684479, + "p95": 89.28000181913376, + "p99": 95.0080007314682 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 89.28000181913376, + "p95": 90.71999788284302, + "p99": 94.08000111579895 + }, + "roundtrip": { + "p50": 142.56000518798828, + "p90": 149.24800395965576, + "p95": 151.5520066022873, + "p99": 158.9439958333969 + }, + "isolatedSum": { + "p50": 157.79200196266174, + "p90": 174.24000054597855, + "p95": 179.99999970197678, + "p99": 189.08800184726715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.13599908351898, + "p90": 88.32000195980072, + "p95": 91.67999774217606, + "p99": 97.6639986038208 + }, + "combine": { + "p50": 86.97599917650223, + "p90": 90.81599861383438, + "p95": 93.12000125646591, + "p99": 98.9760011434555 + }, + "roundtrip": { + "p50": 141.27999544143677, + "p90": 149.27999675273895, + "p95": 151.61600708961487, + "p99": 156.00000321865082 + }, + "isolatedSum": { + "p50": 166.1119982600212, + "p90": 179.1360005736351, + "p95": 184.79999899864197, + "p99": 196.6399997472763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.10400146245956, + "p90": 100.60799866914749, + "p95": 102.36799716949463, + "p99": 108.73600095510483 + }, + "combine": { + "p50": 90.65599739551544, + "p90": 96.41599655151367, + "p95": 101.69599950313568, + "p99": 103.67999970912933 + }, + "roundtrip": { + "p50": 152.8320014476776, + "p90": 163.71199488639832, + "p95": 166.33599996566772, + "p99": 174.81599748134613 + }, + "isolatedSum": { + "p50": 185.759998857975, + "p90": 197.02399522066116, + "p95": 204.0639966726303, + "p99": 212.41600066423416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.0000028014183, + "p90": 111.7120012640953, + "p95": 114.94400352239609, + "p99": 119.10399794578552 + }, + "combine": { + "p50": 103.74400019645691, + "p90": 111.93600296974182, + "p95": 113.88800293207169, + "p99": 117.15199798345566 + }, + "roundtrip": { + "p50": 182.0800006389618, + "p90": 187.58399784564972, + "p95": 189.43999707698822, + "p99": 194.240003824234 + }, + "isolatedSum": { + "p50": 211.7440029978752, + "p90": 223.64800423383713, + "p95": 228.83200645446777, + "p99": 236.25599592924118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.67200243473053, + "p90": 127.96799838542938, + "p95": 131.52000308036804, + "p99": 136.25599443912506 + }, + "combine": { + "p50": 127.36000120639801, + "p90": 131.23199343681335, + "p95": 133.2480013370514, + "p99": 137.472003698349 + }, + "roundtrip": { + "p50": 217.1200066804886, + "p90": 222.6880043745041, + "p95": 224.92800652980804, + "p99": 230.84799945354462 + }, + "isolatedSum": { + "p50": 248.03200364112854, + "p90": 259.19999182224274, + "p95": 264.76800441741943, + "p99": 273.72799813747406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-694aeeb8", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_ec1724fd", + "comparisonKey": "7e01b7f88092784d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:32:34.527657+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.8159966468811, + "p90": 94.71999853849411, + "p95": 107.77600109577179, + "p99": 119.84000355005264 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 82.75199681520462, + "p95": 87.64799684286118, + "p99": 91.45600348711014 + }, + "roundtrip": { + "p50": 138.11199367046356, + "p90": 146.2399959564209, + "p95": 150.11200308799744, + "p99": 154.94400262832642 + }, + "isolatedSum": { + "p50": 157.56800025701523, + "p90": 177.47199535369873, + "p95": 195.42399793863297, + "p99": 211.29600703716278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.34400033950806, + "p90": 87.5839963555336, + "p95": 90.55999666452408, + "p99": 97.6639986038208 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 89.28000181913376, + "p95": 91.48799628019333, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 138.87999951839447, + "p90": 147.2959965467453, + "p95": 150.2400040626526, + "p99": 154.55999970436096 + }, + "isolatedSum": { + "p50": 157.85600244998932, + "p90": 176.86399817466736, + "p95": 182.0479929447174, + "p99": 199.5519995689392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.89599692821503, + "p90": 83.42400193214417, + "p95": 87.2960016131401, + "p99": 91.839998960495 + }, + "combine": { + "p50": 82.24000036716461, + "p90": 90.27200192213058, + "p95": 90.81599861383438, + "p99": 94.52799707651138 + }, + "roundtrip": { + "p50": 139.13600146770477, + "p90": 146.33600413799286, + "p95": 150.176003575325, + "p99": 155.20000457763672 + }, + "isolatedSum": { + "p50": 159.13599729537964, + "p90": 173.69600385427475, + "p95": 178.1120002269745, + "p99": 186.36799603700638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.49600166082382, + "p90": 87.13600039482117, + "p95": 90.2400016784668, + "p99": 98.91200065612793 + }, + "combine": { + "p50": 89.79199826717377, + "p90": 92.16000139713287, + "p95": 93.82399916648865, + "p99": 99.87200051546097 + }, + "roundtrip": { + "p50": 144.76799964904785, + "p90": 151.74399316310883, + "p95": 154.55999970436096, + "p99": 163.00800442695618 + }, + "isolatedSum": { + "p50": 168.2879999279976, + "p90": 179.29600179195404, + "p95": 184.06400084495544, + "p99": 198.7840011715889 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.74399691820145, + "p90": 90.52799642086029, + "p95": 93.31200271844864, + "p99": 99.39199686050415 + }, + "combine": { + "p50": 90.2400016784668, + "p90": 93.50399672985077, + "p95": 94.91200000047684, + "p99": 102.65599936246872 + }, + "roundtrip": { + "p50": 153.34400534629822, + "p90": 161.21600568294525, + "p95": 164.89599645137787, + "p99": 175.77600479125977 + }, + "isolatedSum": { + "p50": 173.98399859666824, + "p90": 184.03199315071106, + "p95": 188.22400271892548, + "p99": 202.04799622297287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.50400352478027, + "p90": 96.57599776983261, + "p95": 100.63999891281128, + "p99": 104.22399640083313 + }, + "combine": { + "p50": 91.96799993515015, + "p90": 100.16000270843506, + "p95": 101.27999633550644, + "p99": 103.04000228643417 + }, + "roundtrip": { + "p50": 166.30400717258453, + "p90": 170.56000232696533, + "p95": 172.7679967880249, + "p99": 177.824005484581 + }, + "isolatedSum": { + "p50": 181.47200345993042, + "p90": 196.73600047826767, + "p95": 201.91999524831772, + "p99": 207.2639986872673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.42399775981903, + "p90": 110.52799969911575, + "p95": 115.29599875211716, + "p99": 119.4240003824234 + }, + "combine": { + "p50": 111.48799955844879, + "p90": 115.61600118875504, + "p95": 118.04799735546112, + "p99": 123.90399724245071 + }, + "roundtrip": { + "p50": 189.02400135993958, + "p90": 194.75199282169342, + "p95": 198.4959989786148, + "p99": 202.81599462032318 + }, + "isolatedSum": { + "p50": 214.91199731826782, + "p90": 226.1440008878708, + "p95": 233.34399610757828, + "p99": 243.32799762487411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.91199779510498, + "p90": 135.13599336147308, + "p95": 137.69599795341492, + "p99": 143.16800236701965 + }, + "combine": { + "p50": 148.15999567508698, + "p90": 153.08800339698792, + "p95": 155.32800555229187, + "p99": 161.28000617027283 + }, + "roundtrip": { + "p50": 253.08799743652344, + "p90": 259.0720057487488, + "p95": 262.36799359321594, + "p99": 266.01600646972656 + }, + "isolatedSum": { + "p50": 275.07199347019196, + "p90": 288.223996758461, + "p95": 293.0240035057068, + "p99": 304.4480085372925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-abb80b71", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_8e2c81ec", + "comparisonKey": "a24b8a1a20bfaddc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:33:48.401942+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.31200075149536, + "p90": 91.00800007581711, + "p95": 97.37599641084671, + "p99": 108.73600095510483 + }, + "combine": { + "p50": 65.24799764156342, + "p90": 68.25599819421768, + "p95": 79.19999957084656, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 114.27199840545654, + "p90": 134.43200290203094, + "p95": 140.28799533843994, + "p99": 149.34399724006653 + }, + "isolatedSum": { + "p50": 146.55999839305878, + "p90": 159.2639982700348, + "p95": 176.57599598169327, + "p99": 192.4159973859787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.02399790287018, + "p90": 92.54399687051773, + "p95": 101.15200281143188, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 67.391999065876, + "p90": 77.2479996085167, + "p95": 82.40000158548355, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 130.3039938211441, + "p90": 141.85599982738495, + "p95": 148.6400067806244, + "p99": 156.70399367809296 + }, + "isolatedSum": { + "p50": 144.41599696874619, + "p90": 169.79199647903442, + "p95": 183.55200439691544, + "p99": 202.65599340200424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.38399630784988, + "p90": 104.38399761915207, + "p95": 113.08799684047699, + "p99": 120.54400146007538 + }, + "combine": { + "p50": 77.91999727487564, + "p90": 81.05599880218506, + "p95": 87.52000331878662, + "p99": 94.24000233411789 + }, + "roundtrip": { + "p50": 139.3599957227707, + "p90": 155.008003115654, + "p95": 160.3199988603592, + "p99": 174.27200078964233 + }, + "isolatedSum": { + "p50": 174.30399358272552, + "p90": 185.43999642133713, + "p95": 200.6080001592636, + "p99": 214.78400379419327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.99199962615967, + "p90": 103.2319962978363, + "p95": 110.23999750614166, + "p99": 115.48800021409988 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 91.20000153779984, + "p95": 94.7519987821579, + "p99": 104.35199737548828 + }, + "roundtrip": { + "p50": 152.92799472808838, + "p90": 166.20799899101257, + "p95": 177.0240068435669, + "p99": 201.82399451732635 + }, + "isolatedSum": { + "p50": 169.28000003099442, + "p90": 194.43199783563614, + "p95": 204.99199628829956, + "p99": 219.83999758958817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-505719ac", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_6bd76b01", + "comparisonKey": "2d93c340bede5da4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:52.418589+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 83.29600095748901, + "p90": 88.79999816417694, + "p95": 94.27200257778168, + "p99": 106.81600123643875 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 69.92000341415405, + "p95": 76.1599987745285, + "p99": 82.75199681520462 + }, + "roundtrip": { + "p50": 126.46399438381195, + "p90": 133.7919980287552, + "p95": 137.7280056476593, + "p99": 147.0080018043518 + }, + "isolatedSum": { + "p50": 149.53599870204926, + "p90": 158.720001578331, + "p95": 170.43200135231018, + "p99": 189.56799805164337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 84.1279998421669, + "p90": 95.8079993724823, + "p95": 97.72799909114838, + "p99": 104.12800312042236 + }, + "combine": { + "p50": 67.71200150251389, + "p90": 76.22399926185608, + "p95": 77.82399654388428, + "p99": 81.50400221347809 + }, + "roundtrip": { + "p50": 131.1040073633194, + "p90": 136.73600554466248, + "p95": 139.3280029296875, + "p99": 143.8400000333786 + }, + "isolatedSum": { + "p50": 151.8400013446808, + "p90": 172.03199863433838, + "p95": 175.55199563503265, + "p99": 185.63200533390045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.72800326347351, + "p90": 87.16800063848495, + "p95": 91.90399944782257, + "p99": 100.35199671983719 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 78.23999971151352, + "p95": 80.12799918651581, + "p99": 87.87199854850769 + }, + "roundtrip": { + "p50": 127.74400413036346, + "p90": 139.80799913406372, + "p95": 143.19999516010284, + "p99": 164.2560064792633 + }, + "isolatedSum": { + "p50": 146.7840000987053, + "p90": 165.40800034999847, + "p95": 172.03199863433838, + "p99": 188.22399526834488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.65600287914276, + "p90": 85.21600067615509, + "p95": 90.43200314044952, + "p99": 100.28800368309021 + }, + "combine": { + "p50": 75.93599706888199, + "p90": 78.75200361013412, + "p95": 80.73599636554718, + "p99": 85.60000360012054 + }, + "roundtrip": { + "p50": 127.87200510501862, + "p90": 141.6960060596466, + "p95": 149.08799529075623, + "p99": 153.50399911403656 + }, + "isolatedSum": { + "p50": 154.59199994802475, + "p90": 163.96800428628922, + "p95": 171.1679995059967, + "p99": 185.88800728321075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.4720019698143, + "p90": 90.91199934482574, + "p95": 97.15200215578079, + "p99": 115.80800265073776 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 81.50400221347809, + "p95": 83.39200168848038, + "p99": 97.63199836015701 + }, + "roundtrip": { + "p50": 136.9599997997284, + "p90": 143.13599467277527, + "p95": 148.19200336933136, + "p99": 163.90399634838104 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 172.41600155830383, + "p95": 180.54400384426117, + "p99": 213.44000101089478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.216000020504, + "p90": 88.95999938249588, + "p95": 91.45600348711014, + "p99": 98.27200323343277 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 81.02399855852127, + "p95": 82.36800134181976, + "p99": 91.20000153779984 + }, + "roundtrip": { + "p50": 139.3599957227707, + "p90": 144.86399292945862, + "p95": 147.90399372577667, + "p99": 152.319997549057 + }, + "isolatedSum": { + "p50": 159.16799753904343, + "p90": 169.98399794101715, + "p95": 173.8240048289299, + "p99": 189.4720047712326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.32000261545181, + "p90": 97.18400239944458, + "p95": 100.51199793815613, + "p99": 112.06399649381638 + }, + "combine": { + "p50": 79.80799674987793, + "p90": 88.54400366544724, + "p95": 91.13600105047226, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 143.99999380111694, + "p90": 155.39200603961945, + "p95": 163.32800686359406, + "p99": 174.78400468826294 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 185.72800606489182, + "p95": 191.6479989886284, + "p99": 207.7759951353073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.56799787282944, + "p90": 103.26399654150009, + "p95": 105.69600015878677, + "p99": 112.5119999051094 + }, + "combine": { + "p50": 99.87200051546097, + "p90": 106.39999806880951, + "p95": 109.53599959611893, + "p99": 125.37600100040436 + }, + "roundtrip": { + "p50": 168.03200542926788, + "p90": 175.52000284194946, + "p95": 178.97599935531616, + "p99": 188.1600022315979 + }, + "isolatedSum": { + "p50": 197.4399983882904, + "p90": 209.6639946103096, + "p95": 215.2319997549057, + "p99": 237.88800090551376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d5dc167", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_86a142fb", + "comparisonKey": "2fcdec5539f3638f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:23.321040+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 80.09599894285202, + "p90": 86.01599931716919, + "p95": 87.93599903583527, + "p99": 92.73599833250046 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 80.12799918651581, + "p95": 82.07999914884567, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 126.8479973077774, + "p90": 140.00000059604645, + "p95": 144.19199526309967, + "p99": 150.33599734306335 + }, + "isolatedSum": { + "p50": 157.85600244998932, + "p90": 166.143998503685, + "p95": 170.01599818468094, + "p99": 181.2800019979477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 76.12799853086472, + "p90": 84.927998483181, + "p95": 88.57599645853043, + "p99": 93.63199770450592 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 81.60000294446945, + "p95": 83.23200047016144, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 137.11999356746674, + "p90": 142.62400567531586, + "p95": 146.17599546909332, + "p99": 152.48000621795654 + }, + "isolatedSum": { + "p50": 154.33599799871445, + "p90": 166.52800142765045, + "p95": 171.80799692869186, + "p99": 185.34399569034576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 82.97599852085114, + "p90": 96.28800302743912, + "p95": 98.94400089979172, + "p99": 108.38399827480316 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 89.53599631786346, + "p95": 93.1520015001297, + "p99": 100.80000013113022 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 154.84799444675446, + "p95": 159.55199301242828, + "p99": 171.10399901866913 + }, + "isolatedSum": { + "p50": 165.40800034999847, + "p90": 185.82399934530258, + "p95": 192.09600239992142, + "p99": 209.18399840593338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.94399762153625, + "p90": 86.27200126647949, + "p95": 89.9839997291565, + "p99": 97.08800166845322 + }, + "combine": { + "p50": 81.50400221347809, + "p90": 89.56799656152725, + "p95": 90.4960036277771, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 142.65599846839905, + "p90": 151.36000514030457, + "p95": 154.7520011663437, + "p99": 163.03999722003937 + }, + "isolatedSum": { + "p50": 160.44799983501434, + "p90": 175.83999782800674, + "p95": 180.4800033569336, + "p99": 189.56799805164337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.02399855852127, + "p90": 88.95999938249588, + "p95": 92.57599711418152, + "p99": 98.36799651384354 + }, + "combine": { + "p50": 89.40800279378891, + "p90": 92.12800115346909, + "p95": 93.91999989748001, + "p99": 100.5759984254837 + }, + "roundtrip": { + "p50": 146.55999839305878, + "p90": 156.22399747371674, + "p95": 160.0320041179657, + "p99": 168.83200407028198 + }, + "isolatedSum": { + "p50": 170.43200135231018, + "p90": 181.08800053596497, + "p95": 186.49599701166153, + "p99": 198.94399493932724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.32800316810608, + "p90": 104.99200224876404, + "p95": 107.80800133943558, + "p99": 114.23999816179276 + }, + "combine": { + "p50": 91.13600105047226, + "p90": 95.07200121879578, + "p95": 100.00000149011612, + "p99": 103.39199751615524 + }, + "roundtrip": { + "p50": 157.82399475574493, + "p90": 164.0319973230362, + "p95": 167.77600347995758, + "p99": 173.8239973783493 + }, + "isolatedSum": { + "p50": 186.46400421857834, + "p90": 200.06400346755981, + "p95": 207.8080028295517, + "p99": 217.631995677948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.02399635314941, + "p90": 122.43200093507767, + "p95": 126.36800110340118, + "p99": 135.8720064163208 + }, + "combine": { + "p50": 108.86400192975998, + "p90": 114.88000303506851, + "p95": 116.89600348472595, + "p99": 121.0239976644516 + }, + "roundtrip": { + "p50": 186.39999628067017, + "p90": 195.6160068511963, + "p95": 199.93600249290466, + "p99": 219.52000260353088 + }, + "isolatedSum": { + "p50": 221.8879982829094, + "p90": 237.31200397014618, + "p95": 243.26400458812714, + "p99": 256.8960040807724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.64000284671783, + "p90": 133.215993642807, + "p95": 136.9599997997284, + "p99": 144.19199526309967 + }, + "combine": { + "p50": 143.10400187969208, + "p90": 150.9760022163391, + "p95": 153.4080058336258, + "p99": 164.32000696659088 + }, + "roundtrip": { + "p50": 243.3280050754547, + "p90": 253.50400805473328, + "p95": 256.5760016441345, + "p99": 269.567996263504 + }, + "isolatedSum": { + "p50": 267.7440047264099, + "p90": 284.1919958591461, + "p95": 290.3680056333542, + "p99": 308.51200222969055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-201b3e94", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_163689e3", + "comparisonKey": "887b692608382c34", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:48:25.089807+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 83.90399813652039, + "p90": 90.2400016784668, + "p95": 96.00000083446503, + "p99": 105.47199845314026 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 83.48800241947174, + "p95": 90.30400216579437, + "p99": 96.3520035147667 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 142.39999651908875, + "p95": 148.3840048313141, + "p99": 156.70399367809296 + }, + "isolatedSum": { + "p50": 162.3999997973442, + "p90": 173.72800409793854, + "p95": 186.3040030002594, + "p99": 201.82400196790695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.75200361013412, + "p90": 88.99199962615967, + "p95": 94.01600062847137, + "p99": 102.4319976568222 + }, + "combine": { + "p50": 78.97599786520004, + "p90": 84.06399935483932, + "p95": 92.41600334644318, + "p99": 101.72799974679947 + }, + "roundtrip": { + "p50": 138.2720023393631, + "p90": 152.67199277877808, + "p95": 158.36800634860992, + "p99": 178.43200266361237 + }, + "isolatedSum": { + "p50": 157.72800147533417, + "p90": 173.055998980999, + "p95": 186.43200397491455, + "p99": 204.15999740362167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 79.26400005817413, + "p90": 92.67199784517288, + "p95": 98.1760025024414, + "p99": 108.47999900579453 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 90.55999666452408, + "p95": 94.04800087213516, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 144.44799721240997, + "p90": 153.53600680828094, + "p95": 161.56800091266632, + "p99": 168.2559996843338 + }, + "isolatedSum": { + "p50": 161.3439992070198, + "p90": 183.23199450969696, + "p95": 192.22400337457657, + "p99": 213.95199745893478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.16799932718277, + "p90": 91.10400080680847, + "p95": 96.44799679517746, + "p99": 102.30399668216705 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 95.96800059080124, + "p95": 101.75999999046326, + "p99": 110.52799969911575 + }, + "roundtrip": { + "p50": 142.84799993038177, + "p90": 161.56800091266632, + "p95": 165.3759926557541, + "p99": 187.04000115394592 + }, + "isolatedSum": { + "p50": 167.10399836301804, + "p90": 187.0720013976097, + "p95": 198.20799678564072, + "p99": 212.8319963812828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.07999914884567, + "p90": 97.24800288677216, + "p95": 101.75999999046326, + "p99": 114.17599767446518 + }, + "combine": { + "p50": 90.59199690818787, + "p90": 97.75999933481216, + "p95": 103.4879982471466, + "p99": 113.82400244474411 + }, + "roundtrip": { + "p50": 151.13599598407745, + "p90": 166.4000004529953, + "p95": 176.54399573802948, + "p99": 190.94400107860565 + }, + "isolatedSum": { + "p50": 172.67199605703354, + "p90": 195.00800222158432, + "p95": 205.24799823760986, + "p99": 228.0000001192093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.88800030946732, + "p90": 105.79200088977814, + "p95": 111.68000102043152, + "p99": 130.2720010280609 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 101.21600329875946, + "p95": 106.04800283908844, + "p99": 115.77600240707397 + }, + "roundtrip": { + "p50": 154.36799824237823, + "p90": 165.92000424861908, + "p95": 170.68800330162048, + "p99": 177.0240068435669 + }, + "isolatedSum": { + "p50": 189.66399878263474, + "p90": 207.0080041885376, + "p95": 217.72800385951996, + "p99": 246.0480034351349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.17599701881409, + "p90": 117.60000139474869, + "p95": 123.9359974861145, + "p99": 129.98400628566742 + }, + "combine": { + "p50": 104.5759990811348, + "p90": 115.77600240707397, + "p95": 119.32799965143204, + "p99": 128.28800082206726 + }, + "roundtrip": { + "p50": 183.03999304771423, + "p90": 190.65600633621216, + "p95": 194.46399807929993, + "p99": 204.48000729084015 + }, + "isolatedSum": { + "p50": 214.75199609994888, + "p90": 233.37600380182266, + "p95": 243.26399713754654, + "p99": 258.2720071077347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.7360029220581, + "p90": 128.12800705432892, + "p95": 134.783998131752, + "p99": 141.24800264835358 + }, + "combine": { + "p50": 128.67200374603271, + "p90": 139.1039937734604, + "p95": 143.36000382900238, + "p99": 152.99199521541595 + }, + "roundtrip": { + "p50": 223.36000204086304, + "p90": 237.69600689411163, + "p95": 242.65600740909576, + "p99": 256.76798820495605 + }, + "isolatedSum": { + "p50": 249.40800666809082, + "p90": 267.2320008277893, + "p95": 278.1440019607544, + "p99": 294.23999786376953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d0ef195", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_fdf61e6b", + "comparisonKey": "3fd622724202d259", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:39.129444+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.71200346946716, + "p90": 89.88799899816513, + "p95": 99.29600358009338, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 79.71200346946716, + "p95": 82.49600231647491, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 129.92000579833984, + "p90": 149.63200688362122, + "p95": 152.319997549057, + "p99": 174.5920032262802 + }, + "isolatedSum": { + "p50": 157.05600380897522, + "p90": 169.6000024676323, + "p95": 181.7920058965683, + "p99": 197.63199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.27999985218048, + "p90": 91.26400202512741, + "p95": 101.3759970664978, + "p99": 105.59999942779541 + }, + "combine": { + "p50": 78.14399898052216, + "p90": 83.74399691820145, + "p95": 91.96799993515015, + "p99": 94.94400024414062 + }, + "roundtrip": { + "p50": 138.20800185203552, + "p90": 150.56000649929047, + "p95": 156.95999562740326, + "p99": 164.12800550460815 + }, + "isolatedSum": { + "p50": 155.42399883270264, + "p90": 175.00799894332886, + "p95": 193.34399700164795, + "p99": 200.54399967193604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.504001557827, + "p90": 87.2960016131401, + "p95": 91.67999774217606, + "p99": 99.90400075912476 + }, + "combine": { + "p50": 78.43200117349625, + "p90": 87.10400015115738, + "p95": 91.74399822950363, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 140.25600254535675, + "p90": 150.91200172901154, + "p95": 157.72800147533417, + "p99": 164.99200463294983 + }, + "isolatedSum": { + "p50": 155.93600273132324, + "p90": 174.40000176429749, + "p95": 183.4239959716797, + "p99": 199.39199835062027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.23199981451035, + "p90": 89.12000060081482, + "p95": 98.08000177145004, + "p99": 104.70400005578995 + }, + "combine": { + "p50": 86.33600175380707, + "p90": 92.41600334644318, + "p95": 95.61599791049957, + "p99": 105.40799796581268 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 157.4079990386963, + "p95": 162.27200627326965, + "p99": 171.29600048065186 + }, + "isolatedSum": { + "p50": 165.5680015683174, + "p90": 181.536003947258, + "p95": 193.69599968194962, + "p99": 210.11199802160263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.37600123882294, + "p90": 90.46400338411331, + "p95": 95.20000219345093, + "p99": 102.91200131177902 + }, + "combine": { + "p50": 88.06400001049042, + "p90": 93.40800344944, + "p95": 99.58399832248688, + "p99": 111.26399785280228 + }, + "roundtrip": { + "p50": 142.752006649971, + "p90": 156.54399991035461, + "p95": 162.59199380874634, + "p99": 168.64000260829926 + }, + "isolatedSum": { + "p50": 169.44000124931335, + "p90": 183.8720068335533, + "p95": 194.7840005159378, + "p99": 214.1759991645813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.88800030946732, + "p90": 112.86400258541107, + "p95": 120.2239990234375, + "p99": 133.08799266815186 + }, + "combine": { + "p50": 91.13600105047226, + "p90": 103.00800204277039, + "p95": 106.59199953079224, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 155.07200360298157, + "p90": 169.0559983253479, + "p95": 177.05599963665009, + "p99": 192.57600605487823 + }, + "isolatedSum": { + "p50": 189.02400135993958, + "p90": 215.87200462818146, + "p95": 226.81599855422974, + "p99": 248.35199117660522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.67200046777725, + "p90": 113.37599903345108, + "p95": 116.60800129175186, + "p99": 125.2799928188324 + }, + "combine": { + "p50": 103.39199751615524, + "p90": 112.86400258541107, + "p95": 116.19199812412262, + "p99": 127.58399546146393 + }, + "roundtrip": { + "p50": 180.95999956130981, + "p90": 189.7280067205429, + "p95": 194.5279985666275, + "p99": 205.59999346733093 + }, + "isolatedSum": { + "p50": 212.0639979839325, + "p90": 226.24000161886215, + "p95": 232.79999941587448, + "p99": 252.86398828029633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.18399888277054, + "p90": 131.8719983100891, + "p95": 136.57599687576294, + "p99": 144.99199390411377 + }, + "combine": { + "p50": 127.10399925708771, + "p90": 130.78400492668152, + "p95": 131.9040060043335, + "p99": 141.24800264835358 + }, + "roundtrip": { + "p50": 218.4319943189621, + "p90": 228.0000001192093, + "p95": 232.4800044298172, + "p99": 240.09600281715393 + }, + "isolatedSum": { + "p50": 248.28799813985825, + "p90": 262.65600323677063, + "p95": 268.48000288009644, + "p99": 286.23999655246735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-22ed8d97", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_b6f90576", + "comparisonKey": "7ca472c4842bcac7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:34:53.316379+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 83.99999886751175, + "p90": 99.87200051546097, + "p95": 107.96800255775452, + "p99": 119.80800330638885 + }, + "combine": { + "p50": 76.4160007238388, + "p90": 82.20800012350082, + "p95": 87.3280018568039, + "p99": 97.75999933481216 + }, + "roundtrip": { + "p50": 134.33599472045898, + "p90": 158.4639996290207, + "p95": 169.08800601959229, + "p99": 188.1919950246811 + }, + "isolatedSum": { + "p50": 160.41599959135056, + "p90": 182.0800006389618, + "p95": 195.2960044145584, + "p99": 217.56800264120102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 86.496002972126, + "p90": 101.98400169610977, + "p95": 111.93600296974182, + "p99": 126.14400684833527 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 88.51200342178345, + "p95": 92.76799857616425, + "p99": 104.3199971318245 + }, + "roundtrip": { + "p50": 135.51999628543854, + "p90": 155.90399503707886, + "p95": 167.29600727558136, + "p99": 188.60800564289093 + }, + "isolatedSum": { + "p50": 164.22400623559952, + "p90": 190.49600511789322, + "p95": 204.70400154590607, + "p99": 230.46400398015976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 83.26400071382523, + "p90": 99.16800260543823, + "p95": 111.29599809646606, + "p99": 121.2799996137619 + }, + "combine": { + "p50": 80.83199709653854, + "p90": 90.27200192213058, + "p95": 97.18400239944458, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 138.20800185203552, + "p90": 157.85600244998932, + "p95": 168.70400309562683, + "p99": 182.01600015163422 + }, + "isolatedSum": { + "p50": 164.09599781036377, + "p90": 189.44000452756882, + "p95": 208.48000049591064, + "p99": 226.75199806690216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 88.83199840784073, + "p90": 112.57600039243698, + "p95": 120.92799693346024, + "p99": 134.91199910640717 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 94.46399658918381, + "p95": 103.42399775981903, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 144.48000490665436, + "p90": 176.06399953365326, + "p95": 185.82400679588318, + "p99": 197.7279931306839 + }, + "isolatedSum": { + "p50": 173.69599640369415, + "p90": 207.0399969816208, + "p95": 224.35199469327927, + "p99": 244.159996509552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.99199897050858, + "p90": 98.75199943780899, + "p95": 109.21599715948105, + "p99": 124.76799637079239 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 94.2080020904541, + "p95": 104.60799932479858, + "p99": 110.84800213575363 + }, + "roundtrip": { + "p50": 150.11200308799744, + "p90": 172.92800545692444, + "p95": 186.75200641155243, + "p99": 197.1839964389801 + }, + "isolatedSum": { + "p50": 173.21600019931793, + "p90": 192.9600015282631, + "p95": 213.82399648427963, + "p99": 235.61599850654602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.58399766683578, + "p90": 104.86400127410889, + "p95": 112.64000087976456, + "p99": 121.88799679279327 + }, + "combine": { + "p50": 89.75999802350998, + "p90": 99.04000163078308, + "p95": 105.82400113344193, + "p99": 116.89600348472595 + }, + "roundtrip": { + "p50": 154.7199934720993, + "p90": 175.58400332927704, + "p95": 180.06399273872375, + "p99": 192.6400065422058 + }, + "isolatedSum": { + "p50": 185.34399569034576, + "p90": 203.90400290489197, + "p95": 218.46400201320648, + "p99": 238.78400027751923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.54399883747101, + "p90": 121.63200229406357, + "p95": 130.8159977197647, + "p99": 144.41600441932678 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 117.79200285673141, + "p95": 123.3920007944107, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 184.86399948596954, + "p90": 204.44799959659576, + "p95": 211.13599836826324, + "p99": 230.3680032491684 + }, + "isolatedSum": { + "p50": 210.81599593162537, + "p90": 239.42400515079498, + "p95": 254.20799851417542, + "p99": 274.30400252342224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.54400146007538, + "p90": 131.00799918174744, + "p95": 138.11199367046356, + "p99": 148.8640010356903 + }, + "combine": { + "p50": 138.75199854373932, + "p90": 149.02399480342865, + "p95": 154.33600544929504, + "p99": 165.18400609493256 + }, + "roundtrip": { + "p50": 235.80799996852875, + "p90": 247.00799584388733, + "p95": 254.20799851417542, + "p99": 265.9839987754822 + }, + "isolatedSum": { + "p50": 259.2960000038147, + "p90": 280.0319939851761, + "p95": 292.4479991197586, + "p99": 314.04800713062286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa272864", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_10983e54", + "comparisonKey": "5c89ff4fcdf3f78b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:08.536662+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.28000050783157, + "p90": 86.20800077915192, + "p95": 88.22400122880936, + "p99": 95.0080007314682 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 76.09599828720093, + "p95": 77.31200009584427, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 127.03999876976013, + "p90": 136.48000359535217, + "p95": 141.76000654697418, + "p99": 153.6960005760193 + }, + "isolatedSum": { + "p50": 148.51199835538864, + "p90": 162.30399906635284, + "p95": 165.53600132465363, + "p99": 178.46400290727615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 82.91199803352356, + "p90": 88.22400122880936, + "p95": 90.04800021648407, + "p99": 104.51199859380722 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 76.12799853086472, + "p95": 77.2159993648529, + "p99": 80.09599894285202 + }, + "roundtrip": { + "p50": 128.63999605178833, + "p90": 138.49599659442902, + "p95": 145.34400403499603, + "p99": 152.76800096035004 + }, + "isolatedSum": { + "p50": 150.7520005106926, + "p90": 164.35199975967407, + "p95": 167.26399958133698, + "p99": 184.60799753665924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.69600367546082, + "p90": 93.53599697351456, + "p95": 96.09600156545639, + "p99": 103.74400019645691 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 76.09599828720093, + "p95": 77.2159993648529, + "p99": 82.71999657154083 + }, + "roundtrip": { + "p50": 128.76799702644348, + "p90": 134.65599715709686, + "p95": 137.66400516033173, + "p99": 143.327996134758 + }, + "isolatedSum": { + "p50": 148.92800152301788, + "p90": 169.63199526071548, + "p95": 173.3120009303093, + "p99": 186.46399676799774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.58400249481201, + "p90": 96.70399874448776, + "p95": 98.68799895048141, + "p99": 106.75200074911118 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 80.25600016117096, + "p95": 82.97599852085114, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 131.71200454235077, + "p90": 138.14400136470795, + "p95": 143.71199905872345, + "p99": 151.19999647140503 + }, + "isolatedSum": { + "p50": 157.3440060019493, + "p90": 176.95999890565872, + "p95": 181.66399747133255, + "p99": 194.7840005159378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.84799689054489, + "p90": 86.81599795818329, + "p95": 90.97599983215332, + "p99": 94.33600306510925 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 81.85599744319916, + "p95": 83.8719978928566, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 139.20000195503235, + "p90": 145.02400159835815, + "p95": 147.13600277900696, + "p99": 152.41600573062897 + }, + "isolatedSum": { + "p50": 157.47199952602386, + "p90": 168.67199540138245, + "p95": 174.84799772500992, + "p99": 185.248002409935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.28000181913376, + "p90": 96.03200107812881, + "p95": 100.12800246477127, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 87.3280018568039, + "p90": 92.79999881982803, + "p95": 95.71199864149094, + "p99": 98.94400089979172 + }, + "roundtrip": { + "p50": 151.16800367832184, + "p90": 159.61599349975586, + "p95": 162.33600676059723, + "p99": 167.84000396728516 + }, + "isolatedSum": { + "p50": 176.60800367593765, + "p90": 188.83199989795685, + "p95": 195.8400011062622, + "p99": 210.84800362586975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.2640026807785, + "p90": 102.11200267076492, + "p95": 103.5199984908104, + "p99": 107.64800012111664 + }, + "combine": { + "p50": 100.25600343942642, + "p90": 103.10400277376175, + "p95": 104.38399761915207, + "p99": 109.95200276374817 + }, + "roundtrip": { + "p50": 174.52800273895264, + "p90": 178.9119988679886, + "p95": 181.95199966430664, + "p99": 187.58399784564972 + }, + "isolatedSum": { + "p50": 195.52000612020493, + "p90": 205.21600544452667, + "p95": 207.90399610996246, + "p99": 217.6000028848648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.3119985461235, + "p90": 120.03199756145477, + "p95": 123.90399724245071, + "p99": 128.28800082206726 + }, + "combine": { + "p50": 131.48799538612366, + "p90": 139.55199718475342, + "p95": 140.47999680042267, + "p99": 149.21599626541138 + }, + "roundtrip": { + "p50": 226.68799757957458, + "p90": 231.51999711990356, + "p95": 234.3679964542389, + "p99": 241.4720058441162 + }, + "isolatedSum": { + "p50": 244.79999393224716, + "p90": 259.5839947462082, + "p95": 264.3839940428734, + "p99": 277.50399708747864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5d0176bc", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_ad0e927a", + "comparisonKey": "0082c53b2a93d494", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:47:11.364121+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.65600353479385, + "p90": 87.45600283145905, + "p95": 89.59999680519104, + "p99": 96.44799679517746 + }, + "combine": { + "p50": 77.31200009584427, + "p90": 79.13599908351898, + "p95": 80.83199709653854, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 138.94400000572205, + "p95": 142.5279974937439, + "p99": 151.36000514030457 + }, + "isolatedSum": { + "p50": 159.96800363063812, + "p90": 166.59200191497803, + "p95": 170.43199390172958, + "p99": 180.38399517536163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.5600021481514, + "p90": 88.83199840784073, + "p95": 92.32000261545181, + "p99": 99.64799880981445 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 81.79199695587158, + "p95": 83.45600217580795, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 138.2399946451187, + "p90": 144.44799721240997, + "p95": 147.2640037536621, + "p99": 154.7199934720993 + }, + "isolatedSum": { + "p50": 156.89600259065628, + "p90": 170.6239953637123, + "p95": 175.77600479125977, + "p99": 191.71199947595596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.14399898052216, + "p90": 97.4079966545105, + "p95": 100.44799745082855, + "p99": 122.14399874210358 + }, + "combine": { + "p50": 79.93599772453308, + "p90": 88.41600269079208, + "p95": 90.7519981265068, + "p99": 94.11200135946274 + }, + "roundtrip": { + "p50": 142.94399321079254, + "p90": 152.0320028066635, + "p95": 156.3519984483719, + "p99": 182.43199586868286 + }, + "isolatedSum": { + "p50": 158.07999670505524, + "p90": 185.82399934530258, + "p95": 191.19999557733536, + "p99": 216.25600010156631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.45600152015686, + "p90": 90.40000289678574, + "p95": 94.24000233411789, + "p99": 99.93600100278854 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 91.39200299978256, + "p95": 92.99200028181076, + "p99": 98.14400225877762 + }, + "roundtrip": { + "p50": 143.71199905872345, + "p90": 161.05599701404572, + "p95": 165.21599888801575, + "p99": 174.49599504470825 + }, + "isolatedSum": { + "p50": 167.07199811935425, + "p90": 181.7920058965683, + "p95": 187.23200261592865, + "p99": 198.08000326156616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.0159986615181, + "p90": 118.27199906110764, + "p95": 124.70400333404541, + "p99": 137.2479945421219 + }, + "combine": { + "p50": 88.639996945858, + "p90": 102.4319976568222, + "p95": 106.49599879980087, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 146.43199741840363, + "p90": 202.2400051355362, + "p95": 208.80000293254852, + "p99": 218.87999773025513 + }, + "isolatedSum": { + "p50": 170.6559956073761, + "p90": 220.70399671792984, + "p95": 231.20000213384628, + "p99": 249.66399371623993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.47199714183807, + "p90": 130.2720010280609, + "p95": 134.91199910640717, + "p99": 147.48799800872803 + }, + "combine": { + "p50": 91.5519967675209, + "p90": 98.75199943780899, + "p95": 101.34399682283401, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 155.68000078201294, + "p90": 167.42399334907532, + "p95": 170.43200135231018, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 189.02399390935898, + "p90": 229.0240004658699, + "p95": 236.25599592924118, + "p99": 259.8399966955185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.55199939012527, + "p90": 115.9679964184761, + "p95": 118.01599711179733, + "p99": 123.36000055074692 + }, + "combine": { + "p50": 104.47999835014343, + "p90": 113.98400366306305, + "p95": 115.74400216341019, + "p99": 120.51200121641159 + }, + "roundtrip": { + "p50": 179.61600422859192, + "p90": 185.85599958896637, + "p95": 190.36799669265747, + "p99": 194.75199282169342 + }, + "isolatedSum": { + "p50": 212.0319977402687, + "p90": 229.95200008153915, + "p95": 233.75999927520752, + "p99": 243.8720017671585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.50400131940842, + "p90": 132.25600123405457, + "p95": 134.68800485134125, + "p99": 145.6640064716339 + }, + "combine": { + "p50": 127.42400169372559, + "p90": 131.16799294948578, + "p95": 132.47999548912048, + "p99": 139.71200585365295 + }, + "roundtrip": { + "p50": 217.47200191020966, + "p90": 223.4559953212738, + "p95": 225.600004196167, + "p99": 230.78399896621704 + }, + "isolatedSum": { + "p50": 248.928003013134, + "p90": 263.42399418354034, + "p95": 267.16800034046173, + "p99": 285.37601232528687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3c212caf", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_11e714df", + "comparisonKey": "2524364d19db3983", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:42:15.183915+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.84799689054489, + "p90": 85.56800335645676, + "p95": 88.22400122880936, + "p99": 95.77599912881851 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 78.94399762153625, + "p95": 80.86399734020233, + "p99": 88.3840024471283 + }, + "roundtrip": { + "p50": 127.93600559234619, + "p90": 136.19199395179749, + "p95": 139.67999815940857, + "p99": 142.84799993038177 + }, + "isolatedSum": { + "p50": 155.90399503707886, + "p90": 164.512000977993, + "p95": 169.0879985690117, + "p99": 184.1600015759468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.18399912118912, + "p90": 85.37600189447403, + "p95": 88.128000497818, + "p99": 91.36000275611877 + }, + "combine": { + "p50": 78.17599922418594, + "p90": 81.02399855852127, + "p95": 82.84799754619598, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 138.72000575065613, + "p90": 152.8960019350052, + "p95": 194.7840005159378, + "p99": 204.8639953136444 + }, + "isolatedSum": { + "p50": 155.35999834537506, + "p90": 166.4000004529953, + "p95": 170.97599804401398, + "p99": 181.63200467824936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.08799839019775, + "p90": 84.63999629020691, + "p95": 89.31200206279755, + "p99": 94.33600306510925 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 89.56799656152725, + "p95": 91.67999774217606, + "p99": 105.82400113344193 + }, + "roundtrip": { + "p50": 142.36800372600555, + "p90": 152.79999375343323, + "p95": 159.58400070667267, + "p99": 176.92799866199493 + }, + "isolatedSum": { + "p50": 156.44799917936325, + "p90": 174.20799285173416, + "p95": 180.9919998049736, + "p99": 200.16000419855118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.19199967384338, + "p90": 124.25599992275238, + "p95": 128.1599998474121, + "p99": 137.2479945421219 + }, + "combine": { + "p50": 87.26400136947632, + "p90": 105.69600015878677, + "p95": 108.09600353240967, + "p99": 111.7440015077591 + }, + "roundtrip": { + "p50": 142.11200177669525, + "p90": 172.2559928894043, + "p95": 202.55999267101288, + "p99": 210.87999641895294 + }, + "isolatedSum": { + "p50": 167.4560010433197, + "p90": 229.95200008153915, + "p95": 236.25600337982178, + "p99": 248.99199604988098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.6160027384758, + "p90": 85.21600067615509, + "p95": 90.2400016784668, + "p99": 93.79199892282486 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 89.9839997291565, + "p95": 91.5519967675209, + "p99": 99.07200187444687 + }, + "roundtrip": { + "p50": 141.31200313568115, + "p90": 149.63200688362122, + "p95": 153.08800339698792, + "p99": 157.50400722026825 + }, + "isolatedSum": { + "p50": 166.72000288963318, + "p90": 175.20000040531158, + "p95": 181.7919984459877, + "p99": 192.86400079727173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.53599828481674, + "p90": 111.16799712181091, + "p95": 116.7680025100708, + "p99": 128.25599312782288 + }, + "combine": { + "p50": 91.90399944782257, + "p90": 110.68800091743469, + "p95": 114.23999816179276, + "p99": 121.79200351238251 + }, + "roundtrip": { + "p50": 160.25599837303162, + "p90": 204.76800203323364, + "p95": 212.92799711227417, + "p99": 220.57600319385529 + }, + "isolatedSum": { + "p50": 193.4399977326393, + "p90": 221.8559980392456, + "p95": 231.00800067186356, + "p99": 250.04799664020538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.38399827480316, + "p90": 138.14400136470795, + "p95": 139.93600010871887, + "p99": 145.05599439144135 + }, + "combine": { + "p50": 110.49599945545197, + "p90": 130.43199479579926, + "p95": 136.99199259281158, + "p99": 140.06400108337402 + }, + "roundtrip": { + "p50": 187.19999492168427, + "p90": 220.44800221920013, + "p95": 225.69599747657776, + "p99": 234.592005610466 + }, + "isolatedSum": { + "p50": 218.87999773025513, + "p90": 268.5759961605072, + "p95": 276.92799270153046, + "p99": 285.11999547481537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.08799946308136, + "p90": 164.8000031709671, + "p95": 168.44800114631653, + "p99": 174.3679940700531 + }, + "combine": { + "p50": 139.64800536632538, + "p90": 152.0639955997467, + "p95": 158.07999670505524, + "p99": 165.72800278663635 + }, + "roundtrip": { + "p50": 238.94399404525757, + "p90": 246.14399671554565, + "p95": 249.5039999485016, + "p99": 262.2399926185608 + }, + "isolatedSum": { + "p50": 268.73600482940674, + "p90": 316.8639987707138, + "p95": 326.52799785137177, + "p99": 340.09599685668945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac1b9295", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_1df41dff", + "comparisonKey": "b060b411a1aab755", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:42:47.950916+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.6160027384758, + "p90": 91.48799628019333, + "p95": 126.27199292182922, + "p99": 160.96000373363495 + }, + "combine": { + "p50": 78.04799824953079, + "p90": 84.35200154781342, + "p95": 91.5519967675209, + "p99": 129.08799946308136 + }, + "roundtrip": { + "p50": 130.65600395202637, + "p90": 141.56800508499146, + "p95": 144.76799964904785, + "p99": 151.48800611495972 + }, + "isolatedSum": { + "p50": 157.6640009880066, + "p90": 175.83999782800674, + "p95": 217.82398968935013, + "p99": 290.0480031967163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.07999849319458, + "p90": 110.6560006737709, + "p95": 124.9919980764389, + "p99": 132.9919993877411 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 86.17600053548813, + "p95": 88.32000195980072, + "p99": 91.839998960495 + }, + "roundtrip": { + "p50": 138.43199610710144, + "p90": 144.9279934167862, + "p95": 148.47999811172485, + "p99": 156.0640037059784 + }, + "isolatedSum": { + "p50": 156.89599514007568, + "p90": 196.83200120925903, + "p95": 213.31200003623962, + "p99": 224.83199834823608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.68800312280655, + "p90": 92.6079973578453, + "p95": 97.50399738550186, + "p99": 101.82400047779083 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 87.55200356245041, + "p95": 88.79999816417694, + "p99": 93.37600320577621 + }, + "roundtrip": { + "p50": 142.65599846839905, + "p90": 152.76800096035004, + "p95": 157.53600001335144, + "p99": 165.6000018119812 + }, + "isolatedSum": { + "p50": 157.98400342464447, + "p90": 180.16000092029572, + "p95": 186.3039955496788, + "p99": 195.20000368356705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.00799810886383, + "p90": 84.1279998421669, + "p95": 89.31200206279755, + "p99": 93.91999989748001 + }, + "combine": { + "p50": 82.30400085449219, + "p90": 99.61599856615067, + "p95": 102.11200267076492, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 141.92000031471252, + "p90": 151.74399316310883, + "p95": 156.00000321865082, + "p99": 238.81599307060242 + }, + "isolatedSum": { + "p50": 161.31199896335602, + "p90": 183.74399840831757, + "p95": 191.42400473356247, + "p99": 201.27999782562256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.9599980711937, + "p90": 88.0960002541542, + "p95": 90.65599739551544, + "p99": 94.91200000047684 + }, + "combine": { + "p50": 88.92799913883209, + "p90": 92.28800237178802, + "p95": 93.82399916648865, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 148.3519971370697, + "p90": 164.06400501728058, + "p95": 168.19199919700623, + "p99": 184.09599363803864 + }, + "isolatedSum": { + "p50": 169.8879972100258, + "p90": 180.38400262594223, + "p95": 184.4799965620041, + "p99": 199.96800273656845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.22400254011154, + "p90": 105.40799796581268, + "p95": 109.76000130176544, + "p99": 117.34399944543839 + }, + "combine": { + "p50": 91.93599969148636, + "p90": 104.06400263309479, + "p95": 111.00800335407257, + "p99": 146.40000462532043 + }, + "roundtrip": { + "p50": 155.29599785804749, + "p90": 164.67200219631195, + "p95": 166.59200191497803, + "p99": 173.15199971199036 + }, + "isolatedSum": { + "p50": 188.1600022315979, + "p90": 209.47200059890747, + "p95": 220.768004655838, + "p99": 263.7440040707588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.55199939012527, + "p90": 111.90400272607803, + "p95": 114.78400230407715, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 103.96800190210342, + "p90": 112.19199746847153, + "p95": 113.69600147008896, + "p99": 116.83200299739838 + }, + "roundtrip": { + "p50": 181.63199722766876, + "p90": 189.95200097560883, + "p95": 193.82399320602417, + "p99": 200.6080001592636 + }, + "isolatedSum": { + "p50": 211.5200012922287, + "p90": 224.09600019454956, + "p95": 228.4800037741661, + "p99": 235.80799996852875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.93599683046341, + "p90": 129.02399897575378, + "p95": 131.9359987974167, + "p99": 137.472003698349 + }, + "combine": { + "p50": 127.20000743865967, + "p90": 130.14400005340576, + "p95": 131.48799538612366, + "p99": 136.80000603199005 + }, + "roundtrip": { + "p50": 218.72000396251678, + "p90": 226.33600234985352, + "p95": 230.56000471115112, + "p99": 238.46399784088135 + }, + "isolatedSum": { + "p50": 247.13600426912308, + "p90": 259.16799902915955, + "p95": 263.42399418354034, + "p99": 274.27200973033905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a1a41f77", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_1c58745c", + "comparisonKey": "0875ed4a755c0160", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:44:42.724854+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.69600367546082, + "p90": 86.81599795818329, + "p95": 88.95999938249588, + "p99": 93.82399916648865 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 78.68800312280655, + "p95": 80.51200211048126, + "p99": 90.2400016784668 + }, + "roundtrip": { + "p50": 128.83199751377106, + "p90": 144.86399292945862, + "p95": 149.3760049343109, + "p99": 154.7520011663437 + }, + "isolatedSum": { + "p50": 157.72800147533417, + "p90": 165.50400108098984, + "p95": 169.47200149297714, + "p99": 184.06400084495544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.98399776220322, + "p90": 88.83199840784073, + "p95": 91.93599969148636, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 84.79999750852585, + "p95": 88.128000497818, + "p99": 94.40000355243683 + }, + "roundtrip": { + "p50": 129.37599420547485, + "p90": 141.53599739074707, + "p95": 145.02400159835815, + "p99": 150.56000649929047 + }, + "isolatedSum": { + "p50": 156.38399869203568, + "p90": 173.63199591636658, + "p95": 180.06400018930435, + "p99": 192.9280012845993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.37600058317184, + "p90": 85.9839990735054, + "p95": 89.31200206279755, + "p99": 95.58399766683578 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 84.25600081682205, + "p95": 87.42400258779526, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 137.5039964914322, + "p90": 147.77599275112152, + "p95": 150.65599977970123, + "p99": 156.80000185966492 + }, + "isolatedSum": { + "p50": 155.93600273132324, + "p90": 170.23999989032745, + "p95": 176.7360046505928, + "p99": 188.83199989795685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.94399762153625, + "p90": 92.79999881982803, + "p95": 97.9200005531311, + "p99": 104.86400127410889 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 88.16000074148178, + "p95": 89.85599875450134, + "p99": 92.3520028591156 + }, + "roundtrip": { + "p50": 143.8080072402954, + "p90": 151.296004652977, + "p95": 153.72799336910248, + "p99": 162.1440052986145 + }, + "isolatedSum": { + "p50": 159.03999656438828, + "p90": 180.95999956130981, + "p95": 187.77599930763245, + "p99": 197.2160041332245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.74400371313095, + "p90": 86.36800199747086, + "p95": 90.33600240945816, + "p99": 96.6079980134964 + }, + "combine": { + "p50": 80.44800162315369, + "p90": 88.83199840784073, + "p95": 90.40000289678574, + "p99": 93.02400052547455 + }, + "roundtrip": { + "p50": 142.0159935951233, + "p90": 151.10400319099426, + "p95": 152.73599326610565, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 160.19200533628464, + "p90": 175.20000040531158, + "p95": 180.7360053062439, + "p99": 189.63199853897095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.63999825716019, + "p90": 100.51199793815613, + "p95": 102.27199643850327, + "p99": 105.79200088977814 + }, + "combine": { + "p50": 91.39200299978256, + "p90": 98.33600372076035, + "p95": 100.96000134944916, + "p99": 104.89600151777267 + }, + "roundtrip": { + "p50": 155.90399503707886, + "p90": 165.47200083732605, + "p95": 168.19199919700623, + "p99": 177.2480010986328 + }, + "isolatedSum": { + "p50": 188.03200125694275, + "p90": 198.84800165891647, + "p95": 203.23199778795242, + "p99": 210.6880024075508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.54399818181992, + "p90": 112.28799819946289, + "p95": 116.03199690580368, + "p99": 120.83200365304947 + }, + "combine": { + "p50": 103.67999970912933, + "p90": 107.87200182676315, + "p95": 110.52799969911575, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 178.14399302005768, + "p90": 184.4480037689209, + "p95": 187.99999356269836, + "p99": 191.67999923229218 + }, + "isolatedSum": { + "p50": 204.22399789094925, + "p90": 220.16000002622604, + "p95": 226.55999660491943, + "p99": 236.67200654745102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.31199985742569, + "p90": 126.24000012874603, + "p95": 127.93600559234619, + "p99": 132.9600065946579 + }, + "combine": { + "p50": 139.48799669742584, + "p90": 142.20799505710602, + "p95": 143.8080072402954, + "p99": 150.62400698661804 + }, + "roundtrip": { + "p50": 229.88800704479218, + "p90": 237.40799725055695, + "p95": 239.74399268627167, + "p99": 244.09599602222443 + }, + "isolatedSum": { + "p50": 260.79999655485153, + "p90": 268.44799518585205, + "p95": 271.7440128326416, + "p99": 283.58401358127594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-af9c4535", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_5f6ee132", + "comparisonKey": "35e91165089ce2ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:45:15.562046+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.19200032949448, + "p90": 99.10400211811066, + "p95": 109.0880036354065, + "p99": 126.0479986667633 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 84.79999750852585, + "p95": 90.11200070381165, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 127.96799838542938, + "p90": 145.28000354766846, + "p95": 152.12799608707428, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 162.1439978480339, + "p90": 183.9039996266365, + "p95": 199.20000433921814, + "p99": 228.83199900388718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.98399776220322, + "p90": 92.12800115346909, + "p95": 98.9760011434555, + "p99": 104.86400127410889 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 89.24800157546997, + "p95": 94.59199756383896, + "p99": 104.76800054311752 + }, + "roundtrip": { + "p50": 140.06400108337402, + "p90": 155.03999590873718, + "p95": 161.24799847602844, + "p99": 176.5120029449463 + }, + "isolatedSum": { + "p50": 157.27999806404114, + "p90": 181.37600272893906, + "p95": 193.56799870729446, + "p99": 209.6320018172264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.88799703121185, + "p90": 92.67199784517288, + "p95": 98.59199821949005, + "p99": 105.15200346708298 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 91.0400003194809, + "p95": 95.23200243711472, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 142.7839994430542, + "p90": 155.10399639606476, + "p95": 161.18399798870087, + "p99": 168.41599345207214 + }, + "isolatedSum": { + "p50": 158.1759974360466, + "p90": 183.71199816465378, + "p95": 193.82400065660477, + "p99": 209.60000157356262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.29600030183792, + "p90": 90.65599739551544, + "p95": 95.90400010347366, + "p99": 107.2319969534874 + }, + "combine": { + "p50": 87.96799927949905, + "p90": 93.75999867916107, + "p95": 99.39199686050415, + "p99": 110.11199653148651 + }, + "roundtrip": { + "p50": 141.95199310779572, + "p90": 158.78400206565857, + "p95": 164.000004529953, + "p99": 171.39199376106262 + }, + "isolatedSum": { + "p50": 167.26399958133698, + "p90": 184.4159960746765, + "p95": 195.2959969639778, + "p99": 217.3439934849739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.88799768686295, + "p90": 91.23200178146362, + "p95": 95.77599912881851, + "p99": 106.01600259542465 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 92.44800359010696, + "p95": 94.71999853849411, + "p99": 105.50399869680405 + }, + "roundtrip": { + "p50": 145.60000598430634, + "p90": 161.15200519561768, + "p95": 166.72000288963318, + "p99": 174.46400225162506 + }, + "isolatedSum": { + "p50": 171.23199999332428, + "p90": 183.6800053715706, + "p95": 190.49599766731262, + "p99": 211.5200012922287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.57599776983261, + "p90": 104.22399640083313, + "p95": 112.15999722480774, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 91.26400202512741, + "p90": 102.9760017991066, + "p95": 108.0000028014183, + "p99": 117.27999895811081 + }, + "roundtrip": { + "p50": 154.27200496196747, + "p90": 172.09599912166595, + "p95": 179.71199750900269, + "p99": 206.4639925956726 + }, + "isolatedSum": { + "p50": 187.83999979496002, + "p90": 207.19999819993973, + "p95": 220.16000002622604, + "p99": 240.447998046875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.73600095510483, + "p90": 114.04799669981003, + "p95": 119.71200257539749, + "p99": 129.88799810409546 + }, + "combine": { + "p50": 104.54399883747101, + "p90": 114.88000303506851, + "p95": 118.94399672746658, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 180.7039976119995, + "p90": 189.05599415302277, + "p95": 191.39200448989868, + "p99": 201.53599977493286 + }, + "isolatedSum": { + "p50": 213.27999979257584, + "p90": 228.92799973487854, + "p95": 238.65599930286407, + "p99": 258.7839961051941 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.66400253772736, + "p90": 130.3360015153885, + "p95": 136.35200262069702, + "p99": 146.7839926481247 + }, + "combine": { + "p50": 127.6479959487915, + "p90": 134.8160058259964, + "p95": 138.36799561977386, + "p99": 152.8639942407608 + }, + "roundtrip": { + "p50": 219.13599967956543, + "p90": 229.21599447727203, + "p95": 234.592005610466, + "p99": 244.03199553489685 + }, + "isolatedSum": { + "p50": 249.31199848651886, + "p90": 265.1520073413849, + "p95": 274.7199982404709, + "p99": 299.6479868888855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e819853f", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_6cd406b8", + "comparisonKey": "a967fd3b36cb19a4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:01.887520+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.43200182914734, + "p90": 87.71199733018875, + "p95": 90.43200314044952, + "p99": 94.11200135946274 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 79.19999957084656, + "p95": 81.08799904584885, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 137.2160017490387, + "p95": 139.93600010871887, + "p99": 144.86399292945862 + }, + "isolatedSum": { + "p50": 160.22399812936783, + "p90": 166.9119969010353, + "p95": 171.52000218629837, + "p99": 182.71999806165695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.3040001988411, + "p90": 85.85599809885025, + "p95": 89.08800035715103, + "p99": 93.59999746084213 + }, + "combine": { + "p50": 78.91199737787247, + "p90": 82.43200182914734, + "p95": 87.3280018568039, + "p99": 90.43200314044952 + }, + "roundtrip": { + "p50": 139.71200585365295, + "p90": 149.56800639629364, + "p95": 152.16000378131866, + "p99": 164.70399498939514 + }, + "isolatedSum": { + "p50": 157.21599757671356, + "p90": 168.2879999279976, + "p95": 176.41600221395493, + "p99": 184.03200060129166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.14399898052216, + "p90": 86.07999980449677, + "p95": 90.40000289678574, + "p99": 93.63199770450592 + }, + "combine": { + "p50": 81.50400221347809, + "p90": 89.34400230646133, + "p95": 90.43200314044952, + "p99": 97.69599884748459 + }, + "roundtrip": { + "p50": 142.7839994430542, + "p90": 147.87200093269348, + "p95": 151.0400027036667, + "p99": 157.8879952430725 + }, + "isolatedSum": { + "p50": 159.64800119400024, + "p90": 175.4240021109581, + "p95": 180.83200603723526, + "p99": 191.3279965519905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.48800176382065, + "p90": 86.43200248479843, + "p95": 90.94399958848953, + "p99": 94.30400282144547 + }, + "combine": { + "p50": 87.55200356245041, + "p90": 90.62399715185165, + "p95": 91.64799749851227, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 142.68800616264343, + "p90": 157.1200042963028, + "p95": 162.08000481128693, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 167.04000532627106, + "p90": 177.05599963665009, + "p95": 182.5919970870018, + "p99": 188.73599916696548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.13599973917007, + "p90": 97.72799909114838, + "p95": 103.10400277376175, + "p99": 115.35999923944473 + }, + "combine": { + "p50": 89.50400352478027, + "p90": 92.06400066614151, + "p95": 94.30400282144547, + "p99": 100.0640019774437 + }, + "roundtrip": { + "p50": 145.53600549697876, + "p90": 156.19200468063354, + "p95": 161.50400042533875, + "p99": 172.83199727535248 + }, + "isolatedSum": { + "p50": 172.64000326395035, + "p90": 189.7919997572899, + "p95": 197.40800559520721, + "p99": 215.42400121688843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.67199850082397, + "p90": 102.08000242710114, + "p95": 104.2879968881607, + "p99": 110.75200140476227 + }, + "combine": { + "p50": 90.81599861383438, + "p90": 94.24000233411789, + "p95": 96.51199728250504, + "p99": 103.13600301742554 + }, + "roundtrip": { + "p50": 152.99199521541595, + "p90": 161.82400286197662, + "p95": 165.50399363040924, + "p99": 170.68800330162048 + }, + "isolatedSum": { + "p50": 187.48799711465836, + "p90": 196.32000476121902, + "p95": 200.79999417066574, + "p99": 213.8880044221878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.70400071144104, + "p90": 112.28799819946289, + "p95": 113.27999830245972, + "p99": 117.21599847078323 + }, + "combine": { + "p50": 104.92800176143646, + "p90": 113.15199732780457, + "p95": 115.9679964184761, + "p99": 119.77600306272507 + }, + "roundtrip": { + "p50": 180.4800033569336, + "p90": 187.58399784564972, + "p95": 190.75199961662292, + "p99": 197.2160041332245 + }, + "isolatedSum": { + "p50": 213.6320024728775, + "p90": 225.43999552726746, + "p95": 229.24799472093582, + "p99": 236.9920015335083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.4800009727478, + "p90": 126.08000636100769, + "p95": 127.3919939994812, + "p99": 137.95199990272522 + }, + "combine": { + "p50": 127.83999741077423, + "p90": 131.99999928474426, + "p95": 136.28800213336945, + "p99": 139.64800536632538 + }, + "roundtrip": { + "p50": 219.61599588394165, + "p90": 226.9120067358017, + "p95": 231.7119985818863, + "p99": 237.5359982252121 + }, + "isolatedSum": { + "p50": 248.31999838352203, + "p90": 258.08000564575195, + "p95": 263.67999613285065, + "p99": 277.6000052690506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f32fa2a5", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_16a8f46b", + "comparisonKey": "5478e152816f3a17", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:16:19.025426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.40799885988235, + "p90": 77.02399790287018, + "p95": 79.39200103282928, + "p99": 84.83199775218964 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 81.60000294446945, + "p95": 83.0719992518425, + "p99": 90.7519981265068 + }, + "roundtrip": { + "p50": 120.83200365304947, + "p90": 128.4160017967224, + "p95": 131.99999928474426, + "p99": 137.53600418567657 + }, + "isolatedSum": { + "p50": 143.51999759674072, + "p90": 158.62400084733963, + "p95": 162.46400028467178, + "p99": 175.58399587869644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.35199826955795, + "p90": 70.88000327348709, + "p95": 75.16799867153168, + "p99": 80.73599636554718 + }, + "combine": { + "p50": 78.78399640321732, + "p90": 82.84799754619598, + "p95": 87.45600283145905, + "p99": 90.81599861383438 + }, + "roundtrip": { + "p50": 127.93600559234619, + "p90": 131.77600502967834, + "p95": 132.89600610733032, + "p99": 137.34400272369385 + }, + "isolatedSum": { + "p50": 143.13599467277527, + "p90": 153.72800081968307, + "p95": 162.62400150299072, + "p99": 171.55199497938156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 65.50399959087372, + "p90": 73.15199822187424, + "p95": 76.57600194215775, + "p99": 82.07999914884567 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 88.48000317811966, + "p95": 91.07200056314468, + "p99": 93.1520015001297 + }, + "roundtrip": { + "p50": 130.2720010280609, + "p90": 139.55199718475342, + "p95": 143.26399564743042, + "p99": 154.7199934720993 + }, + "isolatedSum": { + "p50": 145.02400159835815, + "p90": 161.6320013999939, + "p95": 167.64800250530243, + "p99": 175.23200064897537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.58400052785873, + "p90": 72.76800274848938, + "p95": 76.28799974918365, + "p99": 80.6720033288002 + }, + "combine": { + "p50": 82.14399963617325, + "p90": 89.9519994854927, + "p95": 90.59199690818787, + "p99": 92.70399808883667 + }, + "roundtrip": { + "p50": 129.69599664211273, + "p90": 136.48000359535217, + "p95": 138.62399756908417, + "p99": 147.48799800872803 + }, + "isolatedSum": { + "p50": 149.72800016403198, + "p90": 162.7200022339821, + "p95": 166.87999665737152, + "p99": 173.37600141763687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.80000299215317, + "p90": 79.13599908351898, + "p95": 81.98399841785431, + "p99": 88.95999938249588 + }, + "combine": { + "p50": 89.6959975361824, + "p90": 92.12800115346909, + "p95": 94.14400160312653, + "p99": 100.92800110578537 + }, + "roundtrip": { + "p50": 136.57599687576294, + "p90": 146.97599411010742, + "p95": 150.176003575325, + "p99": 156.47999942302704 + }, + "isolatedSum": { + "p50": 162.49600052833557, + "p90": 171.26400023698807, + "p95": 176.12800002098083, + "p99": 189.88800048828125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.02399790287018, + "p90": 85.60000360012054, + "p95": 87.74399757385254, + "p99": 92.6079973578453 + }, + "combine": { + "p50": 90.62399715185165, + "p90": 94.30400282144547, + "p95": 98.27200323343277, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 142.65599846839905, + "p90": 148.92800152301788, + "p95": 150.62400698661804, + "p99": 158.9760035276413 + }, + "isolatedSum": { + "p50": 167.64799505472183, + "p90": 179.904006421566, + "p95": 186.0160008072853, + "p99": 195.1039955019951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.25600278377533, + "p90": 102.46399790048599, + "p95": 104.70400005578995, + "p99": 110.944002866745 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 113.27999830245972, + "p95": 114.84800279140472, + "p99": 118.367999792099 + }, + "roundtrip": { + "p50": 172.19200730323792, + "p90": 178.65599691867828, + "p95": 181.63199722766876, + "p99": 190.3039962053299 + }, + "isolatedSum": { + "p50": 201.21600478887558, + "p90": 215.7439962029457, + "p95": 219.55200284719467, + "p99": 229.312002658844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.04000228643417, + "p90": 112.5440001487732, + "p95": 115.68000167608261, + "p99": 134.46399569511414 + }, + "combine": { + "p50": 127.3919939994812, + "p90": 132.89600610733032, + "p95": 135.51999628543854, + "p99": 140.8960074186325 + }, + "roundtrip": { + "p50": 206.14400506019592, + "p90": 213.76000344753265, + "p95": 215.96799790859222, + "p99": 221.88800573349 + }, + "isolatedSum": { + "p50": 230.43199628591537, + "p90": 245.44000625610352, + "p95": 251.19999796152115, + "p99": 275.36000311374664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-22eb5751", + "identity": "b200|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_033ebcb6", + "comparisonKey": "cd5b4967b41bf30b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:25:07.372971+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 180.41600286960602, + "p90": 200.1280039548874, + "p95": 207.07200467586517, + "p99": 243.16799640655518 + }, + "combine": { + "p50": 61.15199998021126, + "p90": 64.06400352716446, + "p95": 65.79200178384781, + "p99": 71.99999690055847 + }, + "roundtrip": { + "p50": 231.1999946832657, + "p90": 248.86399507522583, + "p95": 274.9119997024536, + "p99": 301.12001299858093 + }, + "isolatedSum": { + "p50": 241.56800284981728, + "p90": 264.19200748205185, + "p95": 272.864006459713, + "p99": 315.16799330711365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 180.31999468803406, + "p90": 201.56799256801605, + "p95": 228.2239943742752, + "p99": 253.88801097869873 + }, + "combine": { + "p50": 63.77600133419037, + "p90": 66.49599969387054, + "p95": 67.9360032081604, + "p99": 74.5600014925003 + }, + "roundtrip": { + "p50": 234.52800512313843, + "p90": 249.56800043582916, + "p95": 253.34399938583374, + "p99": 265.56798815727234 + }, + "isolatedSum": { + "p50": 244.09599602222443, + "p90": 268.0639922618866, + "p95": 296.1599975824356, + "p99": 328.44801247119904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 180.7039976119995, + "p90": 198.0160027742386, + "p95": 201.56799256801605, + "p99": 211.93599700927734 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 68.80000233650208, + "p95": 70.8480030298233, + "p99": 76.22399926185608 + }, + "roundtrip": { + "p50": 236.54399812221527, + "p90": 251.45599246025085, + "p95": 254.59200143814087, + "p99": 260.99199056625366 + }, + "isolatedSum": { + "p50": 246.43199890851974, + "p90": 266.81600511074066, + "p95": 272.41599559783936, + "p99": 288.1599962711334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 182.52800405025482, + "p90": 199.39200580120087, + "p95": 201.6959935426712, + "p99": 213.56800198554993 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 71.96799665689468, + "p95": 74.68800246715546, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 240.83200097084045, + "p90": 255.264014005661, + "p95": 257.56800174713135, + "p99": 262.65600323677063 + }, + "isolatedSum": { + "p50": 251.2960061430931, + "p90": 271.36000245809555, + "p95": 276.38399600982666, + "p99": 294.7840020060539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 181.5679967403412, + "p90": 204.19199764728546, + "p95": 229.8240065574646, + "p99": 255.5840015411377 + }, + "combine": { + "p50": 69.72800195217133, + "p90": 73.02399724721909, + "p95": 74.81600344181061, + "p99": 80.44800162315369 + }, + "roundtrip": { + "p50": 240.54400622844696, + "p90": 254.84800338745117, + "p95": 257.1519911289215, + "p99": 269.50401067733765 + }, + "isolatedSum": { + "p50": 251.2959986925125, + "p90": 277.21599489450455, + "p95": 304.6400099992752, + "p99": 336.0320031642914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 180.83199858665466, + "p90": 198.0160027742386, + "p95": 201.63199305534363, + "p99": 226.9439995288849 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 75.87199658155441, + "p95": 78.62400263547897, + "p99": 83.83999764919281 + }, + "roundtrip": { + "p50": 242.91199445724487, + "p90": 259.0399980545044, + "p95": 266.4960026741028, + "p99": 312.4479949474335 + }, + "isolatedSum": { + "p50": 252.99199670553207, + "p90": 273.887999355793, + "p95": 280.2559956908226, + "p99": 310.7839971780777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 186.49600446224213, + "p90": 220.32000124454498, + "p95": 235.71200668811798, + "p99": 290.20801186561584 + }, + "combine": { + "p50": 84.06399935483932, + "p90": 87.16800063848495, + "p95": 88.70399743318558, + "p99": 92.8959995508194 + }, + "roundtrip": { + "p50": 257.3759853839874, + "p90": 272.19200134277344, + "p95": 274.7519910335541, + "p99": 281.792014837265 + }, + "isolatedSum": { + "p50": 270.56000381708145, + "p90": 307.48800188302994, + "p95": 324.41600412130356, + "p99": 383.10401141643524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 201.6959935426712, + "p90": 219.16800737380981, + "p95": 236.76800727844238, + "p99": 281.76000714302063 + }, + "combine": { + "p50": 102.11200267076492, + "p90": 106.62399977445602, + "p95": 109.6000000834465, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 289.6000146865845, + "p90": 305.7920038700104, + "p95": 321.0879862308502, + "p99": 341.40801429748535 + }, + "isolatedSum": { + "p50": 303.8079962134361, + "p90": 325.79200714826584, + "p95": 346.3680073618889, + "p99": 397.44000881910324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bbe6fd92", + "identity": "b200|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_033ebcb6", + "comparisonKey": "af89287231bec2c3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:27:36.106357+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 196.57599925994873, + "p90": 232.89600014686584, + "p95": 252.06398963928223, + "p99": 339.35999870300293 + }, + "combine": { + "p50": 64.15999680757523, + "p90": 68.09599697589874, + "p95": 75.74400305747986, + "p99": 80.35200089216232 + }, + "roundtrip": { + "p50": 235.45600473880768, + "p90": 273.79199862480164, + "p95": 282.49600529670715, + "p99": 312.3840093612671 + }, + "isolatedSum": { + "p50": 260.73599606752396, + "p90": 300.9919971227646, + "p95": 327.8079926967621, + "p99": 419.71199959516525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 180.9920072555542, + "p90": 212.99199759960175, + "p95": 219.13599967956543, + "p99": 234.52800512313843 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 73.11999797821045, + "p95": 78.62400263547897, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 237.08799481391907, + "p90": 268.2560086250305, + "p95": 273.6319899559021, + "p99": 286.72000765800476 + }, + "isolatedSum": { + "p50": 248.48000705242157, + "p90": 286.1119955778122, + "p95": 297.7600023150444, + "p99": 320.48000395298004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 184.76800620555878, + "p90": 217.18400716781616, + "p95": 224.2240011692047, + "p99": 392.67200231552124 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 75.42400062084198, + "p95": 80.70400357246399, + "p99": 125.91999769210815 + }, + "roundtrip": { + "p50": 240.9919947385788, + "p90": 269.0559923648834, + "p95": 275.58401226997375, + "p99": 286.9119942188263 + }, + "isolatedSum": { + "p50": 254.14400547742844, + "p90": 292.60800778865814, + "p95": 304.9280047416687, + "p99": 518.5920000076294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 181.37599527835846, + "p90": 217.69599616527557, + "p95": 224.48000311851501, + "p99": 234.17599499225616 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 80.76799660921097, + "p95": 86.40000224113464, + "p99": 92.86399930715561 + }, + "roundtrip": { + "p50": 244.159996509552, + "p90": 277.2800028324127, + "p95": 282.0799946784973, + "p99": 293.5679852962494 + }, + "isolatedSum": { + "p50": 253.4399926662445, + "p90": 298.46399277448654, + "p95": 310.88000535964966, + "p99": 327.0399942994118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 181.88799917697906, + "p90": 217.631995677948, + "p95": 225.5679965019226, + "p99": 238.46399784088135 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 78.3040001988411, + "p95": 85.15200018882751, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 247.74399399757385, + "p90": 285.504013299942, + "p95": 292.1600043773651, + "p99": 337.66400814056396 + }, + "isolatedSum": { + "p50": 254.97599691152573, + "p90": 295.9359958767891, + "p95": 310.7199966907501, + "p99": 327.9999941587448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.9359998703003, + "p90": 223.68000447750092, + "p95": 236.00000143051147, + "p99": 282.52801299095154 + }, + "combine": { + "p50": 75.96799731254578, + "p90": 81.85599744319916, + "p95": 88.76799792051315, + "p99": 93.47199648618698 + }, + "roundtrip": { + "p50": 248.3839988708496, + "p90": 279.10399436950684, + "p95": 285.8240008354187, + "p99": 296.25600576400757 + }, + "isolatedSum": { + "p50": 259.90399718284607, + "p90": 305.5360019207001, + "p95": 324.7679993510246, + "p99": 376.0000094771385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 190.14400243759155, + "p90": 226.1119931936264, + "p95": 236.35199666023254, + "p99": 277.98399329185486 + }, + "combine": { + "p50": 90.36800265312195, + "p90": 96.96000069379807, + "p95": 104.70400005578995, + "p99": 111.7120012640953 + }, + "roundtrip": { + "p50": 267.0400142669678, + "p90": 300.54399371147156, + "p95": 309.9519908428192, + "p99": 349.3120074272156 + }, + "isolatedSum": { + "p50": 280.5120050907135, + "p90": 323.07199388742447, + "p95": 341.0559967160225, + "p99": 389.69599455595016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.40000021457672, + "p90": 242.23999679088593, + "p95": 248.03200364112854, + "p99": 260.19200682640076 + }, + "combine": { + "p50": 108.89600217342377, + "p90": 116.2559986114502, + "p95": 122.40000069141388, + "p99": 128.1919926404953 + }, + "roundtrip": { + "p50": 305.4719865322113, + "p90": 338.49599957466125, + "p95": 344.31999921798706, + "p99": 369.6320056915283 + }, + "isolatedSum": { + "p50": 319.2960023880005, + "p90": 358.4959954023361, + "p95": 370.4320043325424, + "p99": 388.38399946689606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d153f35", + "identity": "b200|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_033ebcb6", + "comparisonKey": "e0e5d6c3472fb11e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:30:05.226091+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 186.5919977426529, + "p90": 223.7440049648285, + "p95": 234.52800512313843, + "p99": 265.0560140609741 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 76.48000121116638, + "p95": 80.79999685287476, + "p99": 86.75199747085571 + }, + "roundtrip": { + "p50": 246.24000489711761, + "p90": 273.6000120639801, + "p95": 280.5759906768799, + "p99": 301.91999673843384 + }, + "isolatedSum": { + "p50": 255.48800081014633, + "p90": 300.2240061759949, + "p95": 315.3280019760132, + "p99": 351.80801153182983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 186.68800592422485, + "p90": 216.92800521850586, + "p95": 221.79199755191803, + "p99": 231.77599906921387 + }, + "combine": { + "p50": 70.68800181150436, + "p90": 76.64000242948532, + "p95": 84.32000130414963, + "p99": 88.128000497818 + }, + "roundtrip": { + "p50": 247.74399399757385, + "p90": 274.01599287986755, + "p95": 279.2640030384064, + "p99": 296.2239980697632 + }, + "isolatedSum": { + "p50": 257.3760077357292, + "p90": 293.5680076479912, + "p95": 306.11199885606766, + "p99": 319.90399956703186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.11999595165253, + "p90": 239.23200368881226, + "p95": 253.85600328445435, + "p99": 276.3200104236603 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 75.74400305747986, + "p95": 82.30400085449219, + "p99": 86.56000345945358 + }, + "roundtrip": { + "p50": 249.79199469089508, + "p90": 279.35999631881714, + "p95": 295.1039969921112, + "p99": 327.64801383018494 + }, + "isolatedSum": { + "p50": 269.24799382686615, + "p90": 314.9760067462921, + "p95": 336.16000413894653, + "p99": 362.88001388311386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 187.48800456523895, + "p90": 228.96000742912292, + "p95": 235.45600473880768, + "p99": 274.97598528862 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 82.62400329113007, + "p95": 89.08800035715103, + "p99": 92.44800359010696 + }, + "roundtrip": { + "p50": 254.91198897361755, + "p90": 284.31999683380127, + "p95": 289.98398780822754, + "p99": 305.5360019207001 + }, + "isolatedSum": { + "p50": 263.8080045580864, + "p90": 311.584010720253, + "p95": 324.5440050959587, + "p99": 367.42398887872696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 186.62400543689728, + "p90": 219.58400309085846, + "p95": 224.19199347496033, + "p99": 243.0720031261444 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 85.66399663686752, + "p95": 90.7839983701706, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 255.77598810195923, + "p90": 289.0239953994751, + "p95": 302.2719919681549, + "p99": 349.63199496269226 + }, + "isolatedSum": { + "p50": 264.41600173711777, + "p90": 305.247999727726, + "p95": 314.9759918451309, + "p99": 348.2560068368912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 186.27199530601501, + "p90": 216.2880003452301, + "p95": 223.64799678325653, + "p99": 239.1359955072403 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 86.65599673986435, + "p95": 93.53599697351456, + "p99": 97.34400361776352 + }, + "roundtrip": { + "p50": 261.50399446487427, + "p90": 290.3999984264374, + "p95": 303.48798632621765, + "p99": 335.32801270484924 + }, + "isolatedSum": { + "p50": 267.35999435186386, + "p90": 302.94399708509445, + "p95": 317.1839937567711, + "p99": 336.4799991250038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 192.3840045928955, + "p90": 232.67200589179993, + "p95": 241.56799912452698, + "p99": 298.4960079193115 + }, + "combine": { + "p50": 94.04800087213516, + "p90": 100.0640019774437, + "p95": 107.51999914646149, + "p99": 113.3119985461235 + }, + "roundtrip": { + "p50": 275.2000093460083, + "p90": 301.05599761009216, + "p95": 305.7920038700104, + "p99": 314.91199135780334 + }, + "isolatedSum": { + "p50": 286.43200546503067, + "p90": 332.7360078692436, + "p95": 349.08799827098846, + "p99": 411.808006465435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 215.58399498462677, + "p90": 235.74399948120117, + "p95": 242.8160011768341, + "p99": 251.55198574066162 + }, + "combine": { + "p50": 115.4559999704361, + "p90": 122.91199713945389, + "p95": 128.9920061826706, + "p99": 137.40800321102142 + }, + "roundtrip": { + "p50": 317.7280128002167, + "p90": 343.1999981403351, + "p95": 354.0799915790558, + "p99": 378.36799025535583 + }, + "isolatedSum": { + "p50": 331.03999495506287, + "p90": 358.65599662065506, + "p95": 371.8080073595047, + "p99": 388.95998895168304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-555f186c", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_d0ca9eea", + "comparisonKey": "b368617b2aed0cab", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:18:45.289044+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 77.82399654388428, + "p90": 89.79199826717377, + "p95": 97.47199714183807, + "p99": 109.53599959611893 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 77.53600180149078, + "p95": 82.17599987983704, + "p99": 88.22400122880936 + }, + "roundtrip": { + "p50": 165.95199704170227, + "p90": 194.14399564266205, + "p95": 203.71200144290924, + "p99": 226.52800381183624 + }, + "isolatedSum": { + "p50": 150.81599354743958, + "p90": 167.32800006866455, + "p95": 179.6479970216751, + "p99": 197.76000082492828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 78.3040001988411, + "p90": 87.55200356245041, + "p95": 92.70399808883667, + "p99": 100.99200159311295 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 79.32800054550171, + "p95": 88.128000497818, + "p99": 94.04800087213516 + }, + "roundtrip": { + "p50": 167.13599860668182, + "p90": 193.7599927186966, + "p95": 202.33599841594696, + "p99": 221.76000475883484 + }, + "isolatedSum": { + "p50": 152.96000242233276, + "p90": 166.88000410795212, + "p95": 180.83199858665466, + "p99": 195.0400024652481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.35200089216232, + "p90": 86.81599795818329, + "p95": 94.36800330877304, + "p99": 102.55999863147736 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 79.58400249481201, + "p95": 83.13599973917007, + "p99": 93.05600076913834 + }, + "roundtrip": { + "p50": 173.43999445438385, + "p90": 205.47200739383698, + "p95": 213.31200003623962, + "p99": 236.25600337982178 + }, + "isolatedSum": { + "p50": 156.6080003976822, + "p90": 166.4000004529953, + "p95": 177.50400304794312, + "p99": 195.6159994006157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.79199695587158, + "p90": 92.57599711418152, + "p95": 99.55199807882309, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 84.95999872684479, + "p95": 92.0960009098053, + "p99": 97.4079966545105 + }, + "roundtrip": { + "p50": 170.81600427627563, + "p90": 194.30400431156158, + "p95": 201.60000026226044, + "p99": 213.15200626850128 + }, + "isolatedSum": { + "p50": 162.07999736070633, + "p90": 177.5359958410263, + "p95": 191.6479989886284, + "p99": 203.90399545431137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.36000144481659, + "p90": 93.98400038480759, + "p95": 101.85600072145462, + "p99": 109.8560020327568 + }, + "combine": { + "p50": 82.14399963617325, + "p90": 86.5280032157898, + "p95": 93.50399672985077, + "p99": 99.20000284910202 + }, + "roundtrip": { + "p50": 173.43999445438385, + "p90": 194.5279985666275, + "p95": 200.28799772262573, + "p99": 209.05600488185883 + }, + "isolatedSum": { + "p50": 165.50400108098984, + "p90": 180.51200360059738, + "p95": 195.3599974513054, + "p99": 209.05600488185883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.69599688053131, + "p90": 97.72799909114838, + "p95": 103.10400277376175, + "p99": 116.12799763679504 + }, + "combine": { + "p50": 86.7839977145195, + "p90": 94.84799951314926, + "p95": 99.16800260543823, + "p99": 109.8880022764206 + }, + "roundtrip": { + "p50": 176.96000635623932, + "p90": 194.240003824234, + "p95": 201.53599977493286, + "p99": 211.42399311065674 + }, + "isolatedSum": { + "p50": 172.4799945950508, + "p90": 192.57599860429764, + "p95": 202.27200537919998, + "p99": 226.01599991321564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.61599791049957, + "p90": 106.23999685049057, + "p95": 111.93600296974182, + "p99": 120.92799693346024 + }, + "combine": { + "p50": 102.04800218343735, + "p90": 113.66400122642517, + "p95": 116.73600226640701, + "p99": 125.2799928188324 + }, + "roundtrip": { + "p50": 198.81600141525269, + "p90": 221.79199755191803, + "p95": 231.83999955654144, + "p99": 257.1839988231659 + }, + "isolatedSum": { + "p50": 197.66400009393692, + "p90": 219.90399807691574, + "p95": 228.67200523614883, + "p99": 246.20798975229263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.09600287675858, + "p90": 109.8880022764206, + "p95": 119.48800086975098, + "p99": 128.31999361515045 + }, + "combine": { + "p50": 122.17599898576736, + "p90": 128.00000607967377, + "p95": 132.86399841308594, + "p99": 140.06400108337402 + }, + "roundtrip": { + "p50": 243.45600605010986, + "p90": 254.33599948883057, + "p95": 261.6960108280182, + "p99": 269.6959972381592 + }, + "isolatedSum": { + "p50": 226.27200186252594, + "p90": 237.88800835609436, + "p95": 252.3519992828369, + "p99": 268.3839946985245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-89124c58", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_033ebcb6", + "comparisonKey": "4ad08f4f6a87de99", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:21:16.078436+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 184.4799965620041, + "p90": 202.33599841594696, + "p95": 205.21600544452667, + "p99": 213.18399906158447 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 76.76800340414047, + "p95": 79.45600152015686, + "p99": 85.91999858617783 + }, + "roundtrip": { + "p50": 243.93600225448608, + "p90": 259.2639923095703, + "p95": 262.9759907722473, + "p99": 276.095986366272 + }, + "isolatedSum": { + "p50": 257.7279955148697, + "p90": 279.10400182008743, + "p95": 284.67200696468353, + "p99": 299.1039976477623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 185.63200533390045, + "p90": 213.31200003623962, + "p95": 231.10400140285492, + "p99": 256.00001215934753 + }, + "combine": { + "p50": 74.72000271081924, + "p90": 77.85599678754807, + "p95": 79.8719972372055, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 248.73599410057068, + "p90": 268.12800765037537, + "p95": 291.9040024280548, + "p99": 344.9920117855072 + }, + "isolatedSum": { + "p50": 260.3520080447197, + "p90": 291.1679968237877, + "p95": 310.9759986400604, + "p99": 341.44001454114914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 184.2239946126938, + "p90": 201.6959935426712, + "p95": 205.6639939546585, + "p99": 218.46400201320648 + }, + "combine": { + "p50": 76.35200023651123, + "p90": 79.68000322580338, + "p95": 81.40800148248672, + "p99": 88.48000317811966 + }, + "roundtrip": { + "p50": 253.91998887062073, + "p90": 293.69598627090454, + "p95": 307.20001459121704, + "p99": 332.99198746681213 + }, + "isolatedSum": { + "p50": 260.575994849205, + "p90": 281.3759967684746, + "p95": 287.07199543714523, + "p99": 306.94400519132614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 184.32000279426575, + "p90": 201.4400064945221, + "p95": 206.27200603485107, + "p99": 227.32800245285034 + }, + "combine": { + "p50": 80.25600016117096, + "p90": 83.55200290679932, + "p95": 85.4720026254654, + "p99": 92.92799979448318 + }, + "roundtrip": { + "p50": 254.20799851417542, + "p90": 292.80000925064087, + "p95": 303.16799879074097, + "p99": 324.6079981327057 + }, + "isolatedSum": { + "p50": 264.5760029554367, + "p90": 284.9920094013214, + "p95": 291.74400866031647, + "p99": 320.2560022473335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 185.12000143527985, + "p90": 220.99199891090393, + "p95": 235.00800132751465, + "p99": 260.19200682640076 + }, + "combine": { + "p50": 81.88799768686295, + "p90": 85.31200140714645, + "p95": 87.26400136947632, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 263.64800333976746, + "p90": 294.14400458335876, + "p95": 317.79199838638306, + "p99": 363.74399065971375 + }, + "isolatedSum": { + "p50": 267.0079991221428, + "p90": 306.3040003180504, + "p95": 322.27200269699097, + "p99": 352.06400603055954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 184.4480037689209, + "p90": 200.70399343967438, + "p95": 203.36000621318817, + "p99": 211.16800606250763 + }, + "combine": { + "p50": 86.2400010228157, + "p90": 89.40800279378891, + "p95": 91.20000153779984, + "p99": 95.8079993724823 + }, + "roundtrip": { + "p50": 261.82401180267334, + "p90": 286.8480086326599, + "p95": 310.5599880218506, + "p99": 341.0240113735199 + }, + "isolatedSum": { + "p50": 270.6880047917366, + "p90": 290.1119962334633, + "p95": 294.560007750988, + "p99": 306.97600543498993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 195.77600061893463, + "p90": 207.45599269866943, + "p95": 211.32799983024597, + "p99": 216.44799411296844 + }, + "combine": { + "p50": 100.63999891281128, + "p90": 104.41599786281586, + "p95": 106.08000308275223, + "p99": 110.20799726247787 + }, + "roundtrip": { + "p50": 283.90398621559143, + "p90": 308.19201469421387, + "p95": 320.16000151634216, + "p99": 335.07201075553894 + }, + "isolatedSum": { + "p50": 296.4159995317459, + "p90": 311.8719905614853, + "p95": 317.4080029129982, + "p99": 326.6559913754463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 219.80799734592438, + "p90": 236.06400191783905, + "p95": 244.51200664043427, + "p99": 262.0159983634949 + }, + "combine": { + "p50": 121.85599654912949, + "p90": 126.14400684833527, + "p95": 128.7360042333603, + "p99": 134.46399569511414 + }, + "roundtrip": { + "p50": 326.911985874176, + "p90": 340.9599959850311, + "p95": 356.9279909133911, + "p99": 380.47999143600464 + }, + "isolatedSum": { + "p50": 341.66399389505386, + "p90": 362.2080087661743, + "p95": 373.24801087379456, + "p99": 396.479994058609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-72f8dab9", + "identity": "b200|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_033ebcb6", + "comparisonKey": "98149a71a9583aa0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:22:35.122254+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 189.95200097560883, + "p90": 210.91200411319733, + "p95": 256.3199996948242, + "p99": 386.0799968242645 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 75.32799988985062, + "p95": 77.02399790287018, + "p99": 81.727996468544 + }, + "roundtrip": { + "p50": 254.20799851417542, + "p90": 435.68000197410583, + "p95": 445.248007774353, + "p99": 456.35199546813965 + }, + "isolatedSum": { + "p50": 261.05599850416183, + "p90": 286.24000400304794, + "p95": 333.3439975976944, + "p99": 467.80799329280853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 190.88000059127808, + "p90": 214.20800685882568, + "p95": 238.5600060224533, + "p99": 263.10399174690247 + }, + "combine": { + "p50": 73.05599749088287, + "p90": 75.68000257015228, + "p95": 77.2479996085167, + "p99": 83.0719992518425 + }, + "roundtrip": { + "p50": 252.6400089263916, + "p90": 268.3199942111969, + "p95": 275.9999930858612, + "p99": 292.60799288749695 + }, + "isolatedSum": { + "p50": 263.93599808216095, + "p90": 289.88800942897797, + "p95": 315.80800563097, + "p99": 346.17599099874496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 193.53599846363068, + "p90": 353.95199060440063, + "p95": 367.1039938926697, + "p99": 383.83999466896057 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 81.53600245714188, + "p95": 84.16000008583069, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 256.8320035934448, + "p90": 279.6480059623718, + "p95": 295.0719892978668, + "p99": 381.3439905643463 + }, + "isolatedSum": { + "p50": 269.6639969944954, + "p90": 435.4879930615425, + "p95": 451.26399397850037, + "p99": 475.8719950914383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 190.17599523067474, + "p90": 208.639994263649, + "p95": 211.4879935979843, + "p99": 217.24799275398254 + }, + "combine": { + "p50": 79.74400371313095, + "p90": 86.46400272846222, + "p95": 87.87199854850769, + "p99": 96.79999947547913 + }, + "roundtrip": { + "p50": 259.39199328422546, + "p90": 272.5119888782501, + "p95": 276.12799406051636, + "p99": 285.3119969367981 + }, + "isolatedSum": { + "p50": 269.9199989438057, + "p90": 295.1039969921112, + "p95": 299.359992146492, + "p99": 314.04799222946167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 190.17599523067474, + "p90": 207.90399610996246, + "p95": 210.78400313854218, + "p99": 221.27999365329742 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 83.39200168848038, + "p95": 85.60000360012054, + "p99": 92.51199662685394 + }, + "roundtrip": { + "p50": 262.719988822937, + "p90": 283.84000062942505, + "p95": 291.1680042743683, + "p99": 308.9280128479004 + }, + "isolatedSum": { + "p50": 270.4639956355095, + "p90": 291.29599779844284, + "p95": 296.3840067386627, + "p99": 313.79199028015137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 192.54399836063385, + "p90": 218.33600103855133, + "p95": 240.22400379180908, + "p99": 259.0720057487488 + }, + "combine": { + "p50": 85.88799834251404, + "p90": 89.4400030374527, + "p95": 92.16000139713287, + "p99": 101.53599828481674 + }, + "roundtrip": { + "p50": 265.6959891319275, + "p90": 278.1760096549988, + "p95": 280.19198775291443, + "p99": 288.63999247550964 + }, + "isolatedSum": { + "p50": 278.4319967031479, + "p90": 307.776004076004, + "p95": 332.38400518894196, + "p99": 360.6080040335655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.77599382400513, + "p90": 212.3199999332428, + "p95": 216.06400609016418, + "p99": 226.84800624847412 + }, + "combine": { + "p50": 100.3199964761734, + "p90": 103.39199751615524, + "p95": 104.8320010304451, + "p99": 111.1999973654747 + }, + "roundtrip": { + "p50": 284.7039997577667, + "p90": 297.34399914741516, + "p95": 300.31999945640564, + "p99": 308.03200602531433 + }, + "isolatedSum": { + "p50": 300.0959903001785, + "p90": 315.71199744939804, + "p95": 320.8960071206093, + "p99": 338.0480036139488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 225.75999796390533, + "p90": 240.09600281715393, + "p95": 244.03199553489685, + "p99": 252.16001272201538 + }, + "combine": { + "p50": 121.34400010108948, + "p90": 125.15200674533844, + "p95": 126.91199779510498, + "p99": 132.7359974384308 + }, + "roundtrip": { + "p50": 333.95200967788696, + "p90": 348.09601306915283, + "p95": 352.83198952674866, + "p99": 366.36799573898315 + }, + "isolatedSum": { + "p50": 347.1039980649948, + "p90": 365.24800956249237, + "p95": 370.94399333000183, + "p99": 384.89601016044617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7fbb0004", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_b10b6c62", + "comparisonKey": "63f9399182d73133", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:20:00.128716+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.80000168085098, + "p90": 67.23199784755707, + "p95": 69.24799829721451, + "p99": 74.52800124883652 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 74.72000271081924, + "p95": 76.4480009675026, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 152.79999375343323, + "p90": 168.73599588871002, + "p95": 172.15999960899353, + "p99": 185.63200533390045 + }, + "isolatedSum": { + "p50": 136.83199882507324, + "p90": 141.9520005583763, + "p95": 145.6959992647171, + "p99": 159.00800377130508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 66.14399701356888, + "p90": 69.05599683523178, + "p95": 71.48800045251846, + "p99": 77.95199751853943 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 77.47200131416321, + "p95": 79.13599908351898, + "p99": 83.55200290679932 + }, + "roundtrip": { + "p50": 154.23999726772308, + "p90": 176.32000148296356, + "p95": 180.51199615001678, + "p99": 194.5600062608719 + }, + "isolatedSum": { + "p50": 140.09599387645721, + "p90": 146.527998149395, + "p95": 150.62399953603745, + "p99": 161.50400042533875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 67.90400296449661, + "p90": 70.91200351715088, + "p95": 72.09599763154984, + "p99": 80.57600259780884 + }, + "combine": { + "p50": 75.77600330114365, + "p90": 78.52800190448761, + "p95": 81.11999928951263, + "p99": 86.17600053548813 + }, + "roundtrip": { + "p50": 155.7759940624237, + "p90": 165.3439998626709, + "p95": 169.47199404239655, + "p99": 176.86399817466736 + }, + "isolatedSum": { + "p50": 143.68000626564026, + "p90": 149.4400054216385, + "p95": 153.21599692106247, + "p99": 166.75200313329697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.37599927186966, + "p90": 72.38399982452393, + "p95": 74.30399954319, + "p99": 82.36800134181976 + }, + "combine": { + "p50": 79.99999821186066, + "p90": 83.3280012011528, + "p95": 84.35200154781342, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 161.15200519561768, + "p90": 184.12800133228302, + "p95": 190.11199474334717, + "p99": 201.6959935426712 + }, + "isolatedSum": { + "p50": 149.37599748373032, + "p90": 155.71200102567673, + "p95": 158.65600109100342, + "p99": 173.98399859666824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 70.01599669456482, + "p90": 73.85600358247757, + "p95": 77.34400033950806, + "p99": 83.74399691820145 + }, + "combine": { + "p50": 82.04799890518188, + "p90": 87.26400136947632, + "p95": 88.54400366544724, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 161.6320013999939, + "p90": 170.3999936580658, + "p95": 173.69599640369415, + "p99": 181.8239986896515 + }, + "isolatedSum": { + "p50": 152.0639955997467, + "p90": 161.1200049519539, + "p95": 165.8880040049553, + "p99": 178.39999496936798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.15199822187424, + "p90": 76.19199901819229, + "p95": 78.27199995517731, + "p99": 84.41600203514099 + }, + "combine": { + "p50": 86.11200004816055, + "p90": 89.05600011348724, + "p95": 91.13600105047226, + "p99": 96.38399630784988 + }, + "roundtrip": { + "p50": 166.27199947834015, + "p90": 174.78400468826294, + "p95": 178.97599935531616, + "p99": 185.95199286937714 + }, + "isolatedSum": { + "p50": 159.2639982700348, + "p90": 165.24799913167953, + "p95": 169.40800100564957, + "p99": 180.79999834299088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 81.44000172615051, + "p90": 84.63999629020691, + "p95": 87.93599903583527, + "p99": 95.2640026807785 + }, + "combine": { + "p50": 100.19200295209885, + "p90": 103.74400019645691, + "p95": 105.3759977221489, + "p99": 110.84800213575363 + }, + "roundtrip": { + "p50": 185.92000007629395, + "p90": 195.19999623298645, + "p95": 199.3280053138733, + "p99": 211.19999885559082 + }, + "isolatedSum": { + "p50": 181.63200467824936, + "p90": 188.38399648666382, + "p95": 193.31199675798416, + "p99": 206.11200481653214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.96000003814697, + "p90": 101.27999633550644, + "p95": 103.07200253009796, + "p99": 109.18399691581726 + }, + "combine": { + "p50": 121.5360015630722, + "p90": 127.36000120639801, + "p95": 129.69599664211273, + "p99": 134.91199910640717 + }, + "roundtrip": { + "p50": 230.9119999408722, + "p90": 235.23199558258057, + "p95": 236.80000007152557, + "p99": 239.96800184249878 + }, + "isolatedSum": { + "p50": 214.49600160121918, + "p90": 228.63999754190445, + "p95": 232.7679991722107, + "p99": 244.09599602222443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fc0b9bf6", + "identity": "b200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_f0618d20", + "comparisonKey": "c097dcff2ef5c770", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:27:21.167629+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.93600100278854, + "p90": 109.72800105810165, + "p95": 112.03200370073318, + "p99": 117.08799749612808 + }, + "combine": { + "p50": 101.95200145244598, + "p90": 109.72800105810165, + "p95": 111.42399907112122, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 183.45600366592407, + "p90": 193.56800615787506, + "p95": 197.05599546432495, + "p99": 209.4080001115799 + }, + "isolatedSum": { + "p50": 201.88800245523453, + "p90": 219.4560021162033, + "p95": 223.4560027718544, + "p99": 231.26399517059326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.903999209404, + "p90": 143.48800480365753, + "p95": 145.53600549697876, + "p99": 149.27999675273895 + }, + "combine": { + "p50": 138.65600526332855, + "p90": 146.91199362277985, + "p95": 148.25600385665894, + "p99": 151.61600708961487 + }, + "roundtrip": { + "p50": 251.93598866462708, + "p90": 257.4400007724762, + "p95": 259.74398851394653, + "p99": 265.82399010658264 + }, + "isolatedSum": { + "p50": 274.56000447273254, + "p90": 290.3999984264374, + "p95": 293.7920093536377, + "p99": 300.8960038423538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.95199418067932, + "p90": 198.91199469566345, + "p95": 201.4079988002777, + "p99": 209.21599864959717 + }, + "combine": { + "p50": 203.23200523853302, + "p90": 211.2320065498352, + "p95": 213.0880057811737, + "p99": 221.79199755191803 + }, + "roundtrip": { + "p50": 365.9200072288513, + "p90": 373.6959993839264, + "p95": 377.79200077056885, + "p99": 385.76000928878784 + }, + "isolatedSum": { + "p50": 397.18399941921234, + "p90": 410.14400124549866, + "p95": 414.4960045814514, + "p99": 431.0079962015152 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 308.4160089492798, + "p90": 318.2399868965149, + "p95": 322.62399792671204, + "p99": 333.9200019836426 + }, + "combine": { + "p50": 397.21599221229553, + "p90": 406.97601437568665, + "p95": 409.56801176071167, + "p99": 413.5040044784546 + }, + "roundtrip": { + "p50": 628.928005695343, + "p90": 638.6240124702454, + "p95": 642.7199840545654, + "p99": 649.3759751319885 + }, + "isolatedSum": { + "p50": 705.6320011615753, + "p90": 725.2160012722015, + "p95": 732.1920096874237, + "p99": 747.4240064620972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 538.3999943733215, + "p90": 547.6160049438477, + "p95": 549.888014793396, + "p99": 559.6479773521423 + }, + "combine": { + "p50": 766.3040161132812, + "p90": 772.6399898529053, + "p95": 777.2160172462463, + "p99": 780.4160118103027 + }, + "roundtrip": { + "p50": 1274.4640111923218, + "p90": 1283.3280563354492, + "p95": 1285.9519720077515, + "p99": 1293.887972831726 + }, + "isolatedSum": { + "p50": 1304.7040104866028, + "p90": 1320.255994796753, + "p95": 1327.1040320396423, + "p99": 1340.063989162445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1006.9119930267334, + "p90": 1015.5199766159058, + "p95": 1018.9440250396729, + "p99": 1027.4239778518677 + }, + "combine": { + "p50": 1450.7839679718018, + "p90": 1457.4079513549805, + "p95": 1459.1039419174194, + "p99": 1467.9679870605469 + }, + "roundtrip": { + "p50": 2416.5120124816895, + "p90": 2427.776098251343, + "p95": 2432.192087173462, + "p99": 2478.111982345581 + }, + "isolatedSum": { + "p50": 2457.695960998535, + "p90": 2472.9279279708862, + "p95": 2478.0479669570923, + "p99": 2495.3919649124146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e9bde214", + "identity": "b200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_f0618d20", + "comparisonKey": "2a52697ab2c7509b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:29:12.112905+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.39999741315842, + "p90": 112.22399771213531, + "p95": 117.0239970088005, + "p99": 126.88000500202179 + }, + "combine": { + "p50": 112.89600282907486, + "p90": 116.64000153541565, + "p95": 118.27199906110764, + "p99": 126.68800354003906 + }, + "roundtrip": { + "p50": 192.4159973859787, + "p90": 200.80000162124634, + "p95": 204.0960043668747, + "p99": 209.08799767494202 + }, + "isolatedSum": { + "p50": 215.29600024223328, + "p90": 228.86399924755096, + "p95": 235.29599606990814, + "p99": 253.56800854206085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.19999516010284, + "p90": 149.9519944190979, + "p95": 155.03999590873718, + "p99": 162.56000101566315 + }, + "combine": { + "p50": 149.50400590896606, + "p90": 152.96000242233276, + "p95": 155.20000457763672, + "p99": 164.32000696659088 + }, + "roundtrip": { + "p50": 263.5200023651123, + "p90": 271.07200026512146, + "p95": 275.10398626327515, + "p99": 282.3359966278076 + }, + "isolatedSum": { + "p50": 292.7040010690689, + "p90": 302.91199684143066, + "p95": 310.2400004863739, + "p99": 326.880007982254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.1839977502823, + "p90": 210.7519954442978, + "p95": 212.5760018825531, + "p99": 220.22399306297302 + }, + "combine": { + "p50": 227.26400196552277, + "p90": 236.76800727844238, + "p95": 237.85600066184998, + "p99": 246.5279996395111 + }, + "roundtrip": { + "p50": 395.7119882106781, + "p90": 408.4160029888153, + "p95": 412.7359986305237, + "p99": 424.44801330566406 + }, + "isolatedSum": { + "p50": 432.44799971580505, + "p90": 447.5200027227402, + "p95": 450.4320025444031, + "p99": 466.75199270248413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 315.2639865875244, + "p90": 324.12800192832947, + "p95": 329.72800731658936, + "p99": 340.86400270462036 + }, + "combine": { + "p50": 445.72800397872925, + "p90": 453.7599980831146, + "p95": 457.88800716400146, + "p99": 460.671991109848 + }, + "roundtrip": { + "p50": 714.7840261459351, + "p90": 724.8640060424805, + "p95": 729.5359969139099, + "p99": 737.9519939422607 + }, + "isolatedSum": { + "p50": 760.9919905662537, + "p90": 777.8880000114441, + "p95": 787.6160144805908, + "p99": 801.5359938144684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 562.8479719161987, + "p90": 572.8319883346558, + "p95": 576.7999887466431, + "p99": 583.840012550354 + }, + "combine": { + "p50": 791.487991809845, + "p90": 800.2880215644836, + "p95": 803.0400276184082, + "p99": 807.0080280303955 + }, + "roundtrip": { + "p50": 1322.8800296783447, + "p90": 1331.7439556121826, + "p95": 1334.8159790039062, + "p99": 1343.008041381836 + }, + "isolatedSum": { + "p50": 1354.3359637260437, + "p90": 1373.1200098991394, + "p95": 1379.8400163650513, + "p99": 1390.8480405807495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1046.463966369629, + "p90": 1055.0719499588013, + "p95": 1057.695984840393, + "p99": 1063.4560585021973 + }, + "combine": { + "p50": 1491.2320375442505, + "p90": 1496.0960149765015, + "p95": 1502.7519464492798, + "p99": 1506.432056427002 + }, + "roundtrip": { + "p50": 2500.767946243286, + "p90": 2509.0880393981934, + "p95": 2512.0320320129395, + "p99": 2516.1919593811035 + }, + "isolatedSum": { + "p50": 2537.6960039138794, + "p90": 2551.1679649353027, + "p95": 2560.447931289673, + "p99": 2569.888114929199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1a20456", + "identity": "b200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_f0618d20", + "comparisonKey": "1ee628ccb7ec7df4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:31:05.328709+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.04000359773636, + "p90": 117.98399686813354, + "p95": 120.12799829244614, + "p99": 122.49600142240524 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 122.5920021533966, + "p95": 124.35200065374374, + "p99": 126.75200402736664 + }, + "roundtrip": { + "p50": 200.28799772262573, + "p90": 206.7520022392273, + "p95": 210.4319930076599, + "p99": 215.03999829292297 + }, + "isolatedSum": { + "p50": 225.600004196167, + "p90": 240.57599902153015, + "p95": 244.47999894618988, + "p99": 249.24800544977188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 139.5840048789978, + "p90": 146.65600657463074, + "p95": 149.47199821472168, + "p99": 153.34400534629822 + }, + "combine": { + "p50": 153.76000106334686, + "p90": 162.08000481128693, + "p95": 163.455992937088, + "p99": 168.19199919700623 + }, + "roundtrip": { + "p50": 276.5440046787262, + "p90": 284.31999683380127, + "p95": 287.00798749923706, + "p99": 293.2159900665283 + }, + "isolatedSum": { + "p50": 293.34400594234467, + "p90": 308.73601138591766, + "p95": 312.9279911518097, + "p99": 321.53600454330444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 214.04799818992615, + "p90": 219.39200162887573, + "p95": 220.86399793624878, + "p99": 224.41600263118744 + }, + "combine": { + "p50": 250.11199712753296, + "p90": 258.2719922065735, + "p95": 260.2880001068115, + "p99": 262.30400800704956 + }, + "roundtrip": { + "p50": 425.7279932498932, + "p90": 434.6559941768646, + "p95": 438.1119906902313, + "p99": 444.92799043655396 + }, + "isolatedSum": { + "p50": 464.1599953174591, + "p90": 477.6639938354492, + "p95": 481.1519980430603, + "p99": 486.720010638237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.13600993156433, + "p90": 340.2239978313446, + "p95": 343.6799943447113, + "p99": 350.9120047092438 + }, + "combine": { + "p50": 455.1039934158325, + "p90": 460.1919949054718, + "p95": 464.383989572525, + "p99": 470.97599506378174 + }, + "roundtrip": { + "p50": 760.09601354599, + "p90": 766.9439911842346, + "p95": 769.1199779510498, + "p99": 772.8000283241272 + }, + "isolatedSum": { + "p50": 782.2400033473969, + "p90": 800.4159927368164, + "p95": 808.0639839172363, + "p99": 821.8879997730255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 574.9120116233826, + "p90": 582.1120142936707, + "p95": 584.6080183982849, + "p99": 590.4639959335327 + }, + "combine": { + "p50": 807.6480031013489, + "p90": 816.6720271110535, + "p95": 817.40802526474, + "p99": 825.439989566803 + }, + "roundtrip": { + "p50": 1358.8800430297852, + "p90": 1366.7839765548706, + "p95": 1368.6399459838867, + "p99": 1376.4159679412842 + }, + "isolatedSum": { + "p50": 1382.5600147247314, + "p90": 1398.7840414047241, + "p95": 1402.016043663025, + "p99": 1415.9039855003357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1066.7519569396973, + "p90": 1072.7039575576782, + "p95": 1074.6879577636719, + "p99": 1079.8720121383667 + }, + "combine": { + "p50": 1507.5520277023315, + "p90": 1517.024040222168, + "p95": 1518.2080268859863, + "p99": 1523.584008216858 + }, + "roundtrip": { + "p50": 2552.7360439300537, + "p90": 2561.6960525512695, + "p95": 2564.0640258789062, + "p99": 2568.608045578003 + }, + "isolatedSum": { + "p50": 2574.303984642029, + "p90": 2589.727997779846, + "p95": 2592.895984649658, + "p99": 2603.4560203552246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-120e29ab", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_7c871228", + "comparisonKey": "7b516faca15d8465", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:18:54.732967+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.5680011510849, + "p90": 124.22399967908859, + "p95": 131.3599944114685, + "p99": 141.85599982738495 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 130.62399625778198, + "p95": 137.56799697875977, + "p99": 143.36000382900238 + }, + "roundtrip": { + "p50": 211.35999262332916, + "p90": 221.40799462795258, + "p95": 226.30399465560913, + "p99": 233.95200073719025 + }, + "isolatedSum": { + "p50": 242.97599494457245, + "p90": 254.84799593687057, + "p95": 268.92799139022827, + "p99": 285.21600365638733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.008003115654, + "p90": 160.8320027589798, + "p95": 164.95999693870544, + "p99": 174.3360012769699 + }, + "combine": { + "p50": 166.36799275875092, + "p90": 176.32000148296356, + "p95": 178.20799350738525, + "p99": 184.92799997329712 + }, + "roundtrip": { + "p50": 293.37599873542786, + "p90": 300.6399869918823, + "p95": 305.7920038700104, + "p99": 313.82399797439575 + }, + "isolatedSum": { + "p50": 321.3759958744049, + "p90": 337.15200424194336, + "p95": 343.1679904460907, + "p99": 359.26400125026703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.95199811458588, + "p90": 225.43999552726746, + "p95": 229.95199263095856, + "p99": 239.71199989318848 + }, + "combine": { + "p50": 282.8800082206726, + "p90": 288.4159982204437, + "p95": 290.71998596191406, + "p99": 298.0479896068573 + }, + "roundtrip": { + "p50": 465.2479887008667, + "p90": 474.43199157714844, + "p95": 477.63198614120483, + "p99": 485.6959879398346 + }, + "isolatedSum": { + "p50": 500.8320063352585, + "p90": 513.8559937477112, + "p95": 520.6719785928726, + "p99": 537.7599895000458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 340.4479920864105, + "p90": 348.57600927352905, + "p95": 351.99999809265137, + "p99": 359.3280017375946 + }, + "combine": { + "p50": 470.0480103492737, + "p90": 475.6160080432892, + "p95": 482.4639856815338, + "p99": 489.4079864025116 + }, + "roundtrip": { + "p50": 781.0559868812561, + "p90": 791.0400032997131, + "p95": 795.4879999160767, + "p99": 803.1039834022522 + }, + "isolatedSum": { + "p50": 810.4960024356842, + "p90": 824.1920173168182, + "p95": 834.4639837741852, + "p99": 848.7359881401062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 583.296000957489, + "p90": 591.0080075263977, + "p95": 593.7280058860779, + "p99": 597.5679755210876 + }, + "combine": { + "p50": 831.2000036239624, + "p90": 841.7279720306396, + "p95": 843.2000279426575, + "p99": 850.7519960403442 + }, + "roundtrip": { + "p50": 1390.6240463256836, + "p90": 1398.7840414047241, + "p95": 1402.1120071411133, + "p99": 1409.4719886779785 + }, + "isolatedSum": { + "p50": 1414.4960045814514, + "p90": 1432.7359795570374, + "p95": 1436.9280338287354, + "p99": 1448.3199715614319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1079.7439813613892, + "p90": 1085.6000185012817, + "p95": 1089.2479419708252, + "p99": 1094.655990600586 + }, + "combine": { + "p50": 1539.3600463867188, + "p90": 1543.67995262146, + "p95": 1548.192024230957, + "p99": 1553.920030593872 + }, + "roundtrip": { + "p50": 2592.992067337036, + "p90": 2602.7519702911377, + "p95": 2606.623888015747, + "p99": 2617.151975631714 + }, + "isolatedSum": { + "p50": 2619.104027748108, + "p90": 2629.2799711227417, + "p95": 2637.439966201782, + "p99": 2648.576021194458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-54fe7033", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_f0618d20", + "comparisonKey": "269b8176e86edd37", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:20:42.866865+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.6240017414093, + "p90": 123.26399981975555, + "p95": 124.86399710178375, + "p99": 132.38400220870972 + }, + "combine": { + "p50": 126.08000636100769, + "p90": 129.2479932308197, + "p95": 130.3360015153885, + "p99": 136.83199882507324 + }, + "roundtrip": { + "p50": 213.18399906158447, + "p90": 219.13599967956543, + "p95": 222.97599911689758, + "p99": 227.39200294017792 + }, + "isolatedSum": { + "p50": 244.704008102417, + "p90": 252.51199305057526, + "p95": 255.19999861717224, + "p99": 269.21600103378296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.32800555229187, + "p90": 159.4880074262619, + "p95": 161.3759994506836, + "p99": 165.24800658226013 + }, + "combine": { + "p50": 166.72000288963318, + "p90": 174.6239960193634, + "p95": 175.7120043039322, + "p99": 178.94400656223297 + }, + "roundtrip": { + "p50": 294.8800027370453, + "p90": 305.184006690979, + "p95": 308.83198976516724, + "p99": 320.0640082359314 + }, + "isolatedSum": { + "p50": 322.04800844192505, + "p90": 334.1120034456253, + "p95": 337.0880037546158, + "p99": 344.1920131444931 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 219.7120040655136, + "p90": 225.21600127220154, + "p95": 229.21599447727203, + "p99": 243.3920055627823 + }, + "combine": { + "p50": 283.1999957561493, + "p90": 287.58400678634644, + "p95": 288.7359857559204, + "p99": 292.35199093818665 + }, + "roundtrip": { + "p50": 466.2080109119415, + "p90": 476.99201107025146, + "p95": 480.22401332855225, + "p99": 492.41599440574646 + }, + "isolatedSum": { + "p50": 502.9119998216629, + "p90": 512.800008058548, + "p95": 517.9519802331924, + "p99": 535.7439965009689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 341.5040075778961, + "p90": 347.3280072212219, + "p95": 348.9280045032501, + "p99": 354.2720079421997 + }, + "combine": { + "p50": 469.4719910621643, + "p90": 473.91998767852783, + "p95": 477.60000824928284, + "p99": 482.4320077896118 + }, + "roundtrip": { + "p50": 780.7999849319458, + "p90": 789.3120050430298, + "p95": 793.8560247421265, + "p99": 804.1599988937378 + }, + "isolatedSum": { + "p50": 810.9759986400604, + "p90": 821.2479948997498, + "p95": 826.528012752533, + "p99": 836.7040157318115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 585.0239992141724, + "p90": 591.7119979858398, + "p95": 594.2400097846985, + "p99": 603.2959818840027 + }, + "combine": { + "p50": 830.5919766426086, + "p90": 840.831995010376, + "p95": 842.5920009613037, + "p99": 847.7439880371094 + }, + "roundtrip": { + "p50": 1390.3679847717285, + "p90": 1398.1759548187256, + "p95": 1400.6400108337402, + "p99": 1406.559944152832 + }, + "isolatedSum": { + "p50": 1415.615975856781, + "p90": 1432.5439929962158, + "p95": 1436.8320107460022, + "p99": 1451.039969921112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1082.2720527648926, + "p90": 1088.5759592056274, + "p95": 1091.4239883422852, + "p99": 1095.7119464874268 + }, + "combine": { + "p50": 1540.8320426940918, + "p90": 1546.0799932479858, + "p95": 1551.3919591903687, + "p99": 1555.232048034668 + }, + "roundtrip": { + "p50": 2592.992067337036, + "p90": 2602.8800010681152, + "p95": 2605.952024459839, + "p99": 2613.823890686035 + }, + "isolatedSum": { + "p50": 2623.1040954589844, + "p90": 2634.6559524536133, + "p95": 2642.815947532654, + "p99": 2650.9439945220947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3b692930", + "identity": "b200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_f0618d20", + "comparisonKey": "f17c4c7d5bf71c2e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:25:31.692633+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.9119964838028, + "p90": 124.12799894809723, + "p95": 128.12800705432892, + "p99": 134.91199910640717 + }, + "combine": { + "p50": 125.37600100040436, + "p90": 131.16799294948578, + "p95": 137.02400028705597, + "p99": 148.19200336933136 + }, + "roundtrip": { + "p50": 211.96800470352173, + "p90": 220.64000368118286, + "p95": 226.46400332450867, + "p99": 233.98399353027344 + }, + "isolatedSum": { + "p50": 244.28799748420715, + "p90": 255.295991897583, + "p95": 265.1520073413849, + "p99": 283.1040024757385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.7759940624237, + "p90": 161.47199273109436, + "p95": 164.92800414562225, + "p99": 178.52799594402313 + }, + "combine": { + "p50": 165.27999937534332, + "p90": 174.75199699401855, + "p95": 177.47199535369873, + "p99": 187.23200261592865 + }, + "roundtrip": { + "p50": 293.40800642967224, + "p90": 303.0720055103302, + "p95": 308.03200602531433, + "p99": 316.44800305366516 + }, + "isolatedSum": { + "p50": 321.05599343776703, + "p90": 336.2239897251129, + "p95": 342.399999499321, + "p99": 365.7599985599518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.52800250053406, + "p90": 232.7360063791275, + "p95": 239.51999843120575, + "p99": 252.83199548721313 + }, + "combine": { + "p50": 282.5919985771179, + "p90": 288.4159982204437, + "p95": 291.9999957084656, + "p99": 300.3840148448944 + }, + "roundtrip": { + "p50": 463.48801255226135, + "p90": 475.8079946041107, + "p95": 480.19200563430786, + "p99": 491.8400049209595 + }, + "isolatedSum": { + "p50": 501.120001077652, + "p90": 521.1520045995712, + "p95": 531.5199941396713, + "p99": 553.2160103321075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 336.95998787879944, + "p90": 348.06400537490845, + "p95": 352.512001991272, + "p99": 361.2799942493439 + }, + "combine": { + "p50": 470.97599506378174, + "p90": 481.79200291633606, + "p95": 483.99999737739563, + "p99": 493.27999353408813 + }, + "roundtrip": { + "p50": 785.7599854469299, + "p90": 795.3280210494995, + "p95": 799.0080118179321, + "p99": 805.184006690979 + }, + "isolatedSum": { + "p50": 807.9359829425812, + "p90": 829.8560082912445, + "p95": 836.5119993686676, + "p99": 854.559987783432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 583.7439894676208, + "p90": 589.3440246582031, + "p95": 590.7840132713318, + "p99": 595.9039926528931 + }, + "combine": { + "p50": 820.032000541687, + "p90": 831.7760229110718, + "p95": 837.9200100898743, + "p99": 845.3760147094727 + }, + "roundtrip": { + "p50": 1378.81600856781, + "p90": 1401.0560512542725, + "p95": 1406.272053718567, + "p99": 1417.0880317687988 + }, + "isolatedSum": { + "p50": 1403.7759900093079, + "p90": 1421.120047569275, + "p95": 1428.704023361206, + "p99": 1441.2800073623657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1072.767972946167, + "p90": 1081.1200141906738, + "p95": 1083.1040143966675, + "p99": 1095.1039791107178 + }, + "combine": { + "p50": 1532.4159860610962, + "p90": 1542.4000024795532, + "p95": 1544.0959930419922, + "p99": 1552.7679920196533 + }, + "roundtrip": { + "p50": 2581.727981567383, + "p90": 2589.440107345581, + "p95": 2591.808080673218, + "p99": 2595.936059951782 + }, + "isolatedSum": { + "p50": 2605.183959007263, + "p90": 2623.520016670227, + "p95": 2627.2000074386597, + "p99": 2647.871971130371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-df1b38b6", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_027514d0", + "comparisonKey": "8f47a04926fe7757", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:32:02.441815+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.44800138473511, + "p90": 134.0479999780655, + "p95": 138.7840062379837, + "p99": 144.3839967250824 + }, + "combine": { + "p50": 141.184002161026, + "p90": 150.9760022163391, + "p95": 152.99199521541595, + "p99": 156.54399991035461 + }, + "roundtrip": { + "p50": 249.5039999485016, + "p90": 254.94399666786194, + "p95": 259.74398851394653, + "p99": 266.55998826026917 + }, + "isolatedSum": { + "p50": 265.6320035457611, + "p90": 285.0240021944046, + "p95": 291.77600145339966, + "p99": 300.927996635437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 174.01599884033203, + "p90": 181.21600151062012, + "p95": 186.27199530601501, + "p99": 196.6720074415207 + }, + "combine": { + "p50": 212.5760018825531, + "p90": 222.1439927816391, + "p95": 227.58400440216064, + "p99": 239.32799696922302 + }, + "roundtrip": { + "p50": 356.54398798942566, + "p90": 370.88000774383545, + "p95": 379.2639970779419, + "p99": 399.80798959732056 + }, + "isolatedSum": { + "p50": 386.59200072288513, + "p90": 403.3599942922592, + "p95": 413.85599970817566, + "p99": 436.0000044107437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 249.5039999485016, + "p90": 258.5279941558838, + "p95": 263.96799087524414, + "p99": 270.7520127296448 + }, + "combine": { + "p50": 350.7840037345886, + "p90": 361.02399230003357, + "p95": 362.94400691986084, + "p99": 371.5839982032776 + }, + "roundtrip": { + "p50": 573.1520056724548, + "p90": 581.9839835166931, + "p95": 587.4239802360535, + "p99": 596.0320234298706 + }, + "isolatedSum": { + "p50": 600.2880036830902, + "p90": 619.5519864559174, + "p95": 626.911997795105, + "p99": 642.3360109329224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 425.8880019187927, + "p90": 434.87998843193054, + "p95": 438.81601095199585, + "p99": 447.1040070056915 + }, + "combine": { + "p50": 606.8800091743469, + "p90": 612.3520135879517, + "p95": 616.9919967651367, + "p99": 621.9519972801208 + }, + "roundtrip": { + "p50": 1004.7999620437622, + "p90": 1015.0719881057739, + "p95": 1018.3360576629639, + "p99": 1028.0640125274658 + }, + "isolatedSum": { + "p50": 1032.7680110931396, + "p90": 1047.2320020198822, + "p95": 1055.8080077171326, + "p99": 1069.0560042858124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 771.2960243225098, + "p90": 782.912015914917, + "p95": 785.3119969367981, + "p99": 792.6719784736633 + }, + "combine": { + "p50": 1124.4479417800903, + "p90": 1132.256031036377, + "p95": 1135.167956352234, + "p99": 1140.1280164718628 + }, + "roundtrip": { + "p50": 1866.752028465271, + "p90": 1878.1119585037231, + "p95": 1881.216049194336, + "p99": 1889.8240327835083 + }, + "isolatedSum": { + "p50": 1895.7439661026, + "p90": 1915.168046951294, + "p95": 1920.479953289032, + "p99": 1932.7999949455261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1471.776008605957, + "p90": 1487.936019897461, + "p95": 1492.576003074646, + "p99": 1502.8159618377686 + }, + "combine": { + "p50": 2145.7600593566895, + "p90": 2154.560089111328, + "p95": 2157.6321125030518, + "p99": 2166.5279865264893 + }, + "roundtrip": { + "p50": 3590.9759998321533, + "p90": 3606.3361167907715, + "p95": 3610.975980758667, + "p99": 3626.7518997192383 + }, + "isolatedSum": { + "p50": 3617.5360679626465, + "p90": 3642.496109008789, + "p95": 3650.2081155776978, + "p99": 3669.343948364258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b320af1c", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_d85136b3", + "comparisonKey": "f8048eb2c4851355", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:32:47.229661+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.47199648618698, + "p90": 106.46399855613708, + "p95": 110.78400164842606, + "p99": 129.56799566745758 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 86.62399649620056, + "p95": 87.87199854850769, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 147.13600277900696, + "p90": 153.28000485897064, + "p95": 155.7759940624237, + "p99": 161.31199896335602 + }, + "isolatedSum": { + "p50": 172.03199863433838, + "p90": 193.08799505233765, + "p95": 198.65600019693375, + "p99": 219.7439968585968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 118.20799857378006, + "p90": 122.3360002040863, + "p95": 124.22399967908859, + "p99": 131.67999684810638 + }, + "combine": { + "p50": 125.72799623012543, + "p90": 129.2160004377365, + "p95": 130.36799430847168, + "p99": 138.20800185203552 + }, + "roundtrip": { + "p50": 211.5200012922287, + "p90": 216.73600375652313, + "p95": 219.55199539661407, + "p99": 225.79200565814972 + }, + "isolatedSum": { + "p50": 243.9359948039055, + "p90": 251.55200064182281, + "p95": 254.59199398756027, + "p99": 269.8879987001419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 211.87199652194977, + "p90": 216.0000056028366, + "p95": 217.40800142288208, + "p99": 221.5680032968521 + }, + "combine": { + "p50": 260.5440020561218, + "p90": 263.808012008667, + "p95": 264.73599672317505, + "p99": 266.975998878479 + }, + "roundtrip": { + "p50": 451.6479969024658, + "p90": 457.8239917755127, + "p95": 460.5120122432709, + "p99": 470.880001783371 + }, + "isolatedSum": { + "p50": 472.4159985780716, + "p90": 479.8080176115036, + "p95": 482.14399814605713, + "p99": 488.5440021753311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3e60c90d", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_df12795e", + "comparisonKey": "3781e0544954723f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:22.280489+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.14400225877762, + "p90": 106.36799782514572, + "p95": 108.96000266075134, + "p99": 116.92799627780914 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 100.19200295209885, + "p95": 101.27999633550644, + "p99": 104.19200360774994 + }, + "roundtrip": { + "p50": 163.7759953737259, + "p90": 170.33599317073822, + "p95": 172.5119948387146, + "p99": 179.03999984264374 + }, + "isolatedSum": { + "p50": 189.44000452756882, + "p90": 206.56000077724457, + "p95": 210.23999899625778, + "p99": 221.11999988555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 113.79200220108032, + "p90": 122.04799801111221, + "p95": 126.49600207805634, + "p99": 132.76800513267517 + }, + "combine": { + "p50": 114.97599631547928, + "p90": 122.52800166606903, + "p95": 124.57600235939026, + "p99": 129.08799946308136 + }, + "roundtrip": { + "p50": 209.75999534130096, + "p90": 216.76799654960632, + "p95": 221.21599316596985, + "p99": 228.32000255584717 + }, + "isolatedSum": { + "p50": 228.7679985165596, + "p90": 244.57599967718124, + "p95": 251.0720044374466, + "p99": 261.85600459575653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 149.21599626541138, + "p90": 154.91199493408203, + "p95": 156.99200332164764, + "p99": 162.84799575805664 + }, + "combine": { + "p50": 153.6960005760193, + "p90": 162.33600676059723, + "p95": 163.32800686359406, + "p99": 170.52799463272095 + }, + "roundtrip": { + "p50": 281.40801191329956, + "p90": 288.5119915008545, + "p95": 291.6480004787445, + "p99": 300.1280128955841 + }, + "isolatedSum": { + "p50": 302.91199684143066, + "p90": 317.24800169467926, + "p95": 320.3200101852417, + "p99": 333.3759903907776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 214.49600160121918, + "p90": 221.21599316596985, + "p95": 224.63999688625336, + "p99": 231.87200725078583 + }, + "combine": { + "p50": 271.58400416374207, + "p90": 275.84001421928406, + "p95": 276.7679989337921, + "p99": 282.30398893356323 + }, + "roundtrip": { + "p50": 456.928014755249, + "p90": 464.1920030117035, + "p95": 466.68800711631775, + "p99": 475.13601183891296 + }, + "isolatedSum": { + "p50": 486.08000576496124, + "p90": 497.0560073852539, + "p95": 501.40799582004547, + "p99": 514.1759961843491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 359.48801040649414, + "p90": 367.7760064601898, + "p95": 370.4319894313812, + "p99": 374.55999851226807 + }, + "combine": { + "p50": 472.6080000400543, + "p90": 478.0479967594147, + "p95": 481.440007686615, + "p99": 485.8880043029785 + }, + "roundtrip": { + "p50": 802.9119968414307, + "p90": 809.8239898681641, + "p95": 812.3520016670227, + "p99": 817.8880214691162 + }, + "isolatedSum": { + "p50": 832.0960104465485, + "p90": 845.8240032196045, + "p95": 851.8719971179962, + "p99": 860.4480028152466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 665.3439998626709, + "p90": 674.8160123825073, + "p95": 678.6559820175171, + "p99": 692.192018032074 + }, + "combine": { + "p50": 843.1040048599243, + "p90": 851.3919711112976, + "p95": 852.7039885520935, + "p99": 855.9039831161499 + }, + "roundtrip": { + "p50": 1480.512022972107, + "p90": 1487.231969833374, + "p95": 1489.8240566253662, + "p99": 1494.431972503662 + }, + "isolatedSum": { + "p50": 1508.4480047225952, + "p90": 1526.207983493805, + "p95": 1531.3599705696106, + "p99": 1548.0960011482239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6a3315e6", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_f1e6dd2a", + "comparisonKey": "7e38ffec0114bd44", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:35:36.889599+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.49600142240524, + "p90": 129.95199859142303, + "p95": 132.7359974384308, + "p99": 137.85600662231445 + }, + "combine": { + "p50": 138.08000087738037, + "p90": 141.50400459766388, + "p95": 143.0400013923645, + "p99": 149.59999918937683 + }, + "roundtrip": { + "p50": 236.25600337982178, + "p90": 241.31199717521667, + "p95": 244.00000274181366, + "p99": 249.08800423145294 + }, + "isolatedSum": { + "p50": 260.5760022997856, + "p90": 271.4560031890869, + "p95": 275.7759988307953, + "p99": 287.4560058116913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.5439931154251, + "p90": 172.15999960899353, + "p95": 174.6239960193634, + "p99": 184.92799997329712 + }, + "combine": { + "p50": 199.072003364563, + "p90": 202.78400182724, + "p95": 203.5519927740097, + "p99": 210.65600216388702 + }, + "roundtrip": { + "p50": 336.8639945983887, + "p90": 343.51998567581177, + "p95": 345.8560109138489, + "p99": 352.7680039405823 + }, + "isolatedSum": { + "p50": 359.6159964799881, + "p90": 374.9440014362335, + "p95": 378.1759887933731, + "p99": 395.58400213718414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.9040013551712, + "p90": 250.33599138259888, + "p95": 254.88001108169556, + "p99": 258.91199707984924 + }, + "combine": { + "p50": 344.09600496292114, + "p90": 349.11999106407166, + "p95": 350.3679931163788, + "p99": 356.3840091228485 + }, + "roundtrip": { + "p50": 557.3440194129944, + "p90": 563.8719797134399, + "p95": 565.4079914093018, + "p99": 570.6560015678406 + }, + "isolatedSum": { + "p50": 584.0000063180923, + "p90": 599.4559824466705, + "p95": 605.2480041980743, + "p99": 615.2960062026978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 390.6559944152832, + "p90": 397.66401052474976, + "p95": 401.98400616645813, + "p99": 410.6239974498749 + }, + "combine": { + "p50": 583.840012550354, + "p90": 592.9920077323914, + "p95": 594.0160155296326, + "p99": 598.6239910125732 + }, + "roundtrip": { + "p50": 957.3119878768921, + "p90": 964.0960097312927, + "p95": 966.9439792633057, + "p99": 971.8400239944458 + }, + "isolatedSum": { + "p50": 974.4960069656372, + "p90": 990.6560182571411, + "p95": 996.0000216960907, + "p99": 1009.2479884624481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 706.7520022392273, + "p90": 713.5679721832275, + "p95": 717.631995677948, + "p99": 724.4799733161926 + }, + "combine": { + "p50": 1089.408040046692, + "p90": 1096.832036972046, + "p95": 1098.912000656128, + "p99": 1101.1199951171875 + }, + "roundtrip": { + "p50": 1768.4799432754517, + "p90": 1775.0719785690308, + "p95": 1777.8240442276, + "p99": 1792.7680015563965 + }, + "isolatedSum": { + "p50": 1796.1600422859192, + "p90": 1810.4000091552734, + "p95": 1816.543996334076, + "p99": 1825.5999684333801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1342.1119451522827, + "p90": 1350.208044052124, + "p95": 1353.983998298645, + "p99": 1360.3520393371582 + }, + "combine": { + "p50": 2080.2559852600098, + "p90": 2084.06400680542, + "p95": 2084.991931915283, + "p99": 2091.7439460754395 + }, + "roundtrip": { + "p50": 3397.952079772949, + "p90": 3404.2880535125732, + "p95": 3406.208038330078, + "p99": 3413.439989089966 + }, + "isolatedSum": { + "p50": 3422.3679304122925, + "p90": 3434.272050857544, + "p95": 3438.975930213928, + "p99": 3452.0959854125977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-73d627fd", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_44d733a4", + "comparisonKey": "a1b17727d8497481", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:43:49.428266+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.0239976644516, + "p90": 128.80000472068787, + "p95": 132.25600123405457, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 129.7920048236847, + "p95": 133.215993642807, + "p99": 139.39200341701508 + }, + "roundtrip": { + "p50": 215.45599400997162, + "p90": 224.8000055551529, + "p95": 228.7359982728958, + "p99": 238.14399540424347 + }, + "isolatedSum": { + "p50": 247.19999730587006, + "p90": 258.59200954437256, + "p95": 265.4719948768616, + "p99": 279.35999631881714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.07999670505524, + "p90": 161.53599321842194, + "p95": 163.35999965667725, + "p99": 169.95200514793396 + }, + "combine": { + "p50": 172.38399386405945, + "p90": 177.95200645923615, + "p95": 179.1359931230545, + "p99": 184.28799510002136 + }, + "roundtrip": { + "p50": 296.09599709510803, + "p90": 304.22401428222656, + "p95": 307.5839877128601, + "p99": 316.0640001296997 + }, + "isolatedSum": { + "p50": 330.4639905691147, + "p90": 339.4879996776581, + "p95": 342.49599277973175, + "p99": 354.2400002479553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 219.42399442195892, + "p90": 224.09600019454956, + "p95": 225.3440022468567, + "p99": 230.30400276184082 + }, + "combine": { + "p50": 277.3439884185791, + "p90": 285.91999411582947, + "p95": 287.3919904232025, + "p99": 293.5679852962494 + }, + "roundtrip": { + "p50": 463.3280038833618, + "p90": 471.807986497879, + "p95": 475.0399887561798, + "p99": 483.0720126628876 + }, + "isolatedSum": { + "p50": 496.767982840538, + "p90": 510.015994310379, + "p95": 512.7359926700592, + "p99": 523.8719880580902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 338.9120101928711, + "p90": 346.6239869594574, + "p95": 348.35198521614075, + "p99": 353.05601358413696 + }, + "combine": { + "p50": 472.79998660087585, + "p90": 480.99198937416077, + "p95": 482.7199876308441, + "p99": 485.9519898891449 + }, + "roundtrip": { + "p50": 786.6560220718384, + "p90": 794.1120266914368, + "p95": 796.3520288467407, + "p99": 808.2879781723022 + }, + "isolatedSum": { + "p50": 811.711996793747, + "p90": 827.6159763336182, + "p95": 831.0719728469849, + "p99": 839.0080034732819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.5440030097961, + "p90": 589.3120169639587, + "p95": 590.9759998321533, + "p99": 596.6399908065796 + }, + "combine": { + "p50": 819.8080062866211, + "p90": 828.607976436615, + "p95": 829.695999622345, + "p99": 838.0799889564514 + }, + "roundtrip": { + "p50": 1374.9120235443115, + "p90": 1382.0159435272217, + "p95": 1384.7999572753906, + "p99": 1390.6559944152832 + }, + "isolatedSum": { + "p50": 1404.3520092964172, + "p90": 1417.9199934005737, + "p95": 1420.6719994544983, + "p99": 1434.719979763031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1073.472023010254, + "p90": 1080.9919834136963, + "p95": 1083.5520029067993, + "p99": 1095.4560041427612 + }, + "combine": { + "p50": 1528.864026069641, + "p90": 1534.6239805221558, + "p95": 1539.8399829864502, + "p99": 1544.2559719085693 + }, + "roundtrip": { + "p50": 2575.103998184204, + "p90": 2582.751989364624, + "p95": 2585.024118423462, + "p99": 2590.912103652954 + }, + "isolatedSum": { + "p50": 2602.336049079895, + "p90": 2615.615963935852, + "p95": 2623.3919858932495, + "p99": 2639.7119760513306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ca4d4aca", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_e7d2ef86", + "comparisonKey": "d9a7a2538ba6ec6f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:32.118052+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.61600184440613, + "p90": 129.40800189971924, + "p95": 134.91199910640717, + "p99": 145.1839953660965 + }, + "combine": { + "p50": 125.72799623012543, + "p90": 130.46400249004364, + "p95": 136.83199882507324, + "p99": 142.43200421333313 + }, + "roundtrip": { + "p50": 214.62400257587433, + "p90": 226.4000028371811, + "p95": 232.03200101852417, + "p99": 242.0479953289032 + }, + "isolatedSum": { + "p50": 245.34399807453156, + "p90": 259.8720043897629, + "p95": 271.7439979314804, + "p99": 287.6159995794296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.3439985513687, + "p90": 165.21599888801575, + "p95": 170.84799706935883, + "p99": 182.3039948940277 + }, + "combine": { + "p50": 166.49599373340607, + "p90": 175.84000527858734, + "p95": 178.68800461292267, + "p99": 186.27199530601501 + }, + "roundtrip": { + "p50": 293.7279939651489, + "p90": 303.6159873008728, + "p95": 307.96799063682556, + "p99": 315.744012594223 + }, + "isolatedSum": { + "p50": 323.8399922847748, + "p90": 341.0560041666031, + "p95": 349.5360016822815, + "p99": 368.5759902000427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.67199647426605, + "p90": 234.047994017601, + "p95": 240.28800427913666, + "p99": 252.86400318145752 + }, + "combine": { + "p50": 277.7920067310333, + "p90": 286.78399324417114, + "p95": 287.9039943218231, + "p99": 292.57598519325256 + }, + "roundtrip": { + "p50": 465.05600214004517, + "p90": 471.9359874725342, + "p95": 474.8480021953583, + "p99": 484.16000604629517 + }, + "isolatedSum": { + "p50": 498.4640032052994, + "p90": 520.8319872617722, + "p95": 528.1919986009598, + "p99": 545.4399883747101 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 339.9359881877899, + "p90": 348.5119938850403, + "p95": 351.83998942375183, + "p99": 358.3039939403534 + }, + "combine": { + "p50": 471.1039960384369, + "p90": 480.80000281333923, + "p95": 483.2319915294647, + "p99": 493.3759868144989 + }, + "roundtrip": { + "p50": 783.3279967308044, + "p90": 790.7199859619141, + "p95": 793.2159900665283, + "p99": 799.3919849395752 + }, + "isolatedSum": { + "p50": 811.0399842262268, + "p90": 829.3119966983795, + "p95": 835.0719809532166, + "p99": 851.6799807548523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.4799876213074, + "p90": 591.6159749031067, + "p95": 595.4239964485168, + "p99": 602.3359894752502 + }, + "combine": { + "p50": 817.6000118255615, + "p90": 827.8719782829285, + "p95": 829.3759822845459, + "p99": 837.2160196304321 + }, + "roundtrip": { + "p50": 1374.4319677352905, + "p90": 1383.3600282669067, + "p95": 1388.2240056991577, + "p99": 1402.0800590515137 + }, + "isolatedSum": { + "p50": 1402.079999446869, + "p90": 1419.4879531860352, + "p95": 1424.7999787330627, + "p99": 1439.5520091056824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1074.0480422973633, + "p90": 1081.984043121338, + "p95": 1084.928035736084, + "p99": 1093.1199789047241 + }, + "combine": { + "p50": 1530.8159589767456, + "p90": 1539.423942565918, + "p95": 1542.0160293579102, + "p99": 1546.015977859497 + }, + "roundtrip": { + "p50": 2580.2559852600098, + "p90": 2589.087963104248, + "p95": 2591.808080673218, + "p99": 2596.735954284668 + }, + "isolatedSum": { + "p50": 2604.864001274109, + "p90": 2621.407985687256, + "p95": 2626.944065093994, + "p99": 2639.135956764221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5da5159", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_1439be5b", + "comparisonKey": "0d0bc2794edacd5c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:33:43.581543+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.79200220108032, + "p90": 122.04799801111221, + "p95": 126.01600587368011, + "p99": 134.20799374580383 + }, + "combine": { + "p50": 137.11999356746674, + "p90": 141.08799397945404, + "p95": 142.87999272346497, + "p99": 152.6080071926117 + }, + "roundtrip": { + "p50": 229.18400168418884, + "p90": 240.86399376392365, + "p95": 246.87999486923218, + "p99": 254.07999753952026 + }, + "isolatedSum": { + "p50": 250.91199576854706, + "p90": 263.13599199056625, + "p95": 268.8959985971451, + "p99": 286.8160009384155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.95999431610107, + "p90": 156.63999319076538, + "p95": 159.9999964237213, + "p99": 167.58400201797485 + }, + "combine": { + "p50": 187.99999356269836, + "p90": 196.86399400234222, + "p95": 200.47999918460846, + "p99": 210.59200167655945 + }, + "roundtrip": { + "p50": 321.1199939250946, + "p90": 331.4880132675171, + "p95": 334.4320058822632, + "p99": 341.5359854698181 + }, + "isolatedSum": { + "p50": 336.95998787879944, + "p90": 353.5039871931076, + "p95": 360.4799956083298, + "p99": 378.1760036945343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 228.57600450515747, + "p90": 233.5360050201416, + "p95": 236.41599714756012, + "p99": 241.43999814987183 + }, + "combine": { + "p50": 335.87199449539185, + "p90": 344.83200311660767, + "p95": 348.9280045032501, + "p99": 353.3119857311249 + }, + "roundtrip": { + "p50": 538.4320020675659, + "p90": 551.584005355835, + "p95": 556.22398853302, + "p99": 570.1119899749756 + }, + "isolatedSum": { + "p50": 564.4479990005493, + "p90": 578.3680081367493, + "p95": 585.3440016508102, + "p99": 594.7519838809967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 364.44801092147827, + "p90": 370.65601348876953, + "p95": 372.9279935359955, + "p99": 381.056010723114 + }, + "combine": { + "p50": 595.3919887542725, + "p90": 604.7040224075317, + "p95": 606.4640283584595, + "p99": 614.7519946098328 + }, + "roundtrip": { + "p50": 931.6480159759521, + "p90": 941.8240189552307, + "p95": 944.383978843689, + "p99": 954.6239972114563 + }, + "isolatedSum": { + "p50": 959.8399996757507, + "p90": 975.3600358963013, + "p95": 979.392021894455, + "p99": 995.8080053329468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 659.0719819068909, + "p90": 668.2239770889282, + "p95": 671.6160178184509, + "p99": 677.8560280799866 + }, + "combine": { + "p50": 1073.8879442214966, + "p90": 1077.3439407348633, + "p95": 1079.7120332717896, + "p99": 1087.1039628982544 + }, + "roundtrip": { + "p50": 1712.2880220413208, + "p90": 1720.9919691085815, + "p95": 1725.2800464630127, + "p99": 1733.728051185608 + }, + "isolatedSum": { + "p50": 1732.9599261283875, + "p90": 1745.5679178237915, + "p95": 1751.3280510902405, + "p99": 1764.959990978241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1271.4879512786865, + "p90": 1280.5440425872803, + "p95": 1282.912015914917, + "p99": 1290.94398021698 + }, + "combine": { + "p50": 2047.7759838104248, + "p90": 2058.8479042053223, + "p95": 2064.2240047454834, + "p99": 2087.199926376343 + }, + "roundtrip": { + "p50": 3303.071975708008, + "p90": 3314.4640922546387, + "p95": 3317.8560733795166, + "p99": 3329.3440341949463 + }, + "isolatedSum": { + "p50": 3319.2639350891113, + "p90": 3339.3919467926025, + "p95": 3347.1360206604004, + "p99": 3378.1439065933228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-48353377", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_e357f621", + "comparisonKey": "5c72e4315a810520", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:34:39.773966+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.46399986743927, + "p90": 119.48800086975098, + "p95": 121.69600278139114, + "p99": 126.30400061607361 + }, + "combine": { + "p50": 127.48800218105316, + "p90": 137.02400028705597, + "p95": 138.047993183136, + "p99": 140.25600254535675 + }, + "roundtrip": { + "p50": 221.8559980392456, + "p90": 228.35199534893036, + "p95": 230.46399652957916, + "p99": 236.41599714756012 + }, + "isolatedSum": { + "p50": 241.95200204849243, + "p90": 256.51200115680695, + "p95": 259.74399596452713, + "p99": 266.56000316143036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 141.2159949541092, + "p90": 147.20000326633453, + "p95": 149.59999918937683, + "p99": 152.3520052433014 + }, + "combine": { + "p50": 183.3920031785965, + "p90": 188.54400515556335, + "p95": 191.13600254058838, + "p99": 197.50399887561798 + }, + "roundtrip": { + "p50": 304.8959970474243, + "p90": 313.50401043891907, + "p95": 315.96800684928894, + "p99": 319.2000091075897 + }, + "isolatedSum": { + "p50": 324.6079981327057, + "p90": 335.7440084218979, + "p95": 340.7360017299652, + "p99": 349.8560041189194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.36000621318817, + "p90": 208.67200195789337, + "p95": 210.207998752594, + "p99": 215.10399878025055 + }, + "combine": { + "p50": 320.0959861278534, + "p90": 325.0879943370819, + "p95": 326.3680040836334, + "p99": 335.07201075553894 + }, + "roundtrip": { + "p50": 500.7359981536865, + "p90": 507.29602575302124, + "p95": 509.11998748779297, + "p99": 515.0719881057739 + }, + "isolatedSum": { + "p50": 523.4559923410416, + "p90": 533.7599962949753, + "p95": 536.5760028362274, + "p99": 550.1760095357895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 332.12798833847046, + "p90": 340.86400270462036, + "p95": 345.15199065208435, + "p99": 349.727988243103 + }, + "combine": { + "p50": 595.52001953125, + "p90": 598.7200140953064, + "p95": 602.5279760360718, + "p99": 606.8159937858582 + }, + "roundtrip": { + "p50": 905.7599902153015, + "p90": 912.7680063247681, + "p95": 915.1039719581604, + "p99": 922.976016998291 + }, + "isolatedSum": { + "p50": 927.6480078697205, + "p90": 939.5840167999268, + "p95": 947.6799666881561, + "p99": 956.5439820289612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 593.7600135803223, + "p90": 604.9919724464417, + "p95": 616.5440082550049, + "p99": 633.6640119552612 + }, + "combine": { + "p50": 1103.3600568771362, + "p90": 1112.5760078430176, + "p95": 1113.7919425964355, + "p99": 1120.9280490875244 + }, + "roundtrip": { + "p50": 1629.6319961547852, + "p90": 1636.8000507354736, + "p95": 1638.8800144195557, + "p99": 1642.9120302200317 + }, + "isolatedSum": { + "p50": 1697.1200704574585, + "p90": 1717.5679802894592, + "p95": 1730.3359508514404, + "p99": 1754.5920610427856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1132.7999830245972, + "p90": 1145.5999612808228, + "p95": 1150.8159637451172, + "p99": 1164.512038230896 + }, + "combine": { + "p50": 2062.7200603485107, + "p90": 2071.3601112365723, + "p95": 2072.4799633026123, + "p99": 2075.1678943634033 + }, + "roundtrip": { + "p50": 3144.7041034698486, + "p90": 3153.887987136841, + "p95": 3156.8961143493652, + "p99": 3169.248104095459 + }, + "isolatedSum": { + "p50": 3195.520043373108, + "p90": 3216.960072517395, + "p95": 3223.2959270477295, + "p99": 3239.6799325942993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5d118cda", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_5ec7faad", + "comparisonKey": "610c49df9e35a929", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:42:54.027073+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.10399794578552, + "p90": 126.52799487113953, + "p95": 129.7920048236847, + "p99": 134.75200533866882 + }, + "combine": { + "p50": 125.34399330615997, + "p90": 128.22400033473969, + "p95": 129.4720023870468, + "p99": 137.15200126171112 + }, + "roundtrip": { + "p50": 212.44800090789795, + "p90": 219.90400552749634, + "p95": 223.23200106620789, + "p99": 238.94399404525757 + }, + "isolatedSum": { + "p50": 244.4479912519455, + "p90": 254.7519952058792, + "p95": 259.2640072107315, + "p99": 271.90400660037994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 156.5759927034378, + "p90": 161.82400286197662, + "p95": 163.71199488639832, + "p99": 168.03200542926788 + }, + "combine": { + "p50": 168.12799870967865, + "p90": 175.29599368572235, + "p95": 177.05599963665009, + "p99": 185.7919991016388 + }, + "roundtrip": { + "p50": 297.63200879096985, + "p90": 307.9040050506592, + "p95": 310.8479976654053, + "p99": 320.3519880771637 + }, + "isolatedSum": { + "p50": 324.70399141311646, + "p90": 337.119996547699, + "p95": 340.7679945230484, + "p99": 353.8240045309067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.47999501228333, + "p90": 227.9680073261261, + "p95": 230.71999847888947, + "p99": 236.28799617290497 + }, + "combine": { + "p50": 283.61600637435913, + "p90": 288.2240116596222, + "p95": 289.5359992980957, + "p99": 296.03201150894165 + }, + "roundtrip": { + "p50": 469.92000937461853, + "p90": 479.36001420021057, + "p95": 481.56800866127014, + "p99": 489.1839921474457 + }, + "isolatedSum": { + "p50": 504.09600138664246, + "p90": 516.1920189857483, + "p95": 520.2559977769852, + "p99": 532.3200076818466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 341.0559892654419, + "p90": 347.8719890117645, + "p95": 350.0159978866577, + "p99": 362.0480000972748 + }, + "combine": { + "p50": 474.91198778152466, + "p90": 484.44798588752747, + "p95": 487.36000061035156, + "p99": 494.9119985103607 + }, + "roundtrip": { + "p50": 789.4719839096069, + "p90": 797.4399924278259, + "p95": 799.8080253601074, + "p99": 804.2880296707153 + }, + "isolatedSum": { + "p50": 815.9679770469666, + "p90": 832.319974899292, + "p95": 837.3759984970093, + "p99": 856.9599986076355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 592.2560095787048, + "p90": 600.1600027084351, + "p95": 603.551983833313, + "p99": 614.8480176925659 + }, + "combine": { + "p50": 848.7679958343506, + "p90": 854.5920252799988, + "p95": 857.0560216903687, + "p99": 863.7120127677917 + }, + "roundtrip": { + "p50": 1414.5280122756958, + "p90": 1422.8479862213135, + "p95": 1425.439953804016, + "p99": 1428.7999868392944 + }, + "isolatedSum": { + "p50": 1441.0240054130554, + "p90": 1454.7520279884338, + "p95": 1460.6080055236816, + "p99": 1478.5600304603577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1099.2319583892822, + "p90": 1107.7760457992554, + "p95": 1109.9519729614258, + "p99": 1112.768054008484 + }, + "combine": { + "p50": 1592.2880172729492, + "p90": 1601.6960144042969, + "p95": 1603.935956954956, + "p99": 1616.096019744873 + }, + "roundtrip": { + "p50": 2663.1999015808105, + "p90": 2671.7119216918945, + "p95": 2674.3040084838867, + "p99": 2682.687997817993 + }, + "isolatedSum": { + "p50": 2691.5199756622314, + "p90": 2709.4720602035522, + "p95": 2713.887929916382, + "p99": 2728.864073753357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c9d20c1d", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_49d52a3c", + "comparisonKey": "9d085c2feda6b57e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:37.580881+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.6240023970604, + "p90": 135.3279948234558, + "p95": 139.74399864673615, + "p99": 151.45599842071533 + }, + "combine": { + "p50": 137.7599984407425, + "p90": 141.34399592876434, + "p95": 144.70399916172028, + "p99": 150.91200172901154 + }, + "roundtrip": { + "p50": 234.55999791622162, + "p90": 242.78399348258972, + "p95": 245.4719990491867, + "p99": 251.8720030784607 + }, + "isolatedSum": { + "p50": 260.3840008378029, + "p90": 276.67199075222015, + "p95": 284.4479978084564, + "p99": 302.36800014972687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.50400042533875, + "p90": 172.2559928894043, + "p95": 177.34399437904358, + "p99": 188.89600038528442 + }, + "combine": { + "p50": 190.40000438690186, + "p90": 199.5519995689392, + "p95": 200.54399967193604, + "p99": 203.77600193023682 + }, + "roundtrip": { + "p50": 327.2640109062195, + "p90": 337.18401193618774, + "p95": 341.5040075778961, + "p99": 347.6479947566986 + }, + "isolatedSum": { + "p50": 351.9040048122406, + "p90": 371.8079924583435, + "p95": 377.8879940509796, + "p99": 392.67200231552124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.46399784088135, + "p90": 252.03201174736023, + "p95": 257.4720084667206, + "p99": 274.59201216697693 + }, + "combine": { + "p50": 336.92800998687744, + "p90": 344.57600116729736, + "p95": 347.4879860877991, + "p99": 350.46398639678955 + }, + "roundtrip": { + "p50": 542.7200198173523, + "p90": 550.6560206413269, + "p95": 552.9919862747192, + "p99": 559.8080158233643 + }, + "isolatedSum": { + "p50": 575.3920078277588, + "p90": 596.6080129146576, + "p95": 604.9599945545197, + "p99": 625.0559985637665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 381.47199153900146, + "p90": 397.43998646736145, + "p95": 404.1920006275177, + "p99": 409.9839925765991 + }, + "combine": { + "p50": 578.3039927482605, + "p90": 583.3280086517334, + "p95": 584.447979927063, + "p99": 591.264009475708 + }, + "roundtrip": { + "p50": 937.3120069503784, + "p90": 944.5440173149109, + "p95": 948.3839869499207, + "p99": 952.3199796676636 + }, + "isolatedSum": { + "p50": 959.775984287262, + "p90": 980.7679951190948, + "p95": 988.6399805545807, + "p99": 1001.2480020523071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 682.0160150527954, + "p90": 691.4880275726318, + "p95": 695.7119703292847, + "p99": 706.6559791564941 + }, + "combine": { + "p50": 1053.1200170516968, + "p90": 1061.087965965271, + "p95": 1063.1359815597534, + "p99": 1069.0560340881348 + }, + "roundtrip": { + "p50": 1706.976056098938, + "p90": 1715.6800031661987, + "p95": 1718.559980392456, + "p99": 1725.5359888076782 + }, + "isolatedSum": { + "p50": 1735.1360321044922, + "p90": 1752.5759935379028, + "p95": 1758.847951889038, + "p99": 1775.712013244629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1307.3920011520386, + "p90": 1319.1039562225342, + "p95": 1322.9440450668335, + "p99": 1333.3120346069336 + }, + "combine": { + "p50": 2018.5279846191406, + "p90": 2023.3919620513916, + "p95": 2027.359962463379, + "p99": 2035.2959632873535 + }, + "roundtrip": { + "p50": 3305.216073989868, + "p90": 3318.3040618896484, + "p95": 3321.3119506835938, + "p99": 3331.648111343384 + }, + "isolatedSum": { + "p50": 3325.919985771179, + "p90": 3342.495918273926, + "p95": 3350.3040075302124, + "p99": 3368.607997894287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e4cd7bb", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_75d4e512", + "comparisonKey": "cd3f59a7109481d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:08.829773+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.95999652147293, + "p90": 126.49600207805634, + "p95": 132.38400220870972, + "p99": 139.3280029296875 + }, + "combine": { + "p50": 125.82400441169739, + "p90": 131.71200454235077, + "p95": 140.06400108337402, + "p99": 141.66399836540222 + }, + "roundtrip": { + "p50": 213.919997215271, + "p90": 224.57599639892578, + "p95": 229.98400032520294, + "p99": 235.6799989938736 + }, + "isolatedSum": { + "p50": 242.78400093317032, + "p90": 258.2080066204071, + "p95": 272.44800329208374, + "p99": 280.9920012950897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.49600732326508, + "p90": 178.847998380661, + "p95": 184.9920004606247, + "p99": 196.60800695419312 + }, + "combine": { + "p50": 172.06400632858276, + "p90": 181.05599284172058, + "p95": 186.24000251293182, + "p99": 198.5280066728592 + }, + "roundtrip": { + "p50": 295.8720028400421, + "p90": 305.4400086402893, + "p95": 309.53601002693176, + "p99": 325.1520097255707 + }, + "isolatedSum": { + "p50": 330.56001365184784, + "p90": 359.9039912223816, + "p95": 371.2320029735565, + "p99": 395.1360136270523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.04800629615784, + "p90": 227.4239957332611, + "p95": 230.94399273395538, + "p99": 238.304004073143 + }, + "combine": { + "p50": 274.52799677848816, + "p90": 283.9359939098358, + "p95": 288.03199529647827, + "p99": 290.6560003757477 + }, + "roundtrip": { + "p50": 464.2240107059479, + "p90": 472.7360010147095, + "p95": 475.67999362945557, + "p99": 482.7840030193329 + }, + "isolatedSum": { + "p50": 492.576003074646, + "p90": 511.3599896430969, + "p95": 518.9759880304337, + "p99": 528.9600044488907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 342.0799970626831, + "p90": 353.40800881385803, + "p95": 357.2160005569458, + "p99": 370.94399333000183 + }, + "combine": { + "p50": 470.97599506378174, + "p90": 478.9760112762451, + "p95": 482.81601071357727, + "p99": 486.27200722694397 + }, + "roundtrip": { + "p50": 784.6720218658447, + "p90": 796.9920039176941, + "p95": 801.4400005340576, + "p99": 811.0719919204712 + }, + "isolatedSum": { + "p50": 813.0559921264648, + "p90": 832.3840200901031, + "p95": 840.0320112705231, + "p99": 857.2160005569458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.2239856719971, + "p90": 593.0240154266357, + "p95": 596.671998500824, + "p99": 614.1440272331238 + }, + "combine": { + "p50": 827.7119994163513, + "p90": 833.8559865951538, + "p95": 839.4560217857361, + "p99": 847.2319841384888 + }, + "roundtrip": { + "p50": 1383.3279609680176, + "p90": 1392.6399946212769, + "p95": 1395.967960357666, + "p99": 1401.695966720581 + }, + "isolatedSum": { + "p50": 1411.9359850883484, + "p90": 1426.8800020217896, + "p95": 1436.12802028656, + "p99": 1461.3760113716125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1078.495979309082, + "p90": 1086.0799551010132, + "p95": 1089.5999670028687, + "p99": 1106.97603225708 + }, + "combine": { + "p50": 1541.4719581604004, + "p90": 1545.6639528274536, + "p95": 1551.9039630889893, + "p99": 1554.4320344924927 + }, + "roundtrip": { + "p50": 2595.2320098876953, + "p90": 2605.407953262329, + "p95": 2608.2239151000977, + "p99": 2613.9841079711914 + }, + "isolatedSum": { + "p50": 2619.9679374694824, + "p90": 2631.743907928467, + "p95": 2641.503930091858, + "p99": 2661.4080667495728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-44fb9e2b", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_e0f6e15b", + "comparisonKey": "af0cf73b098dc1b5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:28.471096+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.77600306272507, + "p90": 131.04000687599182, + "p95": 135.13599336147308, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 138.17599415779114, + "p90": 151.96800231933594, + "p95": 154.59200739860535, + "p99": 161.82400286197662 + }, + "roundtrip": { + "p50": 229.95199263095856, + "p90": 243.1039959192276, + "p95": 245.1840043067932, + "p99": 255.13601303100586 + }, + "isolatedSum": { + "p50": 257.9519972205162, + "p90": 283.00800919532776, + "p95": 289.72800076007843, + "p99": 316.5439963340759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 151.64799988269806, + "p90": 161.76000237464905, + "p95": 164.5440012216568, + "p99": 172.8000044822693 + }, + "combine": { + "p50": 188.28800320625305, + "p90": 196.4160054922104, + "p95": 199.072003364563, + "p99": 203.10400426387787 + }, + "roundtrip": { + "p50": 318.56000423431396, + "p90": 325.9519934654236, + "p95": 329.0559947490692, + "p99": 339.2319977283478 + }, + "isolatedSum": { + "p50": 339.9360030889511, + "p90": 358.17600786685944, + "p95": 363.6160045862198, + "p99": 375.90400874614716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 227.29599475860596, + "p90": 234.17599499225616, + "p95": 237.05600202083588, + "p99": 245.15199661254883 + }, + "combine": { + "p50": 335.80800890922546, + "p90": 340.1600122451782, + "p95": 344.543993473053, + "p99": 349.8559892177582 + }, + "roundtrip": { + "p50": 534.0160131454468, + "p90": 541.3119792938232, + "p95": 544.3519949913025, + "p99": 552.5439977645874 + }, + "isolatedSum": { + "p50": 563.1040036678314, + "p90": 574.3360072374344, + "p95": 581.5999954938889, + "p99": 595.007985830307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 366.5600121021271, + "p90": 373.76001477241516, + "p95": 376.8320083618164, + "p99": 382.4000060558319 + }, + "combine": { + "p50": 594.6879982948303, + "p90": 599.3599891662598, + "p95": 603.8720011711121, + "p99": 610.8480095863342 + }, + "roundtrip": { + "p50": 931.4879775047302, + "p90": 941.2800073623657, + "p95": 944.2880153656006, + "p99": 952.2560238838196 + }, + "isolatedSum": { + "p50": 961.2480103969574, + "p90": 973.1200039386749, + "p95": 980.7040095329285, + "p99": 993.2480156421661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 660.0959897041321, + "p90": 670.304000377655, + "p95": 672.864019870758, + "p99": 677.0560145378113 + }, + "combine": { + "p50": 1073.3120441436768, + "p90": 1078.112006187439, + "p95": 1085.919976234436, + "p99": 1112.5760078430176 + }, + "roundtrip": { + "p50": 1711.2959623336792, + "p90": 1719.6799516677856, + "p95": 1723.1040000915527, + "p99": 1733.3439588546753 + }, + "isolatedSum": { + "p50": 1733.4080338478088, + "p90": 1748.416006565094, + "p95": 1758.783996105194, + "p99": 1789.6320223808289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1271.83997631073, + "p90": 1281.9839715957642, + "p95": 1284.608006477356, + "p99": 1290.9120321273804 + }, + "combine": { + "p50": 2046.0801124572754, + "p90": 2050.112009048462, + "p95": 2056.1599731445312, + "p99": 2072.3519325256348 + }, + "roundtrip": { + "p50": 3301.7280101776123, + "p90": 3312.256097793579, + "p95": 3315.9360885620117, + "p99": 3347.615957260132 + }, + "isolatedSum": { + "p50": 3317.9200887680054, + "p90": 3332.095980644226, + "p95": 3340.767979621887, + "p99": 3363.263964653015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1ce9907", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_7d7375c3", + "comparisonKey": "28287fd2b4b87fcf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:59.538822+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.9119964838028, + "p90": 125.5359947681427, + "p95": 128.67200374603271, + "p99": 139.52000439167023 + }, + "combine": { + "p50": 125.791996717453, + "p90": 129.85600531101227, + "p95": 137.63199746608734, + "p99": 141.82400703430176 + }, + "roundtrip": { + "p50": 212.2880071401596, + "p90": 223.07200729846954, + "p95": 229.44000363349915, + "p99": 243.83999407291412 + }, + "isolatedSum": { + "p50": 244.7039932012558, + "p90": 255.39200007915497, + "p95": 266.30400121212006, + "p99": 281.344011425972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.83999454975128, + "p90": 160.92799603939056, + "p95": 163.68000209331512, + "p99": 173.0239987373352 + }, + "combine": { + "p50": 170.8800047636032, + "p90": 179.36000227928162, + "p95": 184.09599363803864, + "p99": 191.9039934873581 + }, + "roundtrip": { + "p50": 294.5919930934906, + "p90": 305.7279884815216, + "p95": 309.7279965877533, + "p99": 319.8080062866211 + }, + "isolatedSum": { + "p50": 326.7199993133545, + "p90": 340.2879983186722, + "p95": 347.77599573135376, + "p99": 364.9279922246933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.6560034751892, + "p90": 225.8239984512329, + "p95": 229.08799350261688, + "p99": 237.8239929676056 + }, + "combine": { + "p50": 283.1679880619049, + "p90": 289.247989654541, + "p95": 295.52000761032104, + "p99": 308.0640137195587 + }, + "roundtrip": { + "p50": 461.0239863395691, + "p90": 469.6959853172302, + "p95": 473.1200039386749, + "p99": 479.8719882965088 + }, + "isolatedSum": { + "p50": 501.8239915370941, + "p90": 515.0719881057739, + "p95": 524.6080011129379, + "p99": 545.8880066871643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 333.6640000343323, + "p90": 343.51998567581177, + "p95": 346.9119966030121, + "p99": 359.2959940433502 + }, + "combine": { + "p50": 467.23198890686035, + "p90": 473.1839895248413, + "p95": 474.2400050163269, + "p99": 483.71198773384094 + }, + "roundtrip": { + "p50": 776.9920229911804, + "p90": 786.4000201225281, + "p95": 792.3200130462646, + "p99": 803.5839796066284 + }, + "isolatedSum": { + "p50": 800.8959889411926, + "p90": 816.7039752006531, + "p95": 821.152001619339, + "p99": 843.0079817771912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 588.8640284538269, + "p90": 596.7680215835571, + "p95": 598.3359813690186, + "p99": 605.7599782943726 + }, + "combine": { + "p50": 837.440013885498, + "p90": 844.0639972686768, + "p95": 849.9199748039246, + "p99": 859.1679930686951 + }, + "roundtrip": { + "p50": 1400.2560377120972, + "p90": 1410.1439714431763, + "p95": 1414.1440391540527, + "p99": 1423.2640266418457 + }, + "isolatedSum": { + "p50": 1426.304042339325, + "p90": 1440.832018852234, + "p95": 1448.2559561729431, + "p99": 1464.9279713630676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1081.663966178894, + "p90": 1087.1039628982544, + "p95": 1089.311957359314, + "p99": 1095.0720310211182 + }, + "combine": { + "p50": 1542.3680543899536, + "p90": 1552.1279573440552, + "p95": 1554.3999671936035, + "p99": 1558.4319829940796 + }, + "roundtrip": { + "p50": 2599.32804107666, + "p90": 2609.7919940948486, + "p95": 2612.5760078430176, + "p99": 2618.016004562378 + }, + "isolatedSum": { + "p50": 2624.0320205688477, + "p90": 2639.2319202423096, + "p95": 2643.7119245529175, + "p99": 2653.5040140151978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff1a0aa7", + "identity": "b200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_3facccc3", + "comparisonKey": "90f79cfd5c964081", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:17.558644+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.56000125408173, + "p90": 126.3359934091568, + "p95": 132.79999792575836, + "p99": 139.52000439167023 + }, + "combine": { + "p50": 125.76000392436981, + "p90": 132.76800513267517, + "p95": 139.26400244235992, + "p99": 142.65599846839905 + }, + "roundtrip": { + "p50": 215.58399498462677, + "p90": 226.72000527381897, + "p95": 230.9119999408722, + "p99": 242.71999299526215 + }, + "isolatedSum": { + "p50": 244.32000517845154, + "p90": 259.10399854183197, + "p95": 272.0640003681183, + "p99": 282.1760028600693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.29599785804749, + "p90": 162.49600052833557, + "p95": 168.96000504493713, + "p99": 183.87199938297272 + }, + "combine": { + "p50": 166.84800386428833, + "p90": 177.44000256061554, + "p95": 179.1040003299713, + "p99": 190.0160014629364 + }, + "roundtrip": { + "p50": 292.928010225296, + "p90": 302.43200063705444, + "p95": 307.8719973564148, + "p99": 313.4079873561859 + }, + "isolatedSum": { + "p50": 322.1440017223358, + "p90": 339.9360030889511, + "p95": 348.06400537490845, + "p99": 373.8880008459091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.72000396251678, + "p90": 231.23200237751007, + "p95": 238.11200261116028, + "p99": 248.76800179481506 + }, + "combine": { + "p50": 278.1760096549988, + "p90": 287.55199909210205, + "p95": 288.8000011444092, + "p99": 299.77598786354065 + }, + "roundtrip": { + "p50": 460.57599782943726, + "p90": 472.351998090744, + "p95": 476.83200240135193, + "p99": 483.68000984191895 + }, + "isolatedSum": { + "p50": 496.89601361751556, + "p90": 518.7840014696121, + "p95": 526.9120037555695, + "p99": 548.5439896583557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 332.67199993133545, + "p90": 344.92799639701843, + "p95": 347.9999899864197, + "p99": 356.6719889640808 + }, + "combine": { + "p50": 467.3280119895935, + "p90": 472.76800870895386, + "p95": 475.23200511932373, + "p99": 484.0640127658844 + }, + "roundtrip": { + "p50": 778.8479924201965, + "p90": 791.8720245361328, + "p95": 796.7680096626282, + "p99": 808.1279993057251 + }, + "isolatedSum": { + "p50": 800.000011920929, + "p90": 817.6960051059723, + "p95": 823.2319951057434, + "p99": 840.7360017299652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 587.4559879302979, + "p90": 595.0719714164734, + "p95": 596.9600081443787, + "p99": 601.855993270874 + }, + "combine": { + "p50": 834.3039751052856, + "p90": 842.464029788971, + "p95": 846.2079763412476, + "p99": 852.3839712142944 + }, + "roundtrip": { + "p50": 1396.7039585113525, + "p90": 1404.0000438690186, + "p95": 1406.272053718567, + "p99": 1412.5440120697021 + }, + "isolatedSum": { + "p50": 1421.7599630355835, + "p90": 1437.5360012054443, + "p95": 1443.1679844856262, + "p99": 1454.2399644851685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1081.2159776687622, + "p90": 1087.5200033187866, + "p95": 1090.1440382003784, + "p99": 1095.3279733657837 + }, + "combine": { + "p50": 1541.0560369491577, + "p90": 1552.575945854187, + "p95": 1555.8719635009766, + "p99": 1588.7999534606934 + }, + "roundtrip": { + "p50": 2595.263957977295, + "p90": 2606.1758995056152, + "p95": 2610.7840538024902, + "p99": 2615.072011947632 + }, + "isolatedSum": { + "p50": 2622.27201461792, + "p90": 2640.0959491729736, + "p95": 2646.016001701355, + "p99": 2684.127926826477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5f9a92df", + "identity": "b200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_81246364", + "comparisonKey": "b44bda5bbfe07633", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:19:48.575174+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.14400291442871, + "p90": 110.88000237941742, + "p95": 113.6000007390976, + "p99": 123.3920007944107 + }, + "combine": { + "p50": 125.2799928188324, + "p90": 128.86400520801544, + "p95": 132.7040046453476, + "p99": 139.90400731563568 + }, + "roundtrip": { + "p50": 198.7520009279251, + "p90": 205.28000593185425, + "p95": 208.22399854660034, + "p99": 214.39999341964722 + }, + "isolatedSum": { + "p50": 227.4239957332611, + "p90": 239.74400758743286, + "p95": 246.3040053844452, + "p99": 263.2960081100464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.96800100803375, + "p90": 148.54399859905243, + "p95": 151.64799988269806, + "p99": 159.4880074262619 + }, + "combine": { + "p50": 165.47200083732605, + "p90": 174.84800517559052, + "p95": 176.9919991493225, + "p99": 186.11200153827667 + }, + "roundtrip": { + "p50": 283.1360101699829, + "p90": 290.97598791122437, + "p95": 295.0719892978668, + "p99": 302.2400140762329 + }, + "isolatedSum": { + "p50": 309.4400018453598, + "p90": 323.39200377464294, + "p95": 328.63999903202057, + "p99": 345.6000089645386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.50400018692017, + "p90": 212.0639979839325, + "p95": 215.03999829292297, + "p99": 223.1999933719635 + }, + "combine": { + "p50": 283.29598903656006, + "p90": 288.03199529647827, + "p95": 290.6239926815033, + "p99": 299.29599165916443 + }, + "roundtrip": { + "p50": 453.8240134716034, + "p90": 461.08800172805786, + "p95": 464.4159972667694, + "p99": 471.16801142692566 + }, + "isolatedSum": { + "p50": 488.7999892234802, + "p90": 500.09599328041077, + "p95": 505.66399097442627, + "p99": 522.4959850311279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 330.84800839424133, + "p90": 337.18401193618774, + "p95": 340.9920036792755, + "p99": 350.0480055809021 + }, + "combine": { + "p50": 469.7279930114746, + "p90": 476.79999470710754, + "p95": 483.10399055480957, + "p99": 518.3359980583191 + }, + "roundtrip": { + "p50": 768.5120105743408, + "p90": 776.8639922142029, + "p95": 780.0959944725037, + "p99": 784.1920256614685 + }, + "isolatedSum": { + "p50": 800.5760014057159, + "p90": 813.9840066432953, + "p95": 824.0959942340851, + "p99": 868.3840036392212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 566.0160183906555, + "p90": 572.7360248565674, + "p95": 575.872004032135, + "p99": 579.9040198326111 + }, + "combine": { + "p50": 829.695999622345, + "p90": 839.8399949073792, + "p95": 841.3119912147522, + "p99": 843.6800241470337 + }, + "roundtrip": { + "p50": 1373.5040426254272, + "p90": 1380.6719779968262, + "p95": 1383.520007133484, + "p99": 1390.6879425048828 + }, + "isolatedSum": { + "p50": 1395.7120180130005, + "p90": 1412.5760197639465, + "p95": 1417.1839952468872, + "p99": 1423.5840439796448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1048.5440492630005, + "p90": 1056.9599866867065, + "p95": 1060.7999563217163, + "p99": 1068.511962890625 + }, + "combine": { + "p50": 1540.3200387954712, + "p90": 1544.160008430481, + "p95": 1548.9599704742432, + "p99": 1554.6239614486694 + }, + "roundtrip": { + "p50": 2560.8959197998047, + "p90": 2571.808099746704, + "p95": 2575.392007827759, + "p99": 2584.6400260925293 + }, + "isolatedSum": { + "p50": 2588.8640880584717, + "p90": 2601.1199951171875, + "p95": 2609.7599267959595, + "p99": 2623.1359243392944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4bee4a65", + "identity": "b200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_216f4a3f", + "comparisonKey": "8addb4fa01a9b241", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:26:29.064138+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 202.2400051355362, + "p90": 231.1359941959381, + "p95": 236.4480048418045, + "p99": 244.47999894618988 + }, + "combine": { + "p50": 98.94400089979172, + "p90": 107.07200318574905, + "p95": 112.92800307273865, + "p99": 122.75200337171555 + }, + "roundtrip": { + "p50": 285.8240008354187, + "p90": 311.6160035133362, + "p95": 315.8720135688782, + "p99": 325.28001070022583 + }, + "isolatedSum": { + "p50": 301.1840060353279, + "p90": 338.20799738168716, + "p95": 349.37600791454315, + "p99": 367.2320023179054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 244.00000274181366, + "p90": 265.855997800827, + "p95": 275.7120132446289, + "p99": 287.77599334716797 + }, + "combine": { + "p50": 134.20799374580383, + "p90": 138.87999951839447, + "p95": 142.33599603176117, + "p99": 149.24800395965576 + }, + "roundtrip": { + "p50": 365.02400040626526, + "p90": 388.7999951839447, + "p95": 395.26399970054626, + "p99": 415.1360094547272 + }, + "isolatedSum": { + "p50": 378.2079964876175, + "p90": 404.7359973192215, + "p95": 418.0480092763901, + "p99": 437.02399730682373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 325.24800300598145, + "p90": 351.26399993896484, + "p95": 357.9519987106323, + "p99": 401.5040099620819 + }, + "combine": { + "p50": 199.16799664497375, + "p90": 204.28800582885742, + "p95": 206.94400370121002, + "p99": 212.44800090789795 + }, + "roundtrip": { + "p50": 513.2480263710022, + "p90": 533.1199765205383, + "p95": 541.5999889373779, + "p99": 555.9679865837097 + }, + "isolatedSum": { + "p50": 524.4159996509552, + "p90": 555.5520057678223, + "p95": 564.8960024118423, + "p99": 613.9520108699799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 498.30400943756104, + "p90": 511.29597425460815, + "p95": 515.9680247306824, + "p99": 528.8959741592407 + }, + "combine": { + "p50": 411.48799657821655, + "p90": 421.7599928379059, + "p95": 425.6959855556488, + "p99": 431.36000633239746 + }, + "roundtrip": { + "p50": 899.7759819030762, + "p90": 912.8640294075012, + "p95": 918.8799858093262, + "p99": 927.9680252075195 + }, + "isolatedSum": { + "p50": 909.7920060157776, + "p90": 933.055967092514, + "p95": 941.6640102863312, + "p99": 960.2559804916382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 856.544017791748, + "p90": 869.5039749145508, + "p95": 877.7599930763245, + "p99": 906.7839980125427 + }, + "combine": { + "p50": 762.9439830780029, + "p90": 770.5600261688232, + "p95": 774.1119861602783, + "p99": 780.4800271987915 + }, + "roundtrip": { + "p50": 1592.8319692611694, + "p90": 1610.2399826049805, + "p95": 1616.2559986114502, + "p99": 1639.1680240631104 + }, + "isolatedSum": { + "p50": 1619.488000869751, + "p90": 1640.064001083374, + "p95": 1651.8719792366028, + "p99": 1687.2640252113342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1632.9599618911743, + "p90": 1646.8479633331299, + "p95": 1650.015950202942, + "p99": 1659.4239473342896 + }, + "combine": { + "p50": 1447.9039907455444, + "p90": 1456.544041633606, + "p95": 1458.8160514831543, + "p99": 1466.0160541534424 + }, + "roundtrip": { + "p50": 3043.3599948883057, + "p90": 3054.271936416626, + "p95": 3057.2800636291504, + "p99": 3066.6239261627197 + }, + "isolatedSum": { + "p50": 3080.8639526367188, + "p90": 3103.392004966736, + "p95": 3108.832001686096, + "p99": 3125.440001487732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4cade7bd", + "identity": "b200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_216f4a3f", + "comparisonKey": "aa5f8e4b5f18985e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:28:18.748313+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 205.82400262355804, + "p90": 230.43200373649597, + "p95": 240.03200232982635, + "p99": 251.39200687408447 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 115.87200313806534, + "p95": 121.21599912643433, + "p99": 128.57599556446075 + }, + "roundtrip": { + "p50": 297.69599437713623, + "p90": 333.6000144481659, + "p95": 340.831995010376, + "p99": 356.1600148677826 + }, + "isolatedSum": { + "p50": 311.64800375699997, + "p90": 346.3040068745613, + "p95": 361.2480014562607, + "p99": 379.9680024385452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 256.1599910259247, + "p90": 280.8000147342682, + "p95": 286.0479950904846, + "p99": 296.86400294303894 + }, + "combine": { + "p50": 142.2400027513504, + "p90": 148.70400726795197, + "p95": 152.8639942407608, + "p99": 158.24000537395477 + }, + "roundtrip": { + "p50": 389.055997133255, + "p90": 409.280002117157, + "p95": 414.94399309158325, + "p99": 433.6639940738678 + }, + "isolatedSum": { + "p50": 398.3999937772751, + "p90": 429.50402200222015, + "p95": 438.9119893312454, + "p99": 455.1040083169937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 349.8240113258362, + "p90": 372.70399928092957, + "p95": 378.01599502563477, + "p99": 399.80798959732056 + }, + "combine": { + "p50": 217.47200191020966, + "p90": 225.3440022468567, + "p95": 228.67199778556824, + "p99": 234.49599742889404 + }, + "roundtrip": { + "p50": 579.4879794120789, + "p90": 607.0079803466797, + "p95": 617.2159910202026, + "p99": 647.487998008728 + }, + "isolatedSum": { + "p50": 567.2960132360458, + "p90": 598.0480015277863, + "p95": 606.687992811203, + "p99": 634.3039870262146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 559.6799850463867, + "p90": 604.1920185089111, + "p95": 626.4960169792175, + "p99": 656.5120220184326 + }, + "combine": { + "p50": 442.6240026950836, + "p90": 449.72801208496094, + "p95": 452.7359902858734, + "p99": 461.2480103969574 + }, + "roundtrip": { + "p50": 980.0320267677307, + "p90": 990.4959797859192, + "p95": 997.2800016403198, + "p99": 1018.2080268859863 + }, + "isolatedSum": { + "p50": 1002.3039877414703, + "p90": 1053.920030593872, + "p95": 1079.232007265091, + "p99": 1117.76003241539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 991.9360280036926, + "p90": 1002.4319887161255, + "p95": 1007.1359872817993, + "p99": 1034.559965133667 + }, + "combine": { + "p50": 785.2799892425537, + "p90": 791.3920283317566, + "p95": 794.2399978637695, + "p99": 799.2640137672424 + }, + "roundtrip": { + "p50": 1755.3600072860718, + "p90": 1765.663981437683, + "p95": 1770.0480222702026, + "p99": 1786.1440181732178 + }, + "isolatedSum": { + "p50": 1777.2160172462463, + "p90": 1793.824017047882, + "p95": 1801.3759851455688, + "p99": 1833.8239789009094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1901.6640186309814, + "p90": 1911.520004272461, + "p95": 1915.1999950408936, + "p99": 1926.4639616012573 + }, + "combine": { + "p50": 1486.5280389785767, + "p90": 1493.407964706421, + "p95": 1495.4559803009033, + "p99": 1503.6159753799438 + }, + "roundtrip": { + "p50": 3361.151933670044, + "p90": 3371.840000152588, + "p95": 3375.0720024108887, + "p99": 3387.712001800537 + }, + "isolatedSum": { + "p50": 3388.192057609558, + "p90": 3404.927968978882, + "p95": 3410.655975341797, + "p99": 3430.079936981201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1b841215", + "identity": "b200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_216f4a3f", + "comparisonKey": "4f22b8cee12729c7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:30:10.872947+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 213.15200626850128, + "p90": 225.79200565814972, + "p95": 229.08799350261688, + "p99": 233.50399732589722 + }, + "combine": { + "p50": 112.12799698114395, + "p90": 116.12799763679504, + "p95": 118.20799857378006, + "p99": 122.5920021533966 + }, + "roundtrip": { + "p50": 313.9199912548065, + "p90": 325.0240087509155, + "p95": 336.2559974193573, + "p99": 356.31999373435974 + }, + "isolatedSum": { + "p50": 325.28000324964523, + "p90": 341.92000329494476, + "p95": 347.29599207639694, + "p99": 356.0959994792938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 267.8079903125763, + "p90": 279.7439992427826, + "p95": 284.5759987831116, + "p99": 302.623987197876 + }, + "combine": { + "p50": 151.32799744606018, + "p90": 154.91199493408203, + "p95": 156.15999698638916, + "p99": 159.10400450229645 + }, + "roundtrip": { + "p50": 414.68799114227295, + "p90": 428.6719858646393, + "p95": 441.5999948978424, + "p99": 461.34400367736816 + }, + "isolatedSum": { + "p50": 419.1359877586365, + "p90": 434.6559941768646, + "p95": 440.73599576950073, + "p99": 461.7279917001724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 380.8639943599701, + "p90": 391.03999733924866, + "p95": 394.23999190330505, + "p99": 398.17601442337036 + }, + "combine": { + "p50": 247.77600169181824, + "p90": 253.05598974227905, + "p95": 255.48800826072693, + "p99": 259.90399718284607 + }, + "roundtrip": { + "p50": 627.5200247764587, + "p90": 647.3919749259949, + "p95": 659.9680185317993, + "p99": 686.4640116691589 + }, + "isolatedSum": { + "p50": 628.6399960517883, + "p90": 644.0959870815277, + "p95": 649.728000164032, + "p99": 658.0800116062164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 606.9759726524353, + "p90": 618.9759969711304, + "p95": 625.8879899978638, + "p99": 650.3360271453857 + }, + "combine": { + "p50": 449.3120014667511, + "p90": 454.6560049057007, + "p95": 457.18398690223694, + "p99": 462.5599980354309 + }, + "roundtrip": { + "p50": 1040.38405418396, + "p90": 1053.760051727295, + "p95": 1059.5519542694092, + "p99": 1094.1439867019653 + }, + "isolatedSum": { + "p50": 1056.2879741191864, + "p90": 1073.632001876831, + "p95": 1083.0719769001007, + "p99": 1112.8960251808167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1103.935956954956, + "p90": 1109.9200248718262, + "p95": 1112.9920482635498, + "p99": 1119.6160316467285 + }, + "combine": { + "p50": 807.4560165405273, + "p90": 814.303994178772, + "p95": 816.7679905891418, + "p99": 828.607976436615 + }, + "roundtrip": { + "p50": 1892.2239542007446, + "p90": 1900.8640050888062, + "p95": 1904.1279554367065, + "p99": 1909.4079732894897 + }, + "isolatedSum": { + "p50": 1911.3919734954834, + "p90": 1924.2240190505981, + "p95": 1929.7600388526917, + "p99": 1948.2240080833435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2110.81600189209, + "p90": 2118.97611618042, + "p95": 2123.4240531921387, + "p99": 2147.3920345306396 + }, + "combine": { + "p50": 1501.08802318573, + "p90": 1508.1599950790405, + "p95": 1509.9519491195679, + "p99": 1517.4399614334106 + }, + "roundtrip": { + "p50": 3597.0559120178223, + "p90": 3605.7920455932617, + "p95": 3608.736038208008, + "p99": 3627.0079612731934 + }, + "isolatedSum": { + "p50": 3611.90402507782, + "p90": 3627.1361112594604, + "p95": 3633.3760023117065, + "p99": 3664.8319959640503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1c830c2", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_bb944e8b", + "comparisonKey": "15bb47899d290b5c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:21:39.218426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.1200025677681, + "p90": 107.13600367307663, + "p95": 109.53599959611893, + "p99": 115.74400216341019 + }, + "combine": { + "p50": 118.17599833011627, + "p90": 122.079998254776, + "p95": 123.90399724245071, + "p99": 130.40000200271606 + }, + "roundtrip": { + "p50": 239.80799317359924, + "p90": 249.08800423145294, + "p95": 256.3199996948242, + "p99": 306.36799335479736 + }, + "isolatedSum": { + "p50": 219.29600089788437, + "p90": 229.21600192785263, + "p95": 233.43999683856964, + "p99": 246.14400416612625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.17600095272064, + "p90": 139.45600390434265, + "p95": 141.34399592876434, + "p99": 148.3519971370697 + }, + "combine": { + "p50": 163.2000058889389, + "p90": 168.86399686336517, + "p95": 170.6559956073761, + "p99": 175.135999917984 + }, + "roundtrip": { + "p50": 346.0800051689148, + "p90": 353.92001271247864, + "p95": 357.7600121498108, + "p99": 365.5039966106415 + }, + "isolatedSum": { + "p50": 297.37600684165955, + "p90": 308.3200007677078, + "p95": 311.99999153614044, + "p99": 323.4879970550537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.60000705718994, + "p90": 206.04799687862396, + "p95": 209.56799387931824, + "p99": 216.0319983959198 + }, + "combine": { + "p50": 277.6640057563782, + "p90": 282.4319899082184, + "p95": 284.2240035533905, + "p99": 289.216011762619 + }, + "roundtrip": { + "p50": 575.4879713058472, + "p90": 582.6560258865356, + "p95": 585.2159857749939, + "p99": 593.0879712104797 + }, + "isolatedSum": { + "p50": 475.2640128135681, + "p90": 488.47998678684235, + "p95": 493.79199743270874, + "p99": 505.2480101585388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.4319951534271, + "p90": 322.6560056209564, + "p95": 326.24000310897827, + "p99": 333.3120048046112 + }, + "combine": { + "p50": 463.55199813842773, + "p90": 469.7920083999634, + "p95": 472.03201055526733, + "p99": 477.56800055503845 + }, + "roundtrip": { + "p50": 1000.9599924087524, + "p90": 1008.512020111084, + "p95": 1013.152003288269, + "p99": 1018.8159942626953 + }, + "isolatedSum": { + "p50": 777.9839932918549, + "p90": 792.4480140209198, + "p95": 798.2720136642456, + "p99": 810.8800053596497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 552.1600246429443, + "p90": 557.9519867897034, + "p95": 560.0640177726746, + "p99": 568.1279897689819 + }, + "combine": { + "p50": 827.1039724349976, + "p90": 834.7200155258179, + "p95": 837.2799754142761, + "p99": 852.1280288696289 + }, + "roundtrip": { + "p50": 1844.480037689209, + "p90": 1854.1439771652222, + "p95": 1857.151985168457, + "p99": 1882.1120262145996 + }, + "isolatedSum": { + "p50": 1379.263997077942, + "p90": 1392.6720023155212, + "p95": 1397.3439931869507, + "p99": 1420.2560186386108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1026.5920162200928, + "p90": 1034.6879959106445, + "p95": 1036.6400480270386, + "p99": 1041.5359735488892 + }, + "combine": { + "p50": 1533.2800149917603, + "p90": 1540.8960580825806, + "p95": 1542.7199602127075, + "p99": 1569.1200494766235 + }, + "roundtrip": { + "p50": 3513.5040283203125, + "p90": 3521.7599868774414, + "p95": 3524.4479179382324, + "p99": 3530.4319858551025 + }, + "isolatedSum": { + "p50": 2559.872031211853, + "p90": 2575.584053993225, + "p95": 2579.360008239746, + "p99": 2610.6560230255127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e5bcc6f2", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_216f4a3f", + "comparisonKey": "e9cb1dda4fdf66f8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:23:35.984810+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 218.84800493717194, + "p90": 250.2399981021881, + "p95": 254.30399179458618, + "p99": 261.82401180267334 + }, + "combine": { + "p50": 119.61600184440613, + "p90": 125.95200538635254, + "p95": 130.20800054073334, + "p99": 135.29600203037262 + }, + "roundtrip": { + "p50": 327.13600993156433, + "p90": 348.7359881401062, + "p95": 353.7920117378235, + "p99": 363.5840117931366 + }, + "isolatedSum": { + "p50": 338.46400678157806, + "p90": 376.19200348854065, + "p95": 384.5119923353195, + "p99": 397.12001383304596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 276.5760123729706, + "p90": 308.9599907398224, + "p95": 317.02399253845215, + "p99": 341.72800183296204 + }, + "combine": { + "p50": 163.03999722003937, + "p90": 169.72799599170685, + "p95": 173.21600019931793, + "p99": 178.94400656223297 + }, + "roundtrip": { + "p50": 437.24799156188965, + "p90": 457.0240080356598, + "p95": 463.8719856739044, + "p99": 484.5440089702606 + }, + "isolatedSum": { + "p50": 439.61600959300995, + "p90": 478.68798673152924, + "p95": 490.2399927377701, + "p99": 520.672008395195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 405.9840142726898, + "p90": 424.0640103816986, + "p95": 432.9279959201813, + "p99": 460.2240025997162 + }, + "combine": { + "p50": 275.1680016517639, + "p90": 281.15200996398926, + "p95": 283.488005399704, + "p99": 289.792001247406 + }, + "roundtrip": { + "p50": 673.6000180244446, + "p90": 691.1360025405884, + "p95": 699.9040246009827, + "p99": 717.7600264549255 + }, + "isolatedSum": { + "p50": 681.1520159244537, + "p90": 705.2160203456879, + "p95": 716.4160013198853, + "p99": 750.0160038471222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 648.8320231437683, + "p90": 655.7440161705017, + "p95": 665.1840209960938, + "p99": 705.0880193710327 + }, + "combine": { + "p50": 464.7679924964905, + "p90": 472.76800870895386, + "p95": 476.25601291656494, + "p99": 485.9200119972229 + }, + "roundtrip": { + "p50": 1098.7839698791504, + "p90": 1118.5280084609985, + "p95": 1130.303978919983, + "p99": 1166.5600538253784 + }, + "isolatedSum": { + "p50": 1113.6000156402588, + "p90": 1128.5120248794556, + "p95": 1141.4400339126587, + "p99": 1191.0080313682556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1202.3680210113525, + "p90": 1208.4800004959106, + "p95": 1210.368037223816, + "p99": 1216.9280052185059 + }, + "combine": { + "p50": 828.2880187034607, + "p90": 835.1680040359497, + "p95": 837.664008140564, + "p99": 843.0079817771912 + }, + "roundtrip": { + "p50": 2009.7599029541016, + "p90": 2018.1119441986084, + "p95": 2020.7040309906006, + "p99": 2030.303955078125 + }, + "isolatedSum": { + "p50": 2030.6560397148132, + "p90": 2043.6480045318604, + "p95": 2048.03204536438, + "p99": 2059.935986995697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2298.527956008911, + "p90": 2306.4959049224854, + "p95": 2309.1518878936768, + "p99": 2315.392017364502 + }, + "combine": { + "p50": 1535.5199575424194, + "p90": 1543.455958366394, + "p95": 1546.7840433120728, + "p99": 1553.5039901733398 + }, + "roundtrip": { + "p50": 3824.3839740753174, + "p90": 3835.0400924682617, + "p95": 3838.4320735931396, + "p99": 3852.031946182251 + }, + "isolatedSum": { + "p50": 3834.0479135513306, + "p90": 3849.9518632888794, + "p95": 3855.9359312057495, + "p99": 3868.896007537842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-464f6a25", + "identity": "b200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_216f4a3f", + "comparisonKey": "2dc72d345cacaf19", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:24:36.465077+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 216.99200570583344, + "p90": 244.25600469112396, + "p95": 250.36799907684326, + "p99": 270.4319953918457 + }, + "combine": { + "p50": 118.52800101041794, + "p90": 125.2799928188324, + "p95": 131.3599944114685, + "p99": 136.9280070066452 + }, + "roundtrip": { + "p50": 325.3119885921478, + "p90": 350.68801045417786, + "p95": 358.62401127815247, + "p99": 383.87200236320496 + }, + "isolatedSum": { + "p50": 335.5200067162514, + "p90": 369.53599750995636, + "p95": 381.72799348831177, + "p99": 407.3600023984909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 272.5439965724945, + "p90": 296.86400294303894, + "p95": 302.68800258636475, + "p99": 313.6959969997406 + }, + "combine": { + "p50": 163.00800442695618, + "p90": 169.21600699424744, + "p95": 173.2800006866455, + "p99": 179.23200130462646 + }, + "roundtrip": { + "p50": 435.2959990501404, + "p90": 460.1599872112274, + "p95": 467.4240052700043, + "p99": 478.11201214790344 + }, + "isolatedSum": { + "p50": 435.5520009994507, + "p90": 466.0800099372864, + "p95": 475.96800327301025, + "p99": 492.92799830436707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 407.3599874973297, + "p90": 439.13599848747253, + "p95": 455.23199439048767, + "p99": 520.1600193977356 + }, + "combine": { + "p50": 276.5760123729706, + "p90": 284.09600257873535, + "p95": 286.49601340293884, + "p99": 292.38399863243103 + }, + "roundtrip": { + "p50": 667.9040193557739, + "p90": 685.5360269546509, + "p95": 698.5599994659424, + "p99": 715.9680128097534 + }, + "isolatedSum": { + "p50": 683.9359998703003, + "p90": 723.2320010662079, + "p95": 741.7280077934265, + "p99": 812.5440180301666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 650.1439809799194, + "p90": 662.2080206871033, + "p95": 670.3680157661438, + "p99": 696.4160203933716 + }, + "combine": { + "p50": 466.46401286125183, + "p90": 472.6400077342987, + "p95": 476.00001096725464, + "p99": 480.2879989147186 + }, + "roundtrip": { + "p50": 1098.2719659805298, + "p90": 1115.007996559143, + "p95": 1120.6079721450806, + "p99": 1134.6240043640137 + }, + "isolatedSum": { + "p50": 1116.6079938411713, + "p90": 1134.848028421402, + "p95": 1146.3680267333984, + "p99": 1176.7040193080902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1195.4879760742188, + "p90": 1202.720046043396, + "p95": 1205.9839963912964, + "p99": 1215.3279781341553 + }, + "combine": { + "p50": 818.880021572113, + "p90": 826.528012752533, + "p95": 829.2160034179688, + "p99": 833.6640000343323 + }, + "roundtrip": { + "p50": 1995.6799745559692, + "p90": 2004.8000812530518, + "p95": 2007.4560642242432, + "p99": 2018.4640884399414 + }, + "isolatedSum": { + "p50": 2014.3679976463318, + "p90": 2029.248058795929, + "p95": 2035.1999998092651, + "p99": 2048.9919781684875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2288.383960723877, + "p90": 2295.2001094818115, + "p95": 2298.3999252319336, + "p99": 2304.447889328003 + }, + "combine": { + "p50": 1527.9359817504883, + "p90": 1535.4880094528198, + "p95": 1537.343978881836, + "p99": 1540.7999753952026 + }, + "roundtrip": { + "p50": 3799.8719215393066, + "p90": 3808.9919090270996, + "p95": 3811.0721111297607, + "p99": 3814.847946166992 + }, + "isolatedSum": { + "p50": 3816.3199424743652, + "p90": 3830.6881189346313, + "p95": 3835.7439041137695, + "p99": 3845.2478647232056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c101e15e", + "identity": "b200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_e415a083", + "comparisonKey": "99186b2f10e79b7c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:22:35.456880+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_01", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.43200314044952, + "p90": 94.97600048780441, + "p95": 99.87200051546097, + "p99": 106.75200074911118 + }, + "combine": { + "p50": 118.97599697113037, + "p90": 123.99999797344208, + "p95": 126.56000256538391, + "p99": 141.15199446678162 + }, + "roundtrip": { + "p50": 226.81599855422974, + "p90": 235.48799753189087, + "p95": 239.87199366092682, + "p99": 250.07998943328857 + }, + "isolatedSum": { + "p50": 209.4080001115799, + "p90": 218.9759984612465, + "p95": 226.43200308084488, + "p99": 247.9039952158928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.03199821710587, + "p90": 131.1360001564026, + "p95": 134.8479986190796, + "p99": 142.91200041770935 + }, + "combine": { + "p50": 164.41600024700165, + "p90": 169.91999745368958, + "p95": 172.92800545692444, + "p99": 179.4240027666092 + }, + "roundtrip": { + "p50": 335.29600501060486, + "p90": 341.2480056285858, + "p95": 343.6479866504669, + "p99": 351.4240086078644 + }, + "isolatedSum": { + "p50": 288.4479984641075, + "p90": 301.05599761009216, + "p95": 307.776004076004, + "p99": 322.33600318431854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.78399920463562, + "p90": 192.44800508022308, + "p95": 195.19999623298645, + "p99": 203.5840004682541 + }, + "combine": { + "p50": 278.4639894962311, + "p90": 284.67199206352234, + "p95": 286.9440019130707, + "p99": 292.86399483680725 + }, + "roundtrip": { + "p50": 564.2560124397278, + "p90": 572.0319747924805, + "p95": 575.5519866943359, + "p99": 583.7759971618652 + }, + "isolatedSum": { + "p50": 465.2479887008667, + "p90": 477.1199971437454, + "p95": 482.14399814605713, + "p99": 496.44799530506134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 305.7920038700104, + "p90": 311.42398715019226, + "p95": 313.9519989490509, + "p99": 318.9440071582794 + }, + "combine": { + "p50": 464.03199434280396, + "p90": 470.2399969100952, + "p95": 473.82399439811707, + "p99": 482.40000009536743 + }, + "roundtrip": { + "p50": 990.1120066642761, + "p90": 996.9599843025208, + "p95": 999.4879961013794, + "p99": 1004.1919946670532 + }, + "isolatedSum": { + "p50": 769.8239982128143, + "p90": 781.6639840602875, + "p95": 787.775993347168, + "p99": 801.3440072536469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 535.9359979629517, + "p90": 541.4080023765564, + "p95": 544.704020023346, + "p99": 549.4400262832642 + }, + "combine": { + "p50": 827.1679878234863, + "p90": 834.6560001373291, + "p95": 836.9920253753662, + "p99": 842.9120182991028 + }, + "roundtrip": { + "p50": 1832.319974899292, + "p90": 1842.3680067062378, + "p95": 1846.1439609527588, + "p99": 1854.2720079421997 + }, + "isolatedSum": { + "p50": 1363.103985786438, + "p90": 1376.0640025138855, + "p95": 1381.6960453987122, + "p99": 1392.352044582367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 997.376024723053, + "p90": 1004.1279792785645, + "p95": 1006.7839622497559, + "p99": 1014.1439437866211 + }, + "combine": { + "p50": 1534.432053565979, + "p90": 1541.8239831924438, + "p95": 1543.8719987869263, + "p99": 1547.6160049438477 + }, + "roundtrip": { + "p50": 3482.1760654449463, + "p90": 3491.391897201538, + "p95": 3495.5201148986816, + "p99": 3501.9519329071045 + }, + "isolatedSum": { + "p50": 2531.808078289032, + "p90": 2545.9519624710083, + "p95": 2550.655961036682, + "p99": 2561.7599487304688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4f860a6b", + "identity": "b200|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_790e9497", + "comparisonKey": "d1fe4ee070e67fd9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:27:01.668980+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.85600072145462, + "p90": 117.91999638080597, + "p95": 120.60800194740295, + "p99": 127.23200023174286 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 113.43999952077866, + "p95": 115.90400338172913, + "p99": 123.61600250005722 + }, + "roundtrip": { + "p50": 189.11999464035034, + "p90": 194.84800100326538, + "p95": 196.16000354290009, + "p99": 201.34399831295013 + }, + "isolatedSum": { + "p50": 207.0080041885376, + "p90": 231.35999590158463, + "p95": 236.51200532913208, + "p99": 250.84800273180008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.96800100803375, + "p90": 148.8959938287735, + "p95": 150.81599354743958, + "p99": 154.6880006790161 + }, + "combine": { + "p50": 141.56800508499146, + "p90": 150.56000649929047, + "p95": 152.70400047302246, + "p99": 158.33599865436554 + }, + "roundtrip": { + "p50": 257.1839988231659, + "p90": 264.44798707962036, + "p95": 269.6639895439148, + "p99": 274.27199482917786 + }, + "isolatedSum": { + "p50": 285.5360060930252, + "p90": 299.45600032806396, + "p95": 303.51999402046204, + "p99": 313.02399933338165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.50400567054749, + "p90": 199.2959976196289, + "p95": 201.79200172424316, + "p99": 205.72799444198608 + }, + "combine": { + "p50": 210.40000021457672, + "p90": 215.00800549983978, + "p95": 216.41600131988525, + "p99": 222.9440063238144 + }, + "roundtrip": { + "p50": 377.1519958972931, + "p90": 387.1679902076721, + "p95": 392.767995595932, + "p99": 418.2719886302948 + }, + "isolatedSum": { + "p50": 403.9040058851242, + "p90": 414.3040031194687, + "p95": 418.2080030441284, + "p99": 428.6720007658005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.62401151657104, + "p90": 323.4559893608093, + "p95": 325.8560001850128, + "p99": 332.2240114212036 + }, + "combine": { + "p50": 407.6479971408844, + "p90": 413.34399580955505, + "p95": 415.16798734664917, + "p99": 422.6880073547363 + }, + "roundtrip": { + "p50": 629.8879981040955, + "p90": 638.2079720497131, + "p95": 642.2719955444336, + "p99": 648.8639712333679 + }, + "isolatedSum": { + "p50": 722.2720086574554, + "p90": 736.7999851703644, + "p95": 741.023987531662, + "p99": 754.9120187759399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 547.7120280265808, + "p90": 552.6720285415649, + "p95": 555.2319884300232, + "p99": 559.5200061798096 + }, + "combine": { + "p50": 770.3040242195129, + "p90": 779.0399789810181, + "p95": 780.3199887275696, + "p99": 782.6560139656067 + }, + "roundtrip": { + "p50": 1287.2320413589478, + "p90": 1294.5599555969238, + "p95": 1296.8000173568726, + "p99": 1301.3759851455688 + }, + "isolatedSum": { + "p50": 1318.0160522460938, + "p90": 1331.712007522583, + "p95": 1335.5519771575928, + "p99": 1342.1760201454163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1018.8479423522949, + "p90": 1024.9279737472534, + "p95": 1027.359962463379, + "p99": 1035.3280305862427 + }, + "combine": { + "p50": 1458.400011062622, + "p90": 1467.5519466400146, + "p95": 1476.0960340499878, + "p99": 1529.952049255371 + }, + "roundtrip": { + "p50": 2438.2400512695312, + "p90": 2447.648048400879, + "p95": 2450.0160217285156, + "p99": 2455.519914627075 + }, + "isolatedSum": { + "p50": 2477.247953414917, + "p90": 2492.479920387268, + "p95": 2503.4559965133667, + "p99": 2565.2800798416138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-93e49d71", + "identity": "b200|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_790e9497", + "comparisonKey": "c6b4dcd123b3a23f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:29:30.946299+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.35199737548828, + "p90": 116.15999788045883, + "p95": 120.99199742078781, + "p99": 129.5360028743744 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 119.61600184440613, + "p95": 124.25599992275238, + "p99": 129.95199859142303 + }, + "roundtrip": { + "p50": 198.68800044059753, + "p90": 206.33600652217865, + "p95": 209.6640020608902, + "p99": 217.66400337219238 + }, + "isolatedSum": { + "p50": 218.91199797391891, + "p90": 235.77599972486496, + "p95": 245.2479973435402, + "p99": 259.4880014657974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.61600577831268, + "p90": 151.2320041656494, + "p95": 155.96799552440643, + "p99": 162.52799332141876 + }, + "combine": { + "p50": 151.67999267578125, + "p90": 156.3519984483719, + "p95": 159.04000401496887, + "p99": 168.41599345207214 + }, + "roundtrip": { + "p50": 266.52801036834717, + "p90": 275.519996881485, + "p95": 280.4799973964691, + "p99": 286.3360047340393 + }, + "isolatedSum": { + "p50": 295.29599845409393, + "p90": 307.5840026140213, + "p95": 315.0079995393753, + "p99": 330.9439867734909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.0080041885376, + "p90": 215.96799790859222, + "p95": 219.13599967956543, + "p99": 226.01599991321564 + }, + "combine": { + "p50": 218.55999529361725, + "p90": 227.1679937839508, + "p95": 228.99200022220612, + "p99": 234.592005610466 + }, + "roundtrip": { + "p50": 397.98399806022644, + "p90": 405.3120017051697, + "p95": 409.1840088367462, + "p99": 414.40001130104065 + }, + "isolatedSum": { + "p50": 425.56799948215485, + "p90": 443.13599169254303, + "p95": 448.12799990177155, + "p99": 460.60800552368164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 322.9439854621887, + "p90": 332.99198746681213, + "p95": 336.5119993686676, + "p99": 349.5360016822815 + }, + "combine": { + "p50": 451.35998725891113, + "p90": 461.34400367736816, + "p95": 468.6720073223114, + "p99": 522.271990776062 + }, + "roundtrip": { + "p50": 732.5119972229004, + "p90": 741.8879866600037, + "p95": 748.4480142593384, + "p99": 815.1040077209473 + }, + "isolatedSum": { + "p50": 774.3039727210999, + "p90": 794.3359911441803, + "p95": 805.184006690979, + "p99": 871.8079924583435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 564.191997051239, + "p90": 572.767972946167, + "p95": 574.6560096740723, + "p99": 583.0079913139343 + }, + "combine": { + "p50": 794.8799729347229, + "p90": 804.5759797096252, + "p95": 806.4000010490417, + "p99": 816.0960078239441 + }, + "roundtrip": { + "p50": 1334.5600366592407, + "p90": 1341.8240547180176, + "p95": 1344.5119857788086, + "p99": 1351.5839576721191 + }, + "isolatedSum": { + "p50": 1359.071969985962, + "p90": 1377.3439526557922, + "p95": 1381.056010723114, + "p99": 1399.1039991378784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1055.7119846343994, + "p90": 1061.5359544754028, + "p95": 1064.6079778671265, + "p99": 1069.0560340881348 + }, + "combine": { + "p50": 1495.4240322113037, + "p90": 1506.1759948730469, + "p95": 1507.7120065689087, + "p99": 1520.6400156021118 + }, + "roundtrip": { + "p50": 2514.143943786621, + "p90": 2523.3280658721924, + "p95": 2525.599956512451, + "p99": 2530.9441089630127 + }, + "isolatedSum": { + "p50": 2551.136016845703, + "p90": 2567.7119493484497, + "p95": 2572.319984436035, + "p99": 2589.6960496902466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ffbf0e43", + "identity": "b200|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_790e9497", + "comparisonKey": "9048ebeac16ddfdb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:32:01.522320+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.41599917411804, + "p90": 120.35199999809265, + "p95": 122.56000190973282, + "p99": 127.71199643611908 + }, + "combine": { + "p50": 117.34399944543839, + "p90": 126.24000012874603, + "p95": 127.45599448680878, + "p99": 129.92000579833984 + }, + "roundtrip": { + "p50": 202.2079974412918, + "p90": 212.25599944591522, + "p95": 216.0319983959198, + "p99": 222.30400145053864 + }, + "isolatedSum": { + "p50": 229.75999861955643, + "p90": 246.59200012683868, + "p95": 250.0159963965416, + "p99": 257.6320022344589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.64799857139587, + "p90": 158.07999670505524, + "p95": 162.75200247764587, + "p99": 170.23999989032745 + }, + "combine": { + "p50": 161.31199896335602, + "p90": 165.3439998626709, + "p95": 167.32800006866455, + "p99": 174.0799993276596 + }, + "roundtrip": { + "p50": 278.75199913978577, + "p90": 285.6000065803528, + "p95": 288.03199529647827, + "p99": 295.0719892978668 + }, + "isolatedSum": { + "p50": 304.9599975347519, + "p90": 323.42399656772614, + "p95": 330.0800025463104, + "p99": 344.31999921798706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 216.51199460029602, + "p90": 222.78399765491486, + "p95": 225.50399601459503, + "p99": 231.04000091552734 + }, + "combine": { + "p50": 259.2320144176483, + "p90": 264.3519937992096, + "p95": 265.9519910812378, + "p99": 274.2080092430115 + }, + "roundtrip": { + "p50": 428.44799160957336, + "p90": 436.383992433548, + "p95": 438.87999653816223, + "p99": 446.24000787734985 + }, + "isolatedSum": { + "p50": 475.74400901794434, + "p90": 487.13599145412445, + "p95": 491.4559870958328, + "p99": 505.2480101585388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 329.6000063419342, + "p90": 338.81598711013794, + "p95": 343.48800778388977, + "p99": 357.15198516845703 + }, + "combine": { + "p50": 457.18398690223694, + "p90": 461.7919921875, + "p95": 463.03999423980713, + "p99": 471.6159999370575 + }, + "roundtrip": { + "p50": 765.7920122146606, + "p90": 774.4960188865662, + "p95": 777.7600288391113, + "p99": 785.0239872932434 + }, + "isolatedSum": { + "p50": 786.7839932441711, + "p90": 800.6079792976379, + "p95": 806.5280020236969, + "p99": 828.7679851055145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.4719715118408, + "p90": 585.6320261955261, + "p95": 587.1679782867432, + "p99": 590.9759998321533 + }, + "combine": { + "p50": 817.5680041313171, + "p90": 829.9520015716553, + "p95": 837.4720215797424, + "p99": 859.391987323761 + }, + "roundtrip": { + "p50": 1365.7920360565186, + "p90": 1373.9839792251587, + "p95": 1377.6320219039917, + "p99": 1385.151982307434 + }, + "isolatedSum": { + "p50": 1395.039975643158, + "p90": 1415.5840277671814, + "p95": 1424.6399998664856, + "p99": 1450.3679871559143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1068.2560205459595, + "p90": 1073.6960172653198, + "p95": 1076.416015625, + "p99": 1082.6239585876465 + }, + "combine": { + "p50": 1518.7519788742065, + "p90": 1525.6320238113403, + "p95": 1529.6319723129272, + "p99": 1536.0959768295288 + }, + "roundtrip": { + "p50": 2557.728052139282, + "p90": 2567.296028137207, + "p95": 2570.6560611724854, + "p99": 2588.736057281494 + }, + "isolatedSum": { + "p50": 2587.007999420166, + "p90": 2599.32804107666, + "p95": 2606.0479879379272, + "p99": 2618.7199354171753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-29adbfae", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_d95ab2b3", + "comparisonKey": "02d3d33b72456cd5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:15:46.720805+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.92799693346024, + "p90": 132.35199451446533, + "p95": 137.28000223636627, + "p99": 148.25600385665894 + }, + "combine": { + "p50": 127.6479959487915, + "p90": 131.52000308036804, + "p95": 134.75200533866882, + "p99": 143.77599954605103 + }, + "roundtrip": { + "p50": 218.20800006389618, + "p90": 226.8799990415573, + "p95": 232.86400735378265, + "p99": 241.60000681877136 + }, + "isolatedSum": { + "p50": 248.57599288225174, + "p90": 263.8719975948334, + "p95": 272.0320075750351, + "p99": 292.03200340270996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.44000673294067, + "p90": 163.80800306797028, + "p95": 168.7680035829544, + "p99": 174.30399358272552 + }, + "combine": { + "p50": 174.43199455738068, + "p90": 179.9039989709854, + "p95": 180.92800676822662, + "p99": 187.29600310325623 + }, + "roundtrip": { + "p50": 297.21599817276, + "p90": 308.0959916114807, + "p95": 313.4079873561859, + "p99": 325.1200020313263 + }, + "isolatedSum": { + "p50": 331.87200129032135, + "p90": 343.7120020389557, + "p95": 349.69601035118103, + "p99": 361.59999668598175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.57600319385529, + "p90": 230.81600666046143, + "p95": 235.07200181484222, + "p99": 249.05599653720856 + }, + "combine": { + "p50": 284.86400842666626, + "p90": 291.20001196861267, + "p95": 293.3120131492615, + "p99": 304.4160008430481 + }, + "roundtrip": { + "p50": 464.86398577690125, + "p90": 476.3520061969757, + "p95": 480.1279902458191, + "p99": 498.78400564193726 + }, + "isolatedSum": { + "p50": 505.44001162052155, + "p90": 522.0160186290741, + "p95": 528.3840149641037, + "p99": 553.4719973802567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 343.80799531936646, + "p90": 352.09599137306213, + "p95": 357.31199383735657, + "p99": 367.48799681663513 + }, + "combine": { + "p50": 472.03201055526733, + "p90": 478.65599393844604, + "p95": 482.9440116882324, + "p99": 487.13600635528564 + }, + "roundtrip": { + "p50": 785.5679988861084, + "p90": 793.8240170478821, + "p95": 797.5999712944031, + "p99": 807.968020439148 + }, + "isolatedSum": { + "p50": 815.8400058746338, + "p90": 830.7519853115082, + "p95": 840.256005525589, + "p99": 854.6240031719208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.1280221939087, + "p90": 594.43199634552, + "p95": 604.0319800376892, + "p99": 618.1439757347107 + }, + "combine": { + "p50": 836.1279964447021, + "p90": 849.9199748039246, + "p95": 867.5199747085571, + "p99": 897.4400162696838 + }, + "roundtrip": { + "p50": 1393.9520120620728, + "p90": 1403.167963027954, + "p95": 1406.1119556427002, + "p99": 1416.991949081421 + }, + "isolatedSum": { + "p50": 1420.2560186386108, + "p90": 1444.3519711494446, + "p95": 1471.5519547462463, + "p99": 1515.5839920043945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1081.2480449676514, + "p90": 1088.4159803390503, + "p95": 1090.9440517425537, + "p99": 1096.351981163025 + }, + "combine": { + "p50": 1546.8800067901611, + "p90": 1556.831955909729, + "p95": 1560.479998588562, + "p99": 1569.216012954712 + }, + "roundtrip": { + "p50": 2601.5679836273193, + "p90": 2610.975980758667, + "p95": 2613.663911819458, + "p99": 2620.512008666992 + }, + "isolatedSum": { + "p50": 2628.1280517578125, + "p90": 2645.2479362487793, + "p95": 2651.4240503311157, + "p99": 2665.567994117737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3926746", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_790e9497", + "comparisonKey": "7f1c5d1e5b4d2e19", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:18:12.354624+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.90399658679962, + "p90": 124.83199685811996, + "p95": 127.32799351215363, + "p99": 133.34399461746216 + }, + "combine": { + "p50": 127.03999876976013, + "p90": 130.49599528312683, + "p95": 131.26400113105774, + "p99": 139.00800049304962 + }, + "roundtrip": { + "p50": 215.2000069618225, + "p90": 221.47199511528015, + "p95": 224.12799298763275, + "p99": 227.39200294017792 + }, + "isolatedSum": { + "p50": 246.94399535655975, + "p90": 255.3279921412468, + "p95": 258.59199464321136, + "p99": 272.3519951105118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 156.80000185966492, + "p90": 161.56800091266632, + "p95": 164.19200599193573, + "p99": 174.68799650669098 + }, + "combine": { + "p50": 174.01599884033203, + "p90": 178.01600694656372, + "p95": 180.4479956626892, + "p99": 186.94399297237396 + }, + "roundtrip": { + "p50": 296.60800099372864, + "p90": 306.97599053382874, + "p95": 309.59999561309814, + "p99": 315.71200489997864 + }, + "isolatedSum": { + "p50": 330.81600069999695, + "p90": 339.58400785923004, + "p95": 344.64000165462494, + "p99": 361.63198947906494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.49599480628967, + "p90": 226.43199563026428, + "p95": 231.90400004386902, + "p99": 243.48799884319305 + }, + "combine": { + "p50": 279.1360020637512, + "p90": 287.23201155662537, + "p95": 288.83200883865356, + "p99": 292.38399863243103 + }, + "roundtrip": { + "p50": 462.72000670433044, + "p90": 472.4160134792328, + "p95": 475.0399887561798, + "p99": 482.14399814605713 + }, + "isolatedSum": { + "p50": 497.6319968700409, + "p90": 513.6640071868896, + "p95": 520.7360088825226, + "p99": 535.8719974756241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 343.26401352882385, + "p90": 348.4160006046295, + "p95": 350.5280017852783, + "p99": 354.91201281547546 + }, + "combine": { + "p50": 472.1600115299225, + "p90": 478.2080054283142, + "p95": 482.40000009536743, + "p99": 485.152006149292 + }, + "roundtrip": { + "p50": 785.3760123252869, + "p90": 792.4799919128418, + "p95": 794.975996017456, + "p99": 800.383985042572 + }, + "isolatedSum": { + "p50": 815.4240250587463, + "p90": 826.6240060329437, + "p95": 832.9280018806458, + "p99": 840.0640189647675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 582.5920104980469, + "p90": 588.8320207595825, + "p95": 591.6799902915955, + "p99": 598.8799929618835 + }, + "combine": { + "p50": 835.2000117301941, + "p90": 844.7999954223633, + "p95": 850.7199883460999, + "p99": 863.8719916343689 + }, + "roundtrip": { + "p50": 1393.280029296875, + "p90": 1402.8480052947998, + "p95": 1406.816005706787, + "p99": 1424.8319864273071 + }, + "isolatedSum": { + "p50": 1417.792022228241, + "p90": 1433.6320161819458, + "p95": 1442.3999786376953, + "p99": 1462.7519845962524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1081.6320180892944, + "p90": 1087.3279571533203, + "p95": 1090.3680324554443, + "p99": 1095.7759618759155 + }, + "combine": { + "p50": 1544.927954673767, + "p90": 1555.2959442138672, + "p95": 1556.3839673995972, + "p99": 1560.4480504989624 + }, + "roundtrip": { + "p50": 2599.0400314331055, + "p90": 2608.7679862976074, + "p95": 2612.544059753418, + "p99": 2622.27201461792 + }, + "isolatedSum": { + "p50": 2626.5599727630615, + "p90": 2642.6239013671875, + "p95": 2646.7519998550415, + "p99": 2656.224012374878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d9610365", + "identity": "b200|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_790e9497", + "comparisonKey": "b3fc0919836c9614", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:24:33.865117+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.57600170373917, + "p90": 128.83199751377106, + "p95": 134.17600095272064, + "p99": 143.93599331378937 + }, + "combine": { + "p50": 127.29600071907043, + "p90": 130.75199723243713, + "p95": 131.9359987974167, + "p99": 142.36800372600555 + }, + "roundtrip": { + "p50": 218.23999285697937, + "p90": 226.9439995288849, + "p95": 230.78399896621704, + "p99": 237.59999871253967 + }, + "isolatedSum": { + "p50": 247.8720024228096, + "p90": 259.5839947462082, + "p95": 266.11199975013733, + "p99": 286.3039970397949 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.62399339675903, + "p90": 165.92000424861908, + "p95": 169.3439930677414, + "p99": 177.63200402259827 + }, + "combine": { + "p50": 173.2800006866455, + "p90": 177.85599827766418, + "p95": 180.00000715255737, + "p99": 189.98399376869202 + }, + "roundtrip": { + "p50": 299.9039888381958, + "p90": 309.6959888935089, + "p95": 312.8319978713989, + "p99": 323.2640027999878 + }, + "isolatedSum": { + "p50": 331.90399408340454, + "p90": 343.77600252628326, + "p95": 349.34400022029877, + "p99": 367.6159977912903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.768004655838, + "p90": 230.01599311828613, + "p95": 234.3679964542389, + "p99": 245.2480047941208 + }, + "combine": { + "p50": 286.6879999637604, + "p90": 291.1680042743683, + "p95": 298.0479896068573, + "p99": 303.48798632621765 + }, + "roundtrip": { + "p50": 461.760014295578, + "p90": 469.6640074253082, + "p95": 472.7039933204651, + "p99": 477.1200120449066 + }, + "isolatedSum": { + "p50": 507.4560046195984, + "p90": 521.1839973926544, + "p95": 532.4159860610962, + "p99": 548.7359911203384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 343.77598762512207, + "p90": 352.1600067615509, + "p95": 357.4720025062561, + "p99": 365.7599985599518 + }, + "combine": { + "p50": 474.047988653183, + "p90": 484.3200147151947, + "p95": 486.7520034313202, + "p99": 494.9440062046051 + }, + "roundtrip": { + "p50": 790.7840013504028, + "p90": 798.2400059700012, + "p95": 800.383985042572, + "p99": 805.2480220794678 + }, + "isolatedSum": { + "p50": 817.823976278305, + "p90": 836.4800214767456, + "p95": 844.2240059375763, + "p99": 860.7040047645569 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 583.9999914169312, + "p90": 591.1039710044861, + "p95": 593.0240154266357, + "p99": 601.5999913215637 + }, + "combine": { + "p50": 828.5120129585266, + "p90": 833.952009677887, + "p95": 839.3920063972473, + "p99": 843.6800241470337 + }, + "roundtrip": { + "p50": 1382.2720050811768, + "p90": 1390.0480270385742, + "p95": 1392.1279907226562, + "p99": 1398.3360528945923 + }, + "isolatedSum": { + "p50": 1412.5120043754578, + "p90": 1425.055980682373, + "p95": 1432.416021823883, + "p99": 1445.2800154685974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1079.584002494812, + "p90": 1085.5679512023926, + "p95": 1087.7439975738525, + "p99": 1093.664050102234 + }, + "combine": { + "p50": 1541.2800312042236, + "p90": 1553.5039901733398, + "p95": 1564.255952835083, + "p99": 1602.9759645462036 + }, + "roundtrip": { + "p50": 2588.063955307007, + "p90": 2596.1599349975586, + "p95": 2599.5519161224365, + "p99": 2605.5359840393066 + }, + "isolatedSum": { + "p50": 2620.8640336990356, + "p90": 2639.0719413757324, + "p95": 2651.9999504089355, + "p99": 2696.6400146484375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec3527fd", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_ec1724fd", + "comparisonKey": "8bc9261457089d1d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:33:17.092885+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.07200646400452, + "p90": 135.19999384880066, + "p95": 138.62399756908417, + "p99": 144.896000623703 + }, + "combine": { + "p50": 149.82399344444275, + "p90": 153.1520038843155, + "p95": 155.8080017566681, + "p99": 164.67200219631195 + }, + "roundtrip": { + "p50": 254.07999753952026, + "p90": 260.51199436187744, + "p95": 264.3519937992096, + "p99": 273.4079957008362 + }, + "isolatedSum": { + "p50": 276.89599990844727, + "p90": 288.35199773311615, + "p95": 294.43199932575226, + "p99": 309.56800282001495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 169.50400173664093, + "p90": 177.59999632835388, + "p95": 180.28800189495087, + "p99": 185.56800484657288 + }, + "combine": { + "p50": 215.13600647449493, + "p90": 222.01600670814514, + "p95": 224.89599883556366, + "p99": 228.5120040178299 + }, + "roundtrip": { + "p50": 355.9359908103943, + "p90": 361.85601353645325, + "p95": 363.8080060482025, + "p99": 370.88000774383545 + }, + "isolatedSum": { + "p50": 384.64000821113586, + "p90": 399.616003036499, + "p95": 405.1840007305145, + "p99": 414.08000886440277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 253.28001379966736, + "p90": 262.7840042114258, + "p95": 269.27998661994934, + "p99": 274.7519910335541 + }, + "combine": { + "p50": 359.935998916626, + "p90": 364.76799845695496, + "p95": 372.8320002555847, + "p99": 384.6080005168915 + }, + "roundtrip": { + "p50": 567.5519704818726, + "p90": 577.0559906959534, + "p95": 580.5439949035645, + "p99": 589.5360112190247 + }, + "isolatedSum": { + "p50": 613.2160127162933, + "p90": 627.5520026683807, + "p95": 642.1119868755341, + "p99": 659.3599915504456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 415.0719940662384, + "p90": 421.82400822639465, + "p95": 423.39199781417847, + "p99": 427.3279905319214 + }, + "combine": { + "p50": 609.2479825019836, + "p90": 611.3920211791992, + "p95": 615.2960062026978, + "p99": 620.8959817886353 + }, + "roundtrip": { + "p50": 996.1599707603455, + "p90": 1004.4800043106079, + "p95": 1010.9119415283203, + "p99": 1042.9760217666626 + }, + "isolatedSum": { + "p50": 1024.319976568222, + "p90": 1033.2160294055939, + "p95": 1038.6880040168762, + "p99": 1048.2239723205566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 736.2880110740662, + "p90": 745.855987071991, + "p95": 751.7439723014832, + "p99": 774.4320034980774 + }, + "combine": { + "p50": 1127.7120113372803, + "p90": 1136.7679834365845, + "p95": 1138.208031654358, + "p99": 1140.1280164718628 + }, + "roundtrip": { + "p50": 1838.4959697723389, + "p90": 1846.9120264053345, + "p95": 1853.3439636230469, + "p99": 1865.18394947052 + }, + "isolatedSum": { + "p50": 1864.0000224113464, + "p90": 1882.6239705085754, + "p95": 1889.952003955841, + "p99": 1914.5600199699402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1385.5680227279663, + "p90": 1392.9920196533203, + "p95": 1396.2880373001099, + "p99": 1457.152009010315 + }, + "combine": { + "p50": 2150.0160694122314, + "p90": 2158.4320068359375, + "p95": 2160.640001296997, + "p99": 2207.360029220581 + }, + "roundtrip": { + "p50": 3511.7759704589844, + "p90": 3522.6879119873047, + "p95": 3532.7680110931396, + "p99": 3585.792064666748 + }, + "isolatedSum": { + "p50": 3535.5840921401978, + "p90": 3551.424026489258, + "p95": 3556.928038597107, + "p99": 3664.512038230896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a0cd2009", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_8e2c81ec", + "comparisonKey": "9ce6043da2e5c613", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:34:20.332185+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.46400338411331, + "p90": 100.70399940013885, + "p95": 104.16000336408615, + "p99": 111.61600053310394 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 89.37600255012512, + "p95": 90.94399958848953, + "p99": 94.55999732017517 + }, + "roundtrip": { + "p50": 152.63999998569489, + "p90": 163.03999722003937, + "p95": 169.5999950170517, + "p99": 199.23199713230133 + }, + "isolatedSum": { + "p50": 171.1680069565773, + "p90": 190.08000195026398, + "p95": 195.10400295257568, + "p99": 206.1759978532791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 122.27199971675873, + "p90": 132.9600065946579, + "p95": 140.28799533843994, + "p99": 150.4639983177185 + }, + "combine": { + "p50": 128.22400033473969, + "p90": 132.64000415802002, + "p95": 139.0720009803772, + "p99": 145.34400403499603 + }, + "roundtrip": { + "p50": 220.2879935503006, + "p90": 232.12799429893494, + "p95": 235.20000278949738, + "p99": 244.47999894618988 + }, + "isolatedSum": { + "p50": 250.4960000514984, + "p90": 265.6000107526779, + "p95": 279.35999631881714, + "p99": 295.80800235271454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 213.21600675582886, + "p90": 227.55199670791626, + "p95": 243.8720017671585, + "p99": 288.1920039653778 + }, + "combine": { + "p50": 273.27999472618103, + "p90": 279.4879972934723, + "p95": 284.5120131969452, + "p99": 290.46401381492615 + }, + "roundtrip": { + "p50": 466.623991727829, + "p90": 476.639986038208, + "p95": 483.6159944534302, + "p99": 490.5279874801636 + }, + "isolatedSum": { + "p50": 486.4960014820099, + "p90": 507.03999400138855, + "p95": 528.3840149641037, + "p99": 578.656017780304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c45baa60", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_6bd76b01", + "comparisonKey": "f9fd2cdc47fb6a8f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:29.154953+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.11200201511383, + "p90": 105.18400371074677, + "p95": 111.96800321340561, + "p99": 121.21599912643433 + }, + "combine": { + "p50": 99.16800260543823, + "p90": 103.87200117111206, + "p95": 106.88000172376633, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 170.84799706935883, + "p90": 182.40000307559967, + "p95": 188.25599551200867, + "p99": 199.23199713230133 + }, + "isolatedSum": { + "p50": 197.28000462055206, + "p90": 209.05600488185883, + "p95": 218.84800493717194, + "p99": 236.89600080251694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 118.59200149774551, + "p90": 126.68800354003906, + "p95": 132.06399977207184, + "p99": 147.5519984960556 + }, + "combine": { + "p50": 117.0559972524643, + "p90": 128.22400033473969, + "p95": 131.23199343681335, + "p99": 140.99200069904327 + }, + "roundtrip": { + "p50": 212.76800334453583, + "p90": 220.99199891090393, + "p95": 226.55999660491943, + "p99": 238.87999355793 + }, + "isolatedSum": { + "p50": 235.6479987502098, + "p90": 254.91200387477875, + "p95": 263.2959932088852, + "p99": 288.5439991950989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 154.4319987297058, + "p90": 163.93600404262543, + "p95": 169.5680022239685, + "p99": 183.67999792099 + }, + "combine": { + "p50": 159.90400314331055, + "p90": 169.91999745368958, + "p95": 176.1920005083084, + "p99": 184.4480037689209 + }, + "roundtrip": { + "p50": 285.504013299942, + "p90": 297.0559895038605, + "p95": 300.6080090999603, + "p99": 308.47999453544617 + }, + "isolatedSum": { + "p50": 314.33600187301636, + "p90": 333.856001496315, + "p95": 345.7600027322769, + "p99": 368.1280016899109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 219.58400309085846, + "p90": 230.335995554924, + "p95": 241.69600009918213, + "p99": 253.9519965648651 + }, + "combine": { + "p50": 274.944007396698, + "p90": 283.4239900112152, + "p95": 289.18400406837463, + "p99": 298.7520098686218 + }, + "roundtrip": { + "p50": 463.00798654556274, + "p90": 479.48798537254333, + "p95": 489.4079864025116, + "p99": 496.832013130188 + }, + "isolatedSum": { + "p50": 494.52801048755646, + "p90": 513.7599855661392, + "p95": 530.8800041675568, + "p99": 552.7040064334869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 372.0960021018982, + "p90": 396.95999026298523, + "p95": 410.4959964752197, + "p99": 454.43201065063477 + }, + "combine": { + "p50": 475.6479859352112, + "p90": 488.6400103569031, + "p95": 490.62401056289673, + "p99": 499.1680085659027 + }, + "roundtrip": { + "p50": 813.9200210571289, + "p90": 820.7359910011292, + "p95": 822.8800296783447, + "p99": 826.9760012626648 + }, + "isolatedSum": { + "p50": 847.7439880371094, + "p90": 885.6000006198883, + "p95": 901.1200070381165, + "p99": 953.6000192165375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 671.7759966850281, + "p90": 682.5600266456604, + "p95": 684.9279999732971, + "p99": 694.9120163917542 + }, + "combine": { + "p50": 853.2800078392029, + "p90": 856.7360043525696, + "p95": 859.5520257949829, + "p99": 867.6480054855347 + }, + "roundtrip": { + "p50": 1490.4320240020752, + "p90": 1498.9440441131592, + "p95": 1501.2160539627075, + "p99": 1509.6640586853027 + }, + "isolatedSum": { + "p50": 1525.056004524231, + "p90": 1539.29603099823, + "p95": 1544.48002576828, + "p99": 1562.5600218772888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62afdc71", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_86a142fb", + "comparisonKey": "e6b6d1ce3c0c1d0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:06.135658+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.88000500202179, + "p90": 136.06399297714233, + "p95": 139.615997672081, + "p99": 147.93600142002106 + }, + "combine": { + "p50": 141.56800508499146, + "p90": 152.48000621795654, + "p95": 155.68000078201294, + "p99": 166.07999801635742 + }, + "roundtrip": { + "p50": 241.40800535678864, + "p90": 251.74400210380554, + "p95": 260.8320116996765, + "p99": 276.44801139831543 + }, + "isolatedSum": { + "p50": 268.44801008701324, + "p90": 288.5439991950989, + "p95": 295.29599845409393, + "p99": 314.0159994363785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.0319973230362, + "p90": 171.03999853134155, + "p95": 175.04000663757324, + "p99": 181.11999332904816 + }, + "combine": { + "p50": 200.06400346755981, + "p90": 206.81600272655487, + "p95": 213.50400149822235, + "p99": 221.0559993982315 + }, + "roundtrip": { + "p50": 340.4479920864105, + "p90": 349.69601035118103, + "p95": 353.40800881385803, + "p99": 360.79999804496765 + }, + "isolatedSum": { + "p50": 364.096000790596, + "p90": 377.8560012578964, + "p95": 388.5440081357956, + "p99": 402.17599272727966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 241.72799289226532, + "p90": 251.583993434906, + "p95": 257.1200132369995, + "p99": 265.4719948768616 + }, + "combine": { + "p50": 348.86398911476135, + "p90": 355.6160032749176, + "p95": 361.2160086631775, + "p99": 368.51200461387634 + }, + "roundtrip": { + "p50": 562.1119737625122, + "p90": 571.3919997215271, + "p95": 575.5519866943359, + "p99": 592.4479961395264 + }, + "isolatedSum": { + "p50": 590.5919820070267, + "p90": 607.1999967098236, + "p95": 618.336021900177, + "p99": 633.9839994907379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 393.0239975452423, + "p90": 400.7680118083954, + "p95": 404.7040045261383, + "p99": 414.0160083770752 + }, + "combine": { + "p50": 585.856020450592, + "p90": 595.6479907035828, + "p95": 597.3119735717773, + "p99": 607.4240207672119 + }, + "roundtrip": { + "p50": 961.7919921875, + "p90": 973.5999703407288, + "p95": 979.423999786377, + "p99": 989.2479777336121 + }, + "isolatedSum": { + "p50": 978.8800179958344, + "p90": 996.4160025119781, + "p95": 1002.0159780979156, + "p99": 1021.4400291442871 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 708.1279754638672, + "p90": 719.4240093231201, + "p95": 723.3920097351074, + "p99": 735.2319955825806 + }, + "combine": { + "p50": 1089.9840593338013, + "p90": 1098.5599756240845, + "p95": 1102.1440029144287, + "p99": 1111.680030822754 + }, + "roundtrip": { + "p50": 1771.1679935455322, + "p90": 1780.2239656448364, + "p95": 1785.696029663086, + "p99": 1799.2639541625977 + }, + "isolatedSum": { + "p50": 1798.1120347976685, + "p90": 1817.9839849472046, + "p95": 1825.5360126495361, + "p99": 1846.9120264053345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1345.4400300979614, + "p90": 1354.2720079421997, + "p95": 1357.151985168457, + "p99": 1363.5200262069702 + }, + "combine": { + "p50": 2075.295925140381, + "p90": 2085.5040550231934, + "p95": 2091.16792678833, + "p99": 2098.367929458618 + }, + "roundtrip": { + "p50": 3397.3119258880615, + "p90": 3406.048059463501, + "p95": 3410.3360176086426, + "p99": 3420.6080436706543 + }, + "isolatedSum": { + "p50": 3420.7359552383423, + "p90": 3439.776062965393, + "p95": 3448.319911956787, + "p99": 3461.8879556655884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f5a9c90", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_163689e3", + "comparisonKey": "12d02d36e1e855ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:49:05.816690+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.0239976644516, + "p90": 125.31200051307678, + "p95": 127.80800461769104, + "p99": 137.40800321102142 + }, + "combine": { + "p50": 128.31999361515045, + "p90": 134.91199910640717, + "p95": 137.37599551677704, + "p99": 141.2159949541092 + }, + "roundtrip": { + "p50": 219.93599832057953, + "p90": 226.68799757957458, + "p95": 228.89600694179535, + "p99": 233.40800404548645 + }, + "isolatedSum": { + "p50": 249.34399127960205, + "p90": 260.22399961948395, + "p95": 265.1840001344681, + "p99": 278.6239981651306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.75199437141418, + "p90": 163.00800442695618, + "p95": 164.51199352741241, + "p99": 172.19200730323792 + }, + "combine": { + "p50": 171.48800194263458, + "p90": 176.7680048942566, + "p95": 177.76000499725342, + "p99": 180.41600286960602 + }, + "roundtrip": { + "p50": 299.1360127925873, + "p90": 306.5280020236969, + "p95": 309.2159926891327, + "p99": 316.19200110435486 + }, + "isolatedSum": { + "p50": 330.23999631404877, + "p90": 339.77600932121277, + "p95": 342.27199852466583, + "p99": 352.60801017284393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.60799598693848, + "p90": 229.44000363349915, + "p95": 233.43999683856964, + "p99": 237.5359982252121 + }, + "combine": { + "p50": 278.0480086803436, + "p90": 286.624014377594, + "p95": 287.6160144805908, + "p99": 291.0720109939575 + }, + "roundtrip": { + "p50": 464.6719992160797, + "p90": 471.6799855232239, + "p95": 473.66398572921753, + "p99": 485.0560128688812 + }, + "isolatedSum": { + "p50": 498.6560046672821, + "p90": 516.0640180110931, + "p95": 521.0560113191605, + "p99": 528.6080092191696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 344.2240059375763, + "p90": 351.29600763320923, + "p95": 353.983998298645, + "p99": 360.54399609565735 + }, + "combine": { + "p50": 480.80000281333923, + "p90": 487.10399866104126, + "p95": 491.2959933280945, + "p99": 497.21598625183105 + }, + "roundtrip": { + "p50": 798.367977142334, + "p90": 806.4320087432861, + "p95": 809.0559840202332, + "p99": 815.1999711990356 + }, + "isolatedSum": { + "p50": 825.0240087509155, + "p90": 838.4000062942505, + "p95": 845.2799916267395, + "p99": 857.7599823474884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 585.4399800300598, + "p90": 592.4479961395264, + "p95": 601.0559797286987, + "p99": 617.2800064086914 + }, + "combine": { + "p50": 827.9039859771729, + "p90": 832.3519825935364, + "p95": 834.6560001373291, + "p99": 842.0159816741943 + }, + "roundtrip": { + "p50": 1383.7440013885498, + "p90": 1390.9759521484375, + "p95": 1393.2160139083862, + "p99": 1399.5200395584106 + }, + "isolatedSum": { + "p50": 1413.3439660072327, + "p90": 1424.7999787330627, + "p95": 1435.7119798660278, + "p99": 1459.2959880828857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1078.976035118103, + "p90": 1083.9999914169312, + "p95": 1086.4319801330566, + "p99": 1091.871976852417 + }, + "combine": { + "p50": 1533.5999727249146, + "p90": 1543.67995262146, + "p95": 1545.408010482788, + "p99": 1551.967978477478 + }, + "roundtrip": { + "p50": 2584.6080780029297, + "p90": 2592.9598808288574, + "p95": 2597.0559120178223, + "p99": 2602.400064468384 + }, + "isolatedSum": { + "p50": 2612.5760078430176, + "p90": 2627.679944038391, + "p95": 2631.8399906158447, + "p99": 2643.839955329895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f00f8eb5", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_fdf61e6b", + "comparisonKey": "17fc799b554342ce", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:19.759714+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.51200187206268, + "p90": 135.3279948234558, + "p95": 142.43200421333313, + "p99": 161.1199975013733 + }, + "combine": { + "p50": 127.42400169372559, + "p90": 132.4159950017929, + "p95": 136.63999736309052, + "p99": 143.5520052909851 + }, + "roundtrip": { + "p50": 218.27200055122375, + "p90": 224.35200214385986, + "p95": 227.84000635147095, + "p99": 233.43999683856964 + }, + "isolatedSum": { + "p50": 251.93600356578827, + "p90": 267.7439898252487, + "p95": 279.07200157642365, + "p99": 304.6720027923584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.21599757671356, + "p90": 161.56800091266632, + "p95": 162.9440039396286, + "p99": 168.73599588871002 + }, + "combine": { + "p50": 168.41599345207214, + "p90": 177.95200645923615, + "p95": 180.12799322605133, + "p99": 191.5840059518814 + }, + "roundtrip": { + "p50": 297.791987657547, + "p90": 308.47999453544617, + "p95": 311.45599484443665, + "p99": 332.8000009059906 + }, + "isolatedSum": { + "p50": 325.6319910287857, + "p90": 339.52000737190247, + "p95": 343.07199716567993, + "p99": 360.32000184059143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 214.62400257587433, + "p90": 223.13599288463593, + "p95": 225.63199698925018, + "p99": 231.1680018901825 + }, + "combine": { + "p50": 276.0320007801056, + "p90": 281.1200022697449, + "p95": 285.8240008354187, + "p99": 289.0560030937195 + }, + "roundtrip": { + "p50": 466.39999747276306, + "p90": 473.85600209236145, + "p95": 477.2160053253174, + "p99": 493.98401379585266 + }, + "isolatedSum": { + "p50": 490.6560033559799, + "p90": 504.2559951543808, + "p95": 511.4559978246689, + "p99": 520.224004983902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 340.09599685668945, + "p90": 346.2719917297363, + "p95": 348.06400537490845, + "p99": 353.85599732398987 + }, + "combine": { + "p50": 474.07999634742737, + "p90": 483.5520088672638, + "p95": 485.0879907608032, + "p99": 491.16799235343933 + }, + "roundtrip": { + "p50": 787.0720028877258, + "p90": 793.6959862709045, + "p95": 795.0400114059448, + "p99": 799.0080118179321 + }, + "isolatedSum": { + "p50": 814.1759932041168, + "p90": 829.8240005970001, + "p95": 833.1519961357117, + "p99": 845.0239896774292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.4799876213074, + "p90": 590.2720093727112, + "p95": 592.8639769554138, + "p99": 598.7840294837952 + }, + "combine": { + "p50": 819.6160197257996, + "p90": 830.3040266036987, + "p95": 831.8719863891602, + "p99": 835.8719944953918 + }, + "roundtrip": { + "p50": 1377.5360584259033, + "p90": 1386.8160247802734, + "p95": 1390.5600309371948, + "p99": 1405.5039882659912 + }, + "isolatedSum": { + "p50": 1404.096007347107, + "p90": 1420.57603597641, + "p95": 1424.735963344574, + "p99": 1434.656023979187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1075.103998184204, + "p90": 1083.7119817733765, + "p95": 1085.2160453796387, + "p99": 1088.1600379943848 + }, + "combine": { + "p50": 1541.4079427719116, + "p90": 1544.9919700622559, + "p95": 1547.3599433898926, + "p99": 1555.6800365447998 + }, + "roundtrip": { + "p50": 2584.6080780029297, + "p90": 2593.4720039367676, + "p95": 2597.8240966796875, + "p99": 2610.0800037384033 + }, + "isolatedSum": { + "p50": 2616.5119409561157, + "p90": 2628.7039518356323, + "p95": 2632.5759887695312, + "p99": 2643.8400745391846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b295e4b6", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_b6f90576", + "comparisonKey": "e5e5af23c7e9f14c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:35:35.577616+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.60000139474869, + "p90": 123.58400225639343, + "p95": 125.63200294971466, + "p99": 133.2480013370514 + }, + "combine": { + "p50": 139.3280029296875, + "p90": 143.19999516010284, + "p95": 144.9279934167862, + "p99": 154.78399395942688 + }, + "roundtrip": { + "p50": 232.54400491714478, + "p90": 242.78399348258972, + "p95": 246.72000110149384, + "p99": 258.30399990081787 + }, + "isolatedSum": { + "p50": 256.9280043244362, + "p90": 266.7839974164963, + "p95": 270.55999636650085, + "p99": 288.03199529647827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 151.58399939537048, + "p90": 161.28000617027283, + "p95": 163.90399634838104, + "p99": 171.2000072002411 + }, + "combine": { + "p50": 190.62399864196777, + "p90": 198.30399751663208, + "p95": 199.71199333667755, + "p99": 205.08800446987152 + }, + "roundtrip": { + "p50": 321.24799489974976, + "p90": 329.18399572372437, + "p95": 332.2559893131256, + "p99": 339.55198526382446 + }, + "isolatedSum": { + "p50": 342.20799803733826, + "p90": 359.5840036869049, + "p95": 363.6159896850586, + "p99": 376.2880116701126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 227.07200050354004, + "p90": 234.0800017118454, + "p95": 237.0239943265915, + "p99": 241.7919933795929 + }, + "combine": { + "p50": 337.5360071659088, + "p90": 341.0879969596863, + "p95": 343.26401352882385, + "p99": 349.40800070762634 + }, + "roundtrip": { + "p50": 539.4560098648071, + "p90": 548.255980014801, + "p95": 550.6240129470825, + "p99": 559.4559907913208 + }, + "isolatedSum": { + "p50": 564.6080076694489, + "p90": 575.1679986715317, + "p95": 580.2880078554153, + "p99": 591.1999940872192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 366.8479919433594, + "p90": 374.6879994869232, + "p95": 377.75999307632446, + "p99": 384.0320110321045 + }, + "combine": { + "p50": 594.0799713134766, + "p90": 598.7840294837952, + "p95": 601.3759970664978, + "p99": 608.7039709091187 + }, + "roundtrip": { + "p50": 933.791995048523, + "p90": 941.6959881782532, + "p95": 945.4399943351746, + "p99": 952.351987361908 + }, + "isolatedSum": { + "p50": 960.9279632568359, + "p90": 973.4720289707184, + "p95": 979.1359901428223, + "p99": 992.7359819412231 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 662.4320149421692, + "p90": 671.8720197677612, + "p95": 674.6240258216858, + "p99": 681.6959977149963 + }, + "combine": { + "p50": 1074.4320154190063, + "p90": 1078.5599946975708, + "p95": 1080.3519487380981, + "p99": 1087.5519514083862 + }, + "roundtrip": { + "p50": 1713.8240337371826, + "p90": 1723.647952079773, + "p95": 1726.6240119934082, + "p99": 1737.5680208206177 + }, + "isolatedSum": { + "p50": 1736.8640303611755, + "p90": 1750.432014465332, + "p95": 1754.975974559784, + "p99": 1769.2479491233826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1279.744029045105, + "p90": 1288.3520126342773, + "p95": 1291.808009147644, + "p99": 1298.9439964294434 + }, + "combine": { + "p50": 2045.0561046600342, + "p90": 2057.1839809417725, + "p95": 2086.6239070892334, + "p99": 2155.9998989105225 + }, + "roundtrip": { + "p50": 3298.271894454956, + "p90": 3308.1281185150146, + "p95": 3312.0319843292236, + "p99": 3315.48810005188 + }, + "isolatedSum": { + "p50": 3324.800133705139, + "p90": 3345.53599357605, + "p95": 3378.4319162368774, + "p99": 3454.943895339966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d0493cc4", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_10983e54", + "comparisonKey": "83ce37f03e9002d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:50.567056+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.84800279140472, + "p90": 122.78400361537933, + "p95": 127.10399925708771, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 130.5599957704544, + "p90": 139.16799426078796, + "p95": 141.4719969034195, + "p99": 153.1199961900711 + }, + "roundtrip": { + "p50": 227.6799976825714, + "p90": 235.6799989938736, + "p95": 239.68000710010529, + "p99": 248.76800179481506 + }, + "isolatedSum": { + "p50": 245.40799856185913, + "p90": 261.9519978761673, + "p95": 268.5759961605072, + "p99": 286.3039970397949 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.10399508476257, + "p90": 152.48000621795654, + "p95": 155.61600029468536, + "p99": 163.96799683570862 + }, + "combine": { + "p50": 187.9359930753708, + "p90": 193.6960071325302, + "p95": 197.66399264335632, + "p99": 206.2080055475235 + }, + "roundtrip": { + "p50": 311.0719919204712, + "p90": 320.3839957714081, + "p95": 323.2960104942322, + "p99": 328.7679851055145 + }, + "isolatedSum": { + "p50": 335.03998816013336, + "p90": 346.17601335048676, + "p95": 353.2799929380417, + "p99": 370.1760023832321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.5520008802414, + "p90": 212.351992726326, + "p95": 214.81600403785706, + "p99": 222.04799950122833 + }, + "combine": { + "p50": 323.2640027999878, + "p90": 328.0639946460724, + "p95": 331.4880132675171, + "p99": 340.256005525589 + }, + "roundtrip": { + "p50": 508.2240104675293, + "p90": 518.6240077018738, + "p95": 522.6240158081055, + "p99": 538.9440059661865 + }, + "isolatedSum": { + "p50": 530.8160036802292, + "p90": 540.4159873723984, + "p95": 546.3040173053741, + "p99": 562.3040050268173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 338.4320139884949, + "p90": 347.3599851131439, + "p95": 351.23199224472046, + "p99": 360.22400856018066 + }, + "combine": { + "p50": 594.9119925498962, + "p90": 599.2640256881714, + "p95": 604.3199896812439, + "p99": 618.4639930725098 + }, + "roundtrip": { + "p50": 909.1519713401794, + "p90": 918.2080030441284, + "p95": 923.7440228462219, + "p99": 933.3760142326355 + }, + "isolatedSum": { + "p50": 933.3440065383911, + "p90": 946.6240108013153, + "p95": 955.5519819259644, + "p99": 978.6880016326904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 595.9680080413818, + "p90": 606.2399744987488, + "p95": 611.6160154342651, + "p99": 644.2880034446716 + }, + "combine": { + "p50": 1109.7279787063599, + "p90": 1115.9039735794067, + "p95": 1121.5360164642334, + "p99": 1131.7119598388672 + }, + "roundtrip": { + "p50": 1641.8880224227905, + "p90": 1658.30397605896, + "p95": 1667.199969291687, + "p99": 1689.9839639663696 + }, + "isolatedSum": { + "p50": 1705.6959867477417, + "p90": 1722.1439480781555, + "p95": 1733.1520318984985, + "p99": 1775.9999632835388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1144.544005393982, + "p90": 1156.0959815979004, + "p95": 1160.1279973983765, + "p99": 1229.248046875 + }, + "combine": { + "p50": 2060.5759620666504, + "p90": 2072.3841190338135, + "p95": 2085.024118423462, + "p99": 2131.711959838867 + }, + "roundtrip": { + "p50": 3152.415990829468, + "p90": 3162.65606880188, + "p95": 3166.208028793335, + "p99": 3180.608034133911 + }, + "isolatedSum": { + "p50": 3205.1199674606323, + "p90": 3228.480100631714, + "p95": 3245.1521158218384, + "p99": 3360.960006713867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-089fe90f", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_ad0e927a", + "comparisonKey": "7ab57c2891107e7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:47:51.947765+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.18399888277054, + "p90": 129.4720023870468, + "p95": 133.08799266815186, + "p99": 139.48799669742584 + }, + "combine": { + "p50": 127.48800218105316, + "p90": 130.72000443935394, + "p95": 133.2480013370514, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 217.6000028848648, + "p90": 229.0239930152893, + "p95": 234.75199937820435, + "p99": 242.65600740909576 + }, + "isolatedSum": { + "p50": 248.6720010638237, + "p90": 260.19200682640076, + "p95": 266.33599400520325, + "p99": 282.27199614048004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.55199301242828, + "p90": 165.75999557971954, + "p95": 173.0239987373352, + "p99": 182.46400356292725 + }, + "combine": { + "p50": 175.77600479125977, + "p90": 181.40800297260284, + "p95": 187.23200261592865, + "p99": 193.50400567054749 + }, + "roundtrip": { + "p50": 300.1280128955841, + "p90": 310.36800146102905, + "p95": 314.303994178772, + "p99": 327.32799649238586 + }, + "isolatedSum": { + "p50": 335.32799780368805, + "p90": 347.1679985523224, + "p95": 360.25600135326385, + "p99": 375.96800923347473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 221.47199511528015, + "p90": 231.23200237751007, + "p95": 237.18400299549103, + "p99": 249.63200092315674 + }, + "combine": { + "p50": 285.40799021720886, + "p90": 290.71998596191406, + "p95": 292.80000925064087, + "p99": 302.94400453567505 + }, + "roundtrip": { + "p50": 471.8720018863678, + "p90": 483.39200019836426, + "p95": 487.5200092792511, + "p99": 497.21598625183105 + }, + "isolatedSum": { + "p50": 506.879985332489, + "p90": 521.9519883394241, + "p95": 529.9840122461319, + "p99": 552.5760054588318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 346.5920090675354, + "p90": 357.4399948120117, + "p95": 361.60001158714294, + "p99": 370.4639971256256 + }, + "combine": { + "p50": 483.2319915294647, + "p90": 487.93599009513855, + "p95": 489.8560047149658, + "p99": 498.23999404907227 + }, + "roundtrip": { + "p50": 798.0480194091797, + "p90": 806.4640164375305, + "p95": 809.0879917144775, + "p99": 820.5119967460632 + }, + "isolatedSum": { + "p50": 829.8240005970001, + "p90": 845.3759849071503, + "p95": 851.4560163021088, + "p99": 868.7039911746979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 595.7440137863159, + "p90": 606.2399744987488, + "p95": 610.5920076370239, + "p99": 618.2399988174438 + }, + "combine": { + "p50": 854.3679714202881, + "p90": 862.7200126647949, + "p95": 865.5040264129639, + "p99": 872.160017490387 + }, + "roundtrip": { + "p50": 1417.3439741134644, + "p90": 1426.1120557785034, + "p95": 1430.0800561904907, + "p99": 1440.4799938201904 + }, + "isolatedSum": { + "p50": 1450.111985206604, + "p90": 1468.9599871635437, + "p95": 1476.0960340499878, + "p99": 1490.4000163078308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1105.1520109176636, + "p90": 1113.2160425186157, + "p95": 1115.9039735794067, + "p99": 1122.5279569625854 + }, + "combine": { + "p50": 1596.2560176849365, + "p90": 1605.631947517395, + "p95": 1607.200026512146, + "p99": 1610.368013381958 + }, + "roundtrip": { + "p50": 2672.640085220337, + "p90": 2682.719945907593, + "p95": 2685.728073120117, + "p99": 2697.7601051330566 + }, + "isolatedSum": { + "p50": 2701.4080286026, + "p90": 2718.8479900360107, + "p95": 2723.1040000915527, + "p99": 2732.8959703445435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-86af299c", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_11e714df", + "comparisonKey": "82d9487311c03196", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:43:30.129896+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.38400089740753, + "p90": 135.8720064163208, + "p95": 139.42399621009827, + "p99": 149.50400590896606 + }, + "combine": { + "p50": 139.93600010871887, + "p90": 148.92800152301788, + "p95": 153.47200632095337, + "p99": 161.28000617027283 + }, + "roundtrip": { + "p50": 239.9040013551712, + "p90": 247.16800451278687, + "p95": 251.6160011291504, + "p99": 259.2960000038147 + }, + "isolatedSum": { + "p50": 264.3200010061264, + "p90": 284.8000079393387, + "p95": 292.89600253105164, + "p99": 310.7840120792389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.8880033493042, + "p90": 169.40799355506897, + "p95": 175.10400712490082, + "p99": 179.74400520324707 + }, + "combine": { + "p50": 191.64800643920898, + "p90": 200.95999538898468, + "p95": 203.61599326133728, + "p99": 213.34399282932281 + }, + "roundtrip": { + "p50": 333.1199884414673, + "p90": 343.55199337005615, + "p95": 348.09601306915283, + "p99": 355.679988861084 + }, + "isolatedSum": { + "p50": 353.5360097885132, + "p90": 370.36798894405365, + "p95": 378.7200003862381, + "p99": 393.0879980325699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 242.5280064344406, + "p90": 254.2400062084198, + "p95": 262.08001375198364, + "p99": 279.1999876499176 + }, + "combine": { + "p50": 342.81599521636963, + "p90": 351.1039912700653, + "p95": 352.86399722099304, + "p99": 362.0480000972748 + }, + "roundtrip": { + "p50": 544.7360277175903, + "p90": 554.3360114097595, + "p95": 559.7440004348755, + "p99": 566.6559934616089 + }, + "isolatedSum": { + "p50": 585.3440016508102, + "p90": 605.3439974784851, + "p95": 614.9440109729767, + "p99": 641.2479877471924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 380.3839981555939, + "p90": 393.0880129337311, + "p95": 397.37600088119507, + "p99": 409.2159867286682 + }, + "combine": { + "p50": 583.5520029067993, + "p90": 591.5520191192627, + "p95": 595.8080291748047, + "p99": 598.9760160446167 + }, + "roundtrip": { + "p50": 944.5120096206665, + "p90": 955.1680088043213, + "p95": 959.4879746437073, + "p99": 966.2719964981079 + }, + "isolatedSum": { + "p50": 963.9360010623932, + "p90": 984.6400320529938, + "p95": 993.1840300559998, + "p99": 1008.1920027732849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 680.2880167961121, + "p90": 687.0399713516235, + "p95": 689.631998538971, + "p99": 697.376012802124 + }, + "combine": { + "p50": 1053.760051727295, + "p90": 1063.647985458374, + "p95": 1065.3120279312134, + "p99": 1071.679949760437 + }, + "roundtrip": { + "p50": 1708.7680101394653, + "p90": 1723.0720520019531, + "p95": 1728.8960218429565, + "p99": 1755.0400495529175 + }, + "isolatedSum": { + "p50": 1734.048068523407, + "p90": 1750.6879568099976, + "p95": 1754.9440264701843, + "p99": 1769.055962562561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1309.2800378799438, + "p90": 1321.0879564285278, + "p95": 1323.5520124435425, + "p99": 1330.5920362472534 + }, + "combine": { + "p50": 2013.1199359893799, + "p90": 2023.5838890075684, + "p95": 2026.1120796203613, + "p99": 2065.824031829834 + }, + "roundtrip": { + "p50": 3302.97589302063, + "p90": 3315.295934677124, + "p95": 3319.9360370635986, + "p99": 3330.9121131896973 + }, + "isolatedSum": { + "p50": 3322.3999738693237, + "p90": 3344.671845436096, + "p95": 3349.664092063904, + "p99": 3396.4160680770874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7737a4e4", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_1df41dff", + "comparisonKey": "ba288e03ad605e6f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:44:10.301965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.93599683046341, + "p90": 124.92799758911133, + "p95": 127.10399925708771, + "p99": 135.3279948234558 + }, + "combine": { + "p50": 127.10399925708771, + "p90": 130.8159977197647, + "p95": 132.9919993877411, + "p99": 138.5599970817566 + }, + "roundtrip": { + "p50": 218.4000015258789, + "p90": 226.9439995288849, + "p95": 230.68800568580627, + "p99": 242.75200068950653 + }, + "isolatedSum": { + "p50": 247.03999608755112, + "p90": 255.74399530887604, + "p95": 260.0959986448288, + "p99": 273.8879919052124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.36800634860992, + "p90": 162.81600296497345, + "p95": 164.89599645137787, + "p99": 170.46399414539337 + }, + "combine": { + "p50": 173.43999445438385, + "p90": 177.05599963665009, + "p95": 178.68800461292267, + "p99": 182.27200210094452 + }, + "roundtrip": { + "p50": 298.7839877605438, + "p90": 306.43200874328613, + "p95": 308.4479868412018, + "p99": 312.51201033592224 + }, + "isolatedSum": { + "p50": 331.8080008029938, + "p90": 339.87200260162354, + "p95": 343.58400106430054, + "p99": 352.7359962463379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.38400173187256, + "p90": 231.55200481414795, + "p95": 235.03999412059784, + "p99": 246.36800587177277 + }, + "combine": { + "p50": 275.9360074996948, + "p90": 283.7759852409363, + "p95": 285.66399216651917, + "p99": 288.32000494003296 + }, + "roundtrip": { + "p50": 465.92000126838684, + "p90": 471.6799855232239, + "p95": 473.66398572921753, + "p99": 478.59200835227966 + }, + "isolatedSum": { + "p50": 496.3200092315674, + "p90": 515.3279900550842, + "p95": 520.703986287117, + "p99": 534.6880108118057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 340.9920036792755, + "p90": 347.77599573135376, + "p95": 349.5039939880371, + "p99": 353.5679876804352 + }, + "combine": { + "p50": 472.7360010147095, + "p90": 479.74398732185364, + "p95": 483.45598578453064, + "p99": 490.5279874801636 + }, + "roundtrip": { + "p50": 787.7119779586792, + "p90": 795.4239845275879, + "p95": 797.8240251541138, + "p99": 804.7999739646912 + }, + "isolatedSum": { + "p50": 813.728004693985, + "p90": 827.5199830532074, + "p95": 832.9599797725677, + "p99": 844.0959751605988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 583.2319855690002, + "p90": 588.5120034217834, + "p95": 590.7520055770874, + "p99": 597.4720120429993 + }, + "combine": { + "p50": 831.2000036239624, + "p90": 840.6720161437988, + "p95": 842.7199721336365, + "p99": 844.2239761352539 + }, + "roundtrip": { + "p50": 1389.855980873108, + "p90": 1398.2080221176147, + "p95": 1400.6719589233398, + "p99": 1405.6639671325684 + }, + "isolatedSum": { + "p50": 1414.4319891929626, + "p90": 1429.1840195655823, + "p95": 1433.4719777107239, + "p99": 1441.6959881782532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1079.4559717178345, + "p90": 1086.8799686431885, + "p95": 1090.432047843933, + "p99": 1107.6159477233887 + }, + "combine": { + "p50": 1545.151948928833, + "p90": 1555.7440519332886, + "p95": 1556.3520193099976, + "p99": 1560.2240562438965 + }, + "roundtrip": { + "p50": 2605.7279109954834, + "p90": 2614.527940750122, + "p95": 2617.216110229492, + "p99": 2621.5360164642334 + }, + "isolatedSum": { + "p50": 2624.6079206466675, + "p90": 2642.624020576477, + "p95": 2646.7840671539307, + "p99": 2667.840003967285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-86cea496", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_1c58745c", + "comparisonKey": "3b7b5bf336b488a9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:45:57.663304+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.19999867677689, + "p90": 126.27199292182922, + "p95": 128.9920061826706, + "p99": 137.34400272369385 + }, + "combine": { + "p50": 139.20000195503235, + "p90": 143.0719941854477, + "p95": 145.1839953660965, + "p99": 153.21600437164307 + }, + "roundtrip": { + "p50": 230.56000471115112, + "p90": 239.04000222682953, + "p95": 241.60000681877136, + "p99": 247.29600548744202 + }, + "isolatedSum": { + "p50": 258.40000063180923, + "p90": 269.3439871072769, + "p95": 274.1760015487671, + "p99": 290.5600070953369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 151.74399316310883, + "p90": 161.50400042533875, + "p95": 164.70399498939514, + "p99": 171.1679995059967 + }, + "combine": { + "p50": 190.08000195026398, + "p90": 199.74400103092194, + "p95": 201.92000269889832, + "p99": 212.25599944591522 + }, + "roundtrip": { + "p50": 320.8959996700287, + "p90": 328.64001393318176, + "p95": 331.2320113182068, + "p99": 341.12000465393066 + }, + "isolatedSum": { + "p50": 341.8239951133728, + "p90": 361.2480014562607, + "p95": 366.62399768829346, + "p99": 383.4239989519119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 230.880007147789, + "p90": 237.63200640678406, + "p95": 239.68000710010529, + "p99": 247.67999351024628 + }, + "combine": { + "p50": 337.40800619125366, + "p90": 344.09600496292114, + "p95": 346.9119966030121, + "p99": 350.71998834609985 + }, + "roundtrip": { + "p50": 540.448009967804, + "p90": 547.6160049438477, + "p95": 551.4879822731018, + "p99": 556.8320155143738 + }, + "isolatedSum": { + "p50": 568.2880133390427, + "p90": 581.7280113697052, + "p95": 586.5920037031174, + "p99": 598.3999818563461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 367.16800928115845, + "p90": 374.9760091304779, + "p95": 379.35999035835266, + "p99": 385.79198718070984 + }, + "combine": { + "p50": 593.887984752655, + "p90": 598.3039736747742, + "p95": 600.3199815750122, + "p99": 607.9040169715881 + }, + "roundtrip": { + "p50": 935.6160163879395, + "p90": 942.8480267524719, + "p95": 947.5839734077454, + "p99": 960.1920247077942 + }, + "isolatedSum": { + "p50": 961.0559940338135, + "p90": 973.2799828052521, + "p95": 979.6799719333649, + "p99": 993.696004152298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 661.8239879608154, + "p90": 671.0079908370972, + "p95": 673.1839776039124, + "p99": 681.7920207977295 + }, + "combine": { + "p50": 1074.7840404510498, + "p90": 1078.6240100860596, + "p95": 1081.536054611206, + "p99": 1089.184045791626 + }, + "roundtrip": { + "p50": 1713.055968284607, + "p90": 1720.352053642273, + "p95": 1723.0720520019531, + "p99": 1732.319951057434 + }, + "isolatedSum": { + "p50": 1736.6080284118652, + "p90": 1749.6320009231567, + "p95": 1754.7200322151184, + "p99": 1770.9760665893555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1278.5279750823975, + "p90": 1287.9680395126343, + "p95": 1290.9120321273804, + "p99": 1296.92804813385 + }, + "combine": { + "p50": 2045.8240509033203, + "p90": 2049.1199493408203, + "p95": 2051.487922668457, + "p99": 2058.0480098724365 + }, + "roundtrip": { + "p50": 3300.960063934326, + "p90": 3311.552047729492, + "p95": 3315.743923187256, + "p99": 3323.2638835906982 + }, + "isolatedSum": { + "p50": 3324.352025985718, + "p90": 3337.0879888534546, + "p95": 3342.3999547958374, + "p99": 3354.9760580062866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7eb7a034", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_5f6ee132", + "comparisonKey": "113fc6a951365f7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:46:38.219916+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.68800222873688, + "p90": 126.43200159072876, + "p95": 129.08799946308136, + "p99": 135.45599579811096 + }, + "combine": { + "p50": 127.16799974441528, + "p90": 130.5920034646988, + "p95": 131.6480040550232, + "p99": 139.48799669742584 + }, + "roundtrip": { + "p50": 218.46400201320648, + "p90": 227.23199427127838, + "p95": 230.46399652957916, + "p99": 239.1040027141571 + }, + "isolatedSum": { + "p50": 245.85600197315216, + "p90": 257.02400505542755, + "p95": 260.73600351810455, + "p99": 274.9439924955368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.56799280643463, + "p90": 162.6559942960739, + "p95": 167.39200055599213, + "p99": 177.66399681568146 + }, + "combine": { + "p50": 168.89600455760956, + "p90": 177.50400304794312, + "p95": 179.48800325393677, + "p99": 185.47199666500092 + }, + "roundtrip": { + "p50": 296.00000381469727, + "p90": 302.8799891471863, + "p95": 304.60798740386963, + "p99": 310.8159899711609 + }, + "isolatedSum": { + "p50": 326.4639973640442, + "p90": 340.15999734401703, + "p95": 346.8800038099289, + "p99": 363.1359934806824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.9760059118271, + "p90": 225.98400712013245, + "p95": 228.70400547981262, + "p99": 234.3679964542389 + }, + "combine": { + "p50": 283.7119996547699, + "p90": 288.7359857559204, + "p95": 290.0480031967163, + "p99": 299.23200607299805 + }, + "roundtrip": { + "p50": 463.51999044418335, + "p90": 471.3920056819916, + "p95": 474.5599925518036, + "p99": 485.6959879398346 + }, + "isolatedSum": { + "p50": 502.688005566597, + "p90": 514.7199928760529, + "p95": 518.7520086765289, + "p99": 533.6000025272369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 333.5359990596771, + "p90": 345.69600224494934, + "p95": 352.57598757743835, + "p99": 379.0079951286316 + }, + "combine": { + "p50": 471.42401337623596, + "p90": 483.2000136375427, + "p95": 487.5200092792511, + "p99": 495.87199091911316 + }, + "roundtrip": { + "p50": 781.440019607544, + "p90": 789.9199724197388, + "p95": 792.959988117218, + "p99": 799.8719811439514 + }, + "isolatedSum": { + "p50": 804.9600124359131, + "p90": 828.8960158824921, + "p95": 840.0959968566895, + "p99": 874.8799860477448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 587.6479744911194, + "p90": 596.127986907959, + "p95": 598.8799929618835, + "p99": 605.4400205612183 + }, + "combine": { + "p50": 841.4080142974854, + "p90": 845.0239896774292, + "p95": 849.0239977836609, + "p99": 852.9919981956482 + }, + "roundtrip": { + "p50": 1401.6640186309814, + "p90": 1411.0080003738403, + "p95": 1414.0479564666748, + "p99": 1422.4319458007812 + }, + "isolatedSum": { + "p50": 1429.0559887886047, + "p90": 1441.1519765853882, + "p95": 1447.9039907455444, + "p99": 1458.4320187568665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1084.8000049591064, + "p90": 1092.3839807510376, + "p95": 1094.4639444351196, + "p99": 1099.8079776763916 + }, + "combine": { + "p50": 1545.8879470825195, + "p90": 1555.9040307998657, + "p95": 1557.8880310058594, + "p99": 1564.352035522461 + }, + "roundtrip": { + "p50": 2605.1840782165527, + "p90": 2614.527940750122, + "p95": 2616.895914077759, + "p99": 2625.216007232666 + }, + "isolatedSum": { + "p50": 2630.687952041626, + "p90": 2648.2880115509033, + "p95": 2652.351975440979, + "p99": 2664.1600131988525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5294656a", + "identity": "b200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_6cd406b8", + "comparisonKey": "75b945ab9b507a8d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:42.197550+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.25599926710129, + "p90": 128.76799702644348, + "p95": 136.35200262069702, + "p99": 145.50399780273438 + }, + "combine": { + "p50": 127.16799974441528, + "p90": 130.5599957704544, + "p95": 132.03200697898865, + "p99": 140.32000303268433 + }, + "roundtrip": { + "p50": 217.47200191020966, + "p90": 226.33600234985352, + "p95": 231.7119985818863, + "p99": 240.03200232982635 + }, + "isolatedSum": { + "p50": 247.42399901151657, + "p90": 259.3279927968979, + "p95": 268.38400959968567, + "p99": 285.8240008354187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.1200042963028, + "p90": 161.85599565505981, + "p95": 164.76799547672272, + "p99": 173.18400740623474 + }, + "combine": { + "p50": 172.12800681591034, + "p90": 177.59999632835388, + "p95": 180.09600043296814, + "p99": 191.3599967956543 + }, + "roundtrip": { + "p50": 297.08799719810486, + "p90": 303.74398827552795, + "p95": 306.08001351356506, + "p99": 317.4720108509064 + }, + "isolatedSum": { + "p50": 329.24801111221313, + "p90": 339.4559919834137, + "p95": 344.86399590969086, + "p99": 364.54400420188904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 219.2319929599762, + "p90": 225.11999309062958, + "p95": 228.19200158119202, + "p99": 236.7040067911148 + }, + "combine": { + "p50": 278.9759933948517, + "p90": 287.9680097103119, + "p95": 288.92800211906433, + "p99": 296.06398940086365 + }, + "roundtrip": { + "p50": 463.51999044418335, + "p90": 473.1520116329193, + "p95": 476.99201107025146, + "p99": 486.07999086380005 + }, + "isolatedSum": { + "p50": 498.2079863548279, + "p90": 513.0880028009415, + "p95": 517.1200037002563, + "p99": 532.7679961919785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 332.44800567626953, + "p90": 338.75200152397156, + "p95": 341.0240113735199, + "p99": 345.8879888057709 + }, + "combine": { + "p50": 468.7359929084778, + "p90": 473.85600209236145, + "p95": 475.13601183891296, + "p99": 483.7439954280853 + }, + "roundtrip": { + "p50": 780.128002166748, + "p90": 788.2239818572998, + "p95": 791.1679744720459, + "p99": 804.6720027923584 + }, + "isolatedSum": { + "p50": 801.1839985847473, + "p90": 812.608003616333, + "p95": 816.1600232124329, + "p99": 829.6319842338562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 589.24800157547, + "p90": 597.6639986038208, + "p95": 599.4560122489929, + "p99": 606.2719821929932 + }, + "combine": { + "p50": 840.9600257873535, + "p90": 844.7039723396301, + "p95": 847.8720188140869, + "p99": 855.4559946060181 + }, + "roundtrip": { + "p50": 1400.607943534851, + "p90": 1410.3679656982422, + "p95": 1414.720058441162, + "p99": 1427.9680252075195 + }, + "isolatedSum": { + "p50": 1430.2080273628235, + "p90": 1442.367970943451, + "p95": 1447.3280310630798, + "p99": 1461.7279767990112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1083.616018295288, + "p90": 1090.9759998321533, + "p95": 1092.8640365600586, + "p99": 1096.832036972046 + }, + "combine": { + "p50": 1545.5039739608765, + "p90": 1555.7440519332886, + "p95": 1557.088017463684, + "p99": 1565.727949142456 + }, + "roundtrip": { + "p50": 2607.583999633789, + "p90": 2617.1200275421143, + "p95": 2620.2239990234375, + "p99": 2637.5999450683594 + }, + "isolatedSum": { + "p50": 2629.1199922561646, + "p90": 2646.720051765442, + "p95": 2649.9520540237427, + "p99": 2662.559986114502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aaeb1c12", + "identity": "b200|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_16a8f46b", + "comparisonKey": "7eed8c7946123cf6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:16:59.105673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.52799904346466, + "p90": 112.96000331640244, + "p95": 115.35999923944473, + "p99": 122.81599640846252 + }, + "combine": { + "p50": 127.48800218105316, + "p90": 133.5040032863617, + "p95": 135.80800592899323, + "p99": 142.91200041770935 + }, + "roundtrip": { + "p50": 203.2960057258606, + "p90": 208.639994263649, + "p95": 210.87999641895294, + "p99": 214.78399634361267 + }, + "isolatedSum": { + "p50": 234.01600122451782, + "p90": 246.46400660276413, + "p95": 251.16800516843796, + "p99": 265.7279968261719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.56799829006195, + "p90": 150.14399588108063, + "p95": 152.38399803638458, + "p99": 158.81599485874176 + }, + "combine": { + "p50": 174.23999309539795, + "p90": 177.85599827766418, + "p95": 179.51999604701996, + "p99": 187.29600310325623 + }, + "roundtrip": { + "p50": 287.200003862381, + "p90": 296.09599709510803, + "p95": 298.335999250412, + "p99": 309.4399869441986 + }, + "isolatedSum": { + "p50": 319.8079913854599, + "p90": 327.9999941587448, + "p95": 331.90399408340454, + "p99": 346.111997961998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.85599541664124, + "p90": 215.07200598716736, + "p95": 219.32800114154816, + "p99": 223.7440049648285 + }, + "combine": { + "p50": 278.75199913978577, + "p90": 287.58400678634644, + "p95": 290.0159955024719, + "p99": 297.1520125865936 + }, + "roundtrip": { + "p50": 451.9999921321869, + "p90": 459.1040015220642, + "p95": 462.20800280570984, + "p99": 470.335990190506 + }, + "isolatedSum": { + "p50": 484.607994556427, + "p90": 502.6560127735138, + "p95": 509.3439966440201, + "p99": 520.8960175514221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 332.28799700737, + "p90": 336.8000090122223, + "p95": 338.49599957466125, + "p99": 344.7360098361969 + }, + "combine": { + "p50": 472.03201055526733, + "p90": 480.6399941444397, + "p95": 484.25599932670593, + "p99": 491.7120039463043 + }, + "roundtrip": { + "p50": 773.5040187835693, + "p90": 781.0879945755005, + "p95": 784.0960025787354, + "p99": 790.5600070953369 + }, + "isolatedSum": { + "p50": 804.3200075626373, + "p90": 817.440003156662, + "p95": 822.7519989013672, + "p99": 836.4480137825012 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 565.8559799194336, + "p90": 572.1279978752136, + "p95": 574.0799903869629, + "p99": 580.0960063934326 + }, + "combine": { + "p50": 833.9840173721313, + "p90": 843.3279991149902, + "p95": 845.7279801368713, + "p99": 852.4799942970276 + }, + "roundtrip": { + "p50": 1377.3759603500366, + "p90": 1385.2800130844116, + "p95": 1388.64004611969, + "p99": 1395.6799507141113 + }, + "isolatedSum": { + "p50": 1399.839997291565, + "p90": 1415.4559969902039, + "p95": 1419.8079705238342, + "p99": 1432.5760006904602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1052.0000457763672, + "p90": 1059.9039793014526, + "p95": 1063.2959604263306, + "p99": 1070.4319477081299 + }, + "combine": { + "p50": 1544.0319776535034, + "p90": 1555.0400018692017, + "p95": 1556.607961654663, + "p99": 1577.9839754104614 + }, + "roundtrip": { + "p50": 2569.6640014648438, + "p90": 2582.0798873901367, + "p95": 2586.1120223999023, + "p99": 2632.8001022338867 + }, + "isolatedSum": { + "p50": 2596.0320234298706, + "p90": 2614.9439811706543, + "p95": 2619.9039220809937, + "p99": 2648.4159231185913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-939dd08e", + "identity": "b200|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_033ebcb6", + "comparisonKey": "789a62d4e1f1f0d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:25:49.177773+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.59200167655945, + "p90": 282.49600529670715, + "p95": 308.0640137195587, + "p99": 345.4720079898834 + }, + "combine": { + "p50": 103.26399654150009, + "p90": 111.64800077676773, + "p95": 114.52800035476685, + "p99": 125.82400441169739 + }, + "roundtrip": { + "p50": 298.0479896068573, + "p90": 327.10400223731995, + "p95": 336.60799264907837, + "p99": 352.09599137306213 + }, + "isolatedSum": { + "p50": 313.85599821805954, + "p90": 394.1440060734749, + "p95": 422.59201407432556, + "p99": 471.2960124015808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 252.8960108757019, + "p90": 279.4559895992279, + "p95": 292.9919958114624, + "p99": 367.7760064601898 + }, + "combine": { + "p50": 138.047993183136, + "p90": 142.56000518798828, + "p95": 144.6399986743927, + "p99": 150.07999539375305 + }, + "roundtrip": { + "p50": 375.16799569129944, + "p90": 392.92800426483154, + "p95": 406.1119854450226, + "p99": 426.07998847961426 + }, + "isolatedSum": { + "p50": 390.9440040588379, + "p90": 422.0159947872162, + "p95": 437.6319944858551, + "p99": 517.8560018539429 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 330.4640054702759, + "p90": 340.89601039886475, + "p95": 343.6799943447113, + "p99": 352.57598757743835 + }, + "combine": { + "p50": 204.19199764728546, + "p90": 209.08799767494202, + "p95": 211.10400557518005, + "p99": 215.2000069618225 + }, + "roundtrip": { + "p50": 517.6960229873657, + "p90": 529.695987701416, + "p95": 538.3039712905884, + "p99": 562.8479719161987 + }, + "isolatedSum": { + "p50": 534.6560031175613, + "p90": 549.9840080738068, + "p95": 554.7839999198914, + "p99": 567.7759945392609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 508.9600086212158, + "p90": 522.4320292472839, + "p95": 539.8079752922058, + "p99": 554.8800230026245 + }, + "combine": { + "p50": 403.328001499176, + "p90": 410.4959964752197, + "p95": 413.63200545310974, + "p99": 419.871985912323 + }, + "roundtrip": { + "p50": 904.0319919586182, + "p90": 921.4400053024292, + "p95": 927.1039962768555, + "p99": 948.8959908485413 + }, + "isolatedSum": { + "p50": 912.2880101203918, + "p90": 932.9280257225037, + "p95": 953.4399807453156, + "p99": 974.7520089149475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 868.7360286712646, + "p90": 878.3360123634338, + "p95": 883.0400109291077, + "p99": 902.2719860076904 + }, + "combine": { + "p50": 766.5280103683472, + "p90": 771.3279724121094, + "p95": 773.3759880065918, + "p99": 777.504026889801 + }, + "roundtrip": { + "p50": 1610.7840538024902, + "p90": 1636.5439891815186, + "p95": 1643.936038017273, + "p99": 1661.2800359725952 + }, + "isolatedSum": { + "p50": 1635.2640390396118, + "p90": 1649.6639847755432, + "p95": 1656.4159989356995, + "p99": 1679.7760128974915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1658.4320068359375, + "p90": 1670.4319715499878, + "p95": 1675.0080585479736, + "p99": 1703.8079500198364 + }, + "combine": { + "p50": 1452.9279470443726, + "p90": 1460.0000381469727, + "p95": 1462.656021118164, + "p99": 1470.8160161972046 + }, + "roundtrip": { + "p50": 3079.296112060547, + "p90": 3090.9440517425537, + "p95": 3094.8801040649414, + "p99": 3109.1198921203613 + }, + "isolatedSum": { + "p50": 3111.35995388031, + "p90": 3130.4320096969604, + "p95": 3137.6640796661377, + "p99": 3174.623966217041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e94f4f02", + "identity": "b200|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_033ebcb6", + "comparisonKey": "822c8e40ef1e4c59", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:28:18.527530+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 212.22400665283203, + "p90": 228.70400547981262, + "p95": 235.74399948120117, + "p99": 333.407998085022 + }, + "combine": { + "p50": 109.6000000834465, + "p90": 115.26399850845337, + "p95": 117.66400188207626, + "p99": 123.90399724245071 + }, + "roundtrip": { + "p50": 305.88799715042114, + "p90": 329.15198802948, + "p95": 342.0799970626831, + "p99": 362.14399337768555 + }, + "isolatedSum": { + "p50": 321.82400673627853, + "p90": 343.968003988266, + "p95": 353.40800136327744, + "p99": 457.3119953274727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 262.59198784828186, + "p90": 291.51999950408936, + "p95": 298.8159954547882, + "p99": 315.744012594223 + }, + "combine": { + "p50": 145.75999975204468, + "p90": 149.6960073709488, + "p95": 151.2639969587326, + "p99": 156.76799416542053 + }, + "roundtrip": { + "p50": 395.9360122680664, + "p90": 406.2719941139221, + "p95": 409.31200981140137, + "p99": 415.71199893951416 + }, + "isolatedSum": { + "p50": 408.35198760032654, + "p90": 441.21600687503815, + "p95": 450.0799924135208, + "p99": 472.51200675964355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 356.9920063018799, + "p90": 381.53600692749023, + "p95": 400.64001083374023, + "p99": 543.2000160217285 + }, + "combine": { + "p50": 219.2319929599762, + "p90": 226.1440008878708, + "p95": 229.18400168418884, + "p99": 235.23199558258057 + }, + "roundtrip": { + "p50": 582.9439759254456, + "p90": 592.5120115280151, + "p95": 595.7760214805603, + "p99": 603.8720011711121 + }, + "isolatedSum": { + "p50": 576.2239992618561, + "p90": 607.680007815361, + "p95": 629.8240125179291, + "p99": 778.4320116043091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 563.103973865509, + "p90": 571.7120170593262, + "p95": 573.7919807434082, + "p99": 578.9440274238586 + }, + "combine": { + "p50": 447.58400321006775, + "p90": 454.0799856185913, + "p95": 456.38400316238403, + "p99": 462.0159864425659 + }, + "roundtrip": { + "p50": 986.9120121002197, + "p90": 998.2079863548279, + "p95": 1003.9680004119873, + "p99": 1029.5679569244385 + }, + "isolatedSum": { + "p50": 1010.6879770755768, + "p90": 1025.7920026779175, + "p95": 1030.1759839057922, + "p99": 1040.9600138664246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 996.4159727096558, + "p90": 1004.0639638900757, + "p95": 1006.6879987716675, + "p99": 1013.0560398101807 + }, + "combine": { + "p50": 795.5840229988098, + "p90": 802.0480275154114, + "p95": 804.6720027923584, + "p99": 810.7519745826721 + }, + "roundtrip": { + "p50": 1770.2399492263794, + "p90": 1778.9759635925293, + "p95": 1782.047986984253, + "p99": 1789.6000146865845 + }, + "isolatedSum": { + "p50": 1791.9999957084656, + "p90": 1806.111991405487, + "p95": 1811.3600015640259, + "p99": 1823.8080143928528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1911.0080003738403, + "p90": 1920.799970626831, + "p95": 1923.8719940185547, + "p99": 1929.2160272598267 + }, + "combine": { + "p50": 1491.9999837875366, + "p90": 1497.663974761963, + "p95": 1499.135971069336, + "p99": 1505.5999755859375 + }, + "roundtrip": { + "p50": 3373.823881149292, + "p90": 3384.3839168548584, + "p95": 3387.8719806671143, + "p99": 3416.7680740356445 + }, + "isolatedSum": { + "p50": 3403.007984161377, + "p90": 3418.463945388794, + "p95": 3423.0079650878906, + "p99": 3434.816002845764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-86b7b3a2", + "identity": "b200|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_033ebcb6", + "comparisonKey": "54952274b56451a0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:30:49.045631+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.75999665260315, + "p90": 261.50399446487427, + "p95": 291.7119860649109, + "p99": 393.40800046920776 + }, + "combine": { + "p50": 114.78400230407715, + "p90": 118.30399930477142, + "p95": 119.74400281906128, + "p99": 127.23200023174286 + }, + "roundtrip": { + "p50": 317.02399253845215, + "p90": 333.75999331474304, + "p95": 342.5599932670593, + "p99": 359.1040074825287 + }, + "isolatedSum": { + "p50": 332.5439989566803, + "p90": 379.8079937696457, + "p95": 411.45598888397217, + "p99": 520.6400007009506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 272.67199754714966, + "p90": 297.5359857082367, + "p95": 305.56800961494446, + "p99": 322.9759931564331 + }, + "combine": { + "p50": 154.84799444675446, + "p90": 159.55199301242828, + "p95": 162.81600296497345, + "p99": 168.64000260829926 + }, + "roundtrip": { + "p50": 422.94400930404663, + "p90": 442.1760141849518, + "p95": 447.519987821579, + "p99": 453.7599980831146 + }, + "isolatedSum": { + "p50": 427.5199919939041, + "p90": 457.087978720665, + "p95": 468.3840125799179, + "p99": 491.61599576473236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 387.03998923301697, + "p90": 416.31999611854553, + "p95": 426.2399971485138, + "p99": 454.49599623680115 + }, + "combine": { + "p50": 253.1839907169342, + "p90": 260.44800877571106, + "p95": 262.9759907722473, + "p99": 269.1200077533722 + }, + "roundtrip": { + "p50": 637.440025806427, + "p90": 657.1840047836304, + "p95": 663.6800169944763, + "p99": 678.2400012016296 + }, + "isolatedSum": { + "p50": 640.2239799499512, + "p90": 676.7680048942566, + "p95": 689.2159879207611, + "p99": 723.6160039901733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 618.2079911231995, + "p90": 673.8560199737549, + "p95": 712.3200297355652, + "p99": 754.1120052337646 + }, + "combine": { + "p50": 453.2800018787384, + "p90": 461.7280066013336, + "p95": 465.63199162483215, + "p99": 474.94399547576904 + }, + "roundtrip": { + "p50": 1053.3440113067627, + "p90": 1095.296025276184, + "p95": 1104.032039642334, + "p99": 1124.9279975891113 + }, + "isolatedSum": { + "p50": 1071.4879930019379, + "p90": 1135.5840265750885, + "p95": 1177.9520213603973, + "p99": 1229.0560007095337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1108.6399555206299, + "p90": 1116.5440082550049, + "p95": 1118.5280084609985, + "p99": 1125.9839534759521 + }, + "combine": { + "p50": 813.696026802063, + "p90": 820.3520178794861, + "p95": 822.272002696991, + "p99": 827.2640109062195 + }, + "roundtrip": { + "p50": 1897.8240489959717, + "p90": 1906.3040018081665, + "p95": 1909.119963645935, + "p99": 1935.1040124893188 + }, + "isolatedSum": { + "p50": 1922.3359823226929, + "p90": 1936.896026134491, + "p95": 1940.8000111579895, + "p99": 1953.2479643821716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2112.44797706604, + "p90": 2119.040012359619, + "p95": 2120.543956756592, + "p99": 2127.19988822937 + }, + "combine": { + "p50": 1514.016032218933, + "p90": 1523.5199928283691, + "p95": 1527.8719663619995, + "p99": 1554.04794216156 + }, + "roundtrip": { + "p50": 3607.487916946411, + "p90": 3616.7678833007812, + "p95": 3620.5759048461914, + "p99": 3647.2320556640625 + }, + "isolatedSum": { + "p50": 3626.464009284973, + "p90": 3642.5600051879883, + "p95": 3648.4159231185913, + "p99": 3681.24783039093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-46862239", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_d0ca9eea", + "comparisonKey": "12389756b658dcc9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:19:27.239603+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.4319976568222, + "p90": 113.88800293207169, + "p95": 119.9679970741272, + "p99": 123.99999797344208 + }, + "combine": { + "p50": 120.7680031657219, + "p90": 126.78399682044983, + "p95": 132.1280002593994, + "p99": 135.83999872207642 + }, + "roundtrip": { + "p50": 242.11199581623077, + "p90": 253.88801097869873, + "p95": 257.53599405288696, + "p99": 271.87201380729675 + }, + "isolatedSum": { + "p50": 223.2000008225441, + "p90": 240.67199975252151, + "p95": 252.0959973335266, + "p99": 259.8399966955185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.3839954137802, + "p90": 148.3840048313141, + "p95": 151.8079936504364, + "p99": 166.59200191497803 + }, + "combine": { + "p50": 167.84000396728516, + "p90": 173.88799786567688, + "p95": 177.59999632835388, + "p99": 185.92000007629395 + }, + "roundtrip": { + "p50": 352.03200578689575, + "p90": 360.6399893760681, + "p95": 366.04800820350647, + "p99": 375.4560053348541 + }, + "isolatedSum": { + "p50": 304.22399938106537, + "p90": 322.27200269699097, + "p95": 329.4079899787903, + "p99": 352.512001991272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.8720020055771, + "p90": 207.32800662517548, + "p95": 211.0079973936081, + "p99": 216.44799411296844 + }, + "combine": { + "p50": 281.6320061683655, + "p90": 287.9360020160675, + "p95": 290.336012840271, + "p99": 294.40000653266907 + }, + "roundtrip": { + "p50": 584.2559933662415, + "p90": 590.9439921379089, + "p95": 593.2160019874573, + "p99": 598.2080101966858 + }, + "isolatedSum": { + "p50": 481.50400817394257, + "p90": 495.264008641243, + "p95": 501.3440102338791, + "p99": 510.8480006456375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.84801387786865, + "p90": 329.4079899787903, + "p95": 348.60798716545105, + "p99": 366.2720024585724 + }, + "combine": { + "p50": 469.9519872665405, + "p90": 484.95998978614807, + "p95": 488.0320131778717, + "p99": 493.75998973846436 + }, + "roundtrip": { + "p50": 1006.1759948730469, + "p90": 1016.3520574569702, + "p95": 1020.7680463790894, + "p99": 1031.3600301742554 + }, + "isolatedSum": { + "p50": 788.8000011444092, + "p90": 814.3679797649384, + "p95": 836.6400003433228, + "p99": 860.0319921970367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 555.2639961242676, + "p90": 562.8479719161987, + "p95": 565.8239722251892, + "p99": 581.9519758224487 + }, + "combine": { + "p50": 832.9600095748901, + "p90": 839.8399949073792, + "p95": 841.69602394104, + "p99": 848.2239842414856 + }, + "roundtrip": { + "p50": 1853.600025177002, + "p90": 1865.5359745025635, + "p95": 1869.088053703308, + "p99": 1879.2320489883423 + }, + "isolatedSum": { + "p50": 1388.2240056991577, + "p90": 1402.6879668235779, + "p95": 1407.5199961662292, + "p99": 1430.1759600639343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1030.527949333191, + "p90": 1037.2480154037476, + "p95": 1040.1599407196045, + "p99": 1047.4879741668701 + }, + "combine": { + "p50": 1538.5600328445435, + "p90": 1545.9200143814087, + "p95": 1548.6079454421997, + "p99": 1554.1759729385376 + }, + "roundtrip": { + "p50": 3523.9360332489014, + "p90": 3535.2959632873535, + "p95": 3538.719892501831, + "p99": 3573.568105697632 + }, + "isolatedSum": { + "p50": 2569.0879821777344, + "p90": 2583.1680297851562, + "p95": 2588.767886161804, + "p99": 2601.6639471054077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4757e965", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_033ebcb6", + "comparisonKey": "120b92b6c7ea453a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:22:00.599085+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 222.20799326896667, + "p90": 261.9200050830841, + "p95": 281.0879945755005, + "p99": 313.8880133628845 + }, + "combine": { + "p50": 122.079998254776, + "p90": 130.5599957704544, + "p95": 136.31999492645264, + "p99": 143.39199662208557 + }, + "roundtrip": { + "p50": 329.3760120868683, + "p90": 362.87999153137207, + "p95": 375.16799569129944, + "p99": 409.7599983215332 + }, + "isolatedSum": { + "p50": 344.2879915237427, + "p90": 392.4800008535385, + "p95": 417.4079895019531, + "p99": 457.2800099849701 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 277.8559923171997, + "p90": 307.3599934577942, + "p95": 312.608003616333, + "p99": 323.2319951057434 + }, + "combine": { + "p50": 166.97600483894348, + "p90": 172.57599532604218, + "p95": 176.35199427604675, + "p99": 184.9920004606247 + }, + "roundtrip": { + "p50": 439.7119879722595, + "p90": 460.03198623657227, + "p95": 465.6960070133209, + "p99": 476.9600033760071 + }, + "isolatedSum": { + "p50": 444.8319971561432, + "p90": 479.93598878383636, + "p95": 488.95999789237976, + "p99": 508.2239955663681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 410.8799993991852, + "p90": 433.1839978694916, + "p95": 442.68798828125, + "p99": 467.0720100402832 + }, + "combine": { + "p50": 281.5040051937103, + "p90": 289.792001247406, + "p95": 293.7600016593933, + "p99": 302.8160035610199 + }, + "roundtrip": { + "p50": 670.4959869384766, + "p90": 682.9119920730591, + "p95": 689.7600293159485, + "p99": 700.4160284996033 + }, + "isolatedSum": { + "p50": 692.3840045928955, + "p90": 722.9759991168976, + "p95": 736.4479899406433, + "p99": 769.8880136013031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 653.6639928817749, + "p90": 668.5439944267273, + "p95": 678.1119704246521, + "p99": 696.0960030555725 + }, + "combine": { + "p50": 467.7119851112366, + "p90": 474.68799352645874, + "p95": 477.9199957847595, + "p99": 487.5839948654175 + }, + "roundtrip": { + "p50": 1100.4799604415894, + "p90": 1126.431941986084, + "p95": 1141.0239934921265, + "p99": 1188.9920234680176 + }, + "isolatedSum": { + "p50": 1121.3759779930115, + "p90": 1143.231987953186, + "p95": 1156.0319662094116, + "p99": 1183.67999792099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1208.351969718933, + "p90": 1215.2639627456665, + "p95": 1219.167947769165, + "p99": 1226.912021636963 + }, + "combine": { + "p50": 834.879994392395, + "p90": 842.2080278396606, + "p95": 845.7599878311157, + "p99": 853.7279963493347 + }, + "roundtrip": { + "p50": 2016.767978668213, + "p90": 2026.047945022583, + "p95": 2029.5360088348389, + "p99": 2047.327995300293 + }, + "isolatedSum": { + "p50": 2043.2319641113281, + "p90": 2057.471990585327, + "p95": 2064.9279356002808, + "p99": 2080.6400179862976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2301.7919063568115, + "p90": 2309.664011001587, + "p95": 2311.8081092834473, + "p99": 2323.040008544922 + }, + "combine": { + "p50": 1538.9759540557861, + "p90": 1547.2960472106934, + "p95": 1549.7280359268188, + "p99": 1554.975986480713 + }, + "roundtrip": { + "p50": 3826.0159492492676, + "p90": 3834.944009780884, + "p95": 3839.679956436157, + "p99": 3845.6320762634277 + }, + "isolatedSum": { + "p50": 3840.7678604125977, + "p90": 3856.9600582122803, + "p95": 3861.536145210266, + "p99": 3878.0159950256348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-00b54cd6", + "identity": "b200|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_033ebcb6", + "comparisonKey": "b7d2c8befbdb45b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:23:19.943142+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.82399713993073, + "p90": 238.24000358581543, + "p95": 249.2160052061081, + "p99": 277.9200077056885 + }, + "combine": { + "p50": 122.20799922943115, + "p90": 126.43200159072876, + "p95": 128.28800082206726, + "p99": 133.95200669765472 + }, + "roundtrip": { + "p50": 328.031986951828, + "p90": 339.83999490737915, + "p95": 344.06399726867676, + "p99": 352.57598757743835 + }, + "isolatedSum": { + "p50": 340.0319963693619, + "p90": 364.6720051765442, + "p95": 377.50400602817535, + "p99": 411.8720144033432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 274.52799677848816, + "p90": 289.15199637413025, + "p95": 303.16799879074097, + "p99": 326.3680040836334 + }, + "combine": { + "p50": 168.06399822235107, + "p90": 172.63999581336975, + "p95": 174.46400225162506, + "p99": 178.46399545669556 + }, + "roundtrip": { + "p50": 440.2239918708801, + "p90": 456.9920003414154, + "p95": 464.9919867515564, + "p99": 481.6960096359253 + }, + "isolatedSum": { + "p50": 442.59199500083923, + "p90": 461.7919921875, + "p95": 477.632001042366, + "p99": 504.831999540329 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 400.57599544525146, + "p90": 423.5840141773224, + "p95": 433.1519901752472, + "p99": 449.0239918231964 + }, + "combine": { + "p50": 276.44801139831543, + "p90": 281.5040051937103, + "p95": 283.58399868011475, + "p99": 288.1599962711334 + }, + "roundtrip": { + "p50": 665.6960248947144, + "p90": 676.4159798622131, + "p95": 681.5040111541748, + "p99": 691.9360160827637 + }, + "isolatedSum": { + "p50": 677.0240068435669, + "p90": 705.0880193710327, + "p95": 716.7359888553619, + "p99": 737.1839880943298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 648.1279730796814, + "p90": 691.5199756622314, + "p95": 717.5359725952148, + "p99": 756.5439939498901 + }, + "combine": { + "p50": 469.37599778175354, + "p90": 475.5840003490448, + "p95": 478.14399003982544, + "p99": 482.7519953250885 + }, + "roundtrip": { + "p50": 1098.7199544906616, + "p90": 1109.2480421066284, + "p95": 1119.9040412902832, + "p99": 1141.535997390747 + }, + "isolatedSum": { + "p50": 1117.503970861435, + "p90": 1167.1039760112762, + "p95": 1195.6799626350403, + "p99": 1239.2959892749786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1195.680022239685, + "p90": 1201.9519805908203, + "p95": 1204.1599750518799, + "p99": 1210.752010345459 + }, + "combine": { + "p50": 823.3919739723206, + "p90": 830.7200074195862, + "p95": 833.1199884414673, + "p99": 840.0959968566895 + }, + "roundtrip": { + "p50": 1997.82395362854, + "p90": 2006.7200660705566, + "p95": 2009.7920894622803, + "p99": 2087.5840187072754 + }, + "isolatedSum": { + "p50": 2019.0719962120056, + "p90": 2032.6719880104065, + "p95": 2037.2799634933472, + "p99": 2050.8480072021484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2294.816017150879, + "p90": 2302.97589302063, + "p95": 2306.080102920532, + "p99": 2319.7760581970215 + }, + "combine": { + "p50": 1535.423994064331, + "p90": 1542.5920486450195, + "p95": 1544.319987297058, + "p99": 1547.1359491348267 + }, + "roundtrip": { + "p50": 3812.6399517059326, + "p90": 3820.159912109375, + "p95": 3822.1120834350586, + "p99": 3828.6080360412598 + }, + "isolatedSum": { + "p50": 3830.24001121521, + "p90": 3845.5679416656494, + "p95": 3850.4000902175903, + "p99": 3866.912007331848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-833933a1", + "identity": "b200|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_b10b6c62", + "comparisonKey": "4b6af1e0ea5be4d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:20:41.795563+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.70399808883667, + "p90": 98.11200201511383, + "p95": 104.09600287675858, + "p99": 115.9679964184761 + }, + "combine": { + "p50": 121.72800302505493, + "p90": 129.4720023870468, + "p95": 133.40799510478973, + "p99": 137.82399892807007 + }, + "roundtrip": { + "p50": 231.36000335216522, + "p90": 236.06400191783905, + "p95": 238.11200261116028, + "p99": 247.26399779319763 + }, + "isolatedSum": { + "p50": 214.4320011138916, + "p90": 227.58400440216064, + "p95": 237.5039979815483, + "p99": 253.79199534654617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.08000636100769, + "p90": 134.24000144004822, + "p95": 138.08000087738037, + "p99": 143.48800480365753 + }, + "combine": { + "p50": 168.03200542926788, + "p90": 173.8239973783493, + "p95": 178.6240041255951, + "p99": 185.5040043592453 + }, + "roundtrip": { + "p50": 340.92798829078674, + "p90": 346.94400429725647, + "p95": 352.09599137306213, + "p99": 357.9840064048767 + }, + "isolatedSum": { + "p50": 294.1120117902756, + "p90": 308.0639988183975, + "p95": 316.70400500297546, + "p99": 328.99200916290283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 191.52000546455383, + "p90": 202.78400182724, + "p95": 208.8959962129593, + "p99": 222.3680019378662 + }, + "combine": { + "p50": 282.04798698425293, + "p90": 288.12798857688904, + "p95": 291.3919985294342, + "p99": 298.0479896068573 + }, + "roundtrip": { + "p50": 572.8639960289001, + "p90": 581.0559988021851, + "p95": 584.9599838256836, + "p99": 594.1439867019653 + }, + "isolatedSum": { + "p50": 473.56799244880676, + "p90": 490.911990404129, + "p95": 500.2879947423935, + "p99": 520.4159915447235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 310.88000535964966, + "p90": 319.42400336265564, + "p95": 323.64800572395325, + "p99": 332.63999223709106 + }, + "combine": { + "p50": 468.54400634765625, + "p90": 476.99201107025146, + "p95": 479.64799404144287, + "p99": 485.79201102256775 + }, + "roundtrip": { + "p50": 996.5760111808777, + "p90": 1005.344033241272, + "p95": 1009.6960067749023, + "p99": 1017.3120498657227 + }, + "isolatedSum": { + "p50": 779.4240117073059, + "p90": 796.4160144329071, + "p95": 803.2959997653961, + "p99": 818.4320032596588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 540.5759811401367, + "p90": 548.0960011482239, + "p95": 551.584005355835, + "p99": 558.4319829940796 + }, + "combine": { + "p50": 833.0559730529785, + "p90": 840.287983417511, + "p95": 843.999981880188, + "p99": 852.5440096855164 + }, + "roundtrip": { + "p50": 1838.8160467147827, + "p90": 1849.3119478225708, + "p95": 1852.4800539016724, + "p99": 1861.791968345642 + }, + "isolatedSum": { + "p50": 1373.6319541931152, + "p90": 1388.3839845657349, + "p95": 1395.583987236023, + "p99": 1410.975992679596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1001.3120174407959, + "p90": 1009.2480182647705, + "p95": 1011.8720531463623, + "p99": 1020.4800367355347 + }, + "combine": { + "p50": 1542.5599813461304, + "p90": 1551.2640476226807, + "p95": 1554.8479557037354, + "p99": 1564.7679567337036 + }, + "roundtrip": { + "p50": 3494.2080974578857, + "p90": 3503.5200119018555, + "p95": 3507.6160430908203, + "p99": 3515.4240131378174 + }, + "isolatedSum": { + "p50": 2543.8719987869263, + "p90": 2560.512065887451, + "p95": 2566.7200088500977, + "p99": 2585.2479934692383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c65e5036", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_b5ece6b8", + "comparisonKey": "9e3061ee3690567b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:13:59.160741+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 169.40799355506897, + "p90": 196.51199877262115, + "p95": 200.95999538898468, + "p99": 213.24799954891205 + }, + "combine": { + "p50": 47.61600121855736, + "p90": 52.41600051522255, + "p95": 58.94400179386139, + "p99": 66.39999896287918 + }, + "roundtrip": { + "p50": 210.33599972724915, + "p90": 238.5600060224533, + "p95": 245.66400051116943, + "p99": 273.8879919052124 + }, + "isolatedSum": { + "p50": 217.02399477362633, + "p90": 248.9279992878437, + "p95": 259.90399718284607, + "p99": 279.64799851179123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 170.78399658203125, + "p90": 194.72000002861023, + "p95": 199.16799664497375, + "p99": 209.85600352287292 + }, + "combine": { + "p50": 46.08000069856644, + "p90": 50.27199909090996, + "p95": 56.51199817657471, + "p99": 61.824001371860504 + }, + "roundtrip": { + "p50": 208.70399475097656, + "p90": 229.72799837589264, + "p95": 235.45600473880768, + "p99": 244.4159984588623 + }, + "isolatedSum": { + "p50": 216.8639972805977, + "p90": 244.9919991195202, + "p95": 255.67999482154846, + "p99": 271.68000489473343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.8800047636032, + "p90": 196.51199877262115, + "p95": 201.1840045452118, + "p99": 207.32800662517548 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 55.135998874902725, + "p95": 60.575999319553375, + "p99": 66.75200164318085 + }, + "roundtrip": { + "p50": 212.41599321365356, + "p90": 233.40800404548645, + "p95": 239.16800320148468, + "p99": 246.68799340724945 + }, + "isolatedSum": { + "p50": 220.86400538682938, + "p90": 251.64799764752388, + "p95": 261.76000386476517, + "p99": 274.0800082683563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 177.279993891716, + "p90": 205.53599298000336, + "p95": 212.76800334453583, + "p99": 235.74399948120117 + }, + "combine": { + "p50": 50.75199902057648, + "p90": 55.96800148487091, + "p95": 60.67200005054474, + "p99": 67.03999638557434 + }, + "roundtrip": { + "p50": 217.631995677948, + "p90": 240.25599658489227, + "p95": 246.65600061416626, + "p99": 261.9520127773285 + }, + "isolatedSum": { + "p50": 228.03199291229248, + "p90": 261.50399446487427, + "p95": 273.44000339508057, + "p99": 302.7839958667755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.74400651454926, + "p90": 206.04799687862396, + "p95": 211.90400421619415, + "p99": 221.21599316596985 + }, + "combine": { + "p50": 54.687999188899994, + "p90": 58.75200033187866, + "p95": 62.94400244951248, + "p99": 66.72000139951706 + }, + "roundtrip": { + "p50": 232.57599771022797, + "p90": 255.5519938468933, + "p95": 261.6960108280182, + "p99": 291.3599908351898 + }, + "isolatedSum": { + "p50": 242.43200570344925, + "p90": 264.7999972105026, + "p95": 274.84800666570663, + "p99": 287.9359945654869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 210.55999398231506, + "p90": 233.75999927520752, + "p95": 244.00000274181366, + "p99": 265.79201221466064 + }, + "combine": { + "p50": 52.960000932216644, + "p90": 61.47199869155884, + "p95": 64.7680014371872, + "p99": 69.82400268316269 + }, + "roundtrip": { + "p50": 256.6719949245453, + "p90": 277.9519855976105, + "p95": 286.1439883708954, + "p99": 295.6799864768982 + }, + "isolatedSum": { + "p50": 263.5199949145317, + "p90": 295.23199796676636, + "p95": 308.76800417900085, + "p99": 335.61601489782333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 257.60000944137573, + "p90": 283.7440073490143, + "p95": 292.5119996070862, + "p99": 302.94400453567505 + }, + "combine": { + "p50": 56.57599866390228, + "p90": 63.80800157785416, + "p95": 70.46400010585785, + "p99": 74.72000271081924 + }, + "roundtrip": { + "p50": 305.82401156425476, + "p90": 334.23998951911926, + "p95": 339.4240140914917, + "p99": 363.647997379303 + }, + "isolatedSum": { + "p50": 314.176008105278, + "p90": 347.55200892686844, + "p95": 362.97599971294403, + "p99": 377.6640072464943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 270.2080011367798, + "p90": 313.728004693985, + "p95": 327.0080089569092, + "p99": 349.15199875831604 + }, + "combine": { + "p50": 64.64000046253204, + "p90": 73.85600358247757, + "p95": 82.11199939250946, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 317.50398874282837, + "p90": 338.01600337028503, + "p95": 348.9280045032501, + "p99": 368.9279854297638 + }, + "isolatedSum": { + "p50": 334.84800159931183, + "p90": 387.58400827646255, + "p95": 409.12000834941864, + "p99": 436.3519996404648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7fcc0981", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_0d404980", + "comparisonKey": "73f3845a091ed759", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:15:02.945368+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 168.47999393939972, + "p90": 185.82400679588318, + "p95": 191.96799397468567, + "p99": 203.23200523853302 + }, + "combine": { + "p50": 48.19199815392494, + "p90": 51.64799839258194, + "p95": 55.48800155520439, + "p99": 61.983998864889145 + }, + "roundtrip": { + "p50": 209.56799387931824, + "p90": 226.75199806690216, + "p95": 234.49599742889404, + "p99": 247.13599681854248 + }, + "isolatedSum": { + "p50": 216.67199209332466, + "p90": 237.47200518846512, + "p95": 247.45599552989006, + "p99": 265.21600410342216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 169.88800466060638, + "p90": 187.80800700187683, + "p95": 203.93599569797516, + "p99": 219.84000504016876 + }, + "combine": { + "p50": 48.16000163555145, + "p90": 50.36799982190132, + "p95": 52.06400156021118, + "p99": 57.53599852323532 + }, + "roundtrip": { + "p50": 209.6640020608902, + "p90": 225.3119945526123, + "p95": 239.96800184249878, + "p99": 260.0640058517456 + }, + "isolatedSum": { + "p50": 218.04800629615784, + "p90": 238.17600682377815, + "p95": 255.99999725818634, + "p99": 277.3760035634041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 171.10399901866913, + "p90": 199.8399943113327, + "p95": 207.7759951353073, + "p99": 239.1040027141571 + }, + "combine": { + "p50": 52.06400156021118, + "p90": 55.52000179886818, + "p95": 61.344001442193985, + "p99": 67.16799736022949 + }, + "roundtrip": { + "p50": 214.33599293231964, + "p90": 231.9680005311966, + "p95": 235.6480062007904, + "p99": 247.0719963312149 + }, + "isolatedSum": { + "p50": 223.1680005788803, + "p90": 255.35999611020088, + "p95": 269.1199965775013, + "p99": 306.2720000743866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 178.94400656223297, + "p90": 191.3599967956543, + "p95": 196.383997797966, + "p99": 206.56000077724457 + }, + "combine": { + "p50": 52.57600173354149, + "p90": 55.00800162553787, + "p95": 57.28000029921532, + "p99": 63.29599767923355 + }, + "roundtrip": { + "p50": 222.33599424362183, + "p90": 237.40799725055695, + "p95": 246.46399915218353, + "p99": 261.7279887199402 + }, + "isolatedSum": { + "p50": 231.52000829577446, + "p90": 246.36799842119217, + "p95": 253.66399809718132, + "p99": 269.8559984564781 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 195.8719938993454, + "p90": 205.4080069065094, + "p95": 210.07999777793884, + "p99": 218.1439995765686 + }, + "combine": { + "p50": 53.408000618219376, + "p90": 58.111999183893204, + "p95": 59.13599953055382, + "p99": 66.3679987192154 + }, + "roundtrip": { + "p50": 240.12799561023712, + "p90": 253.76001000404358, + "p95": 262.2720003128052, + "p99": 276.92800760269165 + }, + "isolatedSum": { + "p50": 249.27999451756477, + "p90": 263.5200060904026, + "p95": 269.21599730849266, + "p99": 284.511998295784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 229.40799593925476, + "p90": 237.34399676322937, + "p95": 239.3600046634674, + "p99": 243.6159998178482 + }, + "combine": { + "p50": 54.91200089454651, + "p90": 57.40800127387047, + "p95": 60.63999980688095, + "p99": 66.46399945020676 + }, + "roundtrip": { + "p50": 275.35998821258545, + "p90": 284.5439910888672, + "p95": 288.5439991950989, + "p99": 293.98399591445923 + }, + "isolatedSum": { + "p50": 284.31999683380127, + "p90": 294.75199803709984, + "p95": 300.00000447034836, + "p99": 310.07999926805496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 296.671986579895, + "p90": 310.4639947414398, + "p95": 317.50398874282837, + "p99": 332.5439989566803 + }, + "combine": { + "p50": 59.13599953055382, + "p90": 62.07999959588051, + "p95": 64.12799656391144, + "p99": 72.15999811887741 + }, + "roundtrip": { + "p50": 347.2000062465668, + "p90": 359.6479892730713, + "p95": 364.28800225257874, + "p99": 373.4079897403717 + }, + "isolatedSum": { + "p50": 355.80798611044884, + "p90": 372.5439943373203, + "p95": 381.6319853067398, + "p99": 404.7039970755577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 303.6159873008728, + "p90": 325.9199857711792, + "p95": 331.0079872608185, + "p99": 341.18399024009705 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 76.06399804353714, + "p95": 79.39200103282928, + "p99": 86.68799698352814 + }, + "roundtrip": { + "p50": 364.28800225257874, + "p90": 376.9280016422272, + "p95": 384.0000033378601, + "p99": 405.63198924064636 + }, + "isolatedSum": { + "p50": 375.13598799705505, + "p90": 401.98398381471634, + "p95": 410.39998829364777, + "p99": 427.8719872236252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8e8d6fa", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_e73eae63", + "comparisonKey": "a9aacba9caf135ed", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:16:05.227124+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 163.26400637626648, + "p90": 178.30400168895721, + "p95": 180.57599663734436, + "p99": 188.7039989233017 + }, + "combine": { + "p50": 42.49599948525429, + "p90": 45.1200008392334, + "p95": 46.720001846551895, + "p99": 51.83999985456467 + }, + "roundtrip": { + "p50": 204.54399287700653, + "p90": 222.84799814224243, + "p95": 228.64000499248505, + "p99": 238.65599930286407 + }, + "isolatedSum": { + "p50": 205.76000586152077, + "p90": 223.4240025281906, + "p95": 227.29599848389626, + "p99": 240.54399877786636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 166.87999665737152, + "p90": 179.36000227928162, + "p95": 182.3360025882721, + "p99": 187.42400407791138 + }, + "combine": { + "p50": 45.791998505592346, + "p90": 48.448000103235245, + "p95": 51.58400163054466, + "p99": 57.50399827957153 + }, + "roundtrip": { + "p50": 206.65599405765533, + "p90": 241.18399620056152, + "p95": 249.40800666809082, + "p99": 262.81601190567017 + }, + "isolatedSum": { + "p50": 212.67199516296387, + "p90": 227.80800238251686, + "p95": 233.92000421881676, + "p99": 244.9280023574829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 181.63199722766876, + "p90": 302.91199684143066, + "p95": 311.2320005893707, + "p99": 361.5039885044098 + }, + "combine": { + "p50": 47.68000170588493, + "p90": 78.14399898052216, + "p95": 88.3840024471283, + "p99": 96.03200107812881 + }, + "roundtrip": { + "p50": 217.43999421596527, + "p90": 335.2319896221161, + "p95": 353.40800881385803, + "p99": 372.3840117454529 + }, + "isolatedSum": { + "p50": 229.3119989335537, + "p90": 381.0559958219528, + "p95": 399.616003036499, + "p99": 457.5359895825386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 184.09599363803864, + "p90": 194.17600333690643, + "p95": 198.33600521087646, + "p99": 203.87199521064758 + }, + "combine": { + "p50": 48.64000156521797, + "p90": 50.6879985332489, + "p95": 53.05600166320801, + "p99": 59.647999703884125 + }, + "roundtrip": { + "p50": 225.11999309062958, + "p90": 236.2239956855774, + "p95": 241.34400486946106, + "p99": 245.88799476623535 + }, + "isolatedSum": { + "p50": 232.7359952032566, + "p90": 244.86400187015533, + "p95": 251.39200687408447, + "p99": 263.5199949145317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a0d14253", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_dbc465ce", + "comparisonKey": "b0c4ed0fb4d455f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:21:15.329430+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 168.70400309562683, + "p90": 196.06399536132812, + "p95": 202.4960070848465, + "p99": 215.488001704216 + }, + "combine": { + "p50": 43.55200007557869, + "p90": 49.75999891757965, + "p95": 56.223999708890915, + "p99": 61.85600161552429 + }, + "roundtrip": { + "p50": 208.15999805927277, + "p90": 238.8480007648468, + "p95": 249.28000569343567, + "p99": 275.9679853916168 + }, + "isolatedSum": { + "p50": 212.25600317120552, + "p90": 245.82399427890778, + "p95": 258.7200067937374, + "p99": 277.3440033197403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 166.9439971446991, + "p90": 194.68800723552704, + "p95": 202.07999646663666, + "p99": 212.54399418830872 + }, + "combine": { + "p50": 45.152001082897186, + "p90": 51.00800096988678, + "p95": 57.312000542879105, + "p99": 62.78400123119354 + }, + "roundtrip": { + "p50": 205.37599921226501, + "p90": 231.23200237751007, + "p95": 239.1359955072403, + "p99": 252.00000405311584 + }, + "isolatedSum": { + "p50": 212.09599822759628, + "p90": 245.69600820541382, + "p95": 259.39199700951576, + "p99": 275.32799541950226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 166.75199568271637, + "p90": 196.51199877262115, + "p95": 200.6080001592636, + "p99": 213.44000101089478 + }, + "combine": { + "p50": 45.791998505592346, + "p90": 52.51200124621391, + "p95": 57.5999990105629, + "p99": 61.91999837756157 + }, + "roundtrip": { + "p50": 203.96800339221954, + "p90": 231.51999711990356, + "p95": 236.92800104618073, + "p99": 243.8720017671585 + }, + "isolatedSum": { + "p50": 212.54399418830872, + "p90": 249.02400001883507, + "p95": 258.2079991698265, + "p99": 275.35999938845634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 169.44000124931335, + "p90": 199.13600385189056, + "p95": 207.39200711250305, + "p99": 218.176007270813 + }, + "combine": { + "p50": 45.69600149989128, + "p90": 52.86400020122528, + "p95": 59.167999774217606, + "p99": 62.912002205848694 + }, + "roundtrip": { + "p50": 211.5200012922287, + "p90": 245.44000625610352, + "p95": 262.30400800704956, + "p99": 308.03200602531433 + }, + "isolatedSum": { + "p50": 215.13600274920464, + "p90": 252.00000405311584, + "p95": 266.56000688672066, + "p99": 281.0880094766617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 179.03999984264374, + "p90": 214.39999341964722, + "p95": 230.1120012998581, + "p99": 250.65600872039795 + }, + "combine": { + "p50": 45.72800174355507, + "p90": 52.25599929690361, + "p95": 58.559998869895935, + "p99": 64.86400216817856 + }, + "roundtrip": { + "p50": 210.65600216388702, + "p90": 239.71199989318848, + "p95": 250.5599856376648, + "p99": 272.41599559783936 + }, + "isolatedSum": { + "p50": 224.7680015861988, + "p90": 266.6559927165508, + "p95": 288.672000169754, + "p99": 315.5200108885765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 181.92000687122345, + "p90": 202.7519941329956, + "p95": 208.41600000858307, + "p99": 219.87199783325195 + }, + "combine": { + "p50": 46.81599885225296, + "p90": 51.00800096988678, + "p95": 56.832000613212585, + "p99": 62.30400130152702 + }, + "roundtrip": { + "p50": 217.69599616527557, + "p90": 241.2160038948059, + "p95": 248.44799935817719, + "p99": 262.688010931015 + }, + "isolatedSum": { + "p50": 228.7360057234764, + "p90": 253.75999510288239, + "p95": 265.24800062179565, + "p99": 282.175999134779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.23199713230133, + "p90": 226.23999416828156, + "p95": 235.26400327682495, + "p99": 248.99199604988098 + }, + "combine": { + "p50": 48.51200059056282, + "p90": 54.336000233888626, + "p95": 58.75200033187866, + "p99": 64.54399973154068 + }, + "roundtrip": { + "p50": 241.88800156116486, + "p90": 264.44798707962036, + "p95": 272.38398790359497, + "p99": 286.624014377594 + }, + "isolatedSum": { + "p50": 247.74399772286415, + "p90": 280.5759944021702, + "p95": 294.0160036087036, + "p99": 313.53599578142166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 199.8080015182495, + "p90": 224.48000311851501, + "p95": 231.90400004386902, + "p99": 254.5279860496521 + }, + "combine": { + "p50": 52.57600173354149, + "p90": 58.78400057554245, + "p95": 63.74400109052658, + "p99": 69.47200000286102 + }, + "roundtrip": { + "p50": 243.3280050754547, + "p90": 264.47999477386475, + "p95": 269.9199914932251, + "p99": 276.44801139831543 + }, + "isolatedSum": { + "p50": 252.384003251791, + "p90": 283.26400369405746, + "p95": 295.6480011343956, + "p99": 323.9999860525131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4441cf82", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_1002a4ba", + "comparisonKey": "813080f548eee21d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:19:08.980621+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 167.61599481105804, + "p90": 179.45599555969238, + "p95": 183.03999304771423, + "p99": 186.68800592422485 + }, + "combine": { + "p50": 45.9199994802475, + "p90": 48.51200059056282, + "p95": 50.40000006556511, + "p99": 55.80800026655197 + }, + "roundtrip": { + "p50": 206.56000077724457, + "p90": 217.28000044822693, + "p95": 222.27199375629425, + "p99": 224.7679978609085 + }, + "isolatedSum": { + "p50": 213.53599429130554, + "p90": 227.9679961502552, + "p95": 233.43999311327934, + "p99": 242.49600619077682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 169.0240055322647, + "p90": 185.12000143527985, + "p95": 196.25599682331085, + "p99": 214.62400257587433 + }, + "combine": { + "p50": 46.94399982690811, + "p90": 50.783999264240265, + "p95": 52.86400020122528, + "p99": 57.472001761198044 + }, + "roundtrip": { + "p50": 207.16799795627594, + "p90": 219.4879949092865, + "p95": 223.32799434661865, + "p99": 228.70400547981262 + }, + "isolatedSum": { + "p50": 215.96800535917282, + "p90": 235.9040006995201, + "p95": 249.11999702453613, + "p99": 272.0960043370724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 172.67200350761414, + "p90": 202.43200659751892, + "p95": 210.07999777793884, + "p99": 223.07200729846954 + }, + "combine": { + "p50": 50.52800104022026, + "p90": 53.98400127887726, + "p95": 55.96800148487091, + "p99": 62.24000081419945 + }, + "roundtrip": { + "p50": 212.22400665283203, + "p90": 228.5120040178299, + "p95": 245.05600333213806, + "p99": 259.68000292778015 + }, + "isolatedSum": { + "p50": 223.2000045478344, + "p90": 256.4160078763962, + "p95": 266.04799926280975, + "p99": 285.312008112669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 174.55999553203583, + "p90": 186.68800592422485, + "p95": 190.0479942560196, + "p99": 198.68800044059753 + }, + "combine": { + "p50": 50.016000866889954, + "p90": 52.671998739242554, + "p95": 54.52800169587135, + "p99": 60.864001512527466 + }, + "roundtrip": { + "p50": 215.03999829292297, + "p90": 226.33600234985352, + "p95": 229.98400032520294, + "p99": 235.71200668811798 + }, + "isolatedSum": { + "p50": 224.57599639892578, + "p90": 239.3600046634674, + "p95": 244.57599595189095, + "p99": 259.552001953125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.3600035905838, + "p90": 197.60000705718994, + "p95": 202.04800367355347, + "p99": 206.88000321388245 + }, + "combine": { + "p50": 54.91200089454651, + "p90": 59.61599946022034, + "p95": 61.24800071120262, + "p99": 66.52799993753433 + }, + "roundtrip": { + "p50": 233.3119958639145, + "p90": 258.39999318122864, + "p95": 266.52801036834717, + "p99": 279.29601073265076 + }, + "isolatedSum": { + "p50": 242.2720044851303, + "p90": 257.2160065174103, + "p95": 263.2960043847561, + "p99": 273.4080031514168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 211.71200275421143, + "p90": 221.50400280952454, + "p95": 226.3679951429367, + "p99": 229.91999983787537 + }, + "combine": { + "p50": 55.03999814391136, + "p90": 57.95200169086456, + "p95": 60.256000608205795, + "p99": 65.43999910354614 + }, + "roundtrip": { + "p50": 256.6080093383789, + "p90": 275.58401226997375, + "p95": 279.55201268196106, + "p99": 297.760009765625 + }, + "isolatedSum": { + "p50": 266.7520008981228, + "p90": 279.4560045003891, + "p95": 286.6239957511425, + "p99": 295.3599989414215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 259.6159875392914, + "p90": 275.6800055503845, + "p95": 281.98400139808655, + "p99": 293.66400837898254 + }, + "combine": { + "p50": 57.18399956822395, + "p90": 60.70400029420853, + "p95": 62.72000074386597, + "p99": 68.00000369548798 + }, + "roundtrip": { + "p50": 306.08001351356506, + "p90": 318.11198592185974, + "p95": 321.75999879837036, + "p99": 337.15200424194336 + }, + "isolatedSum": { + "p50": 316.79998710751534, + "p90": 336.38400584459305, + "p95": 344.7040021419525, + "p99": 361.6640120744705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 257.9520046710968, + "p90": 269.1519856452942, + "p95": 273.18400144577026, + "p99": 310.8159899711609 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 70.91200351715088, + "p95": 73.79200309515, + "p99": 79.39200103282928 + }, + "roundtrip": { + "p50": 318.1760013103485, + "p90": 328.031986951828, + "p95": 331.6799998283386, + "p99": 338.3679986000061 + }, + "isolatedSum": { + "p50": 324.2880031466484, + "p90": 340.06398916244507, + "p95": 346.97600454092026, + "p99": 390.2079910039902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d612a7f", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_c4f169f4", + "comparisonKey": "bfdcda048b19f50c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:28:36.459496+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 168.38400065898895, + "p90": 184.4159960746765, + "p95": 189.31199610233307, + "p99": 209.1519981622696 + }, + "combine": { + "p50": 46.08000069856644, + "p90": 51.80799961090088, + "p95": 54.655998945236206, + "p99": 59.36000123620033 + }, + "roundtrip": { + "p50": 206.56000077724457, + "p90": 220.5120027065277, + "p95": 227.1679937839508, + "p99": 237.21599578857422 + }, + "isolatedSum": { + "p50": 214.4640013575554, + "p90": 236.2239956855774, + "p95": 243.96799504756927, + "p99": 268.5119993984699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 168.60799491405487, + "p90": 182.6239973306656, + "p95": 186.68800592422485, + "p99": 198.04799556732178 + }, + "combine": { + "p50": 46.911999583244324, + "p90": 51.711998879909515, + "p95": 54.1439987719059, + "p99": 59.007998555898666 + }, + "roundtrip": { + "p50": 208.8640034198761, + "p90": 226.9120067358017, + "p95": 236.03199422359467, + "p99": 249.53599274158478 + }, + "isolatedSum": { + "p50": 215.5199944972992, + "p90": 234.3359962105751, + "p95": 240.83200469613075, + "p99": 257.05599412322044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.30400037765503, + "p90": 190.49599766731262, + "p95": 200.00000298023224, + "p99": 212.79999613761902 + }, + "combine": { + "p50": 49.92000013589859, + "p90": 53.53600159287453, + "p95": 56.15999922156334, + "p99": 59.10399928689003 + }, + "roundtrip": { + "p50": 212.3199999332428, + "p90": 233.3119958639145, + "p95": 242.65600740909576, + "p99": 258.39999318122864 + }, + "isolatedSum": { + "p50": 220.22400051355362, + "p90": 244.03199926018715, + "p95": 256.1600022017956, + "p99": 271.90399542450905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 173.92000555992126, + "p90": 188.1600022315979, + "p95": 192.51200556755066, + "p99": 229.88800704479218 + }, + "combine": { + "p50": 50.08000135421753, + "p90": 53.53600159287453, + "p95": 55.71199953556061, + "p99": 61.24800071120262 + }, + "roundtrip": { + "p50": 214.65599536895752, + "p90": 228.19200158119202, + "p95": 233.66400599479675, + "p99": 253.05598974227905 + }, + "isolatedSum": { + "p50": 224.0000069141388, + "p90": 241.69600382447243, + "p95": 248.22400510311127, + "p99": 291.1360077559948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.68000602722168, + "p90": 199.10399615764618, + "p95": 204.16000485420227, + "p99": 221.82400524616241 + }, + "combine": { + "p50": 53.75999957323074, + "p90": 65.66400080919266, + "p95": 69.37599927186966, + "p99": 84.06399935483932 + }, + "roundtrip": { + "p50": 232.06399381160736, + "p90": 287.6479923725128, + "p95": 301.34400725364685, + "p99": 327.8079926967621 + }, + "isolatedSum": { + "p50": 241.44000560045242, + "p90": 264.76799696683884, + "p95": 273.53600412607193, + "p99": 305.88800460100174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 211.07199788093567, + "p90": 223.61600399017334, + "p95": 227.90400683879852, + "p99": 255.87201118469238 + }, + "combine": { + "p50": 52.671998739242554, + "p90": 56.03199824690819, + "p95": 58.848001062870026, + "p99": 63.32799792289734 + }, + "roundtrip": { + "p50": 258.432000875473, + "p90": 277.0879864692688, + "p95": 284.86400842666626, + "p99": 301.5359938144684 + }, + "isolatedSum": { + "p50": 263.7439966201782, + "p90": 279.6480022370815, + "p95": 286.75200790166855, + "p99": 319.2000091075897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 259.74398851394653, + "p90": 279.35999631881714, + "p95": 291.6800081729889, + "p99": 310.2720081806183 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 60.83200126886368, + "p95": 63.48799914121628, + "p99": 68.9919963479042 + }, + "roundtrip": { + "p50": 306.94401264190674, + "p90": 322.1440017223358, + "p95": 326.33599638938904, + "p99": 339.55198526382446 + }, + "isolatedSum": { + "p50": 317.56798923015594, + "p90": 340.1919975876808, + "p95": 355.16800731420517, + "p99": 379.2640045285225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.48798656463623, + "p90": 272.8640139102936, + "p95": 278.56001257896423, + "p99": 292.959988117218 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 70.14399766921997, + "p95": 71.61600142717361, + "p99": 76.09599828720093 + }, + "roundtrip": { + "p50": 314.1759932041168, + "p90": 324.16000962257385, + "p95": 329.21600341796875, + "p99": 348.9600121974945 + }, + "isolatedSum": { + "p50": 325.1839876174927, + "p90": 343.00801157951355, + "p95": 350.17601400613785, + "p99": 369.05598640441895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4726000", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_34150796", + "comparisonKey": "61936e4e799097b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:20:12.896487+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 166.01599752902985, + "p90": 179.23200130462646, + "p95": 184.06400084495544, + "p99": 187.96800076961517 + }, + "combine": { + "p50": 45.024000108242035, + "p90": 48.448000103235245, + "p95": 50.4320003092289, + "p99": 57.37600103020668 + }, + "roundtrip": { + "p50": 202.72000133991241, + "p90": 215.55200219154358, + "p95": 219.200000166893, + "p99": 224.2240011692047 + }, + "isolatedSum": { + "p50": 211.03999763727188, + "p90": 227.6800014078617, + "p95": 234.49600115418434, + "p99": 245.34400179982185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 166.72000288963318, + "p90": 181.2800019979477, + "p95": 186.11200153827667, + "p99": 195.23200392723083 + }, + "combine": { + "p50": 44.83199864625931, + "p90": 47.32799902558327, + "p95": 49.72799867391586, + "p99": 54.55999821424484 + }, + "roundtrip": { + "p50": 204.16000485420227, + "p90": 217.47200191020966, + "p95": 222.1439927816391, + "p99": 242.91199445724487 + }, + "isolatedSum": { + "p50": 211.5520015358925, + "p90": 228.60800102353096, + "p95": 235.84000021219254, + "p99": 249.79200214147568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.33599317073822, + "p90": 184.38400328159332, + "p95": 191.26400351524353, + "p99": 202.33599841594696 + }, + "combine": { + "p50": 48.70399832725525, + "p90": 52.960000932216644, + "p95": 54.816000163555145, + "p99": 60.256000608205795 + }, + "roundtrip": { + "p50": 211.58400177955627, + "p90": 235.9360009431839, + "p95": 249.53599274158478, + "p99": 297.1520125865936 + }, + "isolatedSum": { + "p50": 219.03999149799347, + "p90": 237.34400421380997, + "p95": 246.08000367879868, + "p99": 262.59199902415276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 173.47200214862823, + "p90": 199.3280053138733, + "p95": 206.9759964942932, + "p99": 245.4719990491867 + }, + "combine": { + "p50": 48.86399954557419, + "p90": 54.23999950289726, + "p95": 56.384000927209854, + "p99": 60.47999858856201 + }, + "roundtrip": { + "p50": 212.19199895858765, + "p90": 227.7120053768158, + "p95": 233.21600258350372, + "p99": 243.3599978685379 + }, + "isolatedSum": { + "p50": 222.33600169420242, + "p90": 253.56800481677055, + "p95": 263.35999742150307, + "p99": 305.9519976377487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 184.83200669288635, + "p90": 194.62400674819946, + "p95": 197.7279931306839, + "p99": 206.14400506019592 + }, + "combine": { + "p50": 50.75199902057648, + "p90": 53.92000079154968, + "p95": 56.15999922156334, + "p99": 61.15199998021126 + }, + "roundtrip": { + "p50": 229.8240065574646, + "p90": 249.66399371623993, + "p95": 255.74401021003723, + "p99": 271.13598585128784 + }, + "isolatedSum": { + "p50": 235.58400571346283, + "p90": 248.54400753974915, + "p95": 253.88799235224724, + "p99": 267.2960050404072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 208.54400098323822, + "p90": 219.10400688648224, + "p95": 224.09600019454956, + "p99": 231.23200237751007 + }, + "combine": { + "p50": 54.11199852824211, + "p90": 56.92800134420395, + "p95": 61.3120011985302, + "p99": 68.06399673223495 + }, + "roundtrip": { + "p50": 254.11200523376465, + "p90": 273.6319899559021, + "p95": 280.7359993457794, + "p99": 295.199990272522 + }, + "isolatedSum": { + "p50": 262.65599951148033, + "p90": 276.0320082306862, + "p95": 285.40800139307976, + "p99": 299.295999109745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.93599677085876, + "p90": 266.84799790382385, + "p95": 271.7120051383972, + "p99": 282.1759879589081 + }, + "combine": { + "p50": 56.703999638557434, + "p90": 59.7120001912117, + "p95": 61.08799949288368, + "p99": 67.07199662923813 + }, + "roundtrip": { + "p50": 304.51199412345886, + "p90": 315.10400772094727, + "p95": 318.36798787117004, + "p99": 324.16000962257385 + }, + "isolatedSum": { + "p50": 312.6399964094162, + "p90": 326.55999809503555, + "p95": 332.8000046312809, + "p99": 349.2479845881462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 262.0159983634949, + "p90": 294.1119968891144, + "p95": 306.36799335479736, + "p99": 329.5679986476898 + }, + "combine": { + "p50": 64.83200192451477, + "p90": 68.2239979505539, + "p95": 71.1359977722168, + "p99": 76.1599987745285 + }, + "roundtrip": { + "p50": 316.19200110435486, + "p90": 334.56000685691833, + "p95": 341.2800133228302, + "p99": 353.08799147605896 + }, + "isolatedSum": { + "p50": 326.84800028800964, + "p90": 362.3359948396683, + "p95": 377.50399112701416, + "p99": 405.7279974222183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8c77ced6", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_3dc664cb", + "comparisonKey": "350d23e763161c4d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:17:02.922470+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 170.33599317073822, + "p90": 202.39999890327454, + "p95": 212.41599321365356, + "p99": 237.69600689411163 + }, + "combine": { + "p50": 45.9199994802475, + "p90": 51.29599943757057, + "p95": 55.64799904823303, + "p99": 62.30400130152702 + }, + "roundtrip": { + "p50": 203.5199999809265, + "p90": 230.6559979915619, + "p95": 236.12800240516663, + "p99": 246.17600440979004 + }, + "isolatedSum": { + "p50": 216.25599265098572, + "p90": 253.6959983408451, + "p95": 268.0639922618866, + "p99": 300.00000819563866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 167.84000396728516, + "p90": 195.96800208091736, + "p95": 201.92000269889832, + "p99": 208.639994263649 + }, + "combine": { + "p50": 45.72800174355507, + "p90": 52.191998809576035, + "p95": 57.28000029921532, + "p99": 62.20800057053566 + }, + "roundtrip": { + "p50": 204.76800203323364, + "p90": 230.94399273395538, + "p95": 236.92800104618073, + "p99": 243.45600605010986 + }, + "isolatedSum": { + "p50": 213.56800571084023, + "p90": 248.1600008904934, + "p95": 259.20000299811363, + "p99": 270.84799483418465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.56000232696533, + "p90": 200.8959949016571, + "p95": 211.64800226688385, + "p99": 240.48000574111938 + }, + "combine": { + "p50": 47.807998955249786, + "p90": 53.79199981689453, + "p95": 59.808000922203064, + "p99": 63.231997191905975 + }, + "roundtrip": { + "p50": 210.55999398231506, + "p90": 246.33599817752838, + "p95": 268.640011548996, + "p99": 293.40800642967224 + }, + "isolatedSum": { + "p50": 218.36800128221512, + "p90": 254.68799471855164, + "p95": 271.4560031890869, + "p99": 303.71200293302536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 171.74400389194489, + "p90": 198.30399751663208, + "p95": 203.80799472332, + "p99": 210.9760046005249 + }, + "combine": { + "p50": 47.90399968624115, + "p90": 52.191998809576035, + "p95": 59.007998555898666, + "p99": 63.26399743556976 + }, + "roundtrip": { + "p50": 210.65600216388702, + "p90": 239.29600417613983, + "p95": 244.6720004081726, + "p99": 275.2000093460083 + }, + "isolatedSum": { + "p50": 219.64800357818604, + "p90": 250.49599632620811, + "p95": 262.8159932792187, + "p99": 274.24000203609467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 180.54400384426117, + "p90": 203.19999754428864, + "p95": 210.11200547218323, + "p99": 220.19200026988983 + }, + "combine": { + "p50": 50.56000128388405, + "p90": 56.352000683546066, + "p95": 61.3120011985302, + "p99": 65.50399959087372 + }, + "roundtrip": { + "p50": 221.3120013475418, + "p90": 249.53599274158478, + "p95": 256.48000836372375, + "p99": 280.70399165153503 + }, + "isolatedSum": { + "p50": 231.10400512814522, + "p90": 259.5519982278347, + "p95": 271.4240066707134, + "p99": 285.69599986076355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 196.44799828529358, + "p90": 216.48000180721283, + "p95": 224.2880016565323, + "p99": 233.40800404548645 + }, + "combine": { + "p50": 53.119998425245285, + "p90": 57.50399827957153, + "p95": 62.97600269317627, + "p99": 69.34399902820587 + }, + "roundtrip": { + "p50": 238.8480007648468, + "p90": 262.81601190567017, + "p95": 270.1759934425354, + "p99": 287.6160144805908 + }, + "isolatedSum": { + "p50": 249.56799671053886, + "p90": 273.98400008678436, + "p95": 287.26400434970856, + "p99": 302.7520030736923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 224.12799298763275, + "p90": 247.871994972229, + "p95": 253.08799743652344, + "p99": 260.5760097503662 + }, + "combine": { + "p50": 56.09599873423576, + "p90": 63.29599767923355, + "p95": 66.72000139951706, + "p99": 78.14399898052216 + }, + "roundtrip": { + "p50": 273.0239927768707, + "p90": 294.3679988384247, + "p95": 299.8720109462738, + "p99": 309.02400612831116 + }, + "isolatedSum": { + "p50": 280.2239917218685, + "p90": 311.16799265146255, + "p95": 319.8079988360405, + "p99": 338.72000873088837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 226.04799270629883, + "p90": 249.95200335979462, + "p95": 255.840003490448, + "p99": 275.64799785614014 + }, + "combine": { + "p50": 65.34399837255478, + "p90": 72.1919983625412, + "p95": 76.4160007238388, + "p99": 82.8159973025322 + }, + "roundtrip": { + "p50": 287.23201155662537, + "p90": 312.00000643730164, + "p95": 317.6319897174835, + "p99": 324.99200105667114 + }, + "isolatedSum": { + "p50": 291.3919910788536, + "p90": 322.1440017223358, + "p95": 332.2560042142868, + "p99": 358.46399515867233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-78dac685", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_c584db11", + "comparisonKey": "450fbcdddf254c92", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:18:06.067902+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 170.0800061225891, + "p90": 203.99999618530273, + "p95": 220.73599696159363, + "p99": 255.10400533676147 + }, + "combine": { + "p50": 43.55200007557869, + "p90": 49.75999891757965, + "p95": 53.119998425245285, + "p99": 59.23200026154518 + }, + "roundtrip": { + "p50": 206.33600652217865, + "p90": 228.60799729824066, + "p95": 235.9679937362671, + "p99": 246.5279996395111 + }, + "isolatedSum": { + "p50": 213.6320061981678, + "p90": 253.75999510288239, + "p95": 273.8559953868389, + "p99": 314.33600559830666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 172.70399630069733, + "p90": 193.92000138759613, + "p95": 199.26400482654572, + "p99": 210.94399690628052 + }, + "combine": { + "p50": 46.592000871896744, + "p90": 52.15999856591225, + "p95": 55.03999814391136, + "p99": 61.792001128196716 + }, + "roundtrip": { + "p50": 209.79200303554535, + "p90": 232.67200589179993, + "p95": 240.1919960975647, + "p99": 248.416006565094 + }, + "isolatedSum": { + "p50": 219.29599717259407, + "p90": 246.07999995350838, + "p95": 254.30400297045708, + "p99": 272.73599803447723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 171.51999473571777, + "p90": 199.45600628852844, + "p95": 204.76800203323364, + "p99": 212.0639979839325 + }, + "combine": { + "p50": 47.13600128889084, + "p90": 51.80799961090088, + "p95": 57.11999908089638, + "p99": 63.040003180503845 + }, + "roundtrip": { + "p50": 210.7519954442978, + "p90": 239.71199989318848, + "p95": 244.47999894618988, + "p99": 275.61599016189575 + }, + "isolatedSum": { + "p50": 218.6559960246086, + "p90": 251.26400589942932, + "p95": 261.88800111413, + "p99": 275.10400116443634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 173.0239987373352, + "p90": 200.32000541687012, + "p95": 209.02399718761444, + "p99": 230.97600042819977 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 53.15199866890907, + "p95": 58.6559996008873, + "p99": 68.1919977068901 + }, + "roundtrip": { + "p50": 229.98400032520294, + "p90": 276.6079902648926, + "p95": 290.0800108909607, + "p99": 320.16000151634216 + }, + "isolatedSum": { + "p50": 220.67200019955635, + "p90": 253.4720040857792, + "p95": 267.67999678850174, + "p99": 299.1679981350899 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 175.07199943065643, + "p90": 203.77600193023682, + "p95": 211.96800470352173, + "p99": 234.52800512313843 + }, + "combine": { + "p50": 47.520000487565994, + "p90": 53.599998354911804, + "p95": 57.920001447200775, + "p99": 63.1679967045784 + }, + "roundtrip": { + "p50": 214.84799683094025, + "p90": 238.65599930286407, + "p95": 245.40799856185913, + "p99": 258.62398743629456 + }, + "isolatedSum": { + "p50": 222.59199991822243, + "p90": 257.3760002851486, + "p95": 269.8880061507225, + "p99": 297.6960018277168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.29599499702454, + "p90": 203.93599569797516, + "p95": 210.4319930076599, + "p99": 219.39200162887573 + }, + "combine": { + "p50": 49.34399947524071, + "p90": 54.976001381874084, + "p95": 58.559998869895935, + "p99": 62.912002205848694 + }, + "roundtrip": { + "p50": 222.30400145053864, + "p90": 245.9840029478073, + "p95": 252.99200415611267, + "p99": 277.536004781723 + }, + "isolatedSum": { + "p50": 232.63999447226524, + "p90": 258.91199707984924, + "p95": 268.99199187755585, + "p99": 282.3040038347244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.77599382400513, + "p90": 222.24000096321106, + "p95": 230.71999847888947, + "p99": 254.72000241279602 + }, + "combine": { + "p50": 53.75999957323074, + "p90": 62.6240000128746, + "p95": 65.8240020275116, + "p99": 72.83200323581696 + }, + "roundtrip": { + "p50": 244.1920042037964, + "p90": 268.5439884662628, + "p95": 278.11199426651, + "p99": 297.1520125865936 + }, + "isolatedSum": { + "p50": 253.53599339723587, + "p90": 284.86400097608566, + "p95": 296.54400050640106, + "p99": 327.552005648613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 200.06400346755981, + "p90": 215.96799790859222, + "p95": 220.57600319385529, + "p99": 231.51999711990356 + }, + "combine": { + "p50": 63.4239986538887, + "p90": 68.76800209283829, + "p95": 73.15199822187424, + "p99": 77.2159993648529 + }, + "roundtrip": { + "p50": 253.60000133514404, + "p90": 271.58400416374207, + "p95": 279.776006937027, + "p99": 291.1039888858795 + }, + "isolatedSum": { + "p50": 263.4880021214485, + "p90": 284.7360000014305, + "p95": 293.7280014157295, + "p99": 308.73599648475647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-07f67953", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_4e034ddd", + "comparisonKey": "daa3f39c070f02ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:27:32.596696+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 169.37600076198578, + "p90": 189.88800048828125, + "p95": 195.71200013160706, + "p99": 207.8080028295517 + }, + "combine": { + "p50": 45.85599899291992, + "p90": 49.375999718904495, + "p95": 52.06400156021118, + "p99": 55.96800148487091 + }, + "roundtrip": { + "p50": 208.03199708461761, + "p90": 228.06400060653687, + "p95": 235.35999655723572, + "p99": 250.2720057964325 + }, + "isolatedSum": { + "p50": 215.2319997549057, + "p90": 239.26400020718575, + "p95": 247.77600169181824, + "p99": 263.7760043144226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 166.1120057106018, + "p90": 179.3919950723648, + "p95": 183.32800269126892, + "p99": 190.62399864196777 + }, + "combine": { + "p50": 47.58400097489357, + "p90": 58.079998940229416, + "p95": 96.57599776983261, + "p99": 252.70399451255798 + }, + "roundtrip": { + "p50": 208.92800390720367, + "p90": 244.83199417591095, + "p95": 253.37600708007812, + "p99": 265.24800062179565 + }, + "isolatedSum": { + "p50": 213.69600668549538, + "p90": 237.47199401259422, + "p95": 279.90400046110153, + "p99": 443.32799315452576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 166.87999665737152, + "p90": 179.4240027666092, + "p95": 182.81599879264832, + "p99": 191.6159987449646 + }, + "combine": { + "p50": 50.20799860358238, + "p90": 54.30399999022484, + "p95": 56.60799890756607, + "p99": 62.68800050020218 + }, + "roundtrip": { + "p50": 208.15999805927277, + "p90": 231.04000091552734, + "p95": 241.15200340747833, + "p99": 259.42400097846985 + }, + "isolatedSum": { + "p50": 217.0879952609539, + "p90": 233.72800275683403, + "p95": 239.4239977002144, + "p99": 254.30399924516678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 175.07199943065643, + "p90": 194.59199905395508, + "p95": 203.90400290489197, + "p99": 223.7440049648285 + }, + "combine": { + "p50": 49.79199916124344, + "p90": 52.12799832224846, + "p95": 54.07999828457832, + "p99": 59.647999703884125 + }, + "roundtrip": { + "p50": 213.0880057811737, + "p90": 228.7359982728958, + "p95": 242.01600253582, + "p99": 257.34400749206543 + }, + "isolatedSum": { + "p50": 224.86399859189987, + "p90": 246.71999737620354, + "p95": 257.9840011894703, + "p99": 283.3920046687126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 188.6720061302185, + "p90": 202.81599462032318, + "p95": 207.20000565052032, + "p99": 215.71199595928192 + }, + "combine": { + "p50": 53.02400141954422, + "p90": 55.424001067876816, + "p95": 57.34400078654289, + "p99": 62.04799935221672 + }, + "roundtrip": { + "p50": 231.29600286483765, + "p90": 240.38399755954742, + "p95": 243.83999407291412, + "p99": 253.02401185035706 + }, + "isolatedSum": { + "p50": 241.69600754976273, + "p90": 258.2399956882, + "p95": 264.5440064370632, + "p99": 277.75999531149864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 209.56799387931824, + "p90": 225.47200322151184, + "p95": 234.0800017118454, + "p99": 248.22400510311127 + }, + "combine": { + "p50": 56.992001831531525, + "p90": 60.864001512527466, + "p95": 63.13599646091461, + "p99": 68.41599941253662 + }, + "roundtrip": { + "p50": 255.13601303100586, + "p90": 266.9439911842346, + "p95": 271.64798974990845, + "p99": 281.8880081176758 + }, + "isolatedSum": { + "p50": 266.55999571084976, + "p90": 286.3360047340393, + "p95": 297.21599817276, + "p99": 316.6400045156479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.0080120563507, + "p90": 268.99200677871704, + "p95": 273.79199862480164, + "p99": 285.69599986076355 + }, + "combine": { + "p50": 56.543998420238495, + "p90": 59.13599953055382, + "p95": 60.70400029420853, + "p99": 66.20799750089645 + }, + "roundtrip": { + "p50": 303.23201417922974, + "p90": 318.1439936161041, + "p95": 323.87199997901917, + "p99": 337.76000142097473 + }, + "isolatedSum": { + "p50": 311.5520104765892, + "p90": 328.12800630927086, + "p95": 334.49599891901016, + "p99": 351.90399736166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.36798548698425, + "p90": 273.47201108932495, + "p95": 280.09599447250366, + "p99": 297.1520125865936 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 67.1359971165657, + "p95": 70.01599669456482, + "p99": 75.29599964618683 + }, + "roundtrip": { + "p50": 315.39198756217957, + "p90": 326.911985874176, + "p95": 330.1439881324768, + "p99": 340.1919901371002 + }, + "isolatedSum": { + "p50": 322.3999887704849, + "p90": 340.60800820589066, + "p95": 350.1119911670685, + "p99": 372.44801223278046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-22a3630c", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_3bee5acc", + "comparisonKey": "fb29ac16cfc72d7d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:23:18.584904+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 172.31999337673187, + "p90": 201.21599733829498, + "p95": 205.1199972629547, + "p99": 212.8639966249466 + }, + "combine": { + "p50": 47.16800153255463, + "p90": 52.06400156021118, + "p95": 58.01599845290184, + "p99": 62.84800171852112 + }, + "roundtrip": { + "p50": 215.90399742126465, + "p90": 247.55200743675232, + "p95": 261.59998774528503, + "p99": 289.34401273727417 + }, + "isolatedSum": { + "p50": 219.4879949092865, + "p90": 253.27999889850616, + "p95": 263.13599571585655, + "p99": 275.7119983434677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 172.63999581336975, + "p90": 201.85600221157074, + "p95": 207.35999941825867, + "p99": 213.98399770259857 + }, + "combine": { + "p50": 47.00800031423569, + "p90": 51.32799968123436, + "p95": 59.29600074887276, + "p99": 64.28799778223038 + }, + "roundtrip": { + "p50": 214.52799439430237, + "p90": 248.7040013074875, + "p95": 259.93600487709045, + "p99": 288.1920039653778 + }, + "isolatedSum": { + "p50": 219.64799612760544, + "p90": 253.1840018928051, + "p95": 266.6560001671314, + "p99": 278.27199548482895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 173.98400604724884, + "p90": 200.6720006465912, + "p95": 206.91199600696564, + "p99": 211.99999749660492 + }, + "combine": { + "p50": 50.464000552892685, + "p90": 53.95200103521347, + "p95": 61.824001371860504, + "p99": 67.55200028419495 + }, + "roundtrip": { + "p50": 216.41600131988525, + "p90": 243.80800127983093, + "p95": 250.5280077457428, + "p99": 260.99199056625366 + }, + "isolatedSum": { + "p50": 224.44800660014153, + "p90": 254.62400168180466, + "p95": 268.73599737882614, + "p99": 279.55199778079987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 181.40800297260284, + "p90": 209.34399962425232, + "p95": 218.59200298786163, + "p99": 244.9280023574829 + }, + "combine": { + "p50": 51.29599943757057, + "p90": 56.09599873423576, + "p95": 61.43999844789505, + "p99": 68.15999746322632 + }, + "roundtrip": { + "p50": 226.33600234985352, + "p90": 266.59199595451355, + "p95": 284.06399488449097, + "p99": 316.73601269721985 + }, + "isolatedSum": { + "p50": 232.70400241017342, + "p90": 265.4399983584881, + "p95": 280.0320014357567, + "p99": 313.08799982070923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.68000602722168, + "p90": 211.19999885559082, + "p95": 218.81599724292755, + "p99": 236.86400055885315 + }, + "combine": { + "p50": 53.98400127887726, + "p90": 59.36000123620033, + "p95": 63.77600133419037, + "p99": 71.55200093984604 + }, + "roundtrip": { + "p50": 234.8800003528595, + "p90": 264.8319900035858, + "p95": 278.1760096549988, + "p99": 304.1920065879822 + }, + "isolatedSum": { + "p50": 241.66400730609894, + "p90": 270.56000009179115, + "p95": 282.5919985771179, + "p99": 308.4160014986992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 211.16800606250763, + "p90": 237.0239943265915, + "p95": 246.8159943819046, + "p99": 261.79200410842896 + }, + "combine": { + "p50": 56.352000683546066, + "p90": 62.912002205848694, + "p95": 68.64000111818314, + "p99": 74.36800003051758 + }, + "roundtrip": { + "p50": 257.34400749206543, + "p90": 284.86400842666626, + "p95": 293.69598627090454, + "p99": 318.11198592185974 + }, + "isolatedSum": { + "p50": 267.5200067460537, + "p90": 299.9359965324402, + "p95": 315.45599550008774, + "p99": 336.16000413894653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 254.33599948883057, + "p90": 275.7759988307953, + "p95": 283.55199098587036, + "p99": 295.48799991607666 + }, + "combine": { + "p50": 59.808000922203064, + "p90": 64.2239972949028, + "p95": 69.69600170850754, + "p99": 76.73600316047668 + }, + "roundtrip": { + "p50": 302.2080063819885, + "p90": 329.15198802948, + "p95": 333.69600772857666, + "p99": 341.12000465393066 + }, + "isolatedSum": { + "p50": 314.14400041103363, + "p90": 339.9999961256981, + "p95": 353.2479926943779, + "p99": 372.22400307655334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 254.46400046348572, + "p90": 274.7200131416321, + "p95": 283.03998708724976, + "p99": 294.65600848197937 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 76.86399668455124, + "p95": 84.09599959850311, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 311.42398715019226, + "p90": 327.5519907474518, + "p95": 334.49599146842957, + "p99": 352.80001163482666 + }, + "isolatedSum": { + "p50": 324.5759978890419, + "p90": 351.5840098261833, + "p95": 367.13598668575287, + "p99": 383.61600786447525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bbe85c54", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_e9c19ae2", + "comparisonKey": "0ff6c53ee1db3f17", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:23:48.341625+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 169.18399930000305, + "p90": 184.83200669288635, + "p95": 189.7599995136261, + "p99": 204.96000349521637 + }, + "combine": { + "p50": 47.93599992990494, + "p90": 52.000001072883606, + "p95": 56.44800141453743, + "p99": 63.040003180503845 + }, + "roundtrip": { + "p50": 206.2399983406067, + "p90": 224.38399493694305, + "p95": 230.3999960422516, + "p99": 244.7039932012558 + }, + "isolatedSum": { + "p50": 217.119999229908, + "p90": 236.83200776576996, + "p95": 246.20800092816353, + "p99": 268.0000066757202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 169.855996966362, + "p90": 199.8720020055771, + "p95": 211.61599457263947, + "p99": 236.60799860954285 + }, + "combine": { + "p50": 45.56800052523613, + "p90": 47.648001462221146, + "p95": 49.79199916124344, + "p99": 54.84800040721893 + }, + "roundtrip": { + "p50": 205.4080069065094, + "p90": 218.20800006389618, + "p95": 222.4320024251938, + "p99": 227.39200294017792 + }, + "isolatedSum": { + "p50": 215.42399749159813, + "p90": 247.52000346779823, + "p95": 261.4079937338829, + "p99": 291.4559990167618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 169.50400173664093, + "p90": 186.46399676799774, + "p95": 192.6400065422058, + "p99": 204.0639966726303 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 53.82400006055832, + "p95": 58.33600088953972, + "p99": 66.49599969387054 + }, + "roundtrip": { + "p50": 207.61600136756897, + "p90": 219.7120040655136, + "p95": 224.99200701713562, + "p99": 232.03200101852417 + }, + "isolatedSum": { + "p50": 219.4880023598671, + "p90": 240.28799682855606, + "p95": 250.97600743174553, + "p99": 270.55999636650085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 175.20000040531158, + "p90": 193.7599927186966, + "p95": 205.37599921226501, + "p99": 248.1279969215393 + }, + "combine": { + "p50": 49.72799867391586, + "p90": 53.119998425245285, + "p95": 55.80800026655197, + "p99": 63.10400366783142 + }, + "roundtrip": { + "p50": 214.36800062656403, + "p90": 226.97600722312927, + "p95": 231.00799322128296, + "p99": 241.43999814987183 + }, + "isolatedSum": { + "p50": 224.92799907922745, + "p90": 246.87999114394188, + "p95": 261.183999478817, + "p99": 311.2320005893707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 186.0799938440323, + "p90": 195.68000733852386, + "p95": 199.52000677585602, + "p99": 205.59999346733093 + }, + "combine": { + "p50": 54.336000233888626, + "p90": 58.079998940229416, + "p95": 61.184000223875046, + "p99": 65.08799642324448 + }, + "roundtrip": { + "p50": 228.92799973487854, + "p90": 236.54399812221527, + "p95": 239.19999599456787, + "p99": 243.83999407291412 + }, + "isolatedSum": { + "p50": 240.4159940779209, + "p90": 253.76000627875328, + "p95": 260.70400699973106, + "p99": 270.6879898905754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 212.09600567817688, + "p90": 233.43999683856964, + "p95": 239.77600038051605, + "p99": 258.11201333999634 + }, + "combine": { + "p50": 55.296000093221664, + "p90": 57.88800120353699, + "p95": 59.39200147986412, + "p99": 65.08799642324448 + }, + "roundtrip": { + "p50": 256.51198625564575, + "p90": 266.27200841903687, + "p95": 269.98400688171387, + "p99": 279.2640030384064 + }, + "isolatedSum": { + "p50": 267.39200577139854, + "p90": 291.3279980421066, + "p95": 299.1680018603802, + "p99": 323.2000097632408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.39198517799377, + "p90": 267.07199215888977, + "p95": 273.1199860572815, + "p99": 280.67201375961304 + }, + "combine": { + "p50": 57.28000029921532, + "p90": 59.87200140953064, + "p95": 61.85600161552429, + "p99": 67.9360032081604 + }, + "roundtrip": { + "p50": 305.4719865322113, + "p90": 316.895991563797, + "p95": 320.95998525619507, + "p99": 332.19200372695923 + }, + "isolatedSum": { + "p50": 312.6719854772091, + "p90": 326.9439935684204, + "p95": 334.9759876728058, + "p99": 348.60801696777344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 260.8959972858429, + "p90": 279.55201268196106, + "p95": 284.4800055027008, + "p99": 297.5679934024811 + }, + "combine": { + "p50": 64.7680014371872, + "p90": 68.51200014352798, + "p95": 71.26399874687195, + "p99": 77.72800326347351 + }, + "roundtrip": { + "p50": 313.1200075149536, + "p90": 321.50399684906006, + "p95": 323.87199997901917, + "p99": 329.75998520851135 + }, + "isolatedSum": { + "p50": 325.6639987230301, + "p90": 348.06401282548904, + "p95": 355.74400424957275, + "p99": 375.2959966659546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b67d7b58", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_2e7864eb", + "comparisonKey": "aba149935476836e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:25:25.385480+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 183.00800025463104, + "p90": 216.5440022945404, + "p95": 227.23199427127838, + "p99": 245.4719990491867 + }, + "combine": { + "p50": 46.36799916625023, + "p90": 51.7439991235733, + "p95": 57.34400078654289, + "p99": 61.824001371860504 + }, + "roundtrip": { + "p50": 211.0079973936081, + "p90": 234.0800017118454, + "p95": 244.00000274181366, + "p99": 272.41599559783936 + }, + "isolatedSum": { + "p50": 229.37599942088127, + "p90": 268.2880014181137, + "p95": 284.5759950578213, + "p99": 307.2960004210472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 172.19200730323792, + "p90": 192.9599940776825, + "p95": 196.19199633598328, + "p99": 202.36800611019135 + }, + "combine": { + "p50": 46.78399860858917, + "p90": 51.4880008995533, + "p95": 56.992001831531525, + "p99": 62.24000081419945 + }, + "roundtrip": { + "p50": 211.93599700927734, + "p90": 230.335995554924, + "p95": 235.10399460792542, + "p99": 241.40800535678864 + }, + "isolatedSum": { + "p50": 218.9760059118271, + "p90": 244.4479949772358, + "p95": 253.1839981675148, + "p99": 264.6080069243908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 174.14399981498718, + "p90": 199.90399479866028, + "p95": 205.59999346733093, + "p99": 230.30400276184082 + }, + "combine": { + "p50": 48.41599985957146, + "p90": 50.6879985332489, + "p95": 59.67999994754791, + "p99": 64.28799778223038 + }, + "roundtrip": { + "p50": 217.24799275398254, + "p90": 246.94399535655975, + "p95": 256.6719949245453, + "p99": 277.2800028324127 + }, + "isolatedSum": { + "p50": 222.55999967455864, + "p90": 250.59199333190918, + "p95": 265.27999341487885, + "p99": 294.5920005440712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 183.23199450969696, + "p90": 214.36800062656403, + "p95": 228.44800353050232, + "p99": 246.3040053844452 + }, + "combine": { + "p50": 48.448000103235245, + "p90": 52.12799832224846, + "p95": 59.36000123620033, + "p99": 64.19199705123901 + }, + "roundtrip": { + "p50": 217.3759937286377, + "p90": 249.15200471878052, + "p95": 262.4320089817047, + "p99": 280.7680070400238 + }, + "isolatedSum": { + "p50": 231.6799946129322, + "p90": 266.4959989488125, + "p95": 287.80800476670265, + "p99": 310.4960024356842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 182.559996843338, + "p90": 201.02399587631226, + "p95": 207.58399367332458, + "p99": 212.47999370098114 + }, + "combine": { + "p50": 50.016000866889954, + "p90": 55.1999993622303, + "p95": 62.30400130152702, + "p99": 68.57600063085556 + }, + "roundtrip": { + "p50": 224.7679978609085, + "p90": 247.871994972229, + "p95": 256.7040026187897, + "p99": 282.8480005264282 + }, + "isolatedSum": { + "p50": 232.57599771022797, + "p90": 256.22399523854256, + "p95": 269.8879949748516, + "p99": 281.0559943318367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 199.8080015182495, + "p90": 221.0559993982315, + "p95": 229.91999983787537, + "p99": 240.06399512290955 + }, + "combine": { + "p50": 54.687999188899994, + "p90": 58.30400064587593, + "p95": 62.463998794555664, + "p99": 66.39999896287918 + }, + "roundtrip": { + "p50": 242.71999299526215, + "p90": 254.7520101070404, + "p95": 261.02399826049805, + "p99": 268.7680125236511 + }, + "isolatedSum": { + "p50": 254.4960007071495, + "p90": 279.36000004410744, + "p95": 292.38399863243103, + "p99": 306.4639940857887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 241.2479966878891, + "p90": 328.6080062389374, + "p95": 341.66398644447327, + "p99": 372.79999256134033 + }, + "combine": { + "p50": 59.328000992536545, + "p90": 65.69600105285645, + "p95": 69.37599927186966, + "p99": 75.32799988985062 + }, + "roundtrip": { + "p50": 276.2880027294159, + "p90": 297.63200879096985, + "p95": 302.68800258636475, + "p99": 336.9919955730438 + }, + "isolatedSum": { + "p50": 300.57599768042564, + "p90": 394.3040072917938, + "p95": 411.0399857163429, + "p99": 448.12799245119095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 228.2560020685196, + "p90": 241.02400243282318, + "p95": 244.06400322914124, + "p99": 251.93598866462708 + }, + "combine": { + "p50": 65.08799642324448, + "p90": 71.3919997215271, + "p95": 77.88799703121185, + "p99": 82.49600231647491 + }, + "roundtrip": { + "p50": 289.2799973487854, + "p90": 306.5600097179413, + "p95": 310.94399094581604, + "p99": 319.0079927444458 + }, + "isolatedSum": { + "p50": 293.34399849176407, + "p90": 312.4160021543503, + "p95": 321.9520002603531, + "p99": 334.431990981102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-049e0760", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_1239b153", + "comparisonKey": "ab28838d2a72dc6d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:25:55.413561+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 173.08799922466278, + "p90": 208.76799523830414, + "p95": 227.77600586414337, + "p99": 251.0719895362854 + }, + "combine": { + "p50": 47.58400097489357, + "p90": 53.408000618219376, + "p95": 59.10399928689003, + "p99": 65.08799642324448 + }, + "roundtrip": { + "p50": 218.9120054244995, + "p90": 249.05599653720856, + "p95": 261.56800985336304, + "p99": 285.2799892425537 + }, + "isolatedSum": { + "p50": 220.67200019955635, + "p90": 262.1759958565235, + "p95": 286.8800051510334, + "p99": 316.1599859595299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 173.12000691890717, + "p90": 204.44799959659576, + "p95": 214.36800062656403, + "p99": 239.9040013551712 + }, + "combine": { + "p50": 48.73599857091904, + "p90": 54.816000163555145, + "p95": 61.344001442193985, + "p99": 77.504001557827 + }, + "roundtrip": { + "p50": 208.73600244522095, + "p90": 237.0239943265915, + "p95": 241.88800156116486, + "p99": 250.2399981021881 + }, + "isolatedSum": { + "p50": 221.8560054898262, + "p90": 259.2639997601509, + "p95": 275.712002068758, + "p99": 317.4080029129982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 172.8000044822693, + "p90": 200.54399967193604, + "p95": 207.8080028295517, + "p99": 216.3199931383133 + }, + "combine": { + "p50": 50.65599828958511, + "p90": 55.23199960589409, + "p95": 61.055999249219894, + "p99": 66.11199676990509 + }, + "roundtrip": { + "p50": 210.7519954442978, + "p90": 235.45600473880768, + "p95": 239.99999463558197, + "p99": 248.1279969215393 + }, + "isolatedSum": { + "p50": 223.4560027718544, + "p90": 255.77599927783012, + "p95": 268.8640020787716, + "p99": 282.4319899082184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 177.279993891716, + "p90": 208.54400098323822, + "p95": 213.76000344753265, + "p99": 235.61599850654602 + }, + "combine": { + "p50": 51.711998879909515, + "p90": 57.760000228881836, + "p95": 61.02399900555611, + "p99": 70.20799815654755 + }, + "roundtrip": { + "p50": 222.59199619293213, + "p90": 255.264014005661, + "p95": 268.0000066757202, + "p99": 291.8719947338104 + }, + "isolatedSum": { + "p50": 228.99199277162552, + "p90": 266.30400121212006, + "p95": 274.78400245308876, + "p99": 305.82399666309357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 190.0160014629364, + "p90": 211.67999505996704, + "p95": 218.87999773025513, + "p99": 226.1119931936264 + }, + "combine": { + "p50": 54.46400120854378, + "p90": 58.46399813890457, + "p95": 65.37599861621857, + "p99": 70.65600156784058 + }, + "roundtrip": { + "p50": 233.3119958639145, + "p90": 257.6960027217865, + "p95": 268.3199942111969, + "p99": 288.06400299072266 + }, + "isolatedSum": { + "p50": 244.48000267148018, + "p90": 270.1439931988716, + "p95": 284.2559963464737, + "p99": 296.767994761467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 213.44000101089478, + "p90": 233.72800648212433, + "p95": 242.2720044851303, + "p99": 255.16799092292786 + }, + "combine": { + "p50": 55.80800026655197, + "p90": 59.58399921655655, + "p95": 63.840001821517944, + "p99": 69.08799707889557 + }, + "roundtrip": { + "p50": 258.4640085697174, + "p90": 280.0320088863373, + "p95": 287.3600125312805, + "p99": 295.83999514579773 + }, + "isolatedSum": { + "p50": 269.24800127744675, + "p90": 293.3120056986809, + "p95": 306.11200630664825, + "p99": 324.2559880018234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 257.4720084667206, + "p90": 277.1199941635132, + "p95": 285.98400950431824, + "p99": 297.34399914741516 + }, + "combine": { + "p50": 60.47999858856201, + "p90": 67.52000004053116, + "p95": 71.9359964132309, + "p99": 80.32000064849854 + }, + "roundtrip": { + "p50": 308.4160089492798, + "p90": 334.1119885444641, + "p95": 340.06398916244507, + "p99": 359.0719997882843 + }, + "isolatedSum": { + "p50": 317.9520070552826, + "p90": 344.63999420404434, + "p95": 357.92000591754913, + "p99": 377.6639997959137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.5759880542755, + "p90": 286.17599606513977, + "p95": 292.03200340270996, + "p99": 301.31199955940247 + }, + "combine": { + "p50": 64.31999802589417, + "p90": 71.1359977722168, + "p95": 74.62400197982788, + "p99": 82.68799632787704 + }, + "roundtrip": { + "p50": 319.42400336265564, + "p90": 339.6799862384796, + "p95": 345.5680012702942, + "p99": 369.1520094871521 + }, + "isolatedSum": { + "p50": 328.8959860801697, + "p90": 357.31199383735657, + "p95": 366.65600538253784, + "p99": 383.9999958872795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-600800a0", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_b6e83fb3", + "comparisonKey": "3676369607fe7a20", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:22:15.549787+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 167.35999286174774, + "p90": 184.57600474357605, + "p95": 193.92000138759613, + "p99": 214.39999341964722 + }, + "combine": { + "p50": 45.85599899291992, + "p90": 49.02400076389313, + "p95": 51.29599943757057, + "p99": 58.88000130653381 + }, + "roundtrip": { + "p50": 204.16000485420227, + "p90": 218.20800006389618, + "p95": 223.58399629592896, + "p99": 268.67198944091797 + }, + "isolatedSum": { + "p50": 213.21599185466766, + "p90": 233.60000550746918, + "p95": 245.2160008251667, + "p99": 273.27999472618103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 168.96000504493713, + "p90": 186.27199530601501, + "p95": 191.77600741386414, + "p99": 208.70399475097656 + }, + "combine": { + "p50": 46.23999819159508, + "p90": 49.82399940490723, + "p95": 51.16799846291542, + "p99": 59.007998555898666 + }, + "roundtrip": { + "p50": 204.67199385166168, + "p90": 219.200000166893, + "p95": 225.3440022468567, + "p99": 237.0239943265915 + }, + "isolatedSum": { + "p50": 215.2000032365322, + "p90": 236.09599471092224, + "p95": 242.94400587677956, + "p99": 267.71199330687523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 168.47999393939972, + "p90": 183.61599743366241, + "p95": 188.9919936656952, + "p99": 243.42399835586548 + }, + "combine": { + "p50": 49.60000142455101, + "p90": 51.711998879909515, + "p95": 52.480001002550125, + "p99": 58.46399813890457 + }, + "roundtrip": { + "p50": 208.19200575351715, + "p90": 222.78399765491486, + "p95": 227.743998169899, + "p99": 240.83200097084045 + }, + "isolatedSum": { + "p50": 218.07999536395073, + "p90": 235.32799631357193, + "p95": 241.47199466824532, + "p99": 301.88799649477005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 173.66400361061096, + "p90": 198.55999946594238, + "p95": 209.3760073184967, + "p99": 232.83199965953827 + }, + "combine": { + "p50": 49.95200037956238, + "p90": 52.928000688552856, + "p95": 54.91200089454651, + "p99": 61.792001128196716 + }, + "roundtrip": { + "p50": 213.4079933166504, + "p90": 226.17599368095398, + "p95": 230.52799701690674, + "p99": 237.2480034828186 + }, + "isolatedSum": { + "p50": 223.61600399017334, + "p90": 251.48800015449524, + "p95": 264.2880082130432, + "p99": 294.624000787735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 188.83199989795685, + "p90": 202.55999267101288, + "p95": 208.95999670028687, + "p99": 241.72799289226532 + }, + "combine": { + "p50": 54.27199974656105, + "p90": 56.96000158786774, + "p95": 60.19200012087822, + "p99": 67.4239993095398 + }, + "roundtrip": { + "p50": 228.70400547981262, + "p90": 240.51199853420258, + "p95": 247.48800694942474, + "p99": 272.352010011673 + }, + "isolatedSum": { + "p50": 243.1039996445179, + "p90": 259.5199942588806, + "p95": 269.1519968211651, + "p99": 309.1519922018051 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 210.65600216388702, + "p90": 228.35199534893036, + "p95": 233.50399732589722, + "p99": 251.96799635887146 + }, + "combine": { + "p50": 55.615998804569244, + "p90": 57.920001447200775, + "p95": 60.28800085186958, + "p99": 66.78400188684464 + }, + "roundtrip": { + "p50": 255.45600056648254, + "p90": 266.11199975013733, + "p95": 269.6959972381592, + "p99": 276.2559950351715 + }, + "isolatedSum": { + "p50": 266.27200096845627, + "p90": 286.27199679613113, + "p95": 293.7919981777668, + "p99": 318.7519982457161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.39198517799377, + "p90": 269.50401067733765, + "p95": 273.98398518562317, + "p99": 285.8240008354187 + }, + "combine": { + "p50": 58.62399935722351, + "p90": 63.13599646091461, + "p95": 64.99200314283371, + "p99": 71.00799679756165 + }, + "roundtrip": { + "p50": 303.0720055103302, + "p90": 317.82400608062744, + "p95": 324.70399141311646, + "p99": 364.44801092147827 + }, + "isolatedSum": { + "p50": 314.0159845352173, + "p90": 332.64000713825226, + "p95": 338.9759883284569, + "p99": 356.83199763298035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 260.0319981575012, + "p90": 278.75199913978577, + "p95": 285.47200560569763, + "p99": 296.640008687973 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 67.07199662923813, + "p95": 69.69600170850754, + "p99": 77.2479996085167 + }, + "roundtrip": { + "p50": 312.8960132598877, + "p90": 321.5999901294708, + "p95": 324.0639865398407, + "p99": 341.3439989089966 + }, + "isolatedSum": { + "p50": 323.7119987607002, + "p90": 345.8239957690239, + "p95": 355.16800731420517, + "p99": 373.8880082964897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-010e499b", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_b5ece6b8", + "comparisonKey": "3bc793e026dca048", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:14:32.853009+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.16799902915955, + "p90": 275.35998821258545, + "p95": 281.15200996398926, + "p99": 290.75199365615845 + }, + "combine": { + "p50": 63.93600255250931, + "p90": 67.64800101518631, + "p95": 71.10399752855301, + "p99": 76.83199644088745 + }, + "roundtrip": { + "p50": 314.6879971027374, + "p90": 327.7440071105957, + "p95": 335.2319896221161, + "p99": 348.60798716545105 + }, + "isolatedSum": { + "p50": 323.10400158166885, + "p90": 343.00798922777176, + "p95": 352.25600749254227, + "p99": 367.5839900970459 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 265.4719948768616, + "p90": 281.2480032444, + "p95": 288.7040078639984, + "p99": 297.66398668289185 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 85.34400165081024, + "p95": 89.9839997291565, + "p99": 94.40000355243683 + }, + "roundtrip": { + "p50": 339.6480083465576, + "p90": 356.4479947090149, + "p95": 362.11198568344116, + "p99": 372.51201272010803 + }, + "isolatedSum": { + "p50": 346.52799367904663, + "p90": 366.59200489521027, + "p95": 378.6880075931549, + "p99": 392.0639902353287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 276.0320007801056, + "p90": 292.9919958114624, + "p95": 299.96800422668457, + "p99": 311.5839958190918 + }, + "combine": { + "p50": 113.6000007390976, + "p90": 116.99199676513672, + "p95": 118.30399930477142, + "p99": 124.92799758911133 + }, + "roundtrip": { + "p50": 384.3199908733368, + "p90": 399.83999729156494, + "p95": 407.48798847198486, + "p99": 423.45601320266724 + }, + "isolatedSum": { + "p50": 389.6320015192032, + "p90": 409.9839925765991, + "p95": 418.272003531456, + "p99": 436.5119934082031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 336.2559974193573, + "p90": 355.29598593711853, + "p95": 363.072007894516, + "p99": 374.62401390075684 + }, + "combine": { + "p50": 195.74399292469025, + "p90": 199.77599382400513, + "p95": 201.47199928760529, + "p99": 206.14400506019592 + }, + "roundtrip": { + "p50": 526.8800258636475, + "p90": 539.8719906806946, + "p95": 546.5919971466064, + "p99": 560.1599812507629 + }, + "isolatedSum": { + "p50": 531.9999903440475, + "p90": 555.0719797611237, + "p95": 564.5440071821213, + "p99": 580.7680189609528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.6239833831787, + "p90": 518.0479884147644, + "p95": 524.9279737472534, + "p99": 536.3199710845947 + }, + "combine": { + "p50": 327.1679878234863, + "p90": 330.24001121520996, + "p95": 331.10401034355164, + "p99": 335.90400218963623 + }, + "roundtrip": { + "p50": 831.7440152168274, + "p90": 842.848002910614, + "p95": 855.4239869117737, + "p99": 866.4640188217163 + }, + "isolatedSum": { + "p50": 833.791971206665, + "p90": 848.2879996299744, + "p95": 856.031984090805, + "p99": 872.223973274231 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 776.095986366272, + "p90": 793.0880188941956, + "p95": 798.0800271034241, + "p99": 813.759982585907 + }, + "combine": { + "p50": 606.9440245628357, + "p90": 611.9359731674194, + "p95": 613.9839887619019, + "p99": 618.0160045623779 + }, + "roundtrip": { + "p50": 1387.9040479660034, + "p90": 1433.4720373153687, + "p95": 1457.3760032653809, + "p99": 1541.9199466705322 + }, + "isolatedSum": { + "p50": 1383.0400109291077, + "p90": 1405.023992061615, + "p95": 1412.064015865326, + "p99": 1431.775987148285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-827c87a6", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_0d404980", + "comparisonKey": "4e686cf290e46cc6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:15:37.462042+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 299.6160089969635, + "p90": 324.288010597229, + "p95": 329.02398705482483, + "p99": 347.5840091705322 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 78.46400141716003, + "p95": 84.48000252246857, + "p99": 91.80799871683121 + }, + "roundtrip": { + "p50": 366.11199378967285, + "p90": 388.7360095977783, + "p95": 393.0880129337311, + "p99": 401.18399262428284 + }, + "isolatedSum": { + "p50": 371.2320104241371, + "p90": 402.75201201438904, + "p95": 413.5039895772934, + "p99": 439.39200788736343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 305.82401156425476, + "p90": 333.3440124988556, + "p95": 339.83999490737915, + "p99": 348.35198521614075 + }, + "combine": { + "p50": 92.99200028181076, + "p90": 99.04000163078308, + "p95": 102.7199998497963, + "p99": 109.0560033917427 + }, + "roundtrip": { + "p50": 397.599995136261, + "p90": 420.73601484298706, + "p95": 427.8720021247864, + "p99": 448.60801100730896 + }, + "isolatedSum": { + "p50": 398.8160118460655, + "p90": 432.3840141296387, + "p95": 442.55999475717545, + "p99": 457.40798860788345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 315.96800684928894, + "p90": 329.47200536727905, + "p95": 337.2800052165985, + "p99": 345.91999650001526 + }, + "combine": { + "p50": 151.36000514030457, + "p90": 157.53600001335144, + "p95": 161.6320013999939, + "p99": 166.143998503685 + }, + "roundtrip": { + "p50": 467.0720100402832, + "p90": 487.67998814582825, + "p95": 494.3679869174957, + "p99": 513.2160186767578 + }, + "isolatedSum": { + "p50": 467.3280119895935, + "p90": 487.0080053806305, + "p95": 498.9120066165924, + "p99": 512.0639950037003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 411.3599956035614, + "p90": 428.6400079727173, + "p95": 435.58400869369507, + "p99": 444.3199932575226 + }, + "combine": { + "p50": 259.13599133491516, + "p90": 266.33599400520325, + "p95": 269.8880136013031, + "p99": 275.4240036010742 + }, + "roundtrip": { + "p50": 669.6640253067017, + "p90": 684.1920018196106, + "p95": 691.2000179290771, + "p99": 703.9999961853027 + }, + "isolatedSum": { + "p50": 670.4959869384766, + "p90": 694.9760019779205, + "p95": 705.4720222949982, + "p99": 719.7439968585968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 671.999990940094, + "p90": 686.303973197937, + "p95": 693.9200162887573, + "p99": 707.3280215263367 + }, + "combine": { + "p50": 467.0400023460388, + "p90": 474.047988653183, + "p95": 477.6960015296936, + "p99": 482.14399814605713 + }, + "roundtrip": { + "p50": 1136.3840103149414, + "p90": 1154.3999910354614, + "p95": 1159.9680185317993, + "p99": 1170.1760292053223 + }, + "isolatedSum": { + "p50": 1139.0399932861328, + "p90": 1160.35196185112, + "p95": 1171.616017818451, + "p99": 1189.4720196723938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1111.2639904022217, + "p90": 1138.5600566864014, + "p95": 1160.0960493087769, + "p99": 1222.048044204712 + }, + "combine": { + "p50": 880.6080222129822, + "p90": 885.7920169830322, + "p95": 889.1199827194214, + "p99": 895.8399891853333 + }, + "roundtrip": { + "p50": 1990.3359413146973, + "p90": 2017.6639556884766, + "p95": 2029.088020324707, + "p99": 2058.079957962036 + }, + "isolatedSum": { + "p50": 1991.8720126152039, + "p90": 2024.3520736694336, + "p95": 2049.2160320281982, + "p99": 2117.888033390045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a54e70ac", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_e73eae63", + "comparisonKey": "188edbc7899f83eb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:16:33.152371+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 184.64000523090363, + "p90": 195.2960044145584, + "p95": 200.80000162124634, + "p99": 211.4879935979843 + }, + "combine": { + "p50": 48.576001077890396, + "p90": 50.944000482559204, + "p95": 52.352000027894974, + "p99": 58.6559996008873 + }, + "roundtrip": { + "p50": 225.21600127220154, + "p90": 235.71200668811798, + "p95": 239.6479994058609, + "p99": 244.76799368858337 + }, + "isolatedSum": { + "p50": 233.21600630879402, + "p90": 246.24000489711761, + "p95": 253.1520016491413, + "p99": 270.1439931988716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.3359957933426, + "p90": 196.22400403022766, + "p95": 199.64799284934998, + "p99": 206.496000289917 + }, + "combine": { + "p50": 59.167999774217606, + "p90": 65.11999666690826, + "p95": 70.68800181150436, + "p99": 76.57600194215775 + }, + "roundtrip": { + "p50": 238.0799949169159, + "p90": 249.05599653720856, + "p95": 252.54398584365845, + "p99": 260.96001267433167 + }, + "isolatedSum": { + "p50": 245.5039955675602, + "p90": 261.3440006971359, + "p95": 270.33599466085434, + "p99": 283.07200223207474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 245.02399563789368, + "p90": 254.30399179458618, + "p95": 257.79199600219727, + "p99": 264.0959918498993 + }, + "combine": { + "p50": 106.33599758148193, + "p90": 109.76000130176544, + "p95": 110.52799969911575, + "p99": 116.12799763679504 + }, + "roundtrip": { + "p50": 346.6559946537018, + "p90": 359.71200466156006, + "p95": 369.31198835372925, + "p99": 382.9439878463745 + }, + "isolatedSum": { + "p50": 351.3599932193756, + "p90": 364.0639930963516, + "p95": 368.319995701313, + "p99": 380.22398948669434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c819d17d", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_dbc465ce", + "comparisonKey": "b5b67ac5eb805c3d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:21:45.888363+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 199.39200580120087, + "p90": 218.36799383163452, + "p95": 227.4239957332611, + "p99": 241.02400243282318 + }, + "combine": { + "p50": 52.5440014898777, + "p90": 57.21599981188774, + "p95": 64.54399973154068, + "p99": 68.25599819421768 + }, + "roundtrip": { + "p50": 245.12000381946564, + "p90": 267.93599128723145, + "p95": 274.0800082683563, + "p99": 283.29598903656006 + }, + "isolatedSum": { + "p50": 251.93600729107857, + "p90": 275.58399364352226, + "p95": 291.9679954648018, + "p99": 309.28000062704086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 201.92000269889832, + "p90": 225.5679965019226, + "p95": 232.89600014686584, + "p99": 239.32799696922302 + }, + "combine": { + "p50": 60.06399914622307, + "p90": 67.391999065876, + "p95": 71.87200337648392, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 258.6880028247833, + "p90": 287.1359884738922, + "p95": 295.1039969921112, + "p99": 323.199987411499 + }, + "isolatedSum": { + "p50": 261.9840018451214, + "p90": 292.9599955677986, + "p95": 304.76800352334976, + "p99": 319.13599371910095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.08000457286835, + "p90": 232.83199965953827, + "p95": 239.99999463558197, + "p99": 253.28001379966736 + }, + "combine": { + "p50": 73.79200309515, + "p90": 80.1599994301796, + "p95": 84.16000008583069, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 274.01599287986755, + "p90": 297.21599817276, + "p95": 305.2479922771454, + "p99": 319.2000091075897 + }, + "isolatedSum": { + "p50": 279.87200766801834, + "p90": 312.99199908971786, + "p95": 324.15999472141266, + "p99": 345.40801495313644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 238.0480021238327, + "p90": 263.35999369621277, + "p95": 269.8560059070587, + "p99": 300.4800081253052 + }, + "combine": { + "p50": 111.7440015077591, + "p90": 115.84000289440155, + "p95": 120.35199999809265, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 336.9919955730438, + "p90": 356.7039966583252, + "p95": 363.45601081848145, + "p99": 371.5519905090332 + }, + "isolatedSum": { + "p50": 349.7920036315918, + "p90": 379.1999965906143, + "p95": 390.20800590515137, + "p99": 426.91200971603394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 335.29600501060486, + "p90": 355.3600013256073, + "p95": 362.2399866580963, + "p99": 372.0000088214874 + }, + "combine": { + "p50": 183.67999792099, + "p90": 189.34400379657745, + "p95": 191.96799397468567, + "p99": 197.28000462055206 + }, + "roundtrip": { + "p50": 517.5999999046326, + "p90": 531.8400263786316, + "p95": 537.3119711875916, + "p99": 551.8720149993896 + }, + "isolatedSum": { + "p50": 518.9760029315948, + "p90": 544.7040051221848, + "p95": 554.207980632782, + "p99": 569.2800134420395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 516.319990158081, + "p90": 527.7439951896667, + "p95": 533.6639881134033, + "p99": 543.936014175415 + }, + "combine": { + "p50": 320.95998525619507, + "p90": 327.32799649238586, + "p95": 330.6559920310974, + "p99": 334.9120020866394 + }, + "roundtrip": { + "p50": 834.9760174751282, + "p90": 857.2160005569458, + "p95": 864.0639781951904, + "p99": 887.2640132904053 + }, + "isolatedSum": { + "p50": 837.2799754142761, + "p90": 855.0719916820526, + "p95": 864.3199801445007, + "p99": 878.8480162620544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-72a629b6", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_1002a4ba", + "comparisonKey": "dde0b90b4903bfa2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:19:43.206470+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.99999046325684, + "p90": 283.3920121192932, + "p95": 296.1600124835968, + "p99": 322.7199912071228 + }, + "combine": { + "p50": 66.3679987192154, + "p90": 73.21599870920181, + "p95": 80.09599894285202, + "p99": 88.3840024471283 + }, + "roundtrip": { + "p50": 318.015992641449, + "p90": 337.3439908027649, + "p95": 344.7679877281189, + "p99": 357.15198516845703 + }, + "isolatedSum": { + "p50": 326.36798918247223, + "p90": 356.608010828495, + "p95": 376.2560114264488, + "p99": 411.1039936542511 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 263.35999369621277, + "p90": 282.5919985771179, + "p95": 288.06400299072266, + "p99": 298.40001463890076 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 97.05600142478943, + "p95": 101.79200023412704, + "p99": 108.73600095510483 + }, + "roundtrip": { + "p50": 345.95200419425964, + "p90": 363.5199964046478, + "p95": 371.71199917793274, + "p99": 378.01599502563477 + }, + "isolatedSum": { + "p50": 351.80799663066864, + "p90": 379.64800000190735, + "p95": 389.8560032248497, + "p99": 407.1360155940056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 271.7440128326416, + "p90": 295.3599989414215, + "p95": 302.592009305954, + "p99": 333.0560028553009 + }, + "combine": { + "p50": 132.1280002593994, + "p90": 138.46400380134583, + "p95": 143.5839980840683, + "p99": 151.10400319099426 + }, + "roundtrip": { + "p50": 404.1920006275177, + "p90": 422.2399890422821, + "p95": 430.6879937648773, + "p99": 438.59198689460754 + }, + "isolatedSum": { + "p50": 403.872013092041, + "p90": 433.82400274276733, + "p95": 446.1760073900223, + "p99": 484.16000604629517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 356.57599568367004, + "p90": 372.9279935359955, + "p95": 379.4560134410858, + "p99": 389.75998759269714 + }, + "combine": { + "p50": 228.92799973487854, + "p90": 237.69600689411163, + "p95": 239.74399268627167, + "p99": 242.78399348258972 + }, + "roundtrip": { + "p50": 591.6799902915955, + "p90": 610.0800037384033, + "p95": 617.4079775810242, + "p99": 636.7359757423401 + }, + "isolatedSum": { + "p50": 585.5039954185486, + "p90": 610.6240004301071, + "p95": 619.2000061273575, + "p99": 632.5439810752869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 566.8799877166748, + "p90": 578.7839889526367, + "p95": 585.9839916229248, + "p99": 609.3440055847168 + }, + "combine": { + "p50": 408.9280068874359, + "p90": 412.4799966812134, + "p95": 414.11200165748596, + "p99": 417.60000586509705 + }, + "roundtrip": { + "p50": 982.5599789619446, + "p90": 1012.768030166626, + "p95": 1060.3840351104736, + "p99": 1130.079984664917 + }, + "isolatedSum": { + "p50": 975.8079946041107, + "p90": 991.2639856338501, + "p95": 1000.0959932804108, + "p99": 1026.9440114498138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 987.16801404953, + "p90": 999.2640018463135, + "p95": 1006.335973739624, + "p99": 1016.4799690246582 + }, + "combine": { + "p50": 774.8159766197205, + "p90": 778.656005859375, + "p95": 779.9999713897705, + "p99": 783.1680178642273 + }, + "roundtrip": { + "p50": 1773.535966873169, + "p90": 1793.280005455017, + "p95": 1801.0879755020142, + "p99": 1814.527988433838 + }, + "isolatedSum": { + "p50": 1761.9839906692505, + "p90": 1777.9200077056885, + "p95": 1786.3359451293945, + "p99": 1799.6479868888855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-960fa90d", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_c4f169f4", + "comparisonKey": "92da38396f560407", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:29:09.902425+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.50399446487427, + "p90": 274.399995803833, + "p95": 279.58399057388306, + "p99": 291.0720109939575 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 70.65600156784058, + "p95": 72.51200079917908, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 318.7519907951355, + "p90": 329.72800731658936, + "p95": 334.3679904937744, + "p99": 343.1999981403351 + }, + "isolatedSum": { + "p50": 327.64799147844315, + "p90": 345.0559973716736, + "p95": 352.09599137306213, + "p99": 370.62401324510574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 263.0079984664917, + "p90": 277.69601345062256, + "p95": 284.38401222229004, + "p99": 296.35199904441833 + }, + "combine": { + "p50": 79.1039988398552, + "p90": 82.97599852085114, + "p95": 84.70399677753448, + "p99": 90.65599739551544 + }, + "roundtrip": { + "p50": 340.31999111175537, + "p90": 360.9600067138672, + "p95": 371.16798758506775, + "p99": 390.6880021095276 + }, + "isolatedSum": { + "p50": 342.1119973063469, + "p90": 360.6720119714737, + "p95": 369.0880089998245, + "p99": 387.0079964399338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 274.399995803833, + "p90": 283.7759852409363, + "p95": 287.3600125312805, + "p99": 296.31999135017395 + }, + "combine": { + "p50": 113.37599903345108, + "p90": 116.03199690580368, + "p95": 116.83200299739838, + "p99": 120.92799693346024 + }, + "roundtrip": { + "p50": 382.176011800766, + "p90": 390.6559944152832, + "p95": 394.27199959754944, + "p99": 398.78401160240173 + }, + "isolatedSum": { + "p50": 387.7759948372841, + "p90": 399.80798214673996, + "p95": 404.1920155286789, + "p99": 417.2479882836342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 336.2239897251129, + "p90": 343.77598762512207, + "p95": 346.20800614356995, + "p99": 350.17600655555725 + }, + "combine": { + "p50": 191.64800643920898, + "p90": 194.2719966173172, + "p95": 195.39199769496918, + "p99": 198.94400238990784 + }, + "roundtrip": { + "p50": 520.7679867744446, + "p90": 531.6799879074097, + "p95": 544.4160103797913, + "p99": 559.6160292625427 + }, + "isolatedSum": { + "p50": 527.8719961643219, + "p90": 538.0479842424393, + "p95": 541.6000038385391, + "p99": 549.1200089454651 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 509.3119740486145, + "p90": 523.2639908790588, + "p95": 532.9279899597168, + "p99": 542.2400236129761 + }, + "combine": { + "p50": 326.9439935684204, + "p90": 330.30399680137634, + "p95": 331.167995929718, + "p99": 334.7199857234955 + }, + "roundtrip": { + "p50": 832.5440287590027, + "p90": 841.8560028076172, + "p95": 857.5040102005005, + "p99": 866.7200207710266 + }, + "isolatedSum": { + "p50": 836.2559676170349, + "p90": 853.5679876804352, + "p95": 864.0959858894348, + "p99": 876.9600093364716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 781.1840176582336, + "p90": 800.927996635437, + "p95": 807.1359992027283, + "p99": 819.3280100822449 + }, + "combine": { + "p50": 600.8960008621216, + "p90": 605.8239936828613, + "p95": 607.26398229599, + "p99": 613.2479906082153 + }, + "roundtrip": { + "p50": 1374.6240139007568, + "p90": 1383.4880590438843, + "p95": 1386.2080574035645, + "p99": 1394.3040370941162 + }, + "isolatedSum": { + "p50": 1382.0800185203552, + "p90": 1406.7519903182983, + "p95": 1414.3999814987183, + "p99": 1432.5760006904602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e10321ba", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_34150796", + "comparisonKey": "1872b6421784e657", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:20:46.413692+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 254.97600436210632, + "p90": 265.4719948768616, + "p95": 270.1759934425354, + "p99": 277.18400955200195 + }, + "combine": { + "p50": 63.19999694824219, + "p90": 66.14399701356888, + "p95": 67.87200272083282, + "p99": 73.7600028514862 + }, + "roundtrip": { + "p50": 313.9840066432953, + "p90": 331.07200264930725, + "p95": 336.16000413894653, + "p99": 345.63198685646057 + }, + "isolatedSum": { + "p50": 318.1760013103485, + "p90": 331.61599189043045, + "p95": 338.0479961633682, + "p99": 350.94401240348816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 258.5279941558838, + "p90": 268.3520019054413, + "p95": 272.41599559783936, + "p99": 277.5999903678894 + }, + "combine": { + "p50": 78.14399898052216, + "p90": 81.50400221347809, + "p95": 83.3280012011528, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 334.5920145511627, + "p90": 350.46398639678955, + "p95": 356.6400110721588, + "p99": 369.8880076408386 + }, + "isolatedSum": { + "p50": 336.67199313640594, + "p90": 349.8560041189194, + "p95": 355.74399679899216, + "p99": 367.8079918026924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 270.3999876976013, + "p90": 281.3119888305664, + "p95": 285.8560085296631, + "p99": 291.1680042743683 + }, + "combine": { + "p50": 113.27999830245972, + "p90": 116.22399836778641, + "p95": 117.63200163841248, + "p99": 120.19199877977371 + }, + "roundtrip": { + "p50": 379.39199805259705, + "p90": 388.12801241874695, + "p95": 396.35199308395386, + "p99": 408.60798954963684 + }, + "isolatedSum": { + "p50": 383.67998600006104, + "p90": 397.5359871983528, + "p95": 403.48801016807556, + "p99": 411.360003054142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 333.15199613571167, + "p90": 349.92000460624695, + "p95": 357.2480082511902, + "p99": 371.39201164245605 + }, + "combine": { + "p50": 192.19200313091278, + "p90": 195.39199769496918, + "p95": 196.83200120925903, + "p99": 200.8640021085739 + }, + "roundtrip": { + "p50": 517.3439979553223, + "p90": 525.9199738502502, + "p95": 530.5920243263245, + "p99": 543.3279871940613 + }, + "isolatedSum": { + "p50": 525.3439992666245, + "p90": 545.3120023012161, + "p95": 554.0800094604492, + "p99": 572.25601375103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 505.5999755859375, + "p90": 513.9840245246887, + "p95": 519.1680192947388, + "p99": 533.7280035018921 + }, + "combine": { + "p50": 328.0960023403168, + "p90": 330.87998628616333, + "p95": 332.15999603271484, + "p99": 337.119996547699 + }, + "roundtrip": { + "p50": 829.8559784889221, + "p90": 837.5359773635864, + "p95": 843.392014503479, + "p99": 856.7360043525696 + }, + "isolatedSum": { + "p50": 833.6959779262543, + "p90": 844.864010810852, + "p95": 851.3280153274536, + "p99": 870.8480000495911 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 776.7360210418701, + "p90": 784.4799757003784, + "p95": 788.1280183792114, + "p99": 800.0640273094177 + }, + "combine": { + "p50": 600.4480123519897, + "p90": 603.6800146102905, + "p95": 604.8319935798645, + "p99": 609.8560094833374 + }, + "roundtrip": { + "p50": 1374.4319677352905, + "p90": 1386.3359689712524, + "p95": 1392.0639753341675, + "p99": 1404.6399593353271 + }, + "isolatedSum": { + "p50": 1377.1840333938599, + "p90": 1388.159990310669, + "p95": 1392.960011959076, + "p99": 1409.9200367927551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc6026c9", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_3dc664cb", + "comparisonKey": "4da6037787d9556c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:17:36.399994+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 227.10399329662323, + "p90": 237.5040054321289, + "p95": 241.7600005865097, + "p99": 250.59199333190918 + }, + "combine": { + "p50": 65.43999910354614, + "p90": 68.67200136184692, + "p95": 70.23999840021133, + "p99": 76.54400169849396 + }, + "roundtrip": { + "p50": 286.72000765800476, + "p90": 299.8400032520294, + "p95": 307.0720136165619, + "p99": 328.8640081882477 + }, + "isolatedSum": { + "p50": 292.5439924001694, + "p90": 306.17600679397583, + "p95": 311.99999898672104, + "p99": 327.13599503040314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 233.11999440193176, + "p90": 246.49600684642792, + "p95": 263.13599944114685, + "p99": 293.8559949398041 + }, + "combine": { + "p50": 85.63199639320374, + "p90": 90.01599997282028, + "p95": 92.73599833250046, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 313.728004693985, + "p90": 330.04799485206604, + "p95": 338.1119966506958, + "p99": 383.83999466896057 + }, + "isolatedSum": { + "p50": 318.7519907951355, + "p90": 336.5120068192482, + "p95": 355.8719977736473, + "p99": 400.2879932522774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 258.14399123191833, + "p90": 271.7440128326416, + "p95": 276.89599990844727, + "p99": 289.8879945278168 + }, + "combine": { + "p50": 125.791996717453, + "p90": 128.4160017967224, + "p95": 129.2479932308197, + "p99": 131.9040060043335 + }, + "roundtrip": { + "p50": 387.6799941062927, + "p90": 400.86400508880615, + "p95": 409.9520146846771, + "p99": 420.415997505188 + }, + "isolatedSum": { + "p50": 383.93598794937134, + "p90": 400.160014629364, + "p95": 406.14399313926697, + "p99": 421.79200053215027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 363.23198676109314, + "p90": 377.6000142097473, + "p95": 383.2640051841736, + "p99": 403.903990983963 + }, + "combine": { + "p50": 221.76000475883484, + "p90": 225.11999309062958, + "p95": 226.27200186252594, + "p99": 230.5919975042343 + }, + "roundtrip": { + "p50": 587.2319936752319, + "p90": 595.7120060920715, + "p95": 602.1760106086731, + "p99": 619.871973991394 + }, + "isolatedSum": { + "p50": 584.991991519928, + "p90": 602.7200073003769, + "p95": 609.5360070466995, + "p99": 634.4959884881973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 560.0000023841858, + "p90": 573.5039710998535, + "p95": 581.279993057251, + "p99": 590.3679728507996 + }, + "combine": { + "p50": 395.35999298095703, + "p90": 398.2720077037811, + "p95": 399.3920087814331, + "p99": 404.09600734710693 + }, + "roundtrip": { + "p50": 956.063985824585, + "p90": 965.9839868545532, + "p95": 979.2320132255554, + "p99": 997.1839785575867 + }, + "isolatedSum": { + "p50": 955.3599953651428, + "p90": 971.7759788036346, + "p95": 980.6720018386841, + "p99": 994.4639801979065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 958.3680033683777, + "p90": 972.320020198822, + "p95": 978.335976600647, + "p99": 986.3680005073547 + }, + "combine": { + "p50": 748.3519911766052, + "p90": 752.3840069770813, + "p95": 753.3119916915894, + "p99": 756.3199996948242 + }, + "roundtrip": { + "p50": 1708.2560062408447, + "p90": 1736.5440130233765, + "p95": 1789.1520261764526, + "p99": 1876.8960237503052 + }, + "isolatedSum": { + "p50": 1706.719994544983, + "p90": 1724.7040271759033, + "p95": 1731.6479682922363, + "p99": 1742.688000202179 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0ce12b27", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_c584db11", + "comparisonKey": "c49b0af3ff7d7cd2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:18:39.431490+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.99199497699738, + "p90": 223.4880030155182, + "p95": 231.90400004386902, + "p99": 256.48000836372375 + }, + "combine": { + "p50": 64.00000303983688, + "p90": 69.37599927186966, + "p95": 78.72000336647034, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 249.79199469089508, + "p90": 272.0319926738739, + "p95": 278.656005859375, + "p99": 289.34401273727417 + }, + "isolatedSum": { + "p50": 260.99199801683426, + "p90": 292.86400228738785, + "p95": 310.62400341033936, + "p99": 337.69600838422775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 205.4399996995926, + "p90": 226.4000028371811, + "p95": 231.455996632576, + "p99": 241.43999814987183 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 86.14400029182434, + "p95": 91.45600348711014, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 285.504013299942, + "p90": 310.1760149002075, + "p95": 325.6320059299469, + "p99": 401.5040099620819 + }, + "isolatedSum": { + "p50": 286.1440032720566, + "p90": 312.54400312900543, + "p95": 322.9120001196861, + "p99": 337.15199679136276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 257.60000944137573, + "p90": 283.7440073490143, + "p95": 293.5679852962494, + "p99": 314.07999992370605 + }, + "combine": { + "p50": 122.04799801111221, + "p90": 127.83999741077423, + "p95": 131.74399733543396, + "p99": 140.19200205802917 + }, + "roundtrip": { + "p50": 383.0080032348633, + "p90": 409.824013710022, + "p95": 423.0400025844574, + "p99": 446.368008852005 + }, + "isolatedSum": { + "p50": 379.64800745248795, + "p90": 411.5840047597885, + "p95": 425.31198263168335, + "p99": 454.27200198173523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 355.5839955806732, + "p90": 369.8880076408386, + "p95": 374.11201000213623, + "p99": 388.0000114440918 + }, + "combine": { + "p50": 207.16799795627594, + "p90": 211.07199788093567, + "p95": 212.0639979839325, + "p99": 216.3199931383133 + }, + "roundtrip": { + "p50": 574.720025062561, + "p90": 595.1359868049622, + "p95": 602.6560068130493, + "p99": 616.9599890708923 + }, + "isolatedSum": { + "p50": 562.7519935369492, + "p90": 580.9600055217743, + "p95": 586.1760079860687, + "p99": 604.3200045824051 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 553.2479882240295, + "p90": 562.4639987945557, + "p95": 565.5360221862793, + "p99": 568.3199763298035 + }, + "combine": { + "p50": 376.22401118278503, + "p90": 379.42400574684143, + "p95": 380.511999130249, + "p99": 384.64000821113586 + }, + "roundtrip": { + "p50": 940.5760169029236, + "p90": 956.60799741745, + "p95": 963.9679789543152, + "p99": 993.664026260376 + }, + "isolatedSum": { + "p50": 929.4719994068146, + "p90": 941.8880045413971, + "p95": 946.0480213165283, + "p99": 952.9599845409393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 948.8959908485413, + "p90": 959.9040150642395, + "p95": 961.9200229644775, + "p99": 968.7039852142334 + }, + "combine": { + "p50": 713.8239741325378, + "p90": 717.9200053215027, + "p95": 718.8799977302551, + "p99": 722.815990447998 + }, + "roundtrip": { + "p50": 1677.791953086853, + "p90": 1699.552059173584, + "p95": 1706.2079906463623, + "p99": 1722.5279808044434 + }, + "isolatedSum": { + "p50": 1662.719964981079, + "p90": 1677.8240203857422, + "p95": 1680.8000206947327, + "p99": 1691.5199756622314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-86a25f2a", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_4e034ddd", + "comparisonKey": "72a062c19de05fdf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:28:06.427649+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 260.3839933872223, + "p90": 279.776006937027, + "p95": 286.8480086326599, + "p99": 298.911988735199 + }, + "combine": { + "p50": 64.96000289916992, + "p90": 69.40799951553345, + "p95": 73.72800260782242, + "p99": 76.83199644088745 + }, + "roundtrip": { + "p50": 316.25598669052124, + "p90": 329.69599962234497, + "p95": 337.3120129108429, + "p99": 344.38401460647583 + }, + "isolatedSum": { + "p50": 325.3439962863922, + "p90": 349.1840064525604, + "p95": 360.57601124048233, + "p99": 375.7439851760864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 264.8639976978302, + "p90": 285.5679988861084, + "p95": 294.20799016952515, + "p99": 308.4479868412018 + }, + "combine": { + "p50": 79.13599908351898, + "p90": 84.22400057315826, + "p95": 90.81599861383438, + "p99": 94.36800330877304 + }, + "roundtrip": { + "p50": 341.18399024009705, + "p90": 362.62398958206177, + "p95": 367.68001317977905, + "p99": 384.799987077713 + }, + "isolatedSum": { + "p50": 343.9999967813492, + "p90": 369.79199945926666, + "p95": 385.0239887833595, + "p99": 402.8159901499748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 273.6639976501465, + "p90": 296.51200771331787, + "p95": 303.8080036640167, + "p99": 312.9279911518097 + }, + "combine": { + "p50": 114.78400230407715, + "p90": 119.23199892044067, + "p95": 123.6800029873848, + "p99": 128.54400277137756 + }, + "roundtrip": { + "p50": 386.24000549316406, + "p90": 406.68800473213196, + "p95": 413.08799386024475, + "p99": 432.41599202156067 + }, + "isolatedSum": { + "p50": 388.44799995422363, + "p90": 415.74400663375854, + "p95": 427.4880066514015, + "p99": 441.47199392318726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 343.84000301361084, + "p90": 370.4639971256256, + "p95": 383.29601287841797, + "p99": 411.52000427246094 + }, + "combine": { + "p50": 194.33599710464478, + "p90": 200.03199577331543, + "p95": 202.7519941329956, + "p99": 207.58399367332458 + }, + "roundtrip": { + "p50": 528.6399722099304, + "p90": 545.2160239219666, + "p95": 549.95197057724, + "p99": 561.5040063858032 + }, + "isolatedSum": { + "p50": 538.1760001182556, + "p90": 570.495992898941, + "p95": 586.0480070114136, + "p99": 619.1039979457855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 517.8560018539429, + "p90": 533.1519842147827, + "p95": 540.0320291519165, + "p99": 553.2799959182739 + }, + "combine": { + "p50": 334.0800106525421, + "p90": 339.77600932121277, + "p95": 342.4000144004822, + "p99": 347.03999757766724 + }, + "roundtrip": { + "p50": 849.2159843444824, + "p90": 866.8799996376038, + "p95": 873.63201379776, + "p99": 884.2880129814148 + }, + "isolatedSum": { + "p50": 851.936012506485, + "p90": 872.9279935359955, + "p95": 882.4320435523987, + "p99": 900.3199934959412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 801.2160062789917, + "p90": 812.8960132598877, + "p95": 819.5199966430664, + "p99": 829.9199938774109 + }, + "combine": { + "p50": 618.3040142059326, + "p90": 622.6239800453186, + "p95": 625.6319880485535, + "p99": 637.9839777946472 + }, + "roundtrip": { + "p50": 1419.7440147399902, + "p90": 1441.823959350586, + "p95": 1446.8799829483032, + "p99": 1466.048002243042 + }, + "isolatedSum": { + "p50": 1419.5200204849243, + "p90": 1435.5199933052063, + "p95": 1445.1519846916199, + "p99": 1467.903971672058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3e6b066d", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_3bee5acc", + "comparisonKey": "34208f30dd593dbc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:24:22.082970+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 251.16801261901855, + "p90": 261.05600595474243, + "p95": 266.55998826026917, + "p99": 274.1760015487671 + }, + "combine": { + "p50": 66.65600091218948, + "p90": 70.23999840021133, + "p95": 72.38399982452393, + "p99": 78.46400141716003 + }, + "roundtrip": { + "p50": 309.2159926891327, + "p90": 323.35999608039856, + "p95": 327.455997467041, + "p99": 338.3679986000061 + }, + "isolatedSum": { + "p50": 317.82401353120804, + "p90": 331.29600435495377, + "p95": 338.9439880847931, + "p99": 352.6400029659271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 254.97600436210632, + "p90": 264.8000121116638, + "p95": 269.1839933395386, + "p99": 275.61599016189575 + }, + "combine": { + "p50": 86.71999722719193, + "p90": 91.10400080680847, + "p95": 92.6399976015091, + "p99": 98.68799895048141 + }, + "roundtrip": { + "p50": 338.46399188041687, + "p90": 362.08000779151917, + "p95": 367.1039938926697, + "p99": 377.1519958972931 + }, + "isolatedSum": { + "p50": 341.69600158929825, + "p90": 355.9040129184723, + "p95": 361.82399094104767, + "p99": 374.30398911237717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 266.1440074443817, + "p90": 275.1680016517639, + "p95": 280.5759906768799, + "p99": 297.5040078163147 + }, + "combine": { + "p50": 127.32799351215363, + "p90": 131.26400113105774, + "p95": 133.91999900341034, + "p99": 145.6959992647171 + }, + "roundtrip": { + "p50": 390.5920088291168, + "p90": 403.03999185562134, + "p95": 407.00799226760864, + "p99": 415.77601432800293 + }, + "isolatedSum": { + "p50": 393.47200095653534, + "p90": 406.43200278282166, + "p95": 414.4959896802902, + "p99": 443.2000070810318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.8959913253784, + "p90": 371.36000394821167, + "p95": 375.61601400375366, + "p99": 380.511999130249 + }, + "combine": { + "p50": 226.75199806690216, + "p90": 229.40799593925476, + "p95": 230.71999847888947, + "p99": 234.592005610466 + }, + "roundtrip": { + "p50": 581.7599892616272, + "p90": 598.143994808197, + "p95": 606.1440110206604, + "p99": 616.2239909172058 + }, + "isolatedSum": { + "p50": 587.6479893922806, + "p90": 600.7679998874664, + "p95": 606.3360124826431, + "p99": 615.104004740715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 554.6879768371582, + "p90": 578.8159966468811, + "p95": 592.6079750061035, + "p99": 628.6720037460327 + }, + "combine": { + "p50": 395.58398723602295, + "p90": 398.6560106277466, + "p95": 399.8720049858093, + "p99": 404.06399965286255 + }, + "roundtrip": { + "p50": 948.3839869499207, + "p90": 957.5679898262024, + "p95": 964.2239809036255, + "p99": 983.2000136375427 + }, + "isolatedSum": { + "p50": 950.2719640731812, + "p90": 977.4720072746277, + "p95": 992.4799799919128, + "p99": 1032.7360033988953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 955.0079703330994, + "p90": 967.136025428772, + "p95": 977.9840111732483, + "p99": 995.7119822502136 + }, + "combine": { + "p50": 743.5839772224426, + "p90": 748.9280104637146, + "p95": 751.1360049247742, + "p99": 755.3920149803162 + }, + "roundtrip": { + "p50": 1702.2080421447754, + "p90": 1715.4560089111328, + "p95": 1722.9440212249756, + "p99": 1768.8640356063843 + }, + "isolatedSum": { + "p50": 1698.591947555542, + "p90": 1716.0640358924866, + "p95": 1729.1200160980225, + "p99": 1751.1039972305298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-035bb6eb", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_e9c19ae2", + "comparisonKey": "642f475120968bd3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:24:55.457983+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.9839906692505, + "p90": 273.8560140132904, + "p95": 277.24799513816833, + "p99": 286.3999903202057 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 69.023996591568, + "p95": 70.17599791288376, + "p99": 75.87199658155441 + }, + "roundtrip": { + "p50": 313.9519989490509, + "p90": 324.0320086479187, + "p95": 327.90398597717285, + "p99": 337.8559947013855 + }, + "isolatedSum": { + "p50": 327.87199318408966, + "p90": 342.8800106048584, + "p95": 347.4239930510521, + "p99": 362.2719869017601 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 266.59199595451355, + "p90": 285.21600365638733, + "p95": 294.49599981307983, + "p99": 326.7199993133545 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 81.4720019698143, + "p95": 83.52000266313553, + "p99": 88.57599645853043 + }, + "roundtrip": { + "p50": 338.4320139884949, + "p90": 356.86400532722473, + "p95": 364.47998881340027, + "p99": 377.9520094394684 + }, + "isolatedSum": { + "p50": 344.2559987306595, + "p90": 366.68800562620163, + "p95": 378.01600247621536, + "p99": 415.2959957718849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 277.69601345062256, + "p90": 292.4480140209198, + "p95": 297.95199632644653, + "p99": 323.2319951057434 + }, + "combine": { + "p50": 112.09599673748016, + "p90": 119.87199634313583, + "p95": 125.37600100040436, + "p99": 133.02400708198547 + }, + "roundtrip": { + "p50": 380.95998764038086, + "p90": 392.5119936466217, + "p95": 397.15200662612915, + "p99": 409.63199734687805 + }, + "isolatedSum": { + "p50": 389.7920101881027, + "p90": 412.32001036405563, + "p95": 423.3279973268509, + "p99": 456.2560021877289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 337.18401193618774, + "p90": 353.1840145587921, + "p95": 359.6799969673157, + "p99": 372.51201272010803 + }, + "combine": { + "p50": 190.8479928970337, + "p90": 193.85600090026855, + "p95": 195.26399672031403, + "p99": 199.71199333667755 + }, + "roundtrip": { + "p50": 519.5519924163818, + "p90": 531.2640070915222, + "p95": 535.8080267906189, + "p99": 549.0239858627319 + }, + "isolatedSum": { + "p50": 528.0320048332214, + "p90": 547.0400154590607, + "p95": 554.9439936876297, + "p99": 572.2240060567856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 508.67199897766113, + "p90": 520.7359790802002, + "p95": 528.3520221710205, + "p99": 537.280023097992 + }, + "combine": { + "p50": 328.8959860801697, + "p90": 332.0640027523041, + "p95": 333.47201347351074, + "p99": 335.2000117301941 + }, + "roundtrip": { + "p50": 834.7839713096619, + "p90": 847.7439880371094, + "p95": 853.7279963493347, + "p99": 867.6159977912903 + }, + "isolatedSum": { + "p50": 837.5679850578308, + "p90": 852.7999818325043, + "p95": 861.8240356445312, + "p99": 872.480034828186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 779.6159982681274, + "p90": 796.0960268974304, + "p95": 802.9119968414307, + "p99": 813.9839768409729 + }, + "combine": { + "p50": 605.2160263061523, + "p90": 609.7599864006042, + "p95": 611.2639904022217, + "p99": 615.0079965591431 + }, + "roundtrip": { + "p50": 1379.7119855880737, + "p90": 1392.2239542007446, + "p95": 1399.392008781433, + "p99": 1418.7519550323486 + }, + "isolatedSum": { + "p50": 1384.8320245742798, + "p90": 1405.8560132980347, + "p95": 1414.1759872436523, + "p99": 1428.991973400116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63f1c7bb", + "identity": "b200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_2e7864eb", + "comparisonKey": "59f2f012dd85872f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:26:29.248239+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 227.03999280929565, + "p90": 243.6479926109314, + "p95": 249.95200335979462, + "p99": 258.8160037994385 + }, + "combine": { + "p50": 65.76000154018402, + "p90": 68.92800331115723, + "p95": 73.2479989528656, + "p99": 81.727996468544 + }, + "roundtrip": { + "p50": 286.27198934555054, + "p90": 307.3920011520386, + "p95": 310.2079927921295, + "p99": 319.4560110569 + }, + "isolatedSum": { + "p50": 292.7999943494797, + "p90": 312.5759959220886, + "p95": 323.2000023126602, + "p99": 340.5440002679825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 239.00799453258514, + "p90": 265.4719948768616, + "p95": 278.81601452827454, + "p99": 304.639995098114 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 96.96000069379807, + "p95": 103.45599800348282, + "p99": 114.62400108575821 + }, + "roundtrip": { + "p50": 314.4640028476715, + "p90": 339.9359881877899, + "p95": 352.57598757743835, + "p99": 392.09601283073425 + }, + "isolatedSum": { + "p50": 326.78399235010147, + "p90": 362.43199557065964, + "p95": 382.27201253175735, + "p99": 419.2639961838722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 260.3839933872223, + "p90": 283.6480140686035, + "p95": 290.17600417137146, + "p99": 301.0239899158478 + }, + "combine": { + "p50": 126.0479986667633, + "p90": 131.3920021057129, + "p95": 134.97599959373474, + "p99": 140.06400108337402 + }, + "roundtrip": { + "p50": 387.935996055603, + "p90": 403.03999185562134, + "p95": 410.0160002708435, + "p99": 417.5040125846863 + }, + "isolatedSum": { + "p50": 386.4319920539856, + "p90": 415.0400161743164, + "p95": 425.1520037651062, + "p99": 441.0879909992218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 372.48000502586365, + "p90": 396.67201042175293, + "p95": 406.8480134010315, + "p99": 430.01601099967957 + }, + "combine": { + "p50": 222.59199619293213, + "p90": 229.5999974012375, + "p95": 233.08800160884857, + "p99": 237.72799968719482 + }, + "roundtrip": { + "p50": 589.8560285568237, + "p90": 603.6800146102905, + "p95": 609.503984451294, + "p99": 622.4640011787415 + }, + "isolatedSum": { + "p50": 595.0720012187958, + "p90": 626.2720078229904, + "p95": 639.9360150098801, + "p99": 667.7440106868744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 567.3279762268066, + "p90": 585.6959819793701, + "p95": 593.824028968811, + "p99": 611.4240288734436 + }, + "combine": { + "p50": 397.0560133457184, + "p90": 403.23200821876526, + "p95": 405.5359959602356, + "p99": 411.0400080680847 + }, + "roundtrip": { + "p50": 964.5119905471802, + "p90": 983.4880232810974, + "p95": 988.3840084075928, + "p99": 1000.7359981536865 + }, + "isolatedSum": { + "p50": 964.383989572525, + "p90": 988.9279901981354, + "p95": 999.3600249290466, + "p99": 1022.4640369415283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 973.9199876785278, + "p90": 992.9599761962891, + "p95": 996.3200092315674, + "p99": 1006.4640045166016 + }, + "combine": { + "p50": 750.4000067710876, + "p90": 755.5199861526489, + "p95": 757.7279806137085, + "p99": 762.112021446228 + }, + "roundtrip": { + "p50": 1715.6800031661987, + "p90": 1730.3999662399292, + "p95": 1738.368034362793, + "p99": 1751.7759799957275 + }, + "isolatedSum": { + "p50": 1724.3199944496155, + "p90": 1748.479962348938, + "p95": 1754.0479898452759, + "p99": 1768.5760259628296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-001bb877", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_1239b153", + "comparisonKey": "edb85ff070e34104", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:27:02.919860+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.84801149368286, + "p90": 270.7200050354004, + "p95": 274.3679881095886, + "p99": 283.1040024757385 + }, + "combine": { + "p50": 64.19199705123901, + "p90": 67.9360032081604, + "p95": 70.72000205516815, + "p99": 76.89599692821503 + }, + "roundtrip": { + "p50": 316.32000207901, + "p90": 327.9680013656616, + "p95": 332.67199993133545, + "p99": 342.8800106048584 + }, + "isolatedSum": { + "p50": 323.0400085449219, + "p90": 338.6560082435608, + "p95": 345.0879901647568, + "p99": 359.99999940395355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 264.2880082130432, + "p90": 273.8560140132904, + "p95": 278.81601452827454, + "p99": 291.9360101222992 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 81.56800270080566, + "p95": 83.42400193214417, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 337.5360071659088, + "p90": 346.68800234794617, + "p95": 349.8240113258362, + "p99": 354.3039858341217 + }, + "isolatedSum": { + "p50": 342.0480117201805, + "p90": 355.42401671409607, + "p95": 362.2400164604187, + "p99": 381.4080134034157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 272.8320062160492, + "p90": 290.0800108909607, + "p95": 297.0240116119385, + "p99": 312.19199299812317 + }, + "combine": { + "p50": 111.07199639081955, + "p90": 113.82400244474411, + "p95": 115.32799899578094, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 380.5760145187378, + "p90": 393.0560052394867, + "p95": 399.3920087814331, + "p99": 411.19998693466187 + }, + "isolatedSum": { + "p50": 383.90400260686874, + "p90": 403.9040133357048, + "p95": 412.3520106077194, + "p99": 433.6639940738678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 331.167995929718, + "p90": 342.6879942417145, + "p95": 350.1119911670685, + "p99": 370.3039884567261 + }, + "combine": { + "p50": 190.23999571800232, + "p90": 193.1840032339096, + "p95": 194.2719966173172, + "p99": 198.46400618553162 + }, + "roundtrip": { + "p50": 516.2240266799927, + "p90": 534.5280170440674, + "p95": 543.39200258255, + "p99": 579.1040062904358 + }, + "isolatedSum": { + "p50": 521.4079916477203, + "p90": 535.8719974756241, + "p95": 544.3839877843857, + "p99": 568.7679946422577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 511.32798194885254, + "p90": 521.5680003166199, + "p95": 533.5680246353149, + "p99": 553.8240075111389 + }, + "combine": { + "p50": 325.5999982357025, + "p90": 328.41598987579346, + "p95": 329.79199290275574, + "p99": 334.78400111198425 + }, + "roundtrip": { + "p50": 832.7999711036682, + "p90": 841.6640162467957, + "p95": 849.4399785995483, + "p99": 867.7120208740234 + }, + "isolatedSum": { + "p50": 836.927980184555, + "p90": 849.9839901924133, + "p95": 863.3600175380707, + "p99": 888.6080086231232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 784.7040295600891, + "p90": 800.4800081253052, + "p95": 808.1279993057251, + "p99": 843.7759876251221 + }, + "combine": { + "p50": 602.6880145072937, + "p90": 607.0399880409241, + "p95": 608.735978603363, + "p99": 613.5039925575256 + }, + "roundtrip": { + "p50": 1383.8720321655273, + "p90": 1400.7999897003174, + "p95": 1407.6800346374512, + "p99": 1427.1039962768555 + }, + "isolatedSum": { + "p50": 1387.3920440673828, + "p90": 1407.5199961662292, + "p95": 1416.8639779090881, + "p99": 1457.2799801826477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-470dba37", + "identity": "b200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_b6e83fb3", + "comparisonKey": "11141bb7b41af0f5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:22:48.836091+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_03", + "sku": "b200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.45600867271423, + "p90": 270.6559896469116, + "p95": 274.59201216697693, + "p99": 281.792014837265 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 69.72800195217133, + "p95": 71.3919997215271, + "p99": 78.46400141716003 + }, + "roundtrip": { + "p50": 312.6719892024994, + "p90": 320.8959996700287, + "p95": 322.2079873085022, + "p99": 331.32800459861755 + }, + "isolatedSum": { + "p50": 325.3440111875534, + "p90": 340.38399159908295, + "p95": 345.984011888504, + "p99": 360.25601625442505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 261.6640031337738, + "p90": 272.09600806236267, + "p95": 275.29600262641907, + "p99": 283.80799293518066 + }, + "combine": { + "p50": 77.82399654388428, + "p90": 81.50400221347809, + "p95": 83.23200047016144, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 336.544007062912, + "p90": 345.8879888057709, + "p95": 349.5680093765259, + "p99": 356.25600814819336 + }, + "isolatedSum": { + "p50": 339.4879996776581, + "p90": 353.60001027584076, + "p95": 358.5280030965805, + "p99": 371.7759922146797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 269.0559923648834, + "p90": 279.4240117073059, + "p95": 283.488005399704, + "p99": 292.1279966831207 + }, + "combine": { + "p50": 110.49599945545197, + "p90": 113.72800171375275, + "p95": 115.03999680280685, + "p99": 121.76000326871872 + }, + "roundtrip": { + "p50": 378.01599502563477, + "p90": 392.9600119590759, + "p95": 397.98399806022644, + "p99": 406.0479998588562 + }, + "isolatedSum": { + "p50": 379.5519918203354, + "p90": 393.15201342105865, + "p95": 398.52800220251083, + "p99": 413.88799995183945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 331.5199911594391, + "p90": 348.4160006046295, + "p95": 355.26400804519653, + "p99": 366.5280044078827 + }, + "combine": { + "p50": 191.03999435901642, + "p90": 193.63200664520264, + "p95": 194.91200149059296, + "p99": 200.57600736618042 + }, + "roundtrip": { + "p50": 513.2160186767578, + "p90": 520.5119848251343, + "p95": 523.8080024719238, + "p99": 534.0800285339355 + }, + "isolatedSum": { + "p50": 522.5599855184555, + "p90": 542.0480072498322, + "p95": 550.1760095357895, + "p99": 567.1040117740631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 509.72801446914673, + "p90": 518.6240077018738, + "p95": 523.1680274009705, + "p99": 536.4480018615723 + }, + "combine": { + "p50": 325.6640136241913, + "p90": 328.2240033149719, + "p95": 329.6639919281006, + "p99": 333.2799971103668 + }, + "roundtrip": { + "p50": 831.5200209617615, + "p90": 841.6320085525513, + "p95": 850.8480191230774, + "p99": 865.3759956359863 + }, + "isolatedSum": { + "p50": 835.392028093338, + "p90": 846.8480110168457, + "p95": 852.832019329071, + "p99": 869.7279989719391 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 784.0960025787354, + "p90": 792.6080226898193, + "p95": 799.6799945831299, + "p99": 811.13600730896 + }, + "combine": { + "p50": 602.2080183029175, + "p90": 606.5599918365479, + "p95": 608.35200548172, + "p99": 611.2319827079773 + }, + "roundtrip": { + "p50": 1383.8080167770386, + "p90": 1406.65602684021, + "p95": 1417.6959991455078, + "p99": 1454.0159702301025 + }, + "isolatedSum": { + "p50": 1386.3040208816528, + "p90": 1399.1680145263672, + "p95": 1408.0320000648499, + "p99": 1422.3679900169373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d812e796", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_9f2abd18", + "comparisonKey": "5c323b09025fcf44", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:51:59.794777+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.11999732255936, + "p90": 77.37600058317184, + "p95": 83.67999643087387, + "p99": 90.33600240945816 + }, + "combine": { + "p50": 69.11999732255936, + "p90": 77.37600058317184, + "p95": 83.67999643087387, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 69.11999732255936, + "p90": 77.37600058317184, + "p95": 83.67999643087387, + "p99": 90.33600240945816 + }, + "isolatedSum": { + "p50": 138.2399946451187, + "p90": 154.7520011663437, + "p95": 167.35999286174774, + "p99": 180.67200481891632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 81.28000050783157, + "p99": 90.14400094747543 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 81.28000050783157, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 81.28000050783157, + "p99": 90.14400094747543 + }, + "isolatedSum": { + "p50": 142.0159935951233, + "p90": 154.6880006790161, + "p95": 162.56000101566315, + "p99": 180.28800189495087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.61600142717361, + "p90": 78.62400263547897, + "p95": 81.18399977684021, + "p99": 90.17600119113922 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 78.62400263547897, + "p95": 81.18399977684021, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 71.61600142717361, + "p90": 78.62400263547897, + "p95": 81.18399977684021, + "p99": 90.17600119113922 + }, + "isolatedSum": { + "p50": 143.23200285434723, + "p90": 157.24800527095795, + "p95": 162.36799955368042, + "p99": 180.35200238227844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.03199714422226, + "p90": 80.44800162315369, + "p95": 83.64800363779068, + "p99": 98.4639972448349 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 80.44800162315369, + "p95": 83.64800363779068, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 72.03199714422226, + "p90": 80.44800162315369, + "p95": 83.64800363779068, + "p99": 98.4639972448349 + }, + "isolatedSum": { + "p50": 144.06399428844452, + "p90": 160.89600324630737, + "p95": 167.29600727558136, + "p99": 196.9279944896698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.0479975938797, + "p90": 80.09599894285202, + "p95": 84.86399799585342, + "p99": 93.53599697351456 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 80.09599894285202, + "p95": 84.86399799585342, + "p99": 93.53599697351456 + }, + "roundtrip": { + "p50": 74.0479975938797, + "p90": 80.09599894285202, + "p95": 84.86399799585342, + "p99": 93.53599697351456 + }, + "isolatedSum": { + "p50": 148.0959951877594, + "p90": 160.19199788570404, + "p95": 169.72799599170685, + "p99": 187.0719939470291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.42400062084198, + "p90": 86.68799698352814, + "p95": 89.47200328111649, + "p99": 103.5199984908104 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 86.68799698352814, + "p95": 89.47200328111649, + "p99": 103.5199984908104 + }, + "roundtrip": { + "p50": 75.42400062084198, + "p90": 86.68799698352814, + "p95": 89.47200328111649, + "p99": 103.5199984908104 + }, + "isolatedSum": { + "p50": 150.84800124168396, + "p90": 173.37599396705627, + "p95": 178.94400656223297, + "p99": 207.0399969816208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.22399926185608, + "p90": 83.45600217580795, + "p95": 88.99199962615967, + "p99": 94.97600048780441 + }, + "combine": { + "p50": 76.22399926185608, + "p90": 83.45600217580795, + "p95": 88.99199962615967, + "p99": 94.97600048780441 + }, + "roundtrip": { + "p50": 76.22399926185608, + "p90": 83.45600217580795, + "p95": 88.99199962615967, + "p99": 94.97600048780441 + }, + "isolatedSum": { + "p50": 152.44799852371216, + "p90": 166.9120043516159, + "p95": 177.98399925231934, + "p99": 189.95200097560883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.38400113582611, + "p90": 87.13600039482117, + "p95": 91.71199798583984, + "p99": 99.71199929714203 + }, + "combine": { + "p50": 80.38400113582611, + "p90": 87.13600039482117, + "p95": 91.71199798583984, + "p99": 99.71199929714203 + }, + "roundtrip": { + "p50": 80.38400113582611, + "p90": 87.13600039482117, + "p95": 91.71199798583984, + "p99": 99.71199929714203 + }, + "isolatedSum": { + "p50": 160.76800227165222, + "p90": 174.27200078964233, + "p95": 183.4239959716797, + "p99": 199.42399859428406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-18defbe1", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_83f65160", + "comparisonKey": "33deab4606b4c967", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:54:03.924455+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.88800317049026, + "p90": 77.18399912118912, + "p95": 83.5840031504631, + "p99": 90.08000046014786 + }, + "combine": { + "p50": 69.88800317049026, + "p90": 77.18399912118912, + "p95": 83.5840031504631, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 69.88800317049026, + "p90": 77.18399912118912, + "p95": 83.5840031504631, + "p99": 90.08000046014786 + }, + "isolatedSum": { + "p50": 139.77600634098053, + "p90": 154.36799824237823, + "p95": 167.1680063009262, + "p99": 180.16000092029572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.71200215816498, + "p90": 78.14399898052216, + "p95": 81.56800270080566, + "p99": 89.88799899816513 + }, + "combine": { + "p50": 71.71200215816498, + "p90": 78.14399898052216, + "p95": 81.56800270080566, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 71.71200215816498, + "p90": 78.14399898052216, + "p95": 81.56800270080566, + "p99": 89.88799899816513 + }, + "isolatedSum": { + "p50": 143.42400431632996, + "p90": 156.2879979610443, + "p95": 163.13600540161133, + "p99": 179.77599799633026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.96799665689468, + "p90": 78.65600287914276, + "p95": 82.24000036716461, + "p99": 89.53599631786346 + }, + "combine": { + "p50": 71.96799665689468, + "p90": 78.65600287914276, + "p95": 82.24000036716461, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 71.96799665689468, + "p90": 78.65600287914276, + "p95": 82.24000036716461, + "p99": 89.53599631786346 + }, + "isolatedSum": { + "p50": 143.93599331378937, + "p90": 157.31200575828552, + "p95": 164.48000073432922, + "p99": 179.07199263572693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.11999863386154, + "p90": 87.8399983048439, + "p95": 94.40000355243683, + "p99": 105.02400249242783 + }, + "combine": { + "p50": 77.11999863386154, + "p90": 87.8399983048439, + "p95": 94.40000355243683, + "p99": 105.02400249242783 + }, + "roundtrip": { + "p50": 77.11999863386154, + "p90": 87.8399983048439, + "p95": 94.40000355243683, + "p99": 105.02400249242783 + }, + "isolatedSum": { + "p50": 154.23999726772308, + "p90": 175.6799966096878, + "p95": 188.80000710487366, + "p99": 210.04800498485565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.64000242948532, + "p90": 84.44800227880478, + "p95": 90.84799885749817, + "p99": 102.11200267076492 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 84.44800227880478, + "p95": 90.84799885749817, + "p99": 102.11200267076492 + }, + "roundtrip": { + "p50": 76.64000242948532, + "p90": 84.44800227880478, + "p95": 90.84799885749817, + "p99": 102.11200267076492 + }, + "isolatedSum": { + "p50": 153.28000485897064, + "p90": 168.89600455760956, + "p95": 181.69599771499634, + "p99": 204.22400534152985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.15199887752533, + "p90": 86.65599673986435, + "p95": 90.14400094747543, + "p99": 96.54399752616882 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 86.65599673986435, + "p95": 90.14400094747543, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 77.15199887752533, + "p90": 86.65599673986435, + "p95": 90.14400094747543, + "p99": 96.54399752616882 + }, + "isolatedSum": { + "p50": 154.30399775505066, + "p90": 173.3119934797287, + "p95": 180.28800189495087, + "p99": 193.08799505233765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.09599894285202, + "p90": 87.39200234413147, + "p95": 91.48799628019333, + "p99": 97.34400361776352 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 87.39200234413147, + "p95": 91.48799628019333, + "p99": 97.34400361776352 + }, + "roundtrip": { + "p50": 80.09599894285202, + "p90": 87.39200234413147, + "p95": 91.48799628019333, + "p99": 97.34400361776352 + }, + "isolatedSum": { + "p50": 160.19199788570404, + "p90": 174.78400468826294, + "p95": 182.97599256038666, + "p99": 194.68800723552704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.26400202512741, + "p90": 100.09600222110748, + "p95": 103.4879982471466, + "p99": 117.98399686813354 + }, + "combine": { + "p50": 91.26400202512741, + "p90": 100.09600222110748, + "p95": 103.4879982471466, + "p99": 117.98399686813354 + }, + "roundtrip": { + "p50": 91.26400202512741, + "p90": 100.09600222110748, + "p95": 103.4879982471466, + "p99": 117.98399686813354 + }, + "isolatedSum": { + "p50": 182.52800405025482, + "p90": 200.19200444221497, + "p95": 206.9759964942932, + "p99": 235.9679937362671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cb930fda", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_b47c3e43", + "comparisonKey": "57521956b9176dfc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:55:06.004620+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 61.72800064086914, + "p90": 81.66400343179703, + "p95": 88.41600269079208, + "p99": 109.24799740314484 + }, + "combine": { + "p50": 61.72800064086914, + "p90": 81.66400343179703, + "p95": 88.41600269079208, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 61.72800064086914, + "p90": 81.66400343179703, + "p95": 88.41600269079208, + "p99": 109.24799740314484 + }, + "isolatedSum": { + "p50": 123.45600128173828, + "p90": 163.32800686359406, + "p95": 176.83200538158417, + "p99": 218.49599480628967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 63.77600133419037, + "p90": 76.09599828720093, + "p95": 86.75199747085571, + "p99": 95.96800059080124 + }, + "combine": { + "p50": 63.77600133419037, + "p90": 76.09599828720093, + "p95": 86.75199747085571, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 63.77600133419037, + "p90": 76.09599828720093, + "p95": 86.75199747085571, + "p99": 95.96800059080124 + }, + "isolatedSum": { + "p50": 127.55200266838074, + "p90": 152.19199657440186, + "p95": 173.50399494171143, + "p99": 191.93600118160248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.6800012588501, + "p90": 94.81599926948547, + "p95": 121.88799679279327, + "p99": 136.35200262069702 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 94.81599926948547, + "p95": 121.88799679279327, + "p99": 136.35200262069702 + }, + "roundtrip": { + "p50": 67.6800012588501, + "p90": 94.81599926948547, + "p95": 121.88799679279327, + "p99": 136.35200262069702 + }, + "isolatedSum": { + "p50": 135.3600025177002, + "p90": 189.63199853897095, + "p95": 243.77599358558655, + "p99": 272.70400524139404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 68.09599697589874, + "p90": 83.64800363779068, + "p95": 94.40000355243683, + "p99": 103.42399775981903 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 83.64800363779068, + "p95": 94.40000355243683, + "p99": 103.42399775981903 + }, + "roundtrip": { + "p50": 68.09599697589874, + "p90": 83.64800363779068, + "p95": 94.40000355243683, + "p99": 103.42399775981903 + }, + "isolatedSum": { + "p50": 136.19199395179749, + "p90": 167.29600727558136, + "p95": 188.80000710487366, + "p99": 206.84799551963806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f7604cb3", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_03452aae", + "comparisonKey": "67c6ab011bf30735", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:05:25.230675+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.30400130152702, + "p90": 69.15199756622314, + "p95": 75.03999769687653, + "p99": 85.85599809885025 + }, + "combine": { + "p50": 62.30400130152702, + "p90": 69.15199756622314, + "p95": 75.03999769687653, + "p99": 85.85599809885025 + }, + "roundtrip": { + "p50": 62.30400130152702, + "p90": 69.15199756622314, + "p95": 75.03999769687653, + "p99": 85.85599809885025 + }, + "isolatedSum": { + "p50": 124.60800260305405, + "p90": 138.3039951324463, + "p95": 150.07999539375305, + "p99": 171.7119961977005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.03200328350067, + "p90": 71.99999690055847, + "p95": 78.04799824953079, + "p99": 84.83199775218964 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 71.99999690055847, + "p95": 78.04799824953079, + "p99": 84.83199775218964 + }, + "roundtrip": { + "p50": 64.03200328350067, + "p90": 71.99999690055847, + "p95": 78.04799824953079, + "p99": 84.83199775218964 + }, + "isolatedSum": { + "p50": 128.06400656700134, + "p90": 143.99999380111694, + "p95": 156.09599649906158, + "p99": 169.66399550437927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 65.2799978852272, + "p90": 75.58400183916092, + "p95": 79.71200346946716, + "p99": 90.94399958848953 + }, + "combine": { + "p50": 65.2799978852272, + "p90": 75.58400183916092, + "p95": 79.71200346946716, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 65.2799978852272, + "p90": 75.58400183916092, + "p95": 79.71200346946716, + "p99": 90.94399958848953 + }, + "isolatedSum": { + "p50": 130.5599957704544, + "p90": 151.16800367832184, + "p95": 159.42400693893433, + "p99": 181.88799917697906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 68.09599697589874, + "p90": 81.11999928951263, + "p95": 85.1840004324913, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 81.11999928951263, + "p95": 85.1840004324913, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 68.09599697589874, + "p90": 81.11999928951263, + "p95": 85.1840004324913, + "p99": 108.44799876213074 + }, + "isolatedSum": { + "p50": 136.19199395179749, + "p90": 162.23999857902527, + "p95": 170.3680008649826, + "p99": 216.89599752426147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.95999675989151, + "p90": 77.53600180149078, + "p95": 80.12799918651581, + "p99": 90.08000046014786 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 77.53600180149078, + "p95": 80.12799918651581, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 72.95999675989151, + "p90": 77.53600180149078, + "p95": 80.12799918651581, + "p99": 90.08000046014786 + }, + "isolatedSum": { + "p50": 145.91999351978302, + "p90": 155.07200360298157, + "p95": 160.25599837303162, + "p99": 180.16000092029572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.80800288915634, + "p90": 78.36800068616867, + "p95": 84.95999872684479, + "p99": 94.84799951314926 + }, + "combine": { + "p50": 71.80800288915634, + "p90": 78.36800068616867, + "p95": 84.95999872684479, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 71.80800288915634, + "p90": 78.36800068616867, + "p95": 84.95999872684479, + "p99": 94.84799951314926 + }, + "isolatedSum": { + "p50": 143.61600577831268, + "p90": 156.73600137233734, + "p95": 169.91999745368958, + "p99": 189.69599902629852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 73.40800017118454, + "p90": 80.70400357246399, + "p95": 86.07999980449677, + "p99": 91.42400324344635 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 80.70400357246399, + "p95": 86.07999980449677, + "p99": 91.42400324344635 + }, + "roundtrip": { + "p50": 73.40800017118454, + "p90": 80.70400357246399, + "p95": 86.07999980449677, + "p99": 91.42400324344635 + }, + "isolatedSum": { + "p50": 146.81600034236908, + "p90": 161.40800714492798, + "p95": 172.15999960899353, + "p99": 182.8480064868927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.03999769687653, + "p90": 87.0399996638298, + "p95": 90.01599997282028, + "p99": 103.16800326108932 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 87.0399996638298, + "p95": 90.01599997282028, + "p99": 103.16800326108932 + }, + "roundtrip": { + "p50": 75.03999769687653, + "p90": 87.0399996638298, + "p95": 90.01599997282028, + "p99": 103.16800326108932 + }, + "isolatedSum": { + "p50": 150.07999539375305, + "p90": 174.0799993276596, + "p95": 180.03199994564056, + "p99": 206.33600652217865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-22fe16e2", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_0f397a9a", + "comparisonKey": "125022078a3d6e13", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:03:18.674540+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.36799937486649, + "p90": 75.80800354480743, + "p95": 82.94399827718735, + "p99": 90.4960036277771 + }, + "combine": { + "p50": 70.36799937486649, + "p90": 75.80800354480743, + "p95": 82.94399827718735, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 70.36799937486649, + "p90": 75.80800354480743, + "p95": 82.94399827718735, + "p99": 90.4960036277771 + }, + "isolatedSum": { + "p50": 140.73599874973297, + "p90": 151.61600708961487, + "p95": 165.8879965543747, + "p99": 180.9920072555542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.28799909353256, + "p90": 77.85599678754807, + "p95": 82.20800012350082, + "p99": 88.51200342178345 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 77.85599678754807, + "p95": 82.20800012350082, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 72.28799909353256, + "p90": 77.85599678754807, + "p95": 82.20800012350082, + "p99": 88.51200342178345 + }, + "isolatedSum": { + "p50": 144.57599818706512, + "p90": 155.71199357509613, + "p95": 164.41600024700165, + "p99": 177.0240068435669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.37599992752075, + "p90": 80.99199831485748, + "p95": 87.36000210046768, + "p99": 98.27200323343277 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 80.99199831485748, + "p95": 87.36000210046768, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 73.37599992752075, + "p90": 80.99199831485748, + "p95": 87.36000210046768, + "p99": 98.27200323343277 + }, + "isolatedSum": { + "p50": 146.7519998550415, + "p90": 161.98399662971497, + "p95": 174.72000420093536, + "p99": 196.54400646686554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.53600114583969, + "p90": 79.77599650621414, + "p95": 83.55200290679932, + "p99": 90.20800143480301 + }, + "combine": { + "p50": 73.53600114583969, + "p90": 79.77599650621414, + "p95": 83.55200290679932, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 73.53600114583969, + "p90": 79.77599650621414, + "p95": 83.55200290679932, + "p99": 90.20800143480301 + }, + "isolatedSum": { + "p50": 147.07200229167938, + "p90": 159.55199301242828, + "p95": 167.10400581359863, + "p99": 180.41600286960602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.80000364780426, + "p90": 87.42400258779526, + "p95": 90.87999910116196, + "p99": 98.88000041246414 + }, + "combine": { + "p50": 76.80000364780426, + "p90": 87.42400258779526, + "p95": 90.87999910116196, + "p99": 98.88000041246414 + }, + "roundtrip": { + "p50": 76.80000364780426, + "p90": 87.42400258779526, + "p95": 90.87999910116196, + "p99": 98.88000041246414 + }, + "isolatedSum": { + "p50": 153.60000729560852, + "p90": 174.84800517559052, + "p95": 181.7599982023239, + "p99": 197.76000082492828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.72800326347351, + "p90": 83.26400071382523, + "p95": 87.13600039482117, + "p99": 97.15200215578079 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 83.26400071382523, + "p95": 87.13600039482117, + "p99": 97.15200215578079 + }, + "roundtrip": { + "p50": 77.72800326347351, + "p90": 83.26400071382523, + "p95": 87.13600039482117, + "p99": 97.15200215578079 + }, + "isolatedSum": { + "p50": 155.45600652694702, + "p90": 166.52800142765045, + "p95": 174.27200078964233, + "p99": 194.30400431156158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.09599894285202, + "p90": 86.87999844551086, + "p95": 91.67999774217606, + "p99": 100.67199915647507 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 86.87999844551086, + "p95": 91.67999774217606, + "p99": 100.67199915647507 + }, + "roundtrip": { + "p50": 80.09599894285202, + "p90": 86.87999844551086, + "p95": 91.67999774217606, + "p99": 100.67199915647507 + }, + "isolatedSum": { + "p50": 160.19199788570404, + "p90": 173.75999689102173, + "p95": 183.3599954843521, + "p99": 201.34399831295013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.30400151014328, + "p90": 91.61599725484848, + "p95": 94.2080020904541, + "p99": 102.62399911880493 + }, + "combine": { + "p50": 86.30400151014328, + "p90": 91.61599725484848, + "p95": 94.2080020904541, + "p99": 102.62399911880493 + }, + "roundtrip": { + "p50": 86.30400151014328, + "p90": 91.61599725484848, + "p95": 94.2080020904541, + "p99": 102.62399911880493 + }, + "isolatedSum": { + "p50": 172.60800302028656, + "p90": 183.23199450969696, + "p95": 188.4160041809082, + "p99": 205.24799823760986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6f659bdf", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_52da9154", + "comparisonKey": "ba868a495a2a1e7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:31.891942+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.68000191450119, + "p90": 80.32000064849854, + "p95": 87.36000210046768, + "p99": 99.20000284910202 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 80.32000064849854, + "p95": 87.36000210046768, + "p99": 99.20000284910202 + }, + "roundtrip": { + "p50": 71.68000191450119, + "p90": 80.32000064849854, + "p95": 87.36000210046768, + "p99": 99.20000284910202 + }, + "isolatedSum": { + "p50": 143.36000382900238, + "p90": 160.64000129699707, + "p95": 174.72000420093536, + "p99": 198.40000569820404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.24799829721451, + "p90": 75.96799731254578, + "p95": 81.4720019698143, + "p99": 86.71999722719193 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 75.96799731254578, + "p95": 81.4720019698143, + "p99": 86.71999722719193 + }, + "roundtrip": { + "p50": 69.24799829721451, + "p90": 75.96799731254578, + "p95": 81.4720019698143, + "p99": 86.71999722719193 + }, + "isolatedSum": { + "p50": 138.49599659442902, + "p90": 151.93599462509155, + "p95": 162.9440039396286, + "p99": 173.43999445438385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.32799988985062, + "p90": 94.43199634552002, + "p95": 114.43199962377548, + "p99": 226.0800004005432 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 94.43199634552002, + "p95": 114.43199962377548, + "p99": 226.0800004005432 + }, + "roundtrip": { + "p50": 75.32799988985062, + "p90": 94.43199634552002, + "p95": 114.43199962377548, + "p99": 226.0800004005432 + }, + "isolatedSum": { + "p50": 150.65599977970123, + "p90": 188.86399269104004, + "p95": 228.86399924755096, + "p99": 452.1600008010864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.42400062084198, + "p90": 96.22400254011154, + "p95": 103.7760004401207, + "p99": 157.3760062456131 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 96.22400254011154, + "p95": 103.7760004401207, + "p99": 157.3760062456131 + }, + "roundtrip": { + "p50": 75.42400062084198, + "p90": 96.22400254011154, + "p95": 103.7760004401207, + "p99": 157.3760062456131 + }, + "isolatedSum": { + "p50": 150.84800124168396, + "p90": 192.44800508022308, + "p95": 207.5520008802414, + "p99": 314.7520124912262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.96799731254578, + "p90": 89.53599631786346, + "p95": 94.04800087213516, + "p99": 122.36800044775009 + }, + "combine": { + "p50": 75.96799731254578, + "p90": 89.53599631786346, + "p95": 94.04800087213516, + "p99": 122.36800044775009 + }, + "roundtrip": { + "p50": 75.96799731254578, + "p90": 89.53599631786346, + "p95": 94.04800087213516, + "p99": 122.36800044775009 + }, + "isolatedSum": { + "p50": 151.93599462509155, + "p90": 179.07199263572693, + "p95": 188.09600174427032, + "p99": 244.73600089550018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.18399912118912, + "p90": 89.66399729251862, + "p95": 91.48799628019333, + "p99": 102.24000364542007 + }, + "combine": { + "p50": 77.18399912118912, + "p90": 89.66399729251862, + "p95": 91.48799628019333, + "p99": 102.24000364542007 + }, + "roundtrip": { + "p50": 77.18399912118912, + "p90": 89.66399729251862, + "p95": 91.48799628019333, + "p99": 102.24000364542007 + }, + "isolatedSum": { + "p50": 154.36799824237823, + "p90": 179.32799458503723, + "p95": 182.97599256038666, + "p99": 204.48000729084015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.12000060081482, + "p90": 102.68799960613251, + "p95": 109.3439981341362, + "p99": 120.28799951076508 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 102.68799960613251, + "p95": 109.3439981341362, + "p99": 120.28799951076508 + }, + "roundtrip": { + "p50": 89.12000060081482, + "p90": 102.68799960613251, + "p95": 109.3439981341362, + "p99": 120.28799951076508 + }, + "isolatedSum": { + "p50": 178.24000120162964, + "p90": 205.37599921226501, + "p95": 218.6879962682724, + "p99": 240.57599902153015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.98399841785431, + "p90": 87.93599903583527, + "p95": 89.75999802350998, + "p99": 98.81599992513657 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 87.93599903583527, + "p95": 89.75999802350998, + "p99": 98.81599992513657 + }, + "roundtrip": { + "p50": 81.98399841785431, + "p90": 87.93599903583527, + "p95": 89.75999802350998, + "p99": 98.81599992513657 + }, + "isolatedSum": { + "p50": 163.96799683570862, + "p90": 175.87199807167053, + "p95": 179.51999604701996, + "p99": 197.63199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0216e0e0", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_4c717976", + "comparisonKey": "711b490570af8dec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:04:22.108679+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.32799792289734, + "p90": 75.29599964618683, + "p95": 87.16800063848495, + "p99": 103.39199751615524 + }, + "combine": { + "p50": 63.32799792289734, + "p90": 75.29599964618683, + "p95": 87.16800063848495, + "p99": 103.39199751615524 + }, + "roundtrip": { + "p50": 63.32799792289734, + "p90": 75.29599964618683, + "p95": 87.16800063848495, + "p99": 103.39199751615524 + }, + "isolatedSum": { + "p50": 126.65599584579468, + "p90": 150.59199929237366, + "p95": 174.3360012769699, + "p99": 206.7839950323105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.82400268316269, + "p90": 105.43999820947647, + "p95": 124.12799894809723, + "p99": 134.11200046539307 + }, + "combine": { + "p50": 69.82400268316269, + "p90": 105.43999820947647, + "p95": 124.12799894809723, + "p99": 134.11200046539307 + }, + "roundtrip": { + "p50": 69.82400268316269, + "p90": 105.43999820947647, + "p95": 124.12799894809723, + "p99": 134.11200046539307 + }, + "isolatedSum": { + "p50": 139.64800536632538, + "p90": 210.87999641895294, + "p95": 248.25599789619446, + "p99": 268.22400093078613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.7600028514862, + "p90": 125.08800625801086, + "p95": 131.3920021057129, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 73.7600028514862, + "p90": 125.08800625801086, + "p95": 131.3920021057129, + "p99": 148.28799664974213 + }, + "roundtrip": { + "p50": 73.7600028514862, + "p90": 125.08800625801086, + "p95": 131.3920021057129, + "p99": 148.28799664974213 + }, + "isolatedSum": { + "p50": 147.5200057029724, + "p90": 250.17601251602173, + "p95": 262.7840042114258, + "p99": 296.57599329948425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.96799665689468, + "p90": 81.727996468544, + "p95": 88.639996945858, + "p99": 97.56799787282944 + }, + "combine": { + "p50": 71.96799665689468, + "p90": 81.727996468544, + "p95": 88.639996945858, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 71.96799665689468, + "p90": 81.727996468544, + "p95": 88.639996945858, + "p99": 97.56799787282944 + }, + "isolatedSum": { + "p50": 143.93599331378937, + "p90": 163.455992937088, + "p95": 177.279993891716, + "p99": 195.13599574565887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.1599987745285, + "p90": 89.75999802350998, + "p95": 97.59999811649323, + "p99": 110.78400164842606 + }, + "combine": { + "p50": 76.1599987745285, + "p90": 89.75999802350998, + "p95": 97.59999811649323, + "p99": 110.78400164842606 + }, + "roundtrip": { + "p50": 76.1599987745285, + "p90": 89.75999802350998, + "p95": 97.59999811649323, + "p99": 110.78400164842606 + }, + "isolatedSum": { + "p50": 152.319997549057, + "p90": 179.51999604701996, + "p95": 195.19999623298645, + "p99": 221.5680032968521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.44000107049942, + "p90": 128.76799702644348, + "p95": 135.55200397968292, + "p99": 148.76799285411835 + }, + "combine": { + "p50": 77.44000107049942, + "p90": 128.76799702644348, + "p95": 135.55200397968292, + "p99": 148.76799285411835 + }, + "roundtrip": { + "p50": 77.44000107049942, + "p90": 128.76799702644348, + "p95": 135.55200397968292, + "p99": 148.76799285411835 + }, + "isolatedSum": { + "p50": 154.88000214099884, + "p90": 257.53599405288696, + "p95": 271.10400795936584, + "p99": 297.5359857082367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.64000242948532, + "p90": 84.25600081682205, + "p95": 100.12800246477127, + "p99": 110.6560006737709 + }, + "combine": { + "p50": 76.64000242948532, + "p90": 84.25600081682205, + "p95": 100.12800246477127, + "p99": 110.6560006737709 + }, + "roundtrip": { + "p50": 76.64000242948532, + "p90": 84.25600081682205, + "p95": 100.12800246477127, + "p99": 110.6560006737709 + }, + "isolatedSum": { + "p50": 153.28000485897064, + "p90": 168.5120016336441, + "p95": 200.25600492954254, + "p99": 221.3120013475418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.46400207281113, + "p90": 133.12000036239624, + "p95": 138.94400000572205, + "p99": 153.08800339698792 + }, + "combine": { + "p50": 82.46400207281113, + "p90": 133.12000036239624, + "p95": 138.94400000572205, + "p99": 153.08800339698792 + }, + "roundtrip": { + "p50": 82.46400207281113, + "p90": 133.12000036239624, + "p95": 138.94400000572205, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 164.92800414562225, + "p90": 266.2400007247925, + "p95": 277.8880000114441, + "p99": 306.17600679397583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-153be0b6", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_411d30ab", + "comparisonKey": "9e532275efc41f9e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:56:07.798736+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.51200014352798, + "p90": 75.9039968252182, + "p95": 81.34400099515915, + "p99": 89.15200084447861 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 75.9039968252182, + "p95": 81.34400099515915, + "p99": 89.15200084447861 + }, + "roundtrip": { + "p50": 68.51200014352798, + "p90": 75.9039968252182, + "p95": 81.34400099515915, + "p99": 89.15200084447861 + }, + "isolatedSum": { + "p50": 137.02400028705597, + "p90": 151.8079936504364, + "p95": 162.6880019903183, + "p99": 178.30400168895721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.0799971818924, + "p90": 77.40800082683563, + "p95": 83.64800363779068, + "p99": 95.13600170612335 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 77.40800082683563, + "p95": 83.64800363779068, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 70.0799971818924, + "p90": 77.40800082683563, + "p95": 83.64800363779068, + "p99": 95.13600170612335 + }, + "isolatedSum": { + "p50": 140.1599943637848, + "p90": 154.81600165367126, + "p95": 167.29600727558136, + "p99": 190.2720034122467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.07199728488922, + "p90": 77.15199887752533, + "p95": 80.64000308513641, + "p99": 88.67199718952179 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 77.15199887752533, + "p95": 80.64000308513641, + "p99": 88.67199718952179 + }, + "roundtrip": { + "p50": 71.07199728488922, + "p90": 77.15199887752533, + "p95": 80.64000308513641, + "p99": 88.67199718952179 + }, + "isolatedSum": { + "p50": 142.14399456977844, + "p90": 154.30399775505066, + "p95": 161.28000617027283, + "p99": 177.34399437904358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.25599884986877, + "p90": 85.85599809885025, + "p95": 91.20000153779984, + "p99": 131.58400356769562 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 85.85599809885025, + "p95": 91.20000153779984, + "p99": 131.58400356769562 + }, + "roundtrip": { + "p50": 72.25599884986877, + "p90": 85.85599809885025, + "p95": 91.20000153779984, + "p99": 131.58400356769562 + }, + "isolatedSum": { + "p50": 144.51199769973755, + "p90": 171.7119961977005, + "p95": 182.40000307559967, + "p99": 263.16800713539124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.72000271081924, + "p90": 82.30400085449219, + "p95": 86.46400272846222, + "p99": 93.08800101280212 + }, + "combine": { + "p50": 74.72000271081924, + "p90": 82.30400085449219, + "p95": 86.46400272846222, + "p99": 93.08800101280212 + }, + "roundtrip": { + "p50": 74.72000271081924, + "p90": 82.30400085449219, + "p95": 86.46400272846222, + "p99": 93.08800101280212 + }, + "isolatedSum": { + "p50": 149.4400054216385, + "p90": 164.60800170898438, + "p95": 172.92800545692444, + "p99": 186.17600202560425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.58400183916092, + "p90": 88.03199976682663, + "p95": 90.7839983701706, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 88.03199976682663, + "p95": 90.7839983701706, + "p99": 106.08000308275223 + }, + "roundtrip": { + "p50": 75.58400183916092, + "p90": 88.03199976682663, + "p95": 90.7839983701706, + "p99": 106.08000308275223 + }, + "isolatedSum": { + "p50": 151.16800367832184, + "p90": 176.06399953365326, + "p95": 181.5679967403412, + "p99": 212.16000616550446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.38400048017502, + "p90": 82.5280025601387, + "p95": 88.22400122880936, + "p99": 94.2080020904541 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 82.5280025601387, + "p95": 88.22400122880936, + "p99": 94.2080020904541 + }, + "roundtrip": { + "p50": 76.38400048017502, + "p90": 82.5280025601387, + "p95": 88.22400122880936, + "p99": 94.2080020904541 + }, + "isolatedSum": { + "p50": 152.76800096035004, + "p90": 165.0560051202774, + "p95": 176.4480024576187, + "p99": 188.4160041809082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.55200290679932, + "p90": 89.6959975361824, + "p95": 92.73599833250046, + "p99": 99.04000163078308 + }, + "combine": { + "p50": 83.55200290679932, + "p90": 89.6959975361824, + "p95": 92.73599833250046, + "p99": 99.04000163078308 + }, + "roundtrip": { + "p50": 83.55200290679932, + "p90": 89.6959975361824, + "p95": 92.73599833250046, + "p99": 99.04000163078308 + }, + "isolatedSum": { + "p50": 167.10400581359863, + "p90": 179.3919950723648, + "p95": 185.47199666500092, + "p99": 198.08000326156616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ca8d52e5", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_3b2efc71", + "comparisonKey": "a5af905c6a2058b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:57:11.810218+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.14399701356888, + "p90": 82.78399705886841, + "p95": 94.33600306510925, + "p99": 109.27999764680862 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 82.78399705886841, + "p95": 94.33600306510925, + "p99": 109.27999764680862 + }, + "roundtrip": { + "p50": 66.14399701356888, + "p90": 82.78399705886841, + "p95": 94.33600306510925, + "p99": 109.27999764680862 + }, + "isolatedSum": { + "p50": 132.28799402713776, + "p90": 165.56799411773682, + "p95": 188.6720061302185, + "p99": 218.55999529361725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 66.81600213050842, + "p90": 77.69600301980972, + "p95": 89.50400352478027, + "p99": 96.6079980134964 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 77.69600301980972, + "p95": 89.50400352478027, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 66.81600213050842, + "p90": 77.69600301980972, + "p95": 89.50400352478027, + "p99": 96.6079980134964 + }, + "isolatedSum": { + "p50": 133.63200426101685, + "p90": 155.39200603961945, + "p95": 179.00800704956055, + "p99": 193.2159960269928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.39999961853027, + "p90": 117.69600212574005, + "p95": 123.74400347471237, + "p99": 136.25599443912506 + }, + "combine": { + "p50": 70.39999961853027, + "p90": 117.69600212574005, + "p95": 123.74400347471237, + "p99": 136.25599443912506 + }, + "roundtrip": { + "p50": 70.39999961853027, + "p90": 117.69600212574005, + "p95": 123.74400347471237, + "p99": 136.25599443912506 + }, + "isolatedSum": { + "p50": 140.79999923706055, + "p90": 235.3920042514801, + "p95": 247.48800694942474, + "p99": 272.5119888782501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 67.4239993095398, + "p90": 78.33600044250488, + "p95": 88.41600269079208, + "p99": 98.43199700117111 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 78.33600044250488, + "p95": 88.41600269079208, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 67.4239993095398, + "p90": 78.33600044250488, + "p95": 88.41600269079208, + "p99": 98.43199700117111 + }, + "isolatedSum": { + "p50": 134.8479986190796, + "p90": 156.67200088500977, + "p95": 176.83200538158417, + "p99": 196.86399400234222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.74400305747986, + "p90": 116.95999652147293, + "p95": 119.45600062608719, + "p99": 138.49599659442902 + }, + "combine": { + "p50": 75.74400305747986, + "p90": 116.95999652147293, + "p95": 119.45600062608719, + "p99": 138.49599659442902 + }, + "roundtrip": { + "p50": 75.74400305747986, + "p90": 116.95999652147293, + "p95": 119.45600062608719, + "p99": 138.49599659442902 + }, + "isolatedSum": { + "p50": 151.48800611495972, + "p90": 233.91999304294586, + "p95": 238.91200125217438, + "p99": 276.99199318885803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.87999647855759, + "p90": 91.07200056314468, + "p95": 99.7759997844696, + "p99": 117.53600090742111 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 91.07200056314468, + "p95": 99.7759997844696, + "p99": 117.53600090742111 + }, + "roundtrip": { + "p50": 74.87999647855759, + "p90": 91.07200056314468, + "p95": 99.7759997844696, + "p99": 117.53600090742111 + }, + "isolatedSum": { + "p50": 149.75999295711517, + "p90": 182.14400112628937, + "p95": 199.5519995689392, + "p99": 235.07200181484222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.98399776220322, + "p90": 122.52800166606903, + "p95": 130.72000443935394, + "p99": 142.5279974937439 + }, + "combine": { + "p50": 77.98399776220322, + "p90": 122.52800166606903, + "p95": 130.72000443935394, + "p99": 142.5279974937439 + }, + "roundtrip": { + "p50": 77.98399776220322, + "p90": 122.52800166606903, + "p95": 130.72000443935394, + "p99": 142.5279974937439 + }, + "isolatedSum": { + "p50": 155.96799552440643, + "p90": 245.05600333213806, + "p95": 261.4400088787079, + "p99": 285.0559949874878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.90399813652039, + "p90": 125.82400441169739, + "p95": 134.14399325847626, + "p99": 142.46399700641632 + }, + "combine": { + "p50": 83.90399813652039, + "p90": 125.82400441169739, + "p95": 134.14399325847626, + "p99": 142.46399700641632 + }, + "roundtrip": { + "p50": 83.90399813652039, + "p90": 125.82400441169739, + "p95": 134.14399325847626, + "p99": 142.46399700641632 + }, + "isolatedSum": { + "p50": 167.80799627304077, + "p90": 251.64800882339478, + "p95": 268.2879865169525, + "p99": 284.92799401283264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-78977fb3", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_ff05ff3d", + "comparisonKey": "1a6f2f02a436f5f1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:28.286234+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.11999797821045, + "p90": 82.36800134181976, + "p95": 88.3840024471283, + "p99": 95.90400010347366 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 82.36800134181976, + "p95": 88.3840024471283, + "p99": 95.90400010347366 + }, + "roundtrip": { + "p50": 73.11999797821045, + "p90": 82.36800134181976, + "p95": 88.3840024471283, + "p99": 95.90400010347366 + }, + "isolatedSum": { + "p50": 146.2399959564209, + "p90": 164.73600268363953, + "p95": 176.7680048942566, + "p99": 191.80800020694733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.4480003118515, + "p90": 85.69599688053131, + "p95": 91.61599725484848, + "p99": 107.71200060844421 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 85.69599688053131, + "p95": 91.61599725484848, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 72.4480003118515, + "p90": 85.69599688053131, + "p95": 91.61599725484848, + "p99": 107.71200060844421 + }, + "isolatedSum": { + "p50": 144.896000623703, + "p90": 171.39199376106262, + "p95": 183.23199450969696, + "p99": 215.42400121688843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.09599763154984, + "p90": 83.00799876451492, + "p95": 91.39200299978256, + "p99": 102.88000106811523 + }, + "combine": { + "p50": 72.09599763154984, + "p90": 83.00799876451492, + "p95": 91.39200299978256, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 72.09599763154984, + "p90": 83.00799876451492, + "p95": 91.39200299978256, + "p99": 102.88000106811523 + }, + "isolatedSum": { + "p50": 144.19199526309967, + "p90": 166.01599752902985, + "p95": 182.78400599956512, + "p99": 205.76000213623047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.25599950551987, + "p90": 86.43200248479843, + "p95": 95.10400146245956, + "p99": 102.20800340175629 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 86.43200248479843, + "p95": 95.10400146245956, + "p99": 102.20800340175629 + }, + "roundtrip": { + "p50": 76.25599950551987, + "p90": 86.43200248479843, + "p95": 95.10400146245956, + "p99": 102.20800340175629 + }, + "isolatedSum": { + "p50": 152.51199901103973, + "p90": 172.86400496959686, + "p95": 190.20800292491913, + "p99": 204.41600680351257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.75200361013412, + "p90": 92.83199906349182, + "p95": 102.30399668216705, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 92.83199906349182, + "p95": 102.30399668216705, + "p99": 107.68000036478043 + }, + "roundtrip": { + "p50": 78.75200361013412, + "p90": 92.83199906349182, + "p95": 102.30399668216705, + "p99": 107.68000036478043 + }, + "isolatedSum": { + "p50": 157.50400722026825, + "p90": 185.66399812698364, + "p95": 204.6079933643341, + "p99": 215.36000072956085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.09599894285202, + "p90": 95.39200365543365, + "p95": 102.88000106811523, + "p99": 113.27999830245972 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 95.39200365543365, + "p95": 102.88000106811523, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 80.09599894285202, + "p90": 95.39200365543365, + "p95": 102.88000106811523, + "p99": 113.27999830245972 + }, + "isolatedSum": { + "p50": 160.19199788570404, + "p90": 190.7840073108673, + "p95": 205.76000213623047, + "p99": 226.55999660491943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 81.11999928951263, + "p90": 95.64799815416336, + "p95": 101.05600208044052, + "p99": 112.28799819946289 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 95.64799815416336, + "p95": 101.05600208044052, + "p99": 112.28799819946289 + }, + "roundtrip": { + "p50": 81.11999928951263, + "p90": 95.64799815416336, + "p95": 101.05600208044052, + "p99": 112.28799819946289 + }, + "isolatedSum": { + "p50": 162.23999857902527, + "p90": 191.29599630832672, + "p95": 202.11200416088104, + "p99": 224.57599639892578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.86399799585342, + "p90": 94.59199756383896, + "p95": 102.78400033712387, + "p99": 111.1999973654747 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 94.59199756383896, + "p95": 102.78400033712387, + "p99": 111.1999973654747 + }, + "roundtrip": { + "p50": 84.86399799585342, + "p90": 94.59199756383896, + "p95": 102.78400033712387, + "p99": 111.1999973654747 + }, + "isolatedSum": { + "p50": 169.72799599170685, + "p90": 189.18399512767792, + "p95": 205.56800067424774, + "p99": 222.3999947309494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2264e6ec", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_c8015aac", + "comparisonKey": "bc1b87774fa69708", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:07:30.419892+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 67.74400174617767, + "p90": 79.1039988398552, + "p95": 89.40800279378891, + "p99": 95.67999839782715 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 79.1039988398552, + "p95": 89.40800279378891, + "p99": 95.67999839782715 + }, + "roundtrip": { + "p50": 67.74400174617767, + "p90": 79.1039988398552, + "p95": 89.40800279378891, + "p99": 95.67999839782715 + }, + "isolatedSum": { + "p50": 135.48800349235535, + "p90": 158.2079976797104, + "p95": 178.81600558757782, + "p99": 191.3599967956543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.58400118350983, + "p90": 88.48000317811966, + "p95": 99.16800260543823, + "p99": 112.12799698114395 + }, + "combine": { + "p50": 71.58400118350983, + "p90": 88.48000317811966, + "p95": 99.16800260543823, + "p99": 112.12799698114395 + }, + "roundtrip": { + "p50": 71.58400118350983, + "p90": 88.48000317811966, + "p95": 99.16800260543823, + "p99": 112.12799698114395 + }, + "isolatedSum": { + "p50": 143.16800236701965, + "p90": 176.96000635623932, + "p95": 198.33600521087646, + "p99": 224.2559939622879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.4480003118515, + "p90": 85.75999736785889, + "p95": 96.79999947547913, + "p99": 105.31199723482132 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 85.75999736785889, + "p95": 96.79999947547913, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 72.4480003118515, + "p90": 85.75999736785889, + "p95": 96.79999947547913, + "p99": 105.31199723482132 + }, + "isolatedSum": { + "p50": 144.896000623703, + "p90": 171.51999473571777, + "p95": 193.59999895095825, + "p99": 210.62399446964264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.60800153017044, + "p90": 85.08799970149994, + "p95": 95.61599791049957, + "p99": 105.43999820947647 + }, + "combine": { + "p50": 72.60800153017044, + "p90": 85.08799970149994, + "p95": 95.61599791049957, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 72.60800153017044, + "p90": 85.08799970149994, + "p95": 95.61599791049957, + "p99": 105.43999820947647 + }, + "isolatedSum": { + "p50": 145.21600306034088, + "p90": 170.17599940299988, + "p95": 191.23199582099915, + "p99": 210.87999641895294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.52000135183334, + "p90": 87.52000331878662, + "p95": 97.98400104045868, + "p99": 106.1440035700798 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 87.52000331878662, + "p95": 97.98400104045868, + "p99": 106.1440035700798 + }, + "roundtrip": { + "p50": 75.52000135183334, + "p90": 87.52000331878662, + "p95": 97.98400104045868, + "p99": 106.1440035700798 + }, + "isolatedSum": { + "p50": 151.0400027036667, + "p90": 175.04000663757324, + "p95": 195.96800208091736, + "p99": 212.2880071401596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.87199658155441, + "p90": 92.92799979448318, + "p95": 100.80000013113022, + "p99": 121.44000083208084 + }, + "combine": { + "p50": 75.87199658155441, + "p90": 92.92799979448318, + "p95": 100.80000013113022, + "p99": 121.44000083208084 + }, + "roundtrip": { + "p50": 75.87199658155441, + "p90": 92.92799979448318, + "p95": 100.80000013113022, + "p99": 121.44000083208084 + }, + "isolatedSum": { + "p50": 151.74399316310883, + "p90": 185.85599958896637, + "p95": 201.60000026226044, + "p99": 242.88000166416168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 78.33600044250488, + "p90": 92.83199906349182, + "p95": 101.3759970664978, + "p99": 111.23199760913849 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 92.83199906349182, + "p95": 101.3759970664978, + "p99": 111.23199760913849 + }, + "roundtrip": { + "p50": 78.33600044250488, + "p90": 92.83199906349182, + "p95": 101.3759970664978, + "p99": 111.23199760913849 + }, + "isolatedSum": { + "p50": 156.67200088500977, + "p90": 185.66399812698364, + "p95": 202.7519941329956, + "p99": 222.46399521827698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.87999844551086, + "p90": 102.33599692583084, + "p95": 110.6560006737709, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 102.33599692583084, + "p95": 110.6560006737709, + "p99": 122.17599898576736 + }, + "roundtrip": { + "p50": 86.87999844551086, + "p90": 102.33599692583084, + "p95": 110.6560006737709, + "p99": 122.17599898576736 + }, + "isolatedSum": { + "p50": 173.75999689102173, + "p90": 204.67199385166168, + "p95": 221.3120013475418, + "p99": 244.35199797153473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bac5234", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_e560dec2", + "comparisonKey": "ad5174a48e320903", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:08:00.796295+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.519999384880066, + "p90": 77.11999863386154, + "p95": 88.41600269079208, + "p99": 98.08000177145004 + }, + "combine": { + "p50": 63.519999384880066, + "p90": 77.11999863386154, + "p95": 88.41600269079208, + "p99": 98.08000177145004 + }, + "roundtrip": { + "p50": 63.519999384880066, + "p90": 77.11999863386154, + "p95": 88.41600269079208, + "p99": 98.08000177145004 + }, + "isolatedSum": { + "p50": 127.03999876976013, + "p90": 154.23999726772308, + "p95": 176.83200538158417, + "p99": 196.16000354290009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.08799707889557, + "p90": 87.00799942016602, + "p95": 96.47999703884125, + "p99": 106.01600259542465 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 87.00799942016602, + "p95": 96.47999703884125, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 69.08799707889557, + "p90": 87.00799942016602, + "p95": 96.47999703884125, + "p99": 106.01600259542465 + }, + "isolatedSum": { + "p50": 138.17599415779114, + "p90": 174.01599884033203, + "p95": 192.9599940776825, + "p99": 212.0320051908493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.10399752855301, + "p90": 89.63199704885483, + "p95": 100.832000374794, + "p99": 114.14399743080139 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 89.63199704885483, + "p95": 100.832000374794, + "p99": 114.14399743080139 + }, + "roundtrip": { + "p50": 71.10399752855301, + "p90": 89.63199704885483, + "p95": 100.832000374794, + "p99": 114.14399743080139 + }, + "isolatedSum": { + "p50": 142.20799505710602, + "p90": 179.26399409770966, + "p95": 201.664000749588, + "p99": 228.28799486160278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.41600006818771, + "p90": 86.33600175380707, + "p95": 95.13600170612335, + "p99": 106.97600245475769 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 86.33600175380707, + "p95": 95.13600170612335, + "p99": 106.97600245475769 + }, + "roundtrip": { + "p50": 72.41600006818771, + "p90": 86.33600175380707, + "p95": 95.13600170612335, + "p99": 106.97600245475769 + }, + "isolatedSum": { + "p50": 144.83200013637543, + "p90": 172.67200350761414, + "p95": 190.2720034122467, + "p99": 213.95200490951538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.17599856853485, + "p90": 89.59999680519104, + "p95": 100.22400319576263, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 74.17599856853485, + "p90": 89.59999680519104, + "p95": 100.22400319576263, + "p99": 109.50399935245514 + }, + "roundtrip": { + "p50": 74.17599856853485, + "p90": 89.59999680519104, + "p95": 100.22400319576263, + "p99": 109.50399935245514 + }, + "isolatedSum": { + "p50": 148.3519971370697, + "p90": 179.19999361038208, + "p95": 200.44800639152527, + "p99": 219.00799870491028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.23199915885925, + "p90": 95.83999961614609, + "p95": 103.00800204277039, + "p99": 117.40799993276596 + }, + "combine": { + "p50": 75.23199915885925, + "p90": 95.83999961614609, + "p95": 103.00800204277039, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 75.23199915885925, + "p90": 95.83999961614609, + "p95": 103.00800204277039, + "p99": 117.40799993276596 + }, + "isolatedSum": { + "p50": 150.4639983177185, + "p90": 191.67999923229218, + "p95": 206.01600408554077, + "p99": 234.81599986553192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.60800218582153, + "p90": 91.80799871683121, + "p95": 106.62399977445602, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 91.80799871683121, + "p95": 106.62399977445602, + "p99": 113.50400000810623 + }, + "roundtrip": { + "p50": 76.60800218582153, + "p90": 91.80799871683121, + "p95": 106.62399977445602, + "p99": 113.50400000810623 + }, + "isolatedSum": { + "p50": 153.21600437164307, + "p90": 183.61599743366241, + "p95": 213.24799954891205, + "p99": 227.00800001621246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.69600367546082, + "p90": 99.71199929714203, + "p95": 109.40799862146378, + "p99": 127.96799838542938 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 99.71199929714203, + "p95": 109.40799862146378, + "p99": 127.96799838542938 + }, + "roundtrip": { + "p50": 81.69600367546082, + "p90": 99.71199929714203, + "p95": 109.40799862146378, + "p99": 127.96799838542938 + }, + "isolatedSum": { + "p50": 163.39200735092163, + "p90": 199.42399859428406, + "p95": 218.81599724292755, + "p99": 255.93599677085876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bcd487b8", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_c1dbcdcb", + "comparisonKey": "a4f4f8faecc70231", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:35:21.931620+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 67.90400296449661, + "p90": 81.05599880218506, + "p95": 93.40800344944, + "p99": 103.20000350475311 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 81.05599880218506, + "p95": 93.40800344944, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 67.90400296449661, + "p90": 81.05599880218506, + "p95": 93.40800344944, + "p99": 103.20000350475311 + }, + "isolatedSum": { + "p50": 135.80800592899323, + "p90": 162.11199760437012, + "p95": 186.81600689888, + "p99": 206.40000700950623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.18399780988693, + "p90": 82.68799632787704, + "p95": 98.1760025024414, + "p99": 102.78400033712387 + }, + "combine": { + "p50": 69.18399780988693, + "p90": 82.68799632787704, + "p95": 98.1760025024414, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 69.18399780988693, + "p90": 82.68799632787704, + "p95": 98.1760025024414, + "p99": 102.78400033712387 + }, + "isolatedSum": { + "p50": 138.36799561977386, + "p90": 165.3759926557541, + "p95": 196.3520050048828, + "p99": 205.56800067424774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.91200351715088, + "p90": 82.97599852085114, + "p95": 99.74399954080582, + "p99": 107.71200060844421 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 82.97599852085114, + "p95": 99.74399954080582, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 70.91200351715088, + "p90": 82.97599852085114, + "p95": 99.74399954080582, + "p99": 107.71200060844421 + }, + "isolatedSum": { + "p50": 141.82400703430176, + "p90": 165.95199704170227, + "p95": 199.48799908161163, + "p99": 215.42400121688843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.4480003118515, + "p90": 90.71999788284302, + "p95": 102.49599814414978, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 90.71999788284302, + "p95": 102.49599814414978, + "p99": 110.17599701881409 + }, + "roundtrip": { + "p50": 72.4480003118515, + "p90": 90.71999788284302, + "p95": 102.49599814414978, + "p99": 110.17599701881409 + }, + "isolatedSum": { + "p50": 144.896000623703, + "p90": 181.43999576568604, + "p95": 204.99199628829956, + "p99": 220.35199403762817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.40000027418137, + "p90": 90.81599861383438, + "p95": 104.032002389431, + "p99": 110.55999994277954 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 90.81599861383438, + "p95": 104.032002389431, + "p99": 110.55999994277954 + }, + "roundtrip": { + "p50": 74.40000027418137, + "p90": 90.81599861383438, + "p95": 104.032002389431, + "p99": 110.55999994277954 + }, + "isolatedSum": { + "p50": 148.80000054836273, + "p90": 181.63199722766876, + "p95": 208.064004778862, + "p99": 221.11999988555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.13599842786789, + "p90": 91.61599725484848, + "p95": 105.05600273609161, + "p99": 113.56800049543381 + }, + "combine": { + "p50": 75.13599842786789, + "p90": 91.61599725484848, + "p95": 105.05600273609161, + "p99": 113.56800049543381 + }, + "roundtrip": { + "p50": 75.13599842786789, + "p90": 91.61599725484848, + "p95": 105.05600273609161, + "p99": 113.56800049543381 + }, + "isolatedSum": { + "p50": 150.27199685573578, + "p90": 183.23199450969696, + "p95": 210.11200547218323, + "p99": 227.13600099086761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.1599987745285, + "p90": 93.53599697351456, + "p95": 105.6319996714592, + "p99": 115.167997777462 + }, + "combine": { + "p50": 76.1599987745285, + "p90": 93.53599697351456, + "p95": 105.6319996714592, + "p99": 115.167997777462 + }, + "roundtrip": { + "p50": 76.1599987745285, + "p90": 93.53599697351456, + "p95": 105.6319996714592, + "p99": 115.167997777462 + }, + "isolatedSum": { + "p50": 152.319997549057, + "p90": 187.0719939470291, + "p95": 211.2639993429184, + "p99": 230.335995554924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.23200047016144, + "p90": 101.79200023412704, + "p95": 109.63200032711029, + "p99": 131.3920021057129 + }, + "combine": { + "p50": 83.23200047016144, + "p90": 101.79200023412704, + "p95": 109.63200032711029, + "p99": 131.3920021057129 + }, + "roundtrip": { + "p50": 83.23200047016144, + "p90": 101.79200023412704, + "p95": 109.63200032711029, + "p99": 131.3920021057129 + }, + "isolatedSum": { + "p50": 166.46400094032288, + "p90": 203.5840004682541, + "p95": 219.26400065422058, + "p99": 262.7840042114258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ed235b40", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_c2563ab3", + "comparisonKey": "3f7cde64a86477c9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:35:52.303015+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.99200314283371, + "p90": 75.3600001335144, + "p95": 79.19999957084656, + "p99": 90.20800143480301 + }, + "combine": { + "p50": 64.99200314283371, + "p90": 75.3600001335144, + "p95": 79.19999957084656, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 64.99200314283371, + "p90": 75.3600001335144, + "p95": 79.19999957084656, + "p99": 90.20800143480301 + }, + "isolatedSum": { + "p50": 129.98400628566742, + "p90": 150.7200002670288, + "p95": 158.39999914169312, + "p99": 180.41600286960602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.80800223350525, + "p90": 80.99199831485748, + "p95": 82.91199803352356, + "p99": 99.20000284910202 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 80.99199831485748, + "p95": 82.91199803352356, + "p99": 99.20000284910202 + }, + "roundtrip": { + "p50": 67.80800223350525, + "p90": 80.99199831485748, + "p95": 82.91199803352356, + "p99": 99.20000284910202 + }, + "isolatedSum": { + "p50": 135.6160044670105, + "p90": 161.98399662971497, + "p95": 165.82399606704712, + "p99": 198.40000569820404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 82.56000280380249, + "p99": 90.52799642086029 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 82.56000280380249, + "p99": 90.52799642086029 + }, + "roundtrip": { + "p50": 71.00799679756165, + "p90": 77.34400033950806, + "p95": 82.56000280380249, + "p99": 90.52799642086029 + }, + "isolatedSum": { + "p50": 142.0159935951233, + "p90": 154.6880006790161, + "p95": 165.12000560760498, + "p99": 181.05599284172058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.77600264549255, + "p90": 77.53600180149078, + "p95": 79.71200346946716, + "p99": 86.7839977145195 + }, + "combine": { + "p50": 71.77600264549255, + "p90": 77.53600180149078, + "p95": 79.71200346946716, + "p99": 86.7839977145195 + }, + "roundtrip": { + "p50": 71.77600264549255, + "p90": 77.53600180149078, + "p95": 79.71200346946716, + "p99": 86.7839977145195 + }, + "isolatedSum": { + "p50": 143.5520052909851, + "p90": 155.07200360298157, + "p95": 159.42400693893433, + "p99": 173.567995429039 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.07999783754349, + "p90": 82.59200304746628, + "p95": 85.88799834251404, + "p99": 96.09600156545639 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 82.59200304746628, + "p95": 85.88799834251404, + "p99": 96.09600156545639 + }, + "roundtrip": { + "p50": 74.07999783754349, + "p90": 82.59200304746628, + "p95": 85.88799834251404, + "p99": 96.09600156545639 + }, + "isolatedSum": { + "p50": 148.15999567508698, + "p90": 165.18400609493256, + "p95": 171.77599668502808, + "p99": 192.19200313091278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.86399668455124, + "p90": 84.28800106048584, + "p95": 89.4400030374527, + "p99": 104.38399761915207 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 84.28800106048584, + "p95": 89.4400030374527, + "p99": 104.38399761915207 + }, + "roundtrip": { + "p50": 76.86399668455124, + "p90": 84.28800106048584, + "p95": 89.4400030374527, + "p99": 104.38399761915207 + }, + "isolatedSum": { + "p50": 153.72799336910248, + "p90": 168.57600212097168, + "p95": 178.8800060749054, + "p99": 208.76799523830414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.19199901819229, + "p90": 82.8159973025322, + "p95": 87.23200112581253, + "p99": 94.78399902582169 + }, + "combine": { + "p50": 76.19199901819229, + "p90": 82.8159973025322, + "p95": 87.23200112581253, + "p99": 94.78399902582169 + }, + "roundtrip": { + "p50": 76.19199901819229, + "p90": 82.8159973025322, + "p95": 87.23200112581253, + "p99": 94.78399902582169 + }, + "isolatedSum": { + "p50": 152.38399803638458, + "p90": 165.6319946050644, + "p95": 174.46400225162506, + "p99": 189.56799805164337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.8159973025322, + "p90": 92.57599711418152, + "p95": 97.28000313043594, + "p99": 107.744000852108 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 92.57599711418152, + "p95": 97.28000313043594, + "p99": 107.744000852108 + }, + "roundtrip": { + "p50": 82.8159973025322, + "p90": 92.57599711418152, + "p95": 97.28000313043594, + "p99": 107.744000852108 + }, + "isolatedSum": { + "p50": 165.6319946050644, + "p90": 185.15199422836304, + "p95": 194.5600062608719, + "p99": 215.488001704216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6141ce00", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_88857293", + "comparisonKey": "ab64306097c1ab68", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:06:27.694464+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.41599875688553, + "p90": 71.6480016708374, + "p95": 76.86399668455124, + "p99": 83.03999900817871 + }, + "combine": { + "p50": 64.41599875688553, + "p90": 71.6480016708374, + "p95": 76.86399668455124, + "p99": 83.03999900817871 + }, + "roundtrip": { + "p50": 64.41599875688553, + "p90": 71.6480016708374, + "p95": 76.86399668455124, + "p99": 83.03999900817871 + }, + "isolatedSum": { + "p50": 128.83199751377106, + "p90": 143.2960033416748, + "p95": 153.72799336910248, + "p99": 166.07999801635742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.80000299215317, + "p90": 85.66399663686752, + "p95": 91.77599847316742, + "p99": 105.34399747848511 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 85.66399663686752, + "p95": 91.77599847316742, + "p99": 105.34399747848511 + }, + "roundtrip": { + "p50": 72.80000299215317, + "p90": 85.66399663686752, + "p95": 91.77599847316742, + "p99": 105.34399747848511 + }, + "isolatedSum": { + "p50": 145.60000598430634, + "p90": 171.32799327373505, + "p95": 183.55199694633484, + "p99": 210.68799495697021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.35199958086014, + "p90": 83.90399813652039, + "p95": 87.13600039482117, + "p99": 101.72799974679947 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 83.90399813652039, + "p95": 87.13600039482117, + "p99": 101.72799974679947 + }, + "roundtrip": { + "p50": 72.35199958086014, + "p90": 83.90399813652039, + "p95": 87.13600039482117, + "p99": 101.72799974679947 + }, + "isolatedSum": { + "p50": 144.70399916172028, + "p90": 167.80799627304077, + "p95": 174.27200078964233, + "p99": 203.45599949359894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.5040009021759, + "p90": 82.07999914884567, + "p95": 87.52000331878662, + "p99": 99.93600100278854 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 82.07999914884567, + "p95": 87.52000331878662, + "p99": 99.93600100278854 + }, + "roundtrip": { + "p50": 73.5040009021759, + "p90": 82.07999914884567, + "p95": 87.52000331878662, + "p99": 99.93600100278854 + }, + "isolatedSum": { + "p50": 147.0080018043518, + "p90": 164.15999829769135, + "p95": 175.04000663757324, + "p99": 199.8720020055771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.3600001335144, + "p90": 83.10399949550629, + "p95": 87.55200356245041, + "p99": 91.58399701118469 + }, + "combine": { + "p50": 75.3600001335144, + "p90": 83.10399949550629, + "p95": 87.55200356245041, + "p99": 91.58399701118469 + }, + "roundtrip": { + "p50": 75.3600001335144, + "p90": 83.10399949550629, + "p95": 87.55200356245041, + "p99": 91.58399701118469 + }, + "isolatedSum": { + "p50": 150.7200002670288, + "p90": 166.20799899101257, + "p95": 175.10400712490082, + "p99": 183.16799402236938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.92799717187881, + "p90": 90.62399715185165, + "p95": 103.67999970912933, + "p99": 120.67200243473053 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 90.62399715185165, + "p95": 103.67999970912933, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 76.92799717187881, + "p90": 90.62399715185165, + "p95": 103.67999970912933, + "p99": 120.67200243473053 + }, + "isolatedSum": { + "p50": 153.85599434375763, + "p90": 181.2479943037033, + "p95": 207.35999941825867, + "p99": 241.34400486946106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.99199765920639, + "p90": 82.91199803352356, + "p95": 87.20000088214874, + "p99": 93.91999989748001 + }, + "combine": { + "p50": 76.99199765920639, + "p90": 82.91199803352356, + "p95": 87.20000088214874, + "p99": 93.91999989748001 + }, + "roundtrip": { + "p50": 76.99199765920639, + "p90": 82.91199803352356, + "p95": 87.20000088214874, + "p99": 93.91999989748001 + }, + "isolatedSum": { + "p50": 153.98399531841278, + "p90": 165.82399606704712, + "p95": 174.40000176429749, + "p99": 187.83999979496002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.94399827718735, + "p90": 103.4879982471466, + "p95": 119.99999731779099, + "p99": 133.85599851608276 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 103.4879982471466, + "p95": 119.99999731779099, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 82.94399827718735, + "p90": 103.4879982471466, + "p95": 119.99999731779099, + "p99": 133.85599851608276 + }, + "isolatedSum": { + "p50": 165.8879965543747, + "p90": 206.9759964942932, + "p95": 239.99999463558197, + "p99": 267.7119970321655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac9c1e05", + "identity": "b200|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_db5e035b", + "comparisonKey": "dca623f1a22da85f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:53:02.071821+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 77.15199887752533, + "p90": 91.45600348711014, + "p95": 94.62399780750275, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 91.45600348711014, + "p95": 94.62399780750275, + "p99": 110.81600189208984 + }, + "roundtrip": { + "p50": 77.15199887752533, + "p90": 91.45600348711014, + "p95": 94.62399780750275, + "p99": 110.81600189208984 + }, + "isolatedSum": { + "p50": 154.30399775505066, + "p90": 182.91200697422028, + "p95": 189.2479956150055, + "p99": 221.6320037841797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.92799782752991, + "p90": 87.42400258779526, + "p95": 90.30400216579437, + "p99": 95.10400146245956 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 87.42400258779526, + "p95": 90.30400216579437, + "p99": 95.10400146245956 + }, + "roundtrip": { + "p50": 80.92799782752991, + "p90": 87.42400258779526, + "p95": 90.30400216579437, + "p99": 95.10400146245956 + }, + "isolatedSum": { + "p50": 161.85599565505981, + "p90": 174.84800517559052, + "p95": 180.60800433158875, + "p99": 190.20800292491913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.95199817419052, + "p90": 94.81599926948547, + "p95": 98.08000177145004, + "p99": 111.93600296974182 + }, + "combine": { + "p50": 81.95199817419052, + "p90": 94.81599926948547, + "p95": 98.08000177145004, + "p99": 111.93600296974182 + }, + "roundtrip": { + "p50": 81.95199817419052, + "p90": 94.81599926948547, + "p95": 98.08000177145004, + "p99": 111.93600296974182 + }, + "isolatedSum": { + "p50": 163.90399634838104, + "p90": 189.63199853897095, + "p95": 196.16000354290009, + "p99": 223.87200593948364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.91999793052673, + "p90": 89.56799656152725, + "p95": 96.19200229644775, + "p99": 106.04800283908844 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 89.56799656152725, + "p95": 96.19200229644775, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 81.91999793052673, + "p90": 89.56799656152725, + "p95": 96.19200229644775, + "p99": 106.04800283908844 + }, + "isolatedSum": { + "p50": 163.83999586105347, + "p90": 179.1359931230545, + "p95": 192.3840045928955, + "p99": 212.09600567817688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 85.02399921417236, + "p90": 92.19200164079666, + "p95": 98.55999797582626, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 85.02399921417236, + "p90": 92.19200164079666, + "p95": 98.55999797582626, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 85.02399921417236, + "p90": 92.19200164079666, + "p95": 98.55999797582626, + "p99": 108.64000022411346 + }, + "isolatedSum": { + "p50": 170.04799842834473, + "p90": 184.38400328159332, + "p95": 197.11999595165253, + "p99": 217.28000044822693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.82399785518646, + "p90": 92.19200164079666, + "p95": 97.18400239944458, + "p99": 103.96800190210342 + }, + "combine": { + "p50": 85.82399785518646, + "p90": 92.19200164079666, + "p95": 97.18400239944458, + "p99": 103.96800190210342 + }, + "roundtrip": { + "p50": 85.82399785518646, + "p90": 92.19200164079666, + "p95": 97.18400239944458, + "p99": 103.96800190210342 + }, + "isolatedSum": { + "p50": 171.64799571037292, + "p90": 184.38400328159332, + "p95": 194.36800479888916, + "p99": 207.93600380420685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.65599673986435, + "p90": 93.08800101280212, + "p95": 98.01600128412247, + "p99": 105.6319996714592 + }, + "combine": { + "p50": 86.65599673986435, + "p90": 93.08800101280212, + "p95": 98.01600128412247, + "p99": 105.6319996714592 + }, + "roundtrip": { + "p50": 86.65599673986435, + "p90": 93.08800101280212, + "p95": 98.01600128412247, + "p99": 105.6319996714592 + }, + "isolatedSum": { + "p50": 173.3119934797287, + "p90": 186.17600202560425, + "p95": 196.03200256824493, + "p99": 211.2639993429184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.20800143480301, + "p90": 96.99200093746185, + "p95": 102.84800082445145, + "p99": 112.28799819946289 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 96.99200093746185, + "p95": 102.84800082445145, + "p99": 112.28799819946289 + }, + "roundtrip": { + "p50": 90.20800143480301, + "p90": 96.99200093746185, + "p95": 102.84800082445145, + "p99": 112.28799819946289 + }, + "isolatedSum": { + "p50": 180.41600286960602, + "p90": 193.9840018749237, + "p95": 205.6960016489029, + "p99": 224.57599639892578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6bb3ddfb", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_9f2abd18", + "comparisonKey": "1dedff034d7bffa3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:52:31.835509+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.20800012350082, + "p90": 87.71199733018875, + "p95": 90.08000046014786, + "p99": 99.10400211811066 + }, + "combine": { + "p50": 82.20800012350082, + "p90": 87.71199733018875, + "p95": 90.08000046014786, + "p99": 99.10400211811066 + }, + "roundtrip": { + "p50": 82.20800012350082, + "p90": 87.71199733018875, + "p95": 90.08000046014786, + "p99": 99.10400211811066 + }, + "isolatedSum": { + "p50": 164.41600024700165, + "p90": 175.4239946603775, + "p95": 180.16000092029572, + "p99": 198.2080042362213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.4879982471466, + "p90": 107.13600367307663, + "p95": 111.13599687814713, + "p99": 118.94399672746658 + }, + "combine": { + "p50": 103.4879982471466, + "p90": 107.13600367307663, + "p95": 111.13599687814713, + "p99": 118.94399672746658 + }, + "roundtrip": { + "p50": 103.4879982471466, + "p90": 107.13600367307663, + "p95": 111.13599687814713, + "p99": 118.94399672746658 + }, + "isolatedSum": { + "p50": 206.9759964942932, + "p90": 214.27200734615326, + "p95": 222.27199375629425, + "p99": 237.88799345493317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 161.53599321842194, + "p90": 164.86400365829468, + "p95": 165.98400473594666, + "p99": 171.2000072002411 + }, + "combine": { + "p50": 161.53599321842194, + "p90": 164.86400365829468, + "p95": 165.98400473594666, + "p99": 171.2000072002411 + }, + "roundtrip": { + "p50": 161.53599321842194, + "p90": 164.86400365829468, + "p95": 165.98400473594666, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 323.0719864368439, + "p90": 329.72800731658936, + "p95": 331.9680094718933, + "p99": 342.4000144004822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 278.6239981651306, + "p90": 282.368004322052, + "p95": 284.2240035533905, + "p99": 291.1680042743683 + }, + "combine": { + "p50": 278.6239981651306, + "p90": 282.368004322052, + "p95": 284.2240035533905, + "p99": 291.1680042743683 + }, + "roundtrip": { + "p50": 278.6239981651306, + "p90": 282.368004322052, + "p95": 284.2240035533905, + "p99": 291.1680042743683 + }, + "isolatedSum": { + "p50": 557.2479963302612, + "p90": 564.736008644104, + "p95": 568.448007106781, + "p99": 582.3360085487366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.816029548645, + "p90": 510.8799934387207, + "p95": 513.0559802055359, + "p99": 519.0079808235168 + }, + "combine": { + "p50": 506.816029548645, + "p90": 510.8799934387207, + "p95": 513.0559802055359, + "p99": 519.0079808235168 + }, + "roundtrip": { + "p50": 506.816029548645, + "p90": 510.8799934387207, + "p95": 513.0559802055359, + "p99": 519.0079808235168 + }, + "isolatedSum": { + "p50": 1013.63205909729, + "p90": 1021.7599868774414, + "p95": 1026.1119604110718, + "p99": 1038.0159616470337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 953.823983669281, + "p90": 957.6640129089355, + "p95": 959.168016910553, + "p99": 962.6880288124084 + }, + "combine": { + "p50": 953.823983669281, + "p90": 957.6640129089355, + "p95": 959.168016910553, + "p99": 962.6880288124084 + }, + "roundtrip": { + "p50": 953.823983669281, + "p90": 957.6640129089355, + "p95": 959.168016910553, + "p99": 962.6880288124084 + }, + "isolatedSum": { + "p50": 1907.647967338562, + "p90": 1915.328025817871, + "p95": 1918.336033821106, + "p99": 1925.376057624817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa63d706", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_83f65160", + "comparisonKey": "d5545824d1bf6087", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:54:36.228129+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.81599861383438, + "p90": 94.78399902582169, + "p95": 98.94400089979172, + "p99": 103.4879982471466 + }, + "combine": { + "p50": 90.81599861383438, + "p90": 94.78399902582169, + "p95": 98.94400089979172, + "p99": 103.4879982471466 + }, + "roundtrip": { + "p50": 90.81599861383438, + "p90": 94.78399902582169, + "p95": 98.94400089979172, + "p99": 103.4879982471466 + }, + "isolatedSum": { + "p50": 181.63199722766876, + "p90": 189.56799805164337, + "p95": 197.88800179958344, + "p99": 206.9759964942932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 128.89599800109863, + "p90": 131.00799918174744, + "p95": 132.03200697898865, + "p99": 138.46400380134583 + }, + "combine": { + "p50": 128.89599800109863, + "p90": 131.00799918174744, + "p95": 132.03200697898865, + "p99": 138.46400380134583 + }, + "roundtrip": { + "p50": 128.89599800109863, + "p90": 131.00799918174744, + "p95": 132.03200697898865, + "p99": 138.46400380134583 + }, + "isolatedSum": { + "p50": 257.79199600219727, + "p90": 262.0159983634949, + "p95": 264.0640139579773, + "p99": 276.92800760269165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.50399470329285, + "p90": 220.35199403762817, + "p95": 221.88800573349, + "p99": 228.32000255584717 + }, + "combine": { + "p50": 217.50399470329285, + "p90": 220.35199403762817, + "p95": 221.88800573349, + "p99": 228.32000255584717 + }, + "roundtrip": { + "p50": 217.50399470329285, + "p90": 220.35199403762817, + "p95": 221.88800573349, + "p99": 228.32000255584717 + }, + "isolatedSum": { + "p50": 435.0079894065857, + "p90": 440.70398807525635, + "p95": 443.77601146698, + "p99": 456.64000511169434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 391.3919925689697, + "p90": 395.35999298095703, + "p95": 397.69598841667175, + "p99": 403.77599000930786 + }, + "combine": { + "p50": 391.3919925689697, + "p90": 395.35999298095703, + "p95": 397.69598841667175, + "p99": 403.77599000930786 + }, + "roundtrip": { + "p50": 391.3919925689697, + "p90": 395.35999298095703, + "p95": 397.69598841667175, + "p99": 403.77599000930786 + }, + "isolatedSum": { + "p50": 782.7839851379395, + "p90": 790.7199859619141, + "p95": 795.3919768333435, + "p99": 807.5519800186157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 730.783998966217, + "p90": 734.5920205116272, + "p95": 735.8080148696899, + "p99": 741.536021232605 + }, + "combine": { + "p50": 730.783998966217, + "p90": 734.5920205116272, + "p95": 735.8080148696899, + "p99": 741.536021232605 + }, + "roundtrip": { + "p50": 730.783998966217, + "p90": 734.5920205116272, + "p95": 735.8080148696899, + "p99": 741.536021232605 + }, + "isolatedSum": { + "p50": 1461.567997932434, + "p90": 1469.1840410232544, + "p95": 1471.6160297393799, + "p99": 1483.07204246521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1402.9439687728882, + "p90": 1407.1680307388306, + "p95": 1409.0880155563354, + "p99": 1413.6320352554321 + }, + "combine": { + "p50": 1402.9439687728882, + "p90": 1407.1680307388306, + "p95": 1409.0880155563354, + "p99": 1413.6320352554321 + }, + "roundtrip": { + "p50": 1402.9439687728882, + "p90": 1407.1680307388306, + "p95": 1409.0880155563354, + "p99": 1413.6320352554321 + }, + "isolatedSum": { + "p50": 2805.8879375457764, + "p90": 2814.336061477661, + "p95": 2818.176031112671, + "p99": 2827.2640705108643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-00c5518c", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_b47c3e43", + "comparisonKey": "a53d12cb96747fc4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:55:36.368901+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 67.96800345182419, + "p90": 80.38400113582611, + "p95": 91.42400324344635, + "p99": 100.35199671983719 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 80.38400113582611, + "p95": 91.42400324344635, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 67.96800345182419, + "p90": 80.38400113582611, + "p95": 91.42400324344635, + "p99": 100.35199671983719 + }, + "isolatedSum": { + "p50": 135.93600690364838, + "p90": 160.76800227165222, + "p95": 182.8480064868927, + "p99": 200.70399343967438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 73.31199944019318, + "p90": 82.68799632787704, + "p95": 94.11200135946274, + "p99": 106.39999806880951 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 82.68799632787704, + "p95": 94.11200135946274, + "p99": 106.39999806880951 + }, + "roundtrip": { + "p50": 73.31199944019318, + "p90": 82.68799632787704, + "p95": 94.11200135946274, + "p99": 106.39999806880951 + }, + "isolatedSum": { + "p50": 146.62399888038635, + "p90": 165.3759926557541, + "p95": 188.22400271892548, + "p99": 212.79999613761902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 135.1040005683899, + "p90": 140.09599387645721, + "p95": 144.28800344467163, + "p99": 147.87200093269348 + }, + "combine": { + "p50": 135.1040005683899, + "p90": 140.09599387645721, + "p95": 144.28800344467163, + "p99": 147.87200093269348 + }, + "roundtrip": { + "p50": 135.1040005683899, + "p90": 140.09599387645721, + "p95": 144.28800344467163, + "p99": 147.87200093269348 + }, + "isolatedSum": { + "p50": 270.2080011367798, + "p90": 280.19198775291443, + "p95": 288.57600688934326, + "p99": 295.74400186538696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f2aeddd", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_03452aae", + "comparisonKey": "feb15491c267c123", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:05:56.911518+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 76.54400169849396, + "p90": 81.69600367546082, + "p95": 85.82399785518646, + "p99": 91.90399944782257 + }, + "combine": { + "p50": 76.54400169849396, + "p90": 81.69600367546082, + "p95": 85.82399785518646, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 76.54400169849396, + "p90": 81.69600367546082, + "p95": 85.82399785518646, + "p99": 91.90399944782257 + }, + "isolatedSum": { + "p50": 153.08800339698792, + "p90": 163.39200735092163, + "p95": 171.64799571037292, + "p99": 183.80799889564514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 82.75199681520462, + "p90": 87.93599903583527, + "p95": 91.48799628019333, + "p99": 98.81599992513657 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 87.93599903583527, + "p95": 91.48799628019333, + "p99": 98.81599992513657 + }, + "roundtrip": { + "p50": 82.75199681520462, + "p90": 87.93599903583527, + "p95": 91.48799628019333, + "p99": 98.81599992513657 + }, + "isolatedSum": { + "p50": 165.50399363040924, + "p90": 175.87199807167053, + "p95": 182.97599256038666, + "p99": 197.63199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 104.09600287675858, + "p90": 110.6560006737709, + "p95": 112.83200234174728, + "p99": 126.39999389648438 + }, + "combine": { + "p50": 104.09600287675858, + "p90": 110.6560006737709, + "p95": 112.83200234174728, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 104.09600287675858, + "p90": 110.6560006737709, + "p95": 112.83200234174728, + "p99": 126.39999389648438 + }, + "isolatedSum": { + "p50": 208.19200575351715, + "p90": 221.3120013475418, + "p95": 225.66400468349457, + "p99": 252.79998779296875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 164.8319959640503, + "p90": 166.9120043516159, + "p95": 167.52000153064728, + "p99": 172.2559928894043 + }, + "combine": { + "p50": 164.8319959640503, + "p90": 166.9120043516159, + "p95": 167.52000153064728, + "p99": 172.2559928894043 + }, + "roundtrip": { + "p50": 164.8319959640503, + "p90": 166.9120043516159, + "p95": 167.52000153064728, + "p99": 172.2559928894043 + }, + "isolatedSum": { + "p50": 329.6639919281006, + "p90": 333.8240087032318, + "p95": 335.04000306129456, + "p99": 344.5119857788086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 290.6239926815033, + "p90": 293.2479977607727, + "p95": 294.46399211883545, + "p99": 299.1679906845093 + }, + "combine": { + "p50": 290.6239926815033, + "p90": 293.2479977607727, + "p95": 294.46399211883545, + "p99": 299.1679906845093 + }, + "roundtrip": { + "p50": 290.6239926815033, + "p90": 293.2479977607727, + "p95": 294.46399211883545, + "p99": 299.1679906845093 + }, + "isolatedSum": { + "p50": 581.2479853630066, + "p90": 586.4959955215454, + "p95": 588.9279842376709, + "p99": 598.3359813690186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 543.071985244751, + "p90": 545.8880066871643, + "p95": 547.0399856567383, + "p99": 550.1760244369507 + }, + "combine": { + "p50": 543.071985244751, + "p90": 545.8880066871643, + "p95": 547.0399856567383, + "p99": 550.1760244369507 + }, + "roundtrip": { + "p50": 543.071985244751, + "p90": 545.8880066871643, + "p95": 547.0399856567383, + "p99": 550.1760244369507 + }, + "isolatedSum": { + "p50": 1086.143970489502, + "p90": 1091.7760133743286, + "p95": 1094.0799713134766, + "p99": 1100.3520488739014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-059bfc96", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_0f397a9a", + "comparisonKey": "e68b4957c0db22e3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:03:51.780105+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.43200248479843, + "p90": 94.24000233411789, + "p95": 98.65599870681763, + "p99": 105.95200210809708 + }, + "combine": { + "p50": 86.43200248479843, + "p90": 94.24000233411789, + "p95": 98.65599870681763, + "p99": 105.95200210809708 + }, + "roundtrip": { + "p50": 86.43200248479843, + "p90": 94.24000233411789, + "p95": 98.65599870681763, + "p99": 105.95200210809708 + }, + "isolatedSum": { + "p50": 172.86400496959686, + "p90": 188.48000466823578, + "p95": 197.31199741363525, + "p99": 211.90400421619415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 119.77600306272507, + "p90": 121.5360015630722, + "p95": 122.65600264072418, + "p99": 129.85600531101227 + }, + "combine": { + "p50": 119.77600306272507, + "p90": 121.5360015630722, + "p95": 122.65600264072418, + "p99": 129.85600531101227 + }, + "roundtrip": { + "p50": 119.77600306272507, + "p90": 121.5360015630722, + "p95": 122.65600264072418, + "p99": 129.85600531101227 + }, + "isolatedSum": { + "p50": 239.55200612545013, + "p90": 243.0720031261444, + "p95": 245.31200528144836, + "p99": 259.71201062202454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.664000749588, + "p90": 204.22400534152985, + "p95": 209.1519981622696, + "p99": 212.351992726326 + }, + "combine": { + "p50": 201.664000749588, + "p90": 204.22400534152985, + "p95": 209.1519981622696, + "p99": 212.351992726326 + }, + "roundtrip": { + "p50": 201.664000749588, + "p90": 204.22400534152985, + "p95": 209.1519981622696, + "p99": 212.351992726326 + }, + "isolatedSum": { + "p50": 403.328001499176, + "p90": 408.4480106830597, + "p95": 418.3039963245392, + "p99": 424.703985452652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 369.4719970226288, + "p90": 372.8959858417511, + "p95": 373.7280070781708, + "p99": 377.3120045661926 + }, + "combine": { + "p50": 369.4719970226288, + "p90": 372.8959858417511, + "p95": 373.7280070781708, + "p99": 377.3120045661926 + }, + "roundtrip": { + "p50": 369.4719970226288, + "p90": 372.8959858417511, + "p95": 373.7280070781708, + "p99": 377.3120045661926 + }, + "isolatedSum": { + "p50": 738.9439940452576, + "p90": 745.7919716835022, + "p95": 747.4560141563416, + "p99": 754.6240091323853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 739.6159768104553, + "p90": 745.0559735298157, + "p95": 746.9120025634766, + "p99": 751.5519857406616 + }, + "combine": { + "p50": 739.6159768104553, + "p90": 745.0559735298157, + "p95": 746.9120025634766, + "p99": 751.5519857406616 + }, + "roundtrip": { + "p50": 739.6159768104553, + "p90": 745.0559735298157, + "p95": 746.9120025634766, + "p99": 751.5519857406616 + }, + "isolatedSum": { + "p50": 1479.2319536209106, + "p90": 1490.1119470596313, + "p95": 1493.8240051269531, + "p99": 1503.1039714813232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1489.9519681930542, + "p90": 1494.9760437011719, + "p95": 1496.6720342636108, + "p99": 1499.7440576553345 + }, + "combine": { + "p50": 1489.9519681930542, + "p90": 1494.9760437011719, + "p95": 1496.6720342636108, + "p99": 1499.7440576553345 + }, + "roundtrip": { + "p50": 1489.9519681930542, + "p90": 1494.9760437011719, + "p95": 1496.6720342636108, + "p99": 1499.7440576553345 + }, + "isolatedSum": { + "p50": 2979.9039363861084, + "p90": 2989.9520874023438, + "p95": 2993.3440685272217, + "p99": 2999.488115310669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-246dbded", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_52da9154", + "comparisonKey": "5dff7a6b02b16db2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:04.542592+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.91999793052673, + "p90": 91.90399944782257, + "p95": 100.99200159311295, + "p99": 116.22399836778641 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 91.90399944782257, + "p95": 100.99200159311295, + "p99": 116.22399836778641 + }, + "roundtrip": { + "p50": 81.91999793052673, + "p90": 91.90399944782257, + "p95": 100.99200159311295, + "p99": 116.22399836778641 + }, + "isolatedSum": { + "p50": 163.83999586105347, + "p90": 183.80799889564514, + "p95": 201.9840031862259, + "p99": 232.44799673557281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.42399775981903, + "p90": 114.14399743080139, + "p95": 123.48800152540207, + "p99": 131.3599944114685 + }, + "combine": { + "p50": 103.42399775981903, + "p90": 114.14399743080139, + "p95": 123.48800152540207, + "p99": 131.3599944114685 + }, + "roundtrip": { + "p50": 103.42399775981903, + "p90": 114.14399743080139, + "p95": 123.48800152540207, + "p99": 131.3599944114685 + }, + "isolatedSum": { + "p50": 206.84799551963806, + "p90": 228.28799486160278, + "p95": 246.97600305080414, + "p99": 262.719988822937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 159.45599973201752, + "p90": 164.76799547672272, + "p95": 168.38400065898895, + "p99": 172.2559928894043 + }, + "combine": { + "p50": 159.45599973201752, + "p90": 164.76799547672272, + "p95": 168.38400065898895, + "p99": 172.2559928894043 + }, + "roundtrip": { + "p50": 159.45599973201752, + "p90": 164.76799547672272, + "p95": 168.38400065898895, + "p99": 172.2559928894043 + }, + "isolatedSum": { + "p50": 318.91199946403503, + "p90": 329.53599095344543, + "p95": 336.7680013179779, + "p99": 344.5119857788086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 278.27200293540955, + "p90": 284.92799401283264, + "p95": 288.32000494003296, + "p99": 292.1279966831207 + }, + "combine": { + "p50": 278.27200293540955, + "p90": 284.92799401283264, + "p95": 288.32000494003296, + "p99": 292.1279966831207 + }, + "roundtrip": { + "p50": 278.27200293540955, + "p90": 284.92799401283264, + "p95": 288.32000494003296, + "p99": 292.1279966831207 + }, + "isolatedSum": { + "p50": 556.5440058708191, + "p90": 569.8559880256653, + "p95": 576.6400098800659, + "p99": 584.2559933662415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 502.623975276947, + "p90": 508.28802585601807, + "p95": 510.528028011322, + "p99": 513.9200091362 + }, + "combine": { + "p50": 502.623975276947, + "p90": 508.28802585601807, + "p95": 510.528028011322, + "p99": 513.9200091362 + }, + "roundtrip": { + "p50": 502.623975276947, + "p90": 508.28802585601807, + "p95": 510.528028011322, + "p99": 513.9200091362 + }, + "isolatedSum": { + "p50": 1005.247950553894, + "p90": 1016.5760517120361, + "p95": 1021.056056022644, + "p99": 1027.8400182724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 949.4400024414062, + "p90": 955.6480050086975, + "p95": 960.9919786453247, + "p99": 979.5200228691101 + }, + "combine": { + "p50": 949.4400024414062, + "p90": 955.6480050086975, + "p95": 960.9919786453247, + "p99": 979.5200228691101 + }, + "roundtrip": { + "p50": 949.4400024414062, + "p90": 955.6480050086975, + "p95": 960.9919786453247, + "p99": 979.5200228691101 + }, + "isolatedSum": { + "p50": 1898.8800048828125, + "p90": 1911.296010017395, + "p95": 1921.9839572906494, + "p99": 1959.0400457382202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2a948e1a", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_4c717976", + "comparisonKey": "967bc96e603b74b0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:04:54.692360+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.19199967384338, + "p90": 87.3280018568039, + "p95": 93.31200271844864, + "p99": 100.12800246477127 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 87.3280018568039, + "p95": 93.31200271844864, + "p99": 100.12800246477127 + }, + "roundtrip": { + "p50": 80.19199967384338, + "p90": 87.3280018568039, + "p95": 93.31200271844864, + "p99": 100.12800246477127 + }, + "isolatedSum": { + "p50": 160.38399934768677, + "p90": 174.6560037136078, + "p95": 186.62400543689728, + "p99": 200.25600492954254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 102.01600193977356, + "p90": 105.05600273609161, + "p95": 107.61599987745285, + "p99": 113.79200220108032 + }, + "combine": { + "p50": 102.01600193977356, + "p90": 105.05600273609161, + "p95": 107.61599987745285, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 102.01600193977356, + "p90": 105.05600273609161, + "p95": 107.61599987745285, + "p99": 113.79200220108032 + }, + "isolatedSum": { + "p50": 204.03200387954712, + "p90": 210.11200547218323, + "p95": 215.2319997549057, + "p99": 227.58400440216064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 160.12799739837646, + "p90": 163.64799439907074, + "p95": 165.3759926557541, + "p99": 171.2000072002411 + }, + "combine": { + "p50": 160.12799739837646, + "p90": 163.64799439907074, + "p95": 165.3759926557541, + "p99": 171.2000072002411 + }, + "roundtrip": { + "p50": 160.12799739837646, + "p90": 163.64799439907074, + "p95": 165.3759926557541, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 320.25599479675293, + "p90": 327.2959887981415, + "p95": 330.7519853115082, + "p99": 342.4000144004822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 277.6640057563782, + "p90": 282.20799565315247, + "p95": 284.89598631858826, + "p99": 291.9999957084656 + }, + "combine": { + "p50": 277.6640057563782, + "p90": 282.20799565315247, + "p95": 284.89598631858826, + "p99": 291.9999957084656 + }, + "roundtrip": { + "p50": 277.6640057563782, + "p90": 282.20799565315247, + "p95": 284.89598631858826, + "p99": 291.9999957084656 + }, + "isolatedSum": { + "p50": 555.3280115127563, + "p90": 564.4159913063049, + "p95": 569.7919726371765, + "p99": 583.9999914169312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 504.83202934265137, + "p90": 510.1119875907898, + "p95": 512.9280090332031, + "p99": 516.6400074958801 + }, + "combine": { + "p50": 504.83202934265137, + "p90": 510.1119875907898, + "p95": 512.9280090332031, + "p99": 516.6400074958801 + }, + "roundtrip": { + "p50": 504.83202934265137, + "p90": 510.1119875907898, + "p95": 512.9280090332031, + "p99": 516.6400074958801 + }, + "isolatedSum": { + "p50": 1009.6640586853027, + "p90": 1020.2239751815796, + "p95": 1025.8560180664062, + "p99": 1033.2800149917603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 952.5120258331299, + "p90": 956.6400051116943, + "p95": 958.1760168075562, + "p99": 963.7439846992493 + }, + "combine": { + "p50": 952.5120258331299, + "p90": 956.6400051116943, + "p95": 958.1760168075562, + "p99": 963.7439846992493 + }, + "roundtrip": { + "p50": 952.5120258331299, + "p90": 956.6400051116943, + "p95": 958.1760168075562, + "p99": 963.7439846992493 + }, + "isolatedSum": { + "p50": 1905.0240516662598, + "p90": 1913.2800102233887, + "p95": 1916.3520336151123, + "p99": 1927.4879693984985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62fa42a9", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_411d30ab", + "comparisonKey": "544547efdc526d2d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:56:41.254484+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.73599702119827, + "p90": 92.38400310277939, + "p95": 96.79999947547913, + "p99": 103.20000350475311 + }, + "combine": { + "p50": 84.73599702119827, + "p90": 92.38400310277939, + "p95": 96.79999947547913, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 84.73599702119827, + "p90": 92.38400310277939, + "p95": 96.79999947547913, + "p99": 103.20000350475311 + }, + "isolatedSum": { + "p50": 169.47199404239655, + "p90": 184.76800620555878, + "p95": 193.59999895095825, + "p99": 206.40000700950623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 116.95999652147293, + "p90": 119.32799965143204, + "p95": 120.67200243473053, + "p99": 128.31999361515045 + }, + "combine": { + "p50": 116.95999652147293, + "p90": 119.32799965143204, + "p95": 120.67200243473053, + "p99": 128.31999361515045 + }, + "roundtrip": { + "p50": 116.95999652147293, + "p90": 119.32799965143204, + "p95": 120.67200243473053, + "p99": 128.31999361515045 + }, + "isolatedSum": { + "p50": 233.91999304294586, + "p90": 238.65599930286407, + "p95": 241.34400486946106, + "p99": 256.6399872303009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.22400403022766, + "p90": 199.3280053138733, + "p95": 201.4400064945221, + "p99": 205.6639939546585 + }, + "combine": { + "p50": 196.22400403022766, + "p90": 199.3280053138733, + "p95": 201.4400064945221, + "p99": 205.6639939546585 + }, + "roundtrip": { + "p50": 196.22400403022766, + "p90": 199.3280053138733, + "p95": 201.4400064945221, + "p99": 205.6639939546585 + }, + "isolatedSum": { + "p50": 392.4480080604553, + "p90": 398.6560106277466, + "p95": 402.8800129890442, + "p99": 411.327987909317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 389.69600200653076, + "p90": 393.5360014438629, + "p95": 395.04000544548035, + "p99": 399.29598569869995 + }, + "combine": { + "p50": 389.69600200653076, + "p90": 393.5360014438629, + "p95": 395.04000544548035, + "p99": 399.29598569869995 + }, + "roundtrip": { + "p50": 389.69600200653076, + "p90": 393.5360014438629, + "p95": 395.04000544548035, + "p99": 399.29598569869995 + }, + "isolatedSum": { + "p50": 779.3920040130615, + "p90": 787.0720028877258, + "p95": 790.0800108909607, + "p99": 798.5919713973999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 778.1440019607544, + "p90": 781.3119888305664, + "p95": 782.4959754943848, + "p99": 786.9120240211487 + }, + "combine": { + "p50": 778.1440019607544, + "p90": 781.3119888305664, + "p95": 782.4959754943848, + "p99": 786.9120240211487 + }, + "roundtrip": { + "p50": 778.1440019607544, + "p90": 781.3119888305664, + "p95": 782.4959754943848, + "p99": 786.9120240211487 + }, + "isolatedSum": { + "p50": 1556.2880039215088, + "p90": 1562.6239776611328, + "p95": 1564.9919509887695, + "p99": 1573.8240480422974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1503.999948501587, + "p90": 1507.9360008239746, + "p95": 1509.5360279083252, + "p99": 1512.2560262680054 + }, + "combine": { + "p50": 1503.999948501587, + "p90": 1507.9360008239746, + "p95": 1509.5360279083252, + "p99": 1512.2560262680054 + }, + "roundtrip": { + "p50": 1503.999948501587, + "p90": 1507.9360008239746, + "p95": 1509.5360279083252, + "p99": 1512.2560262680054 + }, + "isolatedSum": { + "p50": 3007.999897003174, + "p90": 3015.872001647949, + "p95": 3019.0720558166504, + "p99": 3024.5120525360107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e175c87", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_3b2efc71", + "comparisonKey": "f98d2e7cb85ac2f1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:57:44.620154+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.65600353479385, + "p90": 94.40000355243683, + "p95": 101.02400183677673, + "p99": 107.07200318574905 + }, + "combine": { + "p50": 82.65600353479385, + "p90": 94.40000355243683, + "p95": 101.02400183677673, + "p99": 107.07200318574905 + }, + "roundtrip": { + "p50": 82.65600353479385, + "p90": 94.40000355243683, + "p95": 101.02400183677673, + "p99": 107.07200318574905 + }, + "isolatedSum": { + "p50": 165.3120070695877, + "p90": 188.80000710487366, + "p95": 202.04800367355347, + "p99": 214.1440063714981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 115.55200070142746, + "p90": 118.6240017414093, + "p95": 121.08799815177917, + "p99": 132.60799646377563 + }, + "combine": { + "p50": 115.55200070142746, + "p90": 118.6240017414093, + "p95": 121.08799815177917, + "p99": 132.60799646377563 + }, + "roundtrip": { + "p50": 115.55200070142746, + "p90": 118.6240017414093, + "p95": 121.08799815177917, + "p99": 132.60799646377563 + }, + "isolatedSum": { + "p50": 231.10400140285492, + "p90": 237.2480034828186, + "p95": 242.17599630355835, + "p99": 265.21599292755127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.25599682331085, + "p90": 198.59200716018677, + "p95": 199.68000054359436, + "p99": 204.3199986219406 + }, + "combine": { + "p50": 196.25599682331085, + "p90": 198.59200716018677, + "p95": 199.68000054359436, + "p99": 204.3199986219406 + }, + "roundtrip": { + "p50": 196.25599682331085, + "p90": 198.59200716018677, + "p95": 199.68000054359436, + "p99": 204.3199986219406 + }, + "isolatedSum": { + "p50": 392.5119936466217, + "p90": 397.18401432037354, + "p95": 399.3600010871887, + "p99": 408.6399972438812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 366.239994764328, + "p90": 369.05598640441895, + "p95": 370.1759874820709, + "p99": 374.33600425720215 + }, + "combine": { + "p50": 366.239994764328, + "p90": 369.05598640441895, + "p95": 370.1759874820709, + "p99": 374.33600425720215 + }, + "roundtrip": { + "p50": 366.239994764328, + "p90": 369.05598640441895, + "p95": 370.1759874820709, + "p99": 374.33600425720215 + }, + "isolatedSum": { + "p50": 732.479989528656, + "p90": 738.1119728088379, + "p95": 740.3519749641418, + "p99": 748.6720085144043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 689.4400119781494, + "p90": 692.4800276756287, + "p95": 694.7199702262878, + "p99": 696.9919800758362 + }, + "combine": { + "p50": 689.4400119781494, + "p90": 692.4800276756287, + "p95": 694.7199702262878, + "p99": 696.9919800758362 + }, + "roundtrip": { + "p50": 689.4400119781494, + "p90": 692.4800276756287, + "p95": 694.7199702262878, + "p99": 696.9919800758362 + }, + "isolatedSum": { + "p50": 1378.8800239562988, + "p90": 1384.9600553512573, + "p95": 1389.4399404525757, + "p99": 1393.9839601516724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1334.2080116271973, + "p90": 1338.2079601287842, + "p95": 1339.359998703003, + "p99": 1342.1759605407715 + }, + "combine": { + "p50": 1334.2080116271973, + "p90": 1338.2079601287842, + "p95": 1339.359998703003, + "p99": 1342.1759605407715 + }, + "roundtrip": { + "p50": 1334.2080116271973, + "p90": 1338.2079601287842, + "p95": 1339.359998703003, + "p99": 1342.1759605407715 + }, + "isolatedSum": { + "p50": 2668.4160232543945, + "p90": 2676.4159202575684, + "p95": 2678.719997406006, + "p99": 2684.351921081543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1b2030a", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_ff05ff3d", + "comparisonKey": "91d0e9ceebcbf9ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:00.788065+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.10399949550629, + "p90": 94.94400024414062, + "p95": 100.47999769449234, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 94.94400024414062, + "p95": 100.47999769449234, + "p99": 111.90400272607803 + }, + "roundtrip": { + "p50": 83.10399949550629, + "p90": 94.94400024414062, + "p95": 100.47999769449234, + "p99": 111.90400272607803 + }, + "isolatedSum": { + "p50": 166.20799899101257, + "p90": 189.88800048828125, + "p95": 200.95999538898468, + "p99": 223.80800545215607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 105.12000322341919, + "p90": 111.7440015077591, + "p95": 116.06399714946747, + "p99": 125.40799379348755 + }, + "combine": { + "p50": 105.12000322341919, + "p90": 111.7440015077591, + "p95": 116.06399714946747, + "p99": 125.40799379348755 + }, + "roundtrip": { + "p50": 105.12000322341919, + "p90": 111.7440015077591, + "p95": 116.06399714946747, + "p99": 125.40799379348755 + }, + "isolatedSum": { + "p50": 210.24000644683838, + "p90": 223.4880030155182, + "p95": 232.12799429893494, + "p99": 250.8159875869751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 163.5199934244156, + "p90": 166.84800386428833, + "p95": 169.21600699424744, + "p99": 173.6000031232834 + }, + "combine": { + "p50": 163.5199934244156, + "p90": 166.84800386428833, + "p95": 169.21600699424744, + "p99": 173.6000031232834 + }, + "roundtrip": { + "p50": 163.5199934244156, + "p90": 166.84800386428833, + "p95": 169.21600699424744, + "p99": 173.6000031232834 + }, + "isolatedSum": { + "p50": 327.0399868488312, + "p90": 333.69600772857666, + "p95": 338.4320139884949, + "p99": 347.2000062465668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 284.5759987831116, + "p90": 288.12798857688904, + "p95": 289.40799832344055, + "p99": 292.83198714256287 + }, + "combine": { + "p50": 284.5759987831116, + "p90": 288.12798857688904, + "p95": 289.40799832344055, + "p99": 292.83198714256287 + }, + "roundtrip": { + "p50": 284.5759987831116, + "p90": 288.12798857688904, + "p95": 289.40799832344055, + "p99": 292.83198714256287 + }, + "isolatedSum": { + "p50": 569.1519975662231, + "p90": 576.2559771537781, + "p95": 578.8159966468811, + "p99": 585.6639742851257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.4240293502808, + "p90": 528.0960202217102, + "p95": 530.8799743652344, + "p99": 538.1119847297668 + }, + "combine": { + "p50": 523.4240293502808, + "p90": 528.0960202217102, + "p95": 530.8799743652344, + "p99": 538.1119847297668 + }, + "roundtrip": { + "p50": 523.4240293502808, + "p90": 528.0960202217102, + "p95": 530.8799743652344, + "p99": 538.1119847297668 + }, + "isolatedSum": { + "p50": 1046.8480587005615, + "p90": 1056.1920404434204, + "p95": 1061.7599487304688, + "p99": 1076.2239694595337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 992.9280281066895, + "p90": 997.5039958953857, + "p95": 999.3600249290466, + "p99": 1004.9599409103394 + }, + "combine": { + "p50": 992.9280281066895, + "p90": 997.5039958953857, + "p95": 999.3600249290466, + "p99": 1004.9599409103394 + }, + "roundtrip": { + "p50": 992.9280281066895, + "p90": 997.5039958953857, + "p95": 999.3600249290466, + "p99": 1004.9599409103394 + }, + "isolatedSum": { + "p50": 1985.856056213379, + "p90": 1995.0079917907715, + "p95": 1998.7200498580933, + "p99": 2009.9198818206787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-77a423cd", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_c8015aac", + "comparisonKey": "d086e3ac1b74f703", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:08:33.974222+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.57600325345993, + "p90": 97.88800030946732, + "p95": 99.32799637317657, + "p99": 106.39999806880951 + }, + "combine": { + "p50": 84.57600325345993, + "p90": 97.88800030946732, + "p95": 99.32799637317657, + "p99": 106.39999806880951 + }, + "roundtrip": { + "p50": 84.57600325345993, + "p90": 97.88800030946732, + "p95": 99.32799637317657, + "p99": 106.39999806880951 + }, + "isolatedSum": { + "p50": 169.15200650691986, + "p90": 195.77600061893463, + "p95": 198.65599274635315, + "p99": 212.79999613761902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 117.21599847078323, + "p90": 119.52000111341476, + "p95": 120.38400024175644, + "p99": 126.75200402736664 + }, + "combine": { + "p50": 117.21599847078323, + "p90": 119.52000111341476, + "p95": 120.38400024175644, + "p99": 126.75200402736664 + }, + "roundtrip": { + "p50": 117.21599847078323, + "p90": 119.52000111341476, + "p95": 120.38400024175644, + "p99": 126.75200402736664 + }, + "isolatedSum": { + "p50": 234.43199694156647, + "p90": 239.04000222682953, + "p95": 240.76800048351288, + "p99": 253.50400805473328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.3279972076416, + "p90": 201.1519968509674, + "p95": 204.76800203323364, + "p99": 206.94400370121002 + }, + "combine": { + "p50": 195.3279972076416, + "p90": 201.1519968509674, + "p95": 204.76800203323364, + "p99": 206.94400370121002 + }, + "roundtrip": { + "p50": 195.3279972076416, + "p90": 201.1519968509674, + "p95": 204.76800203323364, + "p99": 206.94400370121002 + }, + "isolatedSum": { + "p50": 390.6559944152832, + "p90": 402.3039937019348, + "p95": 409.5360040664673, + "p99": 413.88800740242004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 354.7840118408203, + "p90": 356.9920063018799, + "p95": 358.36800932884216, + "p99": 360.48001050949097 + }, + "combine": { + "p50": 354.7840118408203, + "p90": 356.9920063018799, + "p95": 358.36800932884216, + "p99": 360.48001050949097 + }, + "roundtrip": { + "p50": 354.7840118408203, + "p90": 356.9920063018799, + "p95": 358.36800932884216, + "p99": 360.48001050949097 + }, + "isolatedSum": { + "p50": 709.5680236816406, + "p90": 713.9840126037598, + "p95": 716.7360186576843, + "p99": 720.9600210189819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 716.9280052185059, + "p90": 722.3359942436218, + "p95": 723.5199809074402, + "p99": 726.3360023498535 + }, + "combine": { + "p50": 716.9280052185059, + "p90": 722.3359942436218, + "p95": 723.5199809074402, + "p99": 726.3360023498535 + }, + "roundtrip": { + "p50": 716.9280052185059, + "p90": 722.3359942436218, + "p95": 723.5199809074402, + "p99": 726.3360023498535 + }, + "isolatedSum": { + "p50": 1433.8560104370117, + "p90": 1444.6719884872437, + "p95": 1447.0399618148804, + "p99": 1452.672004699707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1438.7840032577515, + "p90": 1443.6479806900024, + "p95": 1445.7600116729736, + "p99": 1449.0879774093628 + }, + "combine": { + "p50": 1438.7840032577515, + "p90": 1443.6479806900024, + "p95": 1445.7600116729736, + "p99": 1449.0879774093628 + }, + "roundtrip": { + "p50": 1438.7840032577515, + "p90": 1443.6479806900024, + "p95": 1445.7600116729736, + "p99": 1449.0879774093628 + }, + "isolatedSum": { + "p50": 2877.568006515503, + "p90": 2887.295961380005, + "p95": 2891.5200233459473, + "p99": 2898.1759548187256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17bcd0cb", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_e560dec2", + "comparisonKey": "17249a5203310bbb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:09:06.649243+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.73599636554718, + "p90": 85.85599809885025, + "p95": 88.03199976682663, + "p99": 93.24800223112106 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 85.85599809885025, + "p95": 88.03199976682663, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 80.73599636554718, + "p90": 85.85599809885025, + "p95": 88.03199976682663, + "p99": 93.24800223112106 + }, + "isolatedSum": { + "p50": 161.47199273109436, + "p90": 171.7119961977005, + "p95": 176.06399953365326, + "p99": 186.49600446224213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 102.94400155544281, + "p90": 106.65600001811981, + "p95": 111.93600296974182, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 102.94400155544281, + "p90": 106.65600001811981, + "p95": 111.93600296974182, + "p99": 122.91199713945389 + }, + "roundtrip": { + "p50": 102.94400155544281, + "p90": 106.65600001811981, + "p95": 111.93600296974182, + "p99": 122.91199713945389 + }, + "isolatedSum": { + "p50": 205.88800311088562, + "p90": 213.31200003623962, + "p95": 223.87200593948364, + "p99": 245.82399427890778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 160.3199988603592, + "p90": 163.13600540161133, + "p95": 164.51199352741241, + "p99": 171.00800573825836 + }, + "combine": { + "p50": 160.3199988603592, + "p90": 163.13600540161133, + "p95": 164.51199352741241, + "p99": 171.00800573825836 + }, + "roundtrip": { + "p50": 160.3199988603592, + "p90": 163.13600540161133, + "p95": 164.51199352741241, + "p99": 171.00800573825836 + }, + "isolatedSum": { + "p50": 320.6399977207184, + "p90": 326.27201080322266, + "p95": 329.02398705482483, + "p99": 342.0160114765167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 277.72799134254456, + "p90": 281.47199749946594, + "p95": 283.29598903656006, + "p99": 289.0560030937195 + }, + "combine": { + "p50": 277.72799134254456, + "p90": 281.47199749946594, + "p95": 283.29598903656006, + "p99": 289.0560030937195 + }, + "roundtrip": { + "p50": 277.72799134254456, + "p90": 281.47199749946594, + "p95": 283.29598903656006, + "p99": 289.0560030937195 + }, + "isolatedSum": { + "p50": 555.4559826850891, + "p90": 562.9439949989319, + "p95": 566.5919780731201, + "p99": 578.112006187439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.0799717903137, + "p90": 509.72801446914673, + "p95": 511.55197620391846, + "p99": 516.1600112915039 + }, + "combine": { + "p50": 506.0799717903137, + "p90": 509.72801446914673, + "p95": 511.55197620391846, + "p99": 516.1600112915039 + }, + "roundtrip": { + "p50": 506.0799717903137, + "p90": 509.72801446914673, + "p95": 511.55197620391846, + "p99": 516.1600112915039 + }, + "isolatedSum": { + "p50": 1012.1599435806274, + "p90": 1019.4560289382935, + "p95": 1023.1039524078369, + "p99": 1032.3200225830078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 954.2080163955688, + "p90": 958.8159918785095, + "p95": 960.1600170135498, + "p99": 966.8800234794617 + }, + "combine": { + "p50": 954.2080163955688, + "p90": 958.8159918785095, + "p95": 960.1600170135498, + "p99": 966.8800234794617 + }, + "roundtrip": { + "p50": 954.2080163955688, + "p90": 958.8159918785095, + "p95": 960.1600170135498, + "p99": 966.8800234794617 + }, + "isolatedSum": { + "p50": 1908.4160327911377, + "p90": 1917.631983757019, + "p95": 1920.3200340270996, + "p99": 1933.7600469589233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-583c649b", + "identity": "b200|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_c1dbcdcb", + "comparisonKey": "501e84257f8c5dae", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:24.890014+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.59200304746628, + "p90": 93.63199770450592, + "p95": 96.73599898815155, + "p99": 112.0000034570694 + }, + "combine": { + "p50": 82.59200304746628, + "p90": 93.63199770450592, + "p95": 96.73599898815155, + "p99": 112.0000034570694 + }, + "roundtrip": { + "p50": 82.59200304746628, + "p90": 93.63199770450592, + "p95": 96.73599898815155, + "p99": 112.0000034570694 + }, + "isolatedSum": { + "p50": 165.18400609493256, + "p90": 187.26399540901184, + "p95": 193.4719979763031, + "p99": 224.0000069141388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 116.28799885511398, + "p90": 118.17599833011627, + "p95": 118.81600320339203, + "p99": 127.29600071907043 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 118.17599833011627, + "p95": 118.81600320339203, + "p99": 127.29600071907043 + }, + "roundtrip": { + "p50": 116.28799885511398, + "p90": 118.17599833011627, + "p95": 118.81600320339203, + "p99": 127.29600071907043 + }, + "isolatedSum": { + "p50": 232.57599771022797, + "p90": 236.35199666023254, + "p95": 237.63200640678406, + "p99": 254.59200143814087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.10400295257568, + "p90": 197.37599790096283, + "p95": 198.14400374889374, + "p99": 201.82399451732635 + }, + "combine": { + "p50": 195.10400295257568, + "p90": 197.37599790096283, + "p95": 198.14400374889374, + "p99": 201.82399451732635 + }, + "roundtrip": { + "p50": 195.10400295257568, + "p90": 197.37599790096283, + "p95": 198.14400374889374, + "p99": 201.82399451732635 + }, + "isolatedSum": { + "p50": 390.20800590515137, + "p90": 394.75199580192566, + "p95": 396.2880074977875, + "p99": 403.6479890346527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 389.15199041366577, + "p90": 394.3679928779602, + "p95": 395.9999978542328, + "p99": 398.8479971885681 + }, + "combine": { + "p50": 389.15199041366577, + "p90": 394.3679928779602, + "p95": 395.9999978542328, + "p99": 398.8479971885681 + }, + "roundtrip": { + "p50": 389.15199041366577, + "p90": 394.3679928779602, + "p95": 395.9999978542328, + "p99": 398.8479971885681 + }, + "isolatedSum": { + "p50": 778.3039808273315, + "p90": 788.7359857559204, + "p95": 791.9999957084656, + "p99": 797.6959943771362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 778.2080173492432, + "p90": 782.1440100669861, + "p95": 783.8079929351807, + "p99": 787.5840067863464 + }, + "combine": { + "p50": 778.2080173492432, + "p90": 782.1440100669861, + "p95": 783.8079929351807, + "p99": 787.5840067863464 + }, + "roundtrip": { + "p50": 778.2080173492432, + "p90": 782.1440100669861, + "p95": 783.8079929351807, + "p99": 787.5840067863464 + }, + "isolatedSum": { + "p50": 1556.4160346984863, + "p90": 1564.2880201339722, + "p95": 1567.6159858703613, + "p99": 1575.1680135726929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1505.7599544525146, + "p90": 1509.5679759979248, + "p95": 1511.1039876937866, + "p99": 1515.4880285263062 + }, + "combine": { + "p50": 1505.7599544525146, + "p90": 1509.5679759979248, + "p95": 1511.1039876937866, + "p99": 1515.4880285263062 + }, + "roundtrip": { + "p50": 1505.7599544525146, + "p90": 1509.5679759979248, + "p95": 1511.1039876937866, + "p99": 1515.4880285263062 + }, + "isolatedSum": { + "p50": 3011.5199089050293, + "p90": 3019.1359519958496, + "p95": 3022.2079753875732, + "p99": 3030.9760570526123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ab975d7", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_c2563ab3", + "comparisonKey": "c371f343aaedfda2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:57.867327+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_07", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.95199817419052, + "p90": 88.0960002541542, + "p95": 91.20000153779984, + "p99": 102.65599936246872 + }, + "combine": { + "p50": 81.95199817419052, + "p90": 88.0960002541542, + "p95": 91.20000153779984, + "p99": 102.65599936246872 + }, + "roundtrip": { + "p50": 81.95199817419052, + "p90": 88.0960002541542, + "p95": 91.20000153779984, + "p99": 102.65599936246872 + }, + "isolatedSum": { + "p50": 163.90399634838104, + "p90": 176.1920005083084, + "p95": 182.40000307559967, + "p99": 205.31199872493744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.10400277376175, + "p90": 105.98400235176086, + "p95": 108.47999900579453, + "p99": 115.84000289440155 + }, + "combine": { + "p50": 103.10400277376175, + "p90": 105.98400235176086, + "p95": 108.47999900579453, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 103.10400277376175, + "p90": 105.98400235176086, + "p95": 108.47999900579453, + "p99": 115.84000289440155 + }, + "isolatedSum": { + "p50": 206.2080055475235, + "p90": 211.96800470352173, + "p95": 216.95999801158905, + "p99": 231.6800057888031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 160.70400178432465, + "p90": 163.71199488639832, + "p95": 165.3759926557541, + "p99": 170.97599804401398 + }, + "combine": { + "p50": 160.70400178432465, + "p90": 163.71199488639832, + "p95": 165.3759926557541, + "p99": 170.97599804401398 + }, + "roundtrip": { + "p50": 160.70400178432465, + "p90": 163.71199488639832, + "p95": 165.3759926557541, + "p99": 170.97599804401398 + }, + "isolatedSum": { + "p50": 321.4080035686493, + "p90": 327.42398977279663, + "p95": 330.7519853115082, + "p99": 341.95199608802795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 276.95998549461365, + "p90": 280.70399165153503, + "p95": 282.0799946784973, + "p99": 286.655992269516 + }, + "combine": { + "p50": 276.95998549461365, + "p90": 280.70399165153503, + "p95": 282.0799946784973, + "p99": 286.655992269516 + }, + "roundtrip": { + "p50": 276.95998549461365, + "p90": 280.70399165153503, + "p95": 282.0799946784973, + "p99": 286.655992269516 + }, + "isolatedSum": { + "p50": 553.9199709892273, + "p90": 561.4079833030701, + "p95": 564.1599893569946, + "p99": 573.311984539032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.5600275993347, + "p90": 509.8559856414795, + "p95": 511.29597425460815, + "p99": 517.2799825668335 + }, + "combine": { + "p50": 506.5600275993347, + "p90": 509.8559856414795, + "p95": 511.29597425460815, + "p99": 517.2799825668335 + }, + "roundtrip": { + "p50": 506.5600275993347, + "p90": 509.8559856414795, + "p95": 511.29597425460815, + "p99": 517.2799825668335 + }, + "isolatedSum": { + "p50": 1013.1200551986694, + "p90": 1019.711971282959, + "p95": 1022.5919485092163, + "p99": 1034.559965133667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 958.0479860305786, + "p90": 962.9120230674744, + "p95": 964.8000001907349, + "p99": 972.0640182495117 + }, + "combine": { + "p50": 958.0479860305786, + "p90": 962.9120230674744, + "p95": 964.8000001907349, + "p99": 972.0640182495117 + }, + "roundtrip": { + "p50": 958.0479860305786, + "p90": 962.9120230674744, + "p95": 964.8000001907349, + "p99": 972.0640182495117 + }, + "isolatedSum": { + "p50": 1916.0959720611572, + "p90": 1925.8240461349487, + "p95": 1929.6000003814697, + "p99": 1944.1280364990234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5160617", + "identity": "b200|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_88857293", + "comparisonKey": "7d7d2618fc6f8d5a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:07:00.049680+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.05599880218506, + "p90": 86.71999722719193, + "p95": 92.0960009098053, + "p99": 98.27200323343277 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 86.71999722719193, + "p95": 92.0960009098053, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 81.05599880218506, + "p90": 86.71999722719193, + "p95": 92.0960009098053, + "p99": 98.27200323343277 + }, + "isolatedSum": { + "p50": 162.11199760437012, + "p90": 173.43999445438385, + "p95": 184.1920018196106, + "p99": 196.54400646686554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.74400019645691, + "p90": 107.744000852108, + "p95": 110.72000116109848, + "p99": 116.28799885511398 + }, + "combine": { + "p50": 103.74400019645691, + "p90": 107.744000852108, + "p95": 110.72000116109848, + "p99": 116.28799885511398 + }, + "roundtrip": { + "p50": 103.74400019645691, + "p90": 107.744000852108, + "p95": 110.72000116109848, + "p99": 116.28799885511398 + }, + "isolatedSum": { + "p50": 207.48800039291382, + "p90": 215.488001704216, + "p95": 221.44000232219696, + "p99": 232.57599771022797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 159.8079949617386, + "p90": 162.49600052833557, + "p95": 164.19200599193573, + "p99": 170.52799463272095 + }, + "combine": { + "p50": 159.8079949617386, + "p90": 162.49600052833557, + "p95": 164.19200599193573, + "p99": 170.52799463272095 + }, + "roundtrip": { + "p50": 159.8079949617386, + "p90": 162.49600052833557, + "p95": 164.19200599193573, + "p99": 170.52799463272095 + }, + "isolatedSum": { + "p50": 319.6159899234772, + "p90": 324.99200105667114, + "p95": 328.38401198387146, + "p99": 341.0559892654419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 277.21598744392395, + "p90": 280.3199887275696, + "p95": 281.792014837265, + "p99": 287.200003862381 + }, + "combine": { + "p50": 277.21598744392395, + "p90": 280.3199887275696, + "p95": 281.792014837265, + "p99": 287.200003862381 + }, + "roundtrip": { + "p50": 277.21598744392395, + "p90": 280.3199887275696, + "p95": 281.792014837265, + "p99": 287.200003862381 + }, + "isolatedSum": { + "p50": 554.4319748878479, + "p90": 560.6399774551392, + "p95": 563.58402967453, + "p99": 574.400007724762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.4319968223572, + "p90": 509.69600677490234, + "p95": 511.23201847076416, + "p99": 514.4640207290649 + }, + "combine": { + "p50": 506.4319968223572, + "p90": 509.69600677490234, + "p95": 511.23201847076416, + "p99": 514.4640207290649 + }, + "roundtrip": { + "p50": 506.4319968223572, + "p90": 509.69600677490234, + "p95": 511.23201847076416, + "p99": 514.4640207290649 + }, + "isolatedSum": { + "p50": 1012.8639936447144, + "p90": 1019.3920135498047, + "p95": 1022.4640369415283, + "p99": 1028.9280414581299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 960.4160189628601, + "p90": 964.9919867515564, + "p95": 966.9119715690613, + "p99": 972.2880125045776 + }, + "combine": { + "p50": 960.4160189628601, + "p90": 964.9919867515564, + "p95": 966.9119715690613, + "p99": 972.2880125045776 + }, + "roundtrip": { + "p50": 960.4160189628601, + "p90": 964.9919867515564, + "p95": 966.9119715690613, + "p99": 972.2880125045776 + }, + "isolatedSum": { + "p50": 1920.8320379257202, + "p90": 1929.9839735031128, + "p95": 1933.8239431381226, + "p99": 1944.5760250091553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8f3cd72", + "identity": "b200|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_db5e035b", + "comparisonKey": "24131bd294e34619", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:53:33.900920+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_09", + "sku": "b200", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.62399715185165, + "p90": 107.84000158309937, + "p95": 114.78400230407715, + "p99": 126.20800733566284 + }, + "combine": { + "p50": 90.62399715185165, + "p90": 107.84000158309937, + "p95": 114.78400230407715, + "p99": 126.20800733566284 + }, + "roundtrip": { + "p50": 90.62399715185165, + "p90": 107.84000158309937, + "p95": 114.78400230407715, + "p99": 126.20800733566284 + }, + "isolatedSum": { + "p50": 181.2479943037033, + "p90": 215.68000316619873, + "p95": 229.5680046081543, + "p99": 252.41601467132568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 103.58399897813797, + "p90": 122.5920021533966, + "p95": 132.1599930524826, + "p99": 145.47200500965118 + }, + "combine": { + "p50": 103.58399897813797, + "p90": 122.5920021533966, + "p95": 132.1599930524826, + "p99": 145.47200500965118 + }, + "roundtrip": { + "p50": 103.58399897813797, + "p90": 122.5920021533966, + "p95": 132.1599930524826, + "p99": 145.47200500965118 + }, + "isolatedSum": { + "p50": 207.16799795627594, + "p90": 245.1840043067932, + "p95": 264.3199861049652, + "p99": 290.94401001930237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 140.35199582576752, + "p90": 154.1759967803955, + "p95": 163.5199934244156, + "p99": 170.68800330162048 + }, + "combine": { + "p50": 140.35199582576752, + "p90": 154.1759967803955, + "p95": 163.5199934244156, + "p99": 170.68800330162048 + }, + "roundtrip": { + "p50": 140.35199582576752, + "p90": 154.1759967803955, + "p95": 163.5199934244156, + "p99": 170.68800330162048 + }, + "isolatedSum": { + "p50": 280.70399165153503, + "p90": 308.351993560791, + "p95": 327.0399868488312, + "p99": 341.37600660324097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 237.95199394226074, + "p90": 244.28799748420715, + "p95": 247.96800315380096, + "p99": 264.3199861049652 + }, + "combine": { + "p50": 237.95199394226074, + "p90": 244.28799748420715, + "p95": 247.96800315380096, + "p99": 264.3199861049652 + }, + "roundtrip": { + "p50": 237.95199394226074, + "p90": 244.28799748420715, + "p95": 247.96800315380096, + "p99": 264.3199861049652 + }, + "isolatedSum": { + "p50": 475.9039878845215, + "p90": 488.5759949684143, + "p95": 495.93600630760193, + "p99": 528.6399722099304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 413.6640131473541, + "p90": 419.6479916572571, + "p95": 422.4959909915924, + "p99": 427.42401361465454 + }, + "combine": { + "p50": 413.6640131473541, + "p90": 419.6479916572571, + "p95": 422.4959909915924, + "p99": 427.42401361465454 + }, + "roundtrip": { + "p50": 413.6640131473541, + "p90": 419.6479916572571, + "p95": 422.4959909915924, + "p99": 427.42401361465454 + }, + "isolatedSum": { + "p50": 827.3280262947083, + "p90": 839.2959833145142, + "p95": 844.9919819831848, + "p99": 854.8480272293091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 765.4719948768616, + "p90": 769.9519991874695, + "p95": 772.383987903595, + "p99": 780.4800271987915 + }, + "combine": { + "p50": 765.4719948768616, + "p90": 769.9519991874695, + "p95": 772.383987903595, + "p99": 780.4800271987915 + }, + "roundtrip": { + "p50": 765.4719948768616, + "p90": 769.9519991874695, + "p95": 772.383987903595, + "p99": 780.4800271987915 + }, + "isolatedSum": { + "p50": 1530.9439897537231, + "p90": 1539.903998374939, + "p95": 1544.76797580719, + "p99": 1560.960054397583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f57630c0", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_ef177a3a", + "comparisonKey": "7794d967b9175293", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:08:28.364127+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 295.80798745155334, + "p90": 338.7199938297272, + "p95": 437.8879964351654, + "p99": 4699.999809265137 + }, + "combine": { + "p50": 111.455999314785, + "p90": 167.61599481105804, + "p95": 219.10400688648224, + "p99": 3621.0238933563232 + }, + "roundtrip": { + "p50": 444.64001059532166, + "p90": 633.9520215988159, + "p95": 1469.0239429473877, + "p99": 5311.808109283447 + }, + "isolatedSum": { + "p50": 407.26398676633835, + "p90": 506.3359886407852, + "p95": 656.9920033216476, + "p99": 8321.02370262146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 297.91998863220215, + "p90": 343.29599142074585, + "p95": 2245.3761100769043, + "p99": 4966.080188751221 + }, + "combine": { + "p50": 110.20799726247787, + "p90": 128.48000228405, + "p95": 132.79999792575836, + "p99": 3528.4481048583984 + }, + "roundtrip": { + "p50": 432.2879910469055, + "p90": 542.464017868042, + "p95": 913.919985294342, + "p99": 4491.7120933532715 + }, + "isolatedSum": { + "p50": 408.12798589468, + "p90": 471.77599370479584, + "p95": 2378.1761080026627, + "p99": 8494.52829360962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 310.1760149002075, + "p90": 370.2720105648041, + "p95": 503.80802154541016, + "p99": 3911.360025405884 + }, + "combine": { + "p50": 111.93600296974182, + "p90": 145.1839953660965, + "p95": 165.24800658226013, + "p99": 3762.432098388672 + }, + "roundtrip": { + "p50": 444.3199932575226, + "p90": 560.4159832000732, + "p95": 822.4959969520569, + "p99": 5224.2560386657715 + }, + "isolatedSum": { + "p50": 422.11201786994934, + "p90": 515.4560059309006, + "p95": 669.0560281276703, + "p99": 7673.792123794556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 311.64801120758057, + "p90": 385.8239948749542, + "p95": 504.6079754829407, + "p99": 4642.047882080078 + }, + "combine": { + "p50": 110.97600311040878, + "p90": 137.92000710964203, + "p95": 159.42400693893433, + "p99": 3380.9919357299805 + }, + "roundtrip": { + "p50": 440.2880072593689, + "p90": 489.1200065612793, + "p95": 651.2320041656494, + "p99": 5194.591999053955 + }, + "isolatedSum": { + "p50": 422.62401431798935, + "p90": 523.7440019845963, + "p95": 664.031982421875, + "p99": 8023.039817810059 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 307.3279857635498, + "p90": 334.9440097808838, + "p95": 414.94399309158325, + "p99": 4407.487869262695 + }, + "combine": { + "p50": 109.98400300741196, + "p90": 132.06399977207184, + "p95": 173.98400604724884, + "p99": 3630.880117416382 + }, + "roundtrip": { + "p50": 440.41600823402405, + "p90": 562.4639987945557, + "p95": 706.3999772071838, + "p99": 4956.255912780762 + }, + "isolatedSum": { + "p50": 417.31198877096176, + "p90": 467.0080095529556, + "p95": 588.9279991388321, + "p99": 8038.367986679077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 325.76000690460205, + "p90": 387.00801134109497, + "p95": 492.6080107688904, + "p99": 4438.240051269531 + }, + "combine": { + "p50": 113.50400000810623, + "p90": 142.20799505710602, + "p95": 161.6320013999939, + "p99": 3321.536064147949 + }, + "roundtrip": { + "p50": 457.40801095962524, + "p90": 576.0959982872009, + "p95": 1029.6319723129272, + "p99": 4850.848197937012 + }, + "isolatedSum": { + "p50": 439.2640069127083, + "p90": 529.216006398201, + "p95": 654.2400121688843, + "p99": 7759.7761154174805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 327.87200808525085, + "p90": 385.919988155365, + "p95": 481.1519980430603, + "p99": 3677.7920722961426 + }, + "combine": { + "p50": 133.91999900341034, + "p90": 144.31999623775482, + "p95": 155.39200603961945, + "p99": 3211.2960815429688 + }, + "roundtrip": { + "p50": 457.0559859275818, + "p90": 524.2239832878113, + "p95": 655.7760238647461, + "p99": 4771.423816680908 + }, + "isolatedSum": { + "p50": 461.7920070886612, + "p90": 530.2399843931198, + "p95": 636.5440040826797, + "p99": 6889.088153839111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 344.38401460647583, + "p90": 399.58399534225464, + "p95": 492.70400404930115, + "p99": 3674.880027770996 + }, + "combine": { + "p50": 205.53599298000336, + "p90": 215.03999829292297, + "p95": 232.9919934272766, + "p99": 2983.2639694213867 + }, + "roundtrip": { + "p50": 544.9600219726562, + "p90": 613.3760213851929, + "p95": 706.5280079841614, + "p99": 4044.703960418701 + }, + "isolatedSum": { + "p50": 549.9200075864792, + "p90": 614.6239936351776, + "p95": 725.6959974765778, + "p99": 6658.143997192383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37b8d7e3", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_c48c1e92", + "comparisonKey": "eb6e4644e2b59ab6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:09:45.901871+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 309.6959888935089, + "p90": 373.9199936389923, + "p95": 483.5200011730194, + "p99": 4650.559902191162 + }, + "combine": { + "p50": 110.23999750614166, + "p90": 149.1840034723282, + "p95": 174.9120056629181, + "p99": 4120.031833648682 + }, + "roundtrip": { + "p50": 441.15200638771057, + "p90": 552.191972732544, + "p95": 745.4079985618591, + "p99": 4932.000160217285 + }, + "isolatedSum": { + "p50": 419.9359863996506, + "p90": 523.1039971113205, + "p95": 658.4320068359375, + "p99": 8770.591735839844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 302.91199684143066, + "p90": 357.08799958229065, + "p95": 465.56800603866577, + "p99": 4301.663875579834 + }, + "combine": { + "p50": 112.86400258541107, + "p90": 160.0320041179657, + "p95": 190.43199717998505, + "p99": 4137.951850891113 + }, + "roundtrip": { + "p50": 439.4240081310272, + "p90": 523.5520005226135, + "p95": 809.8880052566528, + "p99": 4843.391895294189 + }, + "isolatedSum": { + "p50": 415.77599942684174, + "p90": 517.1200037002563, + "p95": 656.0000032186508, + "p99": 8439.615726470947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 314.59200382232666, + "p90": 394.9440121650696, + "p95": 454.0160000324249, + "p99": 4172.44815826416 + }, + "combine": { + "p50": 112.35199868679047, + "p90": 153.82400155067444, + "p95": 180.60800433158875, + "p99": 4261.055946350098 + }, + "roundtrip": { + "p50": 459.9039852619171, + "p90": 538.752019405365, + "p95": 3260.7040405273438, + "p99": 4920.5121994018555 + }, + "isolatedSum": { + "p50": 426.9440025091171, + "p90": 548.768013715744, + "p95": 634.6240043640137, + "p99": 8433.504104614258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 331.13598823547363, + "p90": 408.57601165771484, + "p95": 497.21598625183105, + "p99": 4464.767932891846 + }, + "combine": { + "p50": 110.30399799346924, + "p90": 144.6080058813095, + "p95": 163.7440025806427, + "p99": 4093.0237770080566 + }, + "roundtrip": { + "p50": 460.54399013519287, + "p90": 599.6159911155701, + "p95": 3412.447929382324, + "p99": 4890.3679847717285 + }, + "isolatedSum": { + "p50": 441.43998622894287, + "p90": 553.1840175390244, + "p95": 660.9599888324738, + "p99": 8557.791709899902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 308.351993560791, + "p90": 366.4639890193939, + "p95": 477.05599665641785, + "p99": 4366.879940032959 + }, + "combine": { + "p50": 107.90400207042694, + "p90": 136.19199395179749, + "p95": 151.48800611495972, + "p99": 3205.631971359253 + }, + "roundtrip": { + "p50": 470.91200947761536, + "p90": 626.5599727630615, + "p95": 1438.912034034729, + "p99": 4670.87984085083 + }, + "isolatedSum": { + "p50": 416.25599563121796, + "p90": 502.6559829711914, + "p95": 628.5440027713776, + "p99": 7572.511911392212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 339.83999490737915, + "p90": 415.6480133533478, + "p95": 504.09597158432007, + "p99": 4107.200145721436 + }, + "combine": { + "p50": 111.51999980211258, + "p90": 150.39999783039093, + "p95": 169.5999950170517, + "p99": 3335.871934890747 + }, + "roundtrip": { + "p50": 466.3679897785187, + "p90": 576.2559771537781, + "p95": 756.0960054397583, + "p99": 4789.792060852051 + }, + "isolatedSum": { + "p50": 451.35999470949173, + "p90": 566.0480111837387, + "p95": 673.6959666013718, + "p99": 7443.072080612183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 329.8240005970001, + "p90": 393.6319947242737, + "p95": 515.8079862594604, + "p99": 4222.176074981689 + }, + "combine": { + "p50": 133.34399461746216, + "p90": 159.5200002193451, + "p95": 185.59999763965607, + "p99": 3857.1200370788574 + }, + "roundtrip": { + "p50": 467.1359956264496, + "p90": 568.1599974632263, + "p95": 735.0080013275146, + "p99": 4680.255889892578 + }, + "isolatedSum": { + "p50": 463.1679952144623, + "p90": 553.1519949436188, + "p95": 701.4079838991165, + "p99": 8079.296112060547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 351.8719971179962, + "p90": 425.56801438331604, + "p95": 495.35998702049255, + "p99": 4317.376136779785 + }, + "combine": { + "p50": 200.80000162124634, + "p90": 225.5679965019226, + "p95": 282.943993806839, + "p99": 3406.6879749298096 + }, + "roundtrip": { + "p50": 533.1199765205383, + "p90": 616.4479851722717, + "p95": 2106.4960956573486, + "p99": 4157.087802886963 + }, + "isolatedSum": { + "p50": 552.6719987392426, + "p90": 651.1360108852386, + "p95": 778.3039808273315, + "p99": 7724.064111709595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da660b14", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_47ac442d", + "comparisonKey": "e632332bca6a4bd1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:10:59.110602+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 321.4719891548157, + "p90": 370.2720105648041, + "p95": 3145.7600593566895, + "p99": 4468.224048614502 + }, + "combine": { + "p50": 118.40000003576279, + "p90": 143.0719941854477, + "p95": 153.3759981393814, + "p99": 3086.496114730835 + }, + "roundtrip": { + "p50": 461.2480103969574, + "p90": 796.9920039176941, + "p95": 3859.16805267334, + "p99": 5565.567970275879 + }, + "isolatedSum": { + "p50": 439.87198919057846, + "p90": 513.3440047502518, + "p95": 3299.136057496071, + "p99": 7554.720163345337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 317.8560137748718, + "p90": 366.65600538253784, + "p95": 3099.6479988098145, + "p99": 4448.128223419189 + }, + "combine": { + "p50": 122.56000190973282, + "p90": 166.81599617004395, + "p95": 189.28000330924988, + "p99": 4242.080211639404 + }, + "roundtrip": { + "p50": 462.0479941368103, + "p90": 549.344003200531, + "p95": 3835.6480598449707, + "p99": 5128.191947937012 + }, + "isolatedSum": { + "p50": 440.41601568460464, + "p90": 533.4720015525818, + "p95": 3288.9280021190643, + "p99": 8690.208435058594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 351.39200091362, + "p90": 414.40001130104065, + "p95": 478.7200093269348, + "p99": 3635.6799602508545 + }, + "combine": { + "p50": 127.00800597667694, + "p90": 155.68000078201294, + "p95": 168.96000504493713, + "p99": 272.15999364852905 + }, + "roundtrip": { + "p50": 479.10401225090027, + "p90": 640.1919722557068, + "p95": 3500.3199577331543, + "p99": 4627.391815185547 + }, + "isolatedSum": { + "p50": 478.40000689029694, + "p90": 570.0800120830536, + "p95": 647.680014371872, + "p99": 3907.8399538993835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 352.80001163482666, + "p90": 404.992014169693, + "p95": 489.3440008163452, + "p99": 3952.7039527893066 + }, + "combine": { + "p50": 201.53599977493286, + "p90": 221.21599316596985, + "p95": 228.0000001192093, + "p99": 2854.5279502868652 + }, + "roundtrip": { + "p50": 542.3359870910645, + "p90": 609.6320152282715, + "p95": 3046.880006790161, + "p99": 4197.887897491455 + }, + "isolatedSum": { + "p50": 554.3360114097595, + "p90": 626.2080073356628, + "p95": 717.3440009355545, + "p99": 6807.231903076172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-41b3c395", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_fb40089c", + "comparisonKey": "25d7524dc9a65573", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:17:50.483949+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 315.8720135688782, + "p90": 352.7680039405823, + "p95": 437.75999546051025, + "p99": 4552.9279708862305 + }, + "combine": { + "p50": 120.7680031657219, + "p90": 162.81600296497345, + "p95": 311.6160035133362, + "p99": 4463.967800140381 + }, + "roundtrip": { + "p50": 460.7999920845032, + "p90": 591.6799902915955, + "p95": 4008.1920623779297, + "p99": 5092.959880828857 + }, + "isolatedSum": { + "p50": 436.64001673460007, + "p90": 515.5840069055557, + "p95": 749.3759989738464, + "p99": 9016.895771026611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 9, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 324.99200105667114, + "p90": 412.83199191093445, + "p95": 3174.5920181274414, + "p99": 4768.352031707764 + }, + "combine": { + "p50": 114.04799669981003, + "p90": 155.8080017566681, + "p95": 1763.8720273971558, + "p99": 4440.224170684814 + }, + "roundtrip": { + "p50": 464.1279876232147, + "p90": 580.5760025978088, + "p95": 3517.8558826446533, + "p99": 4934.847831726074 + }, + "isolatedSum": { + "p50": 439.03999775648117, + "p90": 568.6399936676025, + "p95": 4938.464045524597, + "p99": 9208.576202392578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 18, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 329.15198802948, + "p90": 394.52800154685974, + "p95": 3220.319986343384, + "p99": 4509.439945220947 + }, + "combine": { + "p50": 120.4800009727478, + "p90": 167.52000153064728, + "p95": 195.51999866962433, + "p99": 3341.2160873413086 + }, + "roundtrip": { + "p50": 467.16800332069397, + "p90": 594.1759943962097, + "p95": 3317.215919494629, + "p99": 4732.3198318481445 + }, + "isolatedSum": { + "p50": 449.6319890022278, + "p90": 562.048003077507, + "p95": 3415.839985013008, + "p99": 7850.656032562256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 36, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 332.8000009059906, + "p90": 414.3039882183075, + "p95": 527.1040201187134, + "p99": 4374.271869659424 + }, + "combine": { + "p50": 113.92000317573547, + "p90": 155.64799308776855, + "p95": 185.2799952030182, + "p99": 3785.439968109131 + }, + "roundtrip": { + "p50": 476.03198885917664, + "p90": 651.1039733886719, + "p95": 1003.6159753799438, + "p99": 4934.720039367676 + }, + "isolatedSum": { + "p50": 446.7200040817261, + "p90": 569.951981306076, + "p95": 712.3840153217316, + "p99": 8159.711837768555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 72, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 325.3760039806366, + "p90": 380.67200779914856, + "p95": 497.24799394607544, + "p99": 4215.04020690918 + }, + "combine": { + "p50": 113.18399757146835, + "p90": 154.9759954214096, + "p95": 176.64000391960144, + "p99": 4144.15979385376 + }, + "roundtrip": { + "p50": 478.4640073776245, + "p90": 685.151994228363, + "p95": 2531.712055206299, + "p99": 4715.1360511779785 + }, + "isolatedSum": { + "p50": 438.56000155210495, + "p90": 535.6480032205582, + "p95": 673.8879978656769, + "p99": 8359.20000076294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 354.0799915790558, + "p90": 470.17601132392883, + "p95": 619.2640066146851, + "p99": 4007.2641372680664 + }, + "combine": { + "p50": 113.11999708414078, + "p90": 159.29600596427917, + "p95": 208.54400098323822, + "p99": 4164.127826690674 + }, + "roundtrip": { + "p50": 481.0880124568939, + "p90": 540.831983089447, + "p95": 836.4160060882568, + "p99": 4855.904102325439 + }, + "isolatedSum": { + "p50": 467.19998866319656, + "p90": 629.472017288208, + "p95": 827.8080075979233, + "p99": 8171.39196395874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 350.43200850486755, + "p90": 453.5680115222931, + "p95": 499.35999512672424, + "p99": 4036.3521575927734 + }, + "combine": { + "p50": 136.09600067138672, + "p90": 170.84799706935883, + "p95": 193.6960071325302, + "p99": 3247.3599910736084 + }, + "roundtrip": { + "p50": 490.7200038433075, + "p90": 607.2319746017456, + "p95": 1060.6399774551392, + "p99": 4693.376064300537 + }, + "isolatedSum": { + "p50": 486.5280091762543, + "p90": 624.4160085916519, + "p95": 693.0560022592545, + "p99": 7283.712148666382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 576, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 360.3839874267578, + "p90": 433.50398540496826, + "p95": 497.44001030921936, + "p99": 4155.7440757751465 + }, + "combine": { + "p50": 208.6080014705658, + "p90": 232.03200101852417, + "p95": 329.5679986476898, + "p99": 3607.1999073028564 + }, + "roundtrip": { + "p50": 556.1599731445312, + "p90": 635.5519890785217, + "p95": 2274.4319438934326, + "p99": 4012.2880935668945 + }, + "isolatedSum": { + "p50": 568.9919888973236, + "p90": 665.5359864234924, + "p95": 827.0080089569092, + "p99": 7762.943983078003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e76df06c", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_e3a8d4c0", + "comparisonKey": "6118efa1df1d9ffd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:15:13.447247+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 291.80800914764404, + "p90": 328.7999927997589, + "p95": 3327.903985977173, + "p99": 4771.168231964111 + }, + "combine": { + "p50": 108.60799998044968, + "p90": 129.2800009250641, + "p95": 145.6640064716339, + "p99": 4078.400135040283 + }, + "roundtrip": { + "p50": 428.6400079727173, + "p90": 504.5440196990967, + "p95": 3537.8239154815674, + "p99": 5094.880104064941 + }, + "isolatedSum": { + "p50": 400.4160091280937, + "p90": 458.079993724823, + "p95": 3473.5679924488068, + "p99": 8849.568367004395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 296.4479923248291, + "p90": 363.23198676109314, + "p95": 1764.0639543533325, + "p99": 4729.824066162109 + }, + "combine": { + "p50": 108.73600095510483, + "p90": 128.57599556446075, + "p95": 156.38400614261627, + "p99": 4576.896190643311 + }, + "roundtrip": { + "p50": 431.2959909439087, + "p90": 568.9280033111572, + "p95": 816.6400194168091, + "p99": 5281.5680503845215 + }, + "isolatedSum": { + "p50": 405.18399327993393, + "p90": 491.8079823255539, + "p95": 1920.4479604959488, + "p99": 9306.72025680542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 305.184006690979, + "p90": 344.4159924983978, + "p95": 483.5520088672638, + "p99": 4740.2238845825195 + }, + "combine": { + "p50": 108.19199681282043, + "p90": 134.11200046539307, + "p95": 147.39200472831726, + "p99": 4416.384220123291 + }, + "roundtrip": { + "p50": 440.89600443840027, + "p90": 576.1600136756897, + "p95": 834.4640135765076, + "p99": 4924.191951751709 + }, + "isolatedSum": { + "p50": 413.37600350379944, + "p90": 478.5279929637909, + "p95": 630.944013595581, + "p99": 9156.60810470581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 50, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 305.59998750686646, + "p90": 390.46400785446167, + "p95": 494.7200119495392, + "p99": 4485.375881195068 + }, + "combine": { + "p50": 109.8560020327568, + "p90": 148.76799285411835, + "p95": 173.69599640369415, + "p99": 3903.5840034484863 + }, + "roundtrip": { + "p50": 440.8000111579895, + "p90": 576.6400098800659, + "p95": 2270.240068435669, + "p99": 4956.287860870361 + }, + "isolatedSum": { + "p50": 415.45598953962326, + "p90": 539.23200070858, + "p95": 668.4160083532333, + "p99": 8388.959884643555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 304.4480085372925, + "p90": 370.40001153945923, + "p95": 494.27199363708496, + "p99": 4460.7038497924805 + }, + "combine": { + "p50": 107.26399719715118, + "p90": 126.71999633312225, + "p95": 134.71999764442444, + "p99": 3794.7518825531006 + }, + "roundtrip": { + "p50": 438.52800130844116, + "p90": 592.0320153236389, + "p95": 3525.023937225342, + "p99": 4956.543922424316 + }, + "isolatedSum": { + "p50": 411.71200573444366, + "p90": 497.1200078725815, + "p95": 628.9919912815094, + "p99": 8255.455732345581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 224, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 324.95999336242676, + "p90": 393.75999569892883, + "p95": 497.98399209976196, + "p99": 4240.479946136475 + }, + "combine": { + "p50": 112.76800185441971, + "p90": 131.26400113105774, + "p95": 180.1919937133789, + "p99": 4264.639854431152 + }, + "roundtrip": { + "p50": 453.11999320983887, + "p90": 548.6720204353333, + "p95": 834.3679904937744, + "p99": 4967.167854309082 + }, + "isolatedSum": { + "p50": 437.72799521684647, + "p90": 525.0239968299866, + "p95": 678.1759858131409, + "p99": 8505.119800567627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 327.10400223731995, + "p90": 370.36800384521484, + "p95": 499.9679923057556, + "p99": 4166.592121124268 + }, + "combine": { + "p50": 141.95199310779572, + "p90": 154.40000593662262, + "p95": 192.00000166893005, + "p99": 4298.495769500732 + }, + "roundtrip": { + "p50": 464.4159972667694, + "p90": 566.1759972572327, + "p95": 773.9199995994568, + "p99": 4596.288204193115 + }, + "isolatedSum": { + "p50": 469.05599534511566, + "p90": 524.7680097818375, + "p95": 691.9679939746857, + "p99": 8465.087890625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 925, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 350.5280017852783, + "p90": 382.1440041065216, + "p95": 502.6559829711914, + "p99": 4360.352039337158 + }, + "combine": { + "p50": 207.71199464797974, + "p90": 219.10400688648224, + "p95": 263.10399174690247, + "p99": 3651.968002319336 + }, + "roundtrip": { + "p50": 559.4559907913208, + "p90": 628.4480094909668, + "p95": 2630.5599212646484, + "p99": 4135.551929473877 + }, + "isolatedSum": { + "p50": 558.239996433258, + "p90": 601.2480109930038, + "p95": 765.7599747180939, + "p99": 8012.320041656494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f78f6825", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_780f9e8e", + "comparisonKey": "300519b11df6575c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:27:12.683925+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 302.7839958667755, + "p90": 361.91999912261963, + "p95": 3259.488105773926, + "p99": 4650.239944458008 + }, + "combine": { + "p50": 109.21599715948105, + "p90": 132.89600610733032, + "p95": 148.3200043439865, + "p99": 215.87200462818146 + }, + "roundtrip": { + "p50": 434.33600664138794, + "p90": 538.4320020675659, + "p95": 3501.2800693511963, + "p99": 5091.104030609131 + }, + "isolatedSum": { + "p50": 411.99999302625656, + "p90": 494.81600522994995, + "p95": 3407.8081101179123, + "p99": 4866.111949086189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 301.2160062789917, + "p90": 377.24798917770386, + "p95": 1690.3680562973022, + "p99": 4910.367965698242 + }, + "combine": { + "p50": 114.52800035476685, + "p90": 150.9760022163391, + "p95": 181.7599982023239, + "p99": 3808.7360858917236 + }, + "roundtrip": { + "p50": 434.6559941768646, + "p90": 493.4079945087433, + "p95": 2789.599895477295, + "p99": 5244.800090789795 + }, + "isolatedSum": { + "p50": 415.74400663375854, + "p90": 528.223991394043, + "p95": 1872.1280544996262, + "p99": 8719.104051589966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 22, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 310.43198704719543, + "p90": 346.5920090675354, + "p95": 505.3120255470276, + "p99": 4324.192047119141 + }, + "combine": { + "p50": 110.17599701881409, + "p90": 131.6480040550232, + "p95": 141.85599982738495, + "p99": 3536.736011505127 + }, + "roundtrip": { + "p50": 447.90399074554443, + "p90": 564.9600028991699, + "p95": 743.008017539978, + "p99": 4763.296127319336 + }, + "isolatedSum": { + "p50": 420.6079840660095, + "p90": 478.2400131225586, + "p95": 647.1680253744125, + "p99": 7860.928058624268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 314.5599961280823, + "p90": 392.4799859523773, + "p95": 503.87197732925415, + "p99": 4361.055850982666 + }, + "combine": { + "p50": 109.95200276374817, + "p90": 131.04000687599182, + "p95": 138.62399756908417, + "p99": 3445.5039501190186 + }, + "roundtrip": { + "p50": 447.1040070056915, + "p90": 512.3839974403381, + "p95": 673.7599968910217, + "p99": 4378.143787384033 + }, + "isolatedSum": { + "p50": 424.51199889183044, + "p90": 523.5199928283691, + "p95": 642.4959748983383, + "p99": 7806.559801101685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 79, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 311.2320005893707, + "p90": 399.3600010871887, + "p95": 499.80801343917847, + "p99": 4334.8798751831055 + }, + "combine": { + "p50": 113.8560026884079, + "p90": 156.47999942302704, + "p95": 238.14399540424347, + "p99": 4224.800109863281 + }, + "roundtrip": { + "p50": 446.1440145969391, + "p90": 548.0319857597351, + "p95": 768.3519721031189, + "p99": 4508.351802825928 + }, + "isolatedSum": { + "p50": 425.0880032777786, + "p90": 555.8400005102158, + "p95": 737.9520088434219, + "p99": 8559.679985046387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 134, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 332.44800567626953, + "p90": 407.3599874973297, + "p95": 518.01598072052, + "p99": 4213.2158279418945 + }, + "combine": { + "p50": 113.56800049543381, + "p90": 135.6479972600937, + "p95": 186.94399297237396, + "p99": 3657.6321125030518 + }, + "roundtrip": { + "p50": 473.02401065826416, + "p90": 617.6000237464905, + "p95": 3201.3440132141113, + "p99": 4767.903804779053 + }, + "isolatedSum": { + "p50": 446.01600617170334, + "p90": 543.0079847574234, + "p95": 704.959973692894, + "p99": 7870.847940444946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 268, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 333.47201347351074, + "p90": 409.92000699043274, + "p95": 493.4079945087433, + "p99": 3443.5200691223145 + }, + "combine": { + "p50": 132.6719969511032, + "p90": 141.2159949541092, + "p95": 148.28799664974213, + "p99": 3205.440044403076 + }, + "roundtrip": { + "p50": 458.75200629234314, + "p90": 535.7760190963745, + "p95": 683.3279728889465, + "p99": 4411.168098449707 + }, + "isolatedSum": { + "p50": 466.14401042461395, + "p90": 551.1360019445419, + "p95": 641.6959911584854, + "p99": 6648.960113525391 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 533, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 349.0239977836609, + "p90": 389.5680010318756, + "p95": 493.44000220298767, + "p99": 3553.7919998168945 + }, + "combine": { + "p50": 203.99999618530273, + "p90": 214.75200355052948, + "p95": 231.455996632576, + "p99": 3385.823965072632 + }, + "roundtrip": { + "p50": 533.3120226860046, + "p90": 605.5039763450623, + "p95": 709.9199891090393, + "p99": 4010.39981842041 + }, + "isolatedSum": { + "p50": 553.0239939689636, + "p90": 604.3200045824051, + "p95": 724.8959988355637, + "p99": 6939.615964889526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 1027, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-27b148a2", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_bf868a74", + "comparisonKey": "bd6127cf1b9dede4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:16:32.362344+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 301.0239899158478, + "p90": 357.91999101638794, + "p95": 435.93600392341614, + "p99": 4567.5201416015625 + }, + "combine": { + "p50": 116.73600226640701, + "p90": 141.59999787807465, + "p95": 153.21600437164307, + "p99": 3536.6721153259277 + }, + "roundtrip": { + "p50": 440.8639967441559, + "p90": 557.856023311615, + "p95": 731.2639951705933, + "p99": 5107.456207275391 + }, + "isolatedSum": { + "p50": 417.7599921822548, + "p90": 499.5199888944626, + "p95": 589.1520082950592, + "p99": 8104.19225692749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 15, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 302.3039996623993, + "p90": 346.3999927043915, + "p95": 618.336021900177, + "p99": 4469.95210647583 + }, + "combine": { + "p50": 113.11999708414078, + "p90": 133.66399705410004, + "p95": 146.27200365066528, + "p99": 3483.2000732421875 + }, + "roundtrip": { + "p50": 445.5679953098297, + "p90": 541.375994682312, + "p95": 873.088002204895, + "p99": 5192.1281814575195 + }, + "isolatedSum": { + "p50": 415.42399674654007, + "p90": 480.0639897584915, + "p95": 764.6080255508423, + "p99": 7953.152179718018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 312.9279911518097, + "p90": 382.84799456596375, + "p95": 491.32800102233887, + "p99": 4347.263813018799 + }, + "combine": { + "p50": 115.00799655914307, + "p90": 141.15199446678162, + "p95": 156.89599514007568, + "p99": 3354.111909866333 + }, + "roundtrip": { + "p50": 456.60799741744995, + "p90": 549.2479801177979, + "p95": 3536.384105682373, + "p99": 5093.440055847168 + }, + "isolatedSum": { + "p50": 427.93598771095276, + "p90": 523.9999890327454, + "p95": 648.2239961624146, + "p99": 7701.375722885132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 43, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 310.65601110458374, + "p90": 350.14399886131287, + "p95": 485.8880043029785, + "p99": 4571.167945861816 + }, + "combine": { + "p50": 111.48799955844879, + "p90": 133.18400084972382, + "p95": 147.64800667762756, + "p99": 3483.1039905548096 + }, + "roundtrip": { + "p50": 452.7359902858734, + "p90": 563.2320046424866, + "p95": 771.1679935455322, + "p99": 5220.704078674316 + }, + "isolatedSum": { + "p50": 422.14401066303253, + "p90": 483.3279997110367, + "p95": 633.5360109806061, + "p99": 8054.271936416626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 73, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 311.13600730895996, + "p90": 380.3519904613495, + "p95": 502.24000215530396, + "p99": 4469.696044921875 + }, + "combine": { + "p50": 111.35999858379364, + "p90": 130.36799430847168, + "p95": 136.1600011587143, + "p99": 4264.1921043396 + }, + "roundtrip": { + "p50": 450.81600546836853, + "p90": 532.4479937553406, + "p95": 855.4880023002625, + "p99": 4959.199905395508 + }, + "isolatedSum": { + "p50": 422.4960058927536, + "p90": 510.71998476982117, + "p95": 638.4000033140182, + "p99": 8733.888149261475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 142, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 330.7200074195862, + "p90": 383.0080032348633, + "p95": 478.5600006580353, + "p99": 4321.44021987915 + }, + "combine": { + "p50": 113.6000007390976, + "p90": 140.3840035200119, + "p95": 162.7199947834015, + "p99": 4405.183792114258 + }, + "roundtrip": { + "p50": 462.6559913158417, + "p90": 561.8559718132019, + "p95": 714.6559953689575, + "p99": 5060.2240562438965 + }, + "isolatedSum": { + "p50": 444.3200081586838, + "p90": 523.3920067548752, + "p95": 641.2799954414368, + "p99": 8726.624011993408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 274, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 334.49599146842957, + "p90": 382.2399973869324, + "p95": 485.1840138435364, + "p99": 4034.0161323547363 + }, + "combine": { + "p50": 133.34399461746216, + "p90": 145.28000354766846, + "p95": 153.82400155067444, + "p99": 237.7600073814392 + }, + "roundtrip": { + "p50": 464.83200788497925, + "p90": 568.2240128517151, + "p95": 730.5600047111511, + "p99": 4718.016147613525 + }, + "isolatedSum": { + "p50": 467.8399860858917, + "p90": 527.5200009346008, + "p95": 639.0080153942108, + "p99": 4271.7761397361755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 349.0239977836609, + "p90": 383.1680119037628, + "p95": 493.0880069732666, + "p99": 4397.439956665039 + }, + "combine": { + "p50": 206.59199357032776, + "p90": 290.3999984264374, + "p95": 321.4400112628937, + "p99": 3616.28794670105 + }, + "roundtrip": { + "p50": 537.280023097992, + "p90": 621.0240125656128, + "p95": 2953.824043273926, + "p99": 4214.46418762207 + }, + "isolatedSum": { + "p50": 555.6159913539886, + "p90": 673.5680103302002, + "p95": 814.5280182361603, + "p99": 8013.727903366089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 1042, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf7f0a54", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_ed73f5d1", + "comparisonKey": "bda3fdb1200ba38c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:12:04.277719+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 306.0159981250763, + "p90": 380.12799620628357, + "p95": 2955.3918838500977, + "p99": 4644.415855407715 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 195.96800208091736, + "p95": 2276.73602104187, + "p99": 4374.207973480225 + }, + "roundtrip": { + "p50": 441.2800073623657, + "p90": 584.1599702835083, + "p95": 805.0559759140015, + "p99": 4877.408027648926 + }, + "isolatedSum": { + "p50": 431.42399191856384, + "p90": 576.0959982872009, + "p95": 5232.127904891968, + "p99": 9018.62382888794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 303.23201417922974, + "p90": 364.8320138454437, + "p95": 469.6959853172302, + "p99": 4543.488025665283 + }, + "combine": { + "p50": 114.94400352239609, + "p90": 137.5039964914322, + "p95": 599.1359949111938, + "p99": 4336.512088775635 + }, + "roundtrip": { + "p50": 447.3919868469238, + "p90": 731.007993221283, + "p95": 2132.0641040802, + "p99": 4831.424236297607 + }, + "isolatedSum": { + "p50": 418.1760177016258, + "p90": 502.3360103368759, + "p95": 1068.831980228424, + "p99": 8880.000114440918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 321.21598720550537, + "p90": 458.8800072669983, + "p95": 1063.9679431915283, + "p99": 4411.712169647217 + }, + "combine": { + "p50": 122.11199849843979, + "p90": 187.58399784564972, + "p95": 245.2480047941208, + "p99": 3738.464117050171 + }, + "roundtrip": { + "p50": 446.52798771858215, + "p90": 624.1599917411804, + "p95": 984.8319888114929, + "p99": 5131.648063659668 + }, + "isolatedSum": { + "p50": 443.32798570394516, + "p90": 646.464005112648, + "p95": 1309.215947985649, + "p99": 8150.176286697388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 315.5199885368347, + "p90": 380.19201159477234, + "p95": 556.7359924316406, + "p99": 4517.536163330078 + }, + "combine": { + "p50": 115.7120019197464, + "p90": 145.53600549697876, + "p95": 172.06400632858276, + "p99": 3685.0879192352295 + }, + "roundtrip": { + "p50": 449.7919976711273, + "p90": 613.7920022010803, + "p95": 3536.1599922180176, + "p99": 4954.944133758545 + }, + "isolatedSum": { + "p50": 431.2319904565811, + "p90": 525.7280170917511, + "p95": 728.7999987602234, + "p99": 8202.624082565308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 316.22400879859924, + "p90": 379.71198558807373, + "p95": 502.1439790725708, + "p99": 4288.544178009033 + }, + "combine": { + "p50": 112.19199746847153, + "p90": 140.79999923706055, + "p95": 159.36000645160675, + "p99": 3317.568063735962 + }, + "roundtrip": { + "p50": 453.37599515914917, + "p90": 569.9520111083984, + "p95": 2089.024066925049, + "p99": 4753.503799438477 + }, + "isolatedSum": { + "p50": 428.41600626707077, + "p90": 520.5119848251343, + "p95": 661.5039855241776, + "p99": 7606.112241744995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 344.1919982433319, + "p90": 410.5280041694641, + "p95": 523.6799716949463, + "p99": 3992.89608001709 + }, + "combine": { + "p50": 122.49600142240524, + "p90": 156.63999319076538, + "p95": 169.95200514793396, + "p99": 3182.5919151306152 + }, + "roundtrip": { + "p50": 468.51199865341187, + "p90": 594.43199634552, + "p95": 3357.343912124634, + "p99": 4656.3520431518555 + }, + "isolatedSum": { + "p50": 466.68799966573715, + "p90": 567.1679973602295, + "p95": 693.6319768428802, + "p99": 7175.487995147705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 379.93600964546204, + "p90": 441.8880045413971, + "p95": 590.1119709014893, + "p99": 3590.5280113220215 + }, + "combine": { + "p50": 165.50399363040924, + "p90": 196.3520050048828, + "p95": 263.5839879512787, + "p99": 3597.248077392578 + }, + "roundtrip": { + "p50": 543.4240102767944, + "p90": 609.503984451294, + "p95": 721.7599749565125, + "p99": 4089.3120765686035 + }, + "isolatedSum": { + "p50": 545.4400032758713, + "p90": 638.2400095462799, + "p95": 853.695958852768, + "p99": 7187.7760887146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 426.2079894542694, + "p90": 485.0879907608032, + "p95": 568.1599974632263, + "p99": 3872.1280097961426 + }, + "combine": { + "p50": 257.60000944137573, + "p90": 273.72801303863525, + "p95": 2123.9678859710693, + "p99": 3111.743927001953 + }, + "roundtrip": { + "p50": 690.6560063362122, + "p90": 766.975998878479, + "p95": 2719.9039459228516, + "p99": 3773.3120918273926 + }, + "isolatedSum": { + "p50": 683.8079988956451, + "p90": 758.8160037994385, + "p95": 2692.1278834342957, + "p99": 6983.871936798096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a115625f", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_9afdbb6b", + "comparisonKey": "fd389437f119e5bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:13:35.543206+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 318.9760148525238, + "p90": 402.52798795700073, + "p95": 3416.3520336151123, + "p99": 4671.42391204834 + }, + "combine": { + "p50": 121.34400010108948, + "p90": 149.53599870204926, + "p95": 180.4800033569336, + "p99": 3426.975965499878 + }, + "roundtrip": { + "p50": 456.28800988197327, + "p90": 558.4959983825684, + "p95": 2760.1919174194336, + "p99": 4945.7597732543945 + }, + "isolatedSum": { + "p50": 440.3200149536133, + "p90": 552.06398665905, + "p95": 3596.832036972046, + "p99": 8098.399877548218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 59, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 311.0400140285492, + "p90": 387.90398836135864, + "p95": 3300.5759716033936, + "p99": 4634.751796722412 + }, + "combine": { + "p50": 118.78400295972824, + "p90": 138.94400000572205, + "p95": 181.2479943037033, + "p99": 4184.319972991943 + }, + "roundtrip": { + "p50": 449.75998997688293, + "p90": 509.8239779472351, + "p95": 3482.6560020446777, + "p99": 4931.1041831970215 + }, + "isolatedSum": { + "p50": 429.82401698827744, + "p90": 526.8479883670807, + "p95": 3481.823965907097, + "p99": 8819.071769714355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 121, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 327.87200808525085, + "p90": 612.8000020980835, + "p95": 2407.6480865478516, + "p99": 4449.24783706665 + }, + "combine": { + "p50": 146.59200608730316, + "p90": 263.2319927215576, + "p95": 2099.8079776763916, + "p99": 3904.320001602173 + }, + "roundtrip": { + "p50": 476.063996553421, + "p90": 823.7119913101196, + "p95": 2706.432104110718, + "p99": 4540.703773498535 + }, + "isolatedSum": { + "p50": 474.464014172554, + "p90": 876.0319948196411, + "p95": 4507.456064224243, + "p99": 8353.567838668823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 244, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 319.0400004386902, + "p90": 350.17600655555725, + "p95": 403.9680063724518, + "p99": 4420.735836029053 + }, + "combine": { + "p50": 118.75200271606445, + "p90": 147.96799421310425, + "p95": 171.00800573825836, + "p99": 4132.895946502686 + }, + "roundtrip": { + "p50": 464.383989572525, + "p90": 618.7520027160645, + "p95": 1933.4080219268799, + "p99": 4980.703830718994 + }, + "isolatedSum": { + "p50": 437.79200315475464, + "p90": 498.1440007686615, + "p95": 574.9760121107101, + "p99": 8553.631782531738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 478, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 327.39201188087463, + "p90": 377.6319921016693, + "p95": 3293.9839363098145, + "p99": 4454.080104827881 + }, + "combine": { + "p50": 117.50400066375732, + "p90": 135.68000495433807, + "p95": 142.0159935951233, + "p99": 4013.1521224975586 + }, + "roundtrip": { + "p50": 462.8480076789856, + "p90": 648.8320231437683, + "p95": 3595.3280925750732, + "p99": 4796.224117279053 + }, + "isolatedSum": { + "p50": 444.89601254463196, + "p90": 513.3119970560074, + "p95": 3435.9999299049377, + "p99": 8467.23222732544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 953, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 368.5759902000427, + "p90": 427.5200068950653, + "p95": 2793.056011199951, + "p99": 4330.304145812988 + }, + "combine": { + "p50": 126.39999389648438, + "p90": 138.62399756908417, + "p95": 144.44799721240997, + "p99": 3472.5120067596436 + }, + "roundtrip": { + "p50": 486.8159890174866, + "p90": 600.2879738807678, + "p95": 861.2800240516663, + "p99": 4981.311798095703 + }, + "isolatedSum": { + "p50": 494.9759840965271, + "p90": 566.1440044641495, + "p95": 2937.504008412361, + "p99": 7802.816152572632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 1908, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 417.279988527298, + "p90": 454.912006855011, + "p95": 2037.503957748413, + "p99": 3691.8718814849854 + }, + "combine": { + "p50": 189.28000330924988, + "p90": 202.59200036525726, + "p95": 2351.9039154052734, + "p99": 3404.383897781372 + }, + "roundtrip": { + "p50": 593.3120250701904, + "p90": 649.5360136032104, + "p95": 1749.8879432678223, + "p99": 3993.3440685272217 + }, + "isolatedSum": { + "p50": 606.5599918365479, + "p90": 657.5040072202682, + "p95": 4389.4078731536865, + "p99": 7096.255779266357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 3804, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 498.23999404907227, + "p90": 580.7039737701416, + "p95": 2504.703998565674, + "p99": 3822.4639892578125 + }, + "combine": { + "p50": 310.43198704719543, + "p90": 322.59199023246765, + "p95": 377.47201323509216, + "p99": 2539.0079021453857 + }, + "roundtrip": { + "p50": 790.1440262794495, + "p90": 893.1199908256531, + "p95": 2719.775915145874, + "p99": 3374.9759197235107 + }, + "isolatedSum": { + "p50": 808.6719810962677, + "p90": 903.2959640026093, + "p95": 2882.176011800766, + "p99": 6361.471891403198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e97149ed", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_244814d3", + "comparisonKey": "b7c08b045ce46752", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:25:55.141771+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 309.6640110015869, + "p90": 398.46399426460266, + "p95": 3225.503921508789, + "p99": 4824.192047119141 + }, + "combine": { + "p50": 118.49600076675415, + "p90": 138.0160003900528, + "p95": 171.48800194263458, + "p99": 4284.063816070557 + }, + "roundtrip": { + "p50": 447.3919868469238, + "p90": 515.5199766159058, + "p95": 806.2080144882202, + "p99": 5065.4401779174805 + }, + "isolatedSum": { + "p50": 428.16001176834106, + "p90": 536.4799946546555, + "p95": 3396.9919234514236, + "p99": 9108.255863189697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 310.2720081806183, + "p90": 372.51201272010803, + "p95": 3140.0959491729736, + "p99": 4533.120155334473 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 134.8479986190796, + "p95": 139.1039937734604, + "p99": 4343.808174133301 + }, + "roundtrip": { + "p50": 451.87199115753174, + "p90": 588.15997838974, + "p95": 3367.6159381866455, + "p99": 4967.872142791748 + }, + "isolatedSum": { + "p50": 426.56000703573227, + "p90": 507.3600113391876, + "p95": 3279.199942946434, + "p99": 8876.928329467773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 317.9520070552826, + "p90": 365.664005279541, + "p95": 450.3679871559143, + "p99": 4413.18416595459 + }, + "combine": { + "p50": 117.76000261306763, + "p90": 139.90400731563568, + "p95": 251.74400210380554, + "p99": 4253.056049346924 + }, + "roundtrip": { + "p50": 464.4159972667694, + "p90": 579.7759890556335, + "p95": 719.1680073738098, + "p99": 4697.6318359375 + }, + "isolatedSum": { + "p50": 435.7120096683502, + "p90": 505.5680125951767, + "p95": 702.1119892597198, + "p99": 8666.240215301514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 40, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 322.1440017223358, + "p90": 386.6559863090515, + "p95": 499.455988407135, + "p99": 4387.008190155029 + }, + "combine": { + "p50": 115.90400338172913, + "p90": 140.32000303268433, + "p95": 181.5679967403412, + "p99": 3637.439966201782 + }, + "roundtrip": { + "p50": 501.3440251350403, + "p90": 637.0880007743835, + "p95": 766.8799757957458, + "p99": 4528.448104858398 + }, + "isolatedSum": { + "p50": 438.04800510406494, + "p90": 526.9759893417358, + "p95": 681.0239851474762, + "p99": 8024.4481563568115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 71, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 316.0640001296997, + "p90": 354.43198680877686, + "p95": 386.9760036468506, + "p99": 3276.992082595825 + }, + "combine": { + "p50": 114.9120032787323, + "p90": 136.1600011587143, + "p95": 229.0560007095337, + "p99": 4423.03991317749 + }, + "roundtrip": { + "p50": 456.54401183128357, + "p90": 554.4959902763367, + "p95": 717.0559763908386, + "p99": 4822.495937347412 + }, + "isolatedSum": { + "p50": 430.976003408432, + "p90": 490.59198796749115, + "p95": 616.0320043563843, + "p99": 7700.031995773315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 143, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 357.8239977359772, + "p90": 441.0879909992218, + "p95": 680.6719899177551, + "p99": 4224.736213684082 + }, + "combine": { + "p50": 119.45600062608719, + "p90": 164.41600024700165, + "p95": 212.79999613761902, + "p99": 4085.696220397949 + }, + "roundtrip": { + "p50": 471.71199321746826, + "p90": 613.6639714241028, + "p95": 3109.7280979156494, + "p99": 4803.423881530762 + }, + "isolatedSum": { + "p50": 477.27999836206436, + "p90": 605.5039912462234, + "p95": 893.4719860553741, + "p99": 8310.432434082031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 266, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 337.47199177742004, + "p90": 403.74401211738586, + "p95": 491.39198660850525, + "p99": 4110.432147979736 + }, + "combine": { + "p50": 133.4719955921173, + "p90": 143.96800100803375, + "p95": 169.53599452972412, + "p99": 3083.7440490722656 + }, + "roundtrip": { + "p50": 470.7840085029602, + "p90": 562.3360276222229, + "p95": 696.4799761772156, + "p99": 4671.584129333496 + }, + "isolatedSum": { + "p50": 470.94398736953735, + "p90": 547.7120131254196, + "p95": 660.9279811382294, + "p99": 7194.176197052002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 534, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 352.7680039405823, + "p90": 402.8800129890442, + "p95": 499.10399317741394, + "p99": 4239.071846008301 + }, + "combine": { + "p50": 203.67999374866486, + "p90": 222.04799950122833, + "p95": 2191.999912261963, + "p99": 3634.8800659179688 + }, + "roundtrip": { + "p50": 542.4320101737976, + "p90": 641.4719820022583, + "p95": 2529.5040607452393, + "p99": 4059.296131134033 + }, + "isolatedSum": { + "p50": 556.4479976892471, + "p90": 624.9280124902725, + "p95": 2691.103905439377, + "p99": 7873.9519119262695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1044, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-03041936", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_51d8b3be", + "comparisonKey": "82899fdff196f4d5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:20:26.146184+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 303.0720055103302, + "p90": 367.45598912239075, + "p95": 3380.064010620117, + "p99": 4425.600051879883 + }, + "combine": { + "p50": 111.07199639081955, + "p90": 130.94399869441986, + "p95": 150.39999783039093, + "p99": 4073.887825012207 + }, + "roundtrip": { + "p50": 432.73600935935974, + "p90": 514.9120092391968, + "p95": 3611.5200519561768, + "p99": 4904.672145843506 + }, + "isolatedSum": { + "p50": 414.14400190114975, + "p90": 498.3999878168106, + "p95": 3530.464008450508, + "p99": 8499.48787689209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 304.0960133075714, + "p90": 372.51201272010803, + "p95": 3411.263942718506, + "p99": 4519.64807510376 + }, + "combine": { + "p50": 113.08799684047699, + "p90": 133.82400572299957, + "p95": 176.5120029449463, + "p99": 4026.144027709961 + }, + "roundtrip": { + "p50": 448.5760033130646, + "p90": 740.1599884033203, + "p95": 3551.7759323120117, + "p99": 4798.8481521606445 + }, + "isolatedSum": { + "p50": 417.1840101480484, + "p90": 506.3360184431076, + "p95": 3587.775945663452, + "p99": 8545.79210281372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 40, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 315.16799330711365, + "p90": 369.28001046180725, + "p95": 457.95199275016785, + "p99": 4068.575859069824 + }, + "combine": { + "p50": 114.43199962377548, + "p90": 146.464005112648, + "p95": 163.7440025806427, + "p99": 3951.008081436157 + }, + "roundtrip": { + "p50": 451.00799202919006, + "p90": 558.1439733505249, + "p95": 3384.255886077881, + "p99": 4686.079978942871 + }, + "isolatedSum": { + "p50": 429.59999293088913, + "p90": 515.7440155744553, + "p95": 621.6959953308105, + "p99": 8019.583940505981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 81, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 316.76799058914185, + "p90": 400.191992521286, + "p95": 484.03200507164, + "p99": 4349.503993988037 + }, + "combine": { + "p50": 112.86400258541107, + "p90": 131.58400356769562, + "p95": 137.60000467300415, + "p99": 3920.8641052246094 + }, + "roundtrip": { + "p50": 451.6479969024658, + "p90": 516.319990158081, + "p95": 742.9119944572449, + "p99": 4878.367900848389 + }, + "isolatedSum": { + "p50": 429.6319931745529, + "p90": 531.7759960889816, + "p95": 621.6320097446442, + "p99": 8270.368099212646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 314.7200047969818, + "p90": 382.07998871803284, + "p95": 502.3040175437927, + "p99": 4099.455833435059 + }, + "combine": { + "p50": 111.80800199508667, + "p90": 128.12800705432892, + "p95": 132.25600123405457, + "p99": 192.60799884796143 + }, + "roundtrip": { + "p50": 453.0239999294281, + "p90": 612.1280193328857, + "p95": 3283.4560871124268, + "p99": 4855.199813842773 + }, + "isolatedSum": { + "p50": 426.5280067920685, + "p90": 510.20799577236176, + "p95": 634.5600187778473, + "p99": 4292.06383228302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 339, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 334.6239924430847, + "p90": 393.92000436782837, + "p95": 505.0560235977173, + "p99": 4028.223991394043 + }, + "combine": { + "p50": 115.167997777462, + "p90": 137.82399892807007, + "p95": 161.47199273109436, + "p99": 3881.727933883667 + }, + "roundtrip": { + "p50": 464.80000019073486, + "p90": 569.9520111083984, + "p95": 3327.2318840026855, + "p99": 4635.359764099121 + }, + "isolatedSum": { + "p50": 449.7919902205467, + "p90": 531.7440032958984, + "p95": 666.5280163288116, + "p99": 7909.95192527771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 676, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 344.1280126571655, + "p90": 412.8960072994232, + "p95": 499.2640018463135, + "p99": 3974.976062774658 + }, + "combine": { + "p50": 146.01600170135498, + "p90": 159.90400314331055, + "p95": 165.50399363040924, + "p99": 3639.3918991088867 + }, + "roundtrip": { + "p50": 488.16001415252686, + "p90": 617.1200275421143, + "p95": 3260.7359886169434, + "p99": 4360.928058624268 + }, + "isolatedSum": { + "p50": 490.1440143585205, + "p90": 572.8000104427338, + "p95": 664.7679954767227, + "p99": 7614.367961883545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 1328, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 381.5680146217346, + "p90": 446.8480050563812, + "p95": 2637.5999450683594, + "p99": 3912.1599197387695 + }, + "combine": { + "p50": 217.66400337219238, + "p90": 227.87199914455414, + "p95": 239.23200368881226, + "p99": 3263.4880542755127 + }, + "roundtrip": { + "p50": 603.4240126609802, + "p90": 686.5280270576477, + "p95": 3299.2959022521973, + "p99": 3789.5359992980957 + }, + "isolatedSum": { + "p50": 599.232017993927, + "p90": 674.7200042009354, + "p95": 2876.8319487571716, + "p99": 7175.647974014282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fdc396a2", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_cf927f30", + "comparisonKey": "92271e1c8a73d753", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:20:58.740086+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 308.1600069999695, + "p90": 362.2080087661743, + "p95": 402.27198600769043, + "p99": 4307.199954986572 + }, + "combine": { + "p50": 123.29600006341934, + "p90": 149.47199821472168, + "p95": 174.23999309539795, + "p99": 3497.1840381622314 + }, + "roundtrip": { + "p50": 449.40799474716187, + "p90": 509.44000482559204, + "p95": 757.7919960021973, + "p99": 4741.24813079834 + }, + "isolatedSum": { + "p50": 431.4560070633888, + "p90": 511.680006980896, + "p95": 576.5119791030884, + "p99": 7804.383993148804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 307.3599934577942, + "p90": 340.86400270462036, + "p95": 2100.4159450531006, + "p99": 4418.464183807373 + }, + "combine": { + "p50": 123.83999675512314, + "p90": 170.3999936580658, + "p95": 181.88799917697906, + "p99": 4004.000186920166 + }, + "roundtrip": { + "p50": 451.61598920822144, + "p90": 547.5839972496033, + "p95": 2967.8399562835693, + "p99": 4876.287937164307 + }, + "isolatedSum": { + "p50": 431.1999902129173, + "p90": 511.26399636268616, + "p95": 2282.3039442300797, + "p99": 8422.464370727539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 22, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 323.5839903354645, + "p90": 391.36001467704773, + "p95": 521.2159752845764, + "p99": 4219.488143920898 + }, + "combine": { + "p50": 117.34399944543839, + "p90": 134.88000631332397, + "p95": 161.79199516773224, + "p99": 3926.624059677124 + }, + "roundtrip": { + "p50": 460.640013217926, + "p90": 542.9760217666626, + "p95": 3473.8240242004395, + "p99": 4670.720100402832 + }, + "isolatedSum": { + "p50": 440.92798978090286, + "p90": 526.2400209903717, + "p95": 683.0079704523087, + "p99": 8146.1122035980225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 329.5679986476898, + "p90": 400.60800313949585, + "p95": 499.10399317741394, + "p99": 4000.4801750183105 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 154.59200739860535, + "p95": 165.69599509239197, + "p99": 3075.455904006958 + }, + "roundtrip": { + "p50": 461.7919921875, + "p90": 563.5200142860413, + "p95": 3372.5759983062744, + "p99": 4603.871822357178 + }, + "isolatedSum": { + "p50": 451.83999836444855, + "p90": 555.2000105381012, + "p95": 664.7999882698059, + "p99": 7075.936079025269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 73, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 328.73600721359253, + "p90": 455.3599953651428, + "p95": 519.9679732322693, + "p99": 4132.383823394775 + }, + "combine": { + "p50": 115.90400338172913, + "p90": 132.6719969511032, + "p95": 136.83199882507324, + "p99": 3084.768056869507 + }, + "roundtrip": { + "p50": 462.0159864425659, + "p90": 547.327995300293, + "p95": 803.0719757080078, + "p99": 4567.4238204956055 + }, + "isolatedSum": { + "p50": 444.64001059532166, + "p90": 588.031992316246, + "p95": 656.7999720573425, + "p99": 7217.151880264282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 138, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 338.17601203918457, + "p90": 426.7520010471344, + "p95": 563.8719797134399, + "p99": 4013.6637687683105 + }, + "combine": { + "p50": 116.09599739313126, + "p90": 134.65599715709686, + "p95": 144.83200013637543, + "p99": 3239.327907562256 + }, + "roundtrip": { + "p50": 480.54400086402893, + "p90": 615.3920292854309, + "p95": 1963.6160135269165, + "p99": 4592.415809631348 + }, + "isolatedSum": { + "p50": 454.2720094323158, + "p90": 561.4079982042313, + "p95": 708.7039798498154, + "p99": 7252.991676330566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 273, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 338.0480110645294, + "p90": 367.68001317977905, + "p95": 483.0400049686432, + "p99": 3250.976085662842 + }, + "combine": { + "p50": 134.11200046539307, + "p90": 145.82400023937225, + "p95": 154.01600301265717, + "p99": 3250.1120567321777 + }, + "roundtrip": { + "p50": 476.0960042476654, + "p90": 596.3519811630249, + "p95": 2825.536012649536, + "p99": 4348.320007324219 + }, + "isolatedSum": { + "p50": 472.1600115299225, + "p90": 513.5040134191513, + "p95": 637.0560079813004, + "p99": 6501.0881423950195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 532, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 356.1919927597046, + "p90": 393.3440148830414, + "p95": 537.9199981689453, + "p99": 3964.927911758423 + }, + "combine": { + "p50": 205.9199959039688, + "p90": 217.50399470329285, + "p95": 226.72000527381897, + "p99": 2822.335958480835 + }, + "roundtrip": { + "p50": 549.2159724235535, + "p90": 620.8639740943909, + "p95": 3007.2638988494873, + "p99": 3930.783987045288 + }, + "isolatedSum": { + "p50": 562.1119886636734, + "p90": 610.8480095863342, + "p95": 764.6400034427643, + "p99": 6787.263870239258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 1041, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fac11c1a", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_18fd5edd", + "comparisonKey": "86198f77c4b79e85", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:23:07.013634+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 305.759996175766, + "p90": 335.32801270484924, + "p95": 3193.7921047210693, + "p99": 5186.304092407227 + }, + "combine": { + "p50": 118.14399808645248, + "p90": 134.33599472045898, + "p95": 140.60799777507782, + "p99": 3146.0800170898438 + }, + "roundtrip": { + "p50": 444.2879855632782, + "p90": 563.3919835090637, + "p95": 3023.616075515747, + "p99": 4943.808078765869 + }, + "isolatedSum": { + "p50": 423.9039942622185, + "p90": 469.6640074253082, + "p95": 3334.400102496147, + "p99": 8332.38410949707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 307.3599934577942, + "p90": 345.63198685646057, + "p95": 3150.399923324585, + "p99": 4573.1520652771 + }, + "combine": { + "p50": 119.35999989509583, + "p90": 147.48799800872803, + "p95": 2753.7600994110107, + "p99": 4510.43176651001 + }, + "roundtrip": { + "p50": 447.3919868469238, + "p90": 514.4000053405762, + "p95": 745.8239793777466, + "p99": 4952.352046966553 + }, + "isolatedSum": { + "p50": 426.71999335289, + "p90": 493.1199848651886, + "p95": 5904.160022735596, + "p99": 9083.58383178711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 320.41600346565247, + "p90": 401.98400616645813, + "p95": 472.83199429512024, + "p99": 4301.184177398682 + }, + "combine": { + "p50": 118.27199906110764, + "p90": 143.96800100803375, + "p95": 1894.752025604248, + "p99": 4158.688068389893 + }, + "roundtrip": { + "p50": 462.17599511146545, + "p90": 571.1359977722168, + "p95": 2781.7599773406982, + "p99": 4941.184043884277 + }, + "isolatedSum": { + "p50": 438.6880025267601, + "p90": 545.9520071744919, + "p95": 2367.5840198993683, + "p99": 8459.872245788574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 320.0640082359314, + "p90": 376.5760064125061, + "p95": 445.47200202941895, + "p99": 4383.840084075928 + }, + "combine": { + "p50": 117.0239970088005, + "p90": 144.06399428844452, + "p95": 165.66400229930878, + "p99": 4068.2239532470703 + }, + "roundtrip": { + "p50": 465.66399931907654, + "p90": 589.4719958305359, + "p95": 772.704005241394, + "p99": 4966.1760330200195 + }, + "isolatedSum": { + "p50": 437.0880052447319, + "p90": 520.6400007009506, + "p95": 611.1360043287277, + "p99": 8452.064037322998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 321.4400112628937, + "p90": 394.6880102157593, + "p95": 448.67199659347534, + "p99": 4302.944183349609 + }, + "combine": { + "p50": 116.73600226640701, + "p90": 137.9839926958084, + "p95": 168.5439944267273, + "p99": 4138.04817199707 + }, + "roundtrip": { + "p50": 459.3279957771301, + "p90": 515.936017036438, + "p95": 649.3440270423889, + "p99": 4681.600093841553 + }, + "isolatedSum": { + "p50": 438.1760135293007, + "p90": 532.6720029115677, + "p95": 617.2159910202026, + "p99": 8440.99235534668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 350.6560027599335, + "p90": 421.02399468421936, + "p95": 578.3680081367493, + "p99": 4216.70389175415 + }, + "combine": { + "p50": 122.04799801111221, + "p90": 142.07999408245087, + "p95": 260.5440020561218, + "p99": 4042.5281524658203 + }, + "roundtrip": { + "p50": 473.1520116329193, + "p90": 579.6800255775452, + "p95": 649.2480039596558, + "p99": 4731.711864471436 + }, + "isolatedSum": { + "p50": 472.7040007710457, + "p90": 563.1039887666702, + "p95": 838.9120101928711, + "p99": 8259.23204421997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 375.90399384498596, + "p90": 414.8159921169281, + "p95": 502.24000215530396, + "p99": 3811.392068862915 + }, + "combine": { + "p50": 163.93600404262543, + "p90": 181.21600151062012, + "p95": 2103.008031845093, + "p99": 3745.728015899658 + }, + "roundtrip": { + "p50": 532.480001449585, + "p90": 595.0400233268738, + "p95": 653.7920236587524, + "p99": 4109.087944030762 + }, + "isolatedSum": { + "p50": 539.8399978876114, + "p90": 596.0319936275482, + "p95": 2605.2480340003967, + "p99": 7557.120084762573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 430.04798889160156, + "p90": 517.9200172424316, + "p95": 2490.463972091675, + "p99": 3774.303913116455 + }, + "combine": { + "p50": 256.7040026187897, + "p90": 266.36800169944763, + "p95": 281.5999984741211, + "p99": 3093.503952026367 + }, + "roundtrip": { + "p50": 680.0640225410461, + "p90": 759.8080039024353, + "p95": 2395.456075668335, + "p99": 3635.2639198303223 + }, + "isolatedSum": { + "p50": 686.7519915103912, + "p90": 784.2880189418793, + "p95": 2772.063970565796, + "p99": 6867.807865142822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-31e1d682", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_c2dcdc61", + "comparisonKey": "5d83abfcfd959d96", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:23:39.083493+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 295.83999514579773, + "p90": 348.4160006046295, + "p95": 449.535995721817, + "p99": 4869.855880737305 + }, + "combine": { + "p50": 121.0239976644516, + "p90": 165.8879965543747, + "p95": 268.99200677871704, + "p99": 4159.296035766602 + }, + "roundtrip": { + "p50": 434.6559941768646, + "p90": 540.9280061721802, + "p95": 880.6080222129822, + "p99": 4795.392036437988 + }, + "isolatedSum": { + "p50": 416.86399281024933, + "p90": 514.3039971590042, + "p95": 718.5280025005341, + "p99": 9029.151916503906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 296.00000381469727, + "p90": 344.0000116825104, + "p95": 2129.6639442443848, + "p99": 4702.784061431885 + }, + "combine": { + "p50": 109.47199910879135, + "p90": 130.048006772995, + "p95": 142.62400567531586, + "p99": 3592.576026916504 + }, + "roundtrip": { + "p50": 439.2639994621277, + "p90": 619.0400123596191, + "p95": 3042.207956314087, + "p99": 5213.280200958252 + }, + "isolatedSum": { + "p50": 405.4720029234886, + "p90": 474.04801845550537, + "p95": 2272.2879499197006, + "p99": 8295.360088348389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 304.83201146125793, + "p90": 372.54399061203003, + "p95": 471.0400104522705, + "p99": 4221.536159515381 + }, + "combine": { + "p50": 111.23199760913849, + "p90": 136.1600011587143, + "p95": 158.6879938840866, + "p99": 3593.7280654907227 + }, + "roundtrip": { + "p50": 448.41599464416504, + "p90": 595.52001953125, + "p95": 3169.4400310516357, + "p99": 4865.344047546387 + }, + "isolatedSum": { + "p50": 416.0640090703964, + "p90": 508.7039917707443, + "p95": 629.7280043363571, + "p99": 7815.2642250061035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 311.2959861755371, + "p90": 398.71999621391296, + "p95": 697.5039839744568, + "p99": 4322.432041168213 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 133.7279975414276, + "p95": 151.99999511241913, + "p99": 4046.783924102783 + }, + "roundtrip": { + "p50": 452.4799883365631, + "p90": 613.215982913971, + "p95": 2907.167911529541, + "p99": 4861.504077911377 + }, + "isolatedSum": { + "p50": 421.1839884519577, + "p90": 532.4479937553406, + "p95": 849.5039790868759, + "p99": 8369.215965270996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 307.6480031013489, + "p90": 379.7439932823181, + "p95": 459.1360092163086, + "p99": 4343.103885650635 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 128.92800569534302, + "p95": 136.00000739097595, + "p99": 3025.5041122436523 + }, + "roundtrip": { + "p50": 449.44000244140625, + "p90": 533.9199900627136, + "p95": 1351.7760038375854, + "p99": 4951.295852661133 + }, + "isolatedSum": { + "p50": 417.53600537776947, + "p90": 508.67199897766113, + "p95": 595.1360166072845, + "p99": 7368.607997894287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 323.39200377464294, + "p90": 360.25598645210266, + "p95": 470.8479940891266, + "p99": 4200.352191925049 + }, + "combine": { + "p50": 113.6000007390976, + "p90": 157.151997089386, + "p95": 191.67999923229218, + "p99": 4148.159980773926 + }, + "roundtrip": { + "p50": 458.49600434303284, + "p90": 670.3360080718994, + "p95": 2575.615882873535, + "p99": 4763.519763946533 + }, + "isolatedSum": { + "p50": 436.99200451374054, + "p90": 517.4079835414886, + "p95": 662.5279933214188, + "p99": 8348.512172698975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 325.53601264953613, + "p90": 378.11198830604553, + "p95": 482.5280010700226, + "p99": 4145.023822784424 + }, + "combine": { + "p50": 132.47999548912048, + "p90": 140.19200205802917, + "p95": 147.32800424098969, + "p99": 3241.0240173339844 + }, + "roundtrip": { + "p50": 460.54399013519287, + "p90": 568.7680244445801, + "p95": 684.3199729919434, + "p99": 4596.704006195068 + }, + "isolatedSum": { + "p50": 458.0160081386566, + "p90": 518.3039903640747, + "p95": 629.8560053110123, + "p99": 7386.047840118408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 344.89598870277405, + "p90": 424.4160056114197, + "p95": 515.6800150871277, + "p99": 4126.431941986084 + }, + "combine": { + "p50": 204.3839991092682, + "p90": 216.44799411296844, + "p95": 227.29599475860596, + "p99": 3518.6240673065186 + }, + "roundtrip": { + "p50": 527.8720259666443, + "p90": 605.8239936828613, + "p95": 2732.959985733032, + "p99": 4038.3358001708984 + }, + "isolatedSum": { + "p50": 549.2799878120422, + "p90": 640.8639997243881, + "p95": 742.9760098457336, + "p99": 7645.0560092926025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ccc7f10e", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_20d23c1d", + "comparisonKey": "0ab0a4c18889d099", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:19:08.406624+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 300.1919984817505, + "p90": 364.3519878387451, + "p95": 3498.4641075134277, + "p99": 4640.960216522217 + }, + "combine": { + "p50": 113.15199732780457, + "p90": 133.91999900341034, + "p95": 152.0639955997467, + "p99": 4300.352096557617 + }, + "roundtrip": { + "p50": 438.01599740982056, + "p90": 497.408002614975, + "p95": 3476.799964904785, + "p99": 5098.656177520752 + }, + "isolatedSum": { + "p50": 413.34399580955505, + "p90": 498.27198684215546, + "p95": 3650.5281031131744, + "p99": 8941.312313079834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 304.0960133075714, + "p90": 357.2480082511902, + "p95": 3127.487897872925, + "p99": 4414.624214172363 + }, + "combine": { + "p50": 114.33599889278412, + "p90": 135.19999384880066, + "p95": 154.23999726772308, + "p99": 4397.056102752686 + }, + "roundtrip": { + "p50": 438.0800127983093, + "p90": 538.2720232009888, + "p95": 799.9039888381958, + "p99": 4931.007862091064 + }, + "isolatedSum": { + "p50": 418.43201220035553, + "p90": 492.44800209999084, + "p95": 3281.727895140648, + "p99": 8811.680316925049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 311.2640082836151, + "p90": 381.82398676872253, + "p95": 463.6799991130829, + "p99": 4402.112007141113 + }, + "combine": { + "p50": 112.06399649381638, + "p90": 131.58400356769562, + "p95": 153.28000485897064, + "p99": 4283.872127532959 + }, + "roundtrip": { + "p50": 447.9359984397888, + "p90": 488.73600363731384, + "p95": 707.8080177307129, + "p99": 4875.840187072754 + }, + "isolatedSum": { + "p50": 423.3280047774315, + "p90": 513.4079903364182, + "p95": 616.9600039720535, + "p99": 8685.984134674072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 314.91199135780334, + "p90": 421.05600237846375, + "p95": 3132.5440406799316, + "p99": 4375.1678466796875 + }, + "combine": { + "p50": 113.40799927711487, + "p90": 137.43999600410461, + "p95": 164.32000696659088, + "p99": 4046.527862548828 + }, + "roundtrip": { + "p50": 450.75199007987976, + "p90": 583.6160182952881, + "p95": 3331.007957458496, + "p99": 4651.328086853027 + }, + "isolatedSum": { + "p50": 428.3199906349182, + "p90": 558.4959983825684, + "p95": 3296.8640476465225, + "p99": 8421.695709228516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 325.72799921035767, + "p90": 414.20799493789673, + "p95": 555.7119846343994, + "p99": 4186.68794631958 + }, + "combine": { + "p50": 113.47199976444244, + "p90": 136.28800213336945, + "p95": 151.8079936504364, + "p99": 3121.5360164642334 + }, + "roundtrip": { + "p50": 452.5119960308075, + "p90": 587.8400206565857, + "p95": 3614.6559715270996, + "p99": 4820.608139038086 + }, + "isolatedSum": { + "p50": 439.1999989748001, + "p90": 550.4959970712662, + "p95": 707.5199782848358, + "p99": 7308.2239627838135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 328.031986951828, + "p90": 361.34400963783264, + "p95": 477.31199860572815, + "p99": 4125.631809234619 + }, + "combine": { + "p50": 112.60800063610077, + "p90": 129.2800009250641, + "p95": 156.44800662994385, + "p99": 4181.503772735596 + }, + "roundtrip": { + "p50": 458.49600434303284, + "p90": 542.3359870910645, + "p95": 771.1359858512878, + "p99": 4829.7600746154785 + }, + "isolatedSum": { + "p50": 440.6399875879288, + "p90": 490.62401056289673, + "p95": 633.760005235672, + "p99": 8307.135581970215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 333.3120048046112, + "p90": 507.58397579193115, + "p95": 595.7440137863159, + "p99": 3987.3600006103516 + }, + "combine": { + "p50": 132.64000415802002, + "p90": 154.65599298477173, + "p95": 182.11199343204498, + "p99": 310.11199951171875 + }, + "roundtrip": { + "p50": 462.3039960861206, + "p90": 571.2640285491943, + "p95": 3367.392063140869, + "p99": 4626.239776611328 + }, + "isolatedSum": { + "p50": 465.9520089626312, + "p90": 662.2399687767029, + "p95": 777.8560072183609, + "p99": 4297.47200012207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 343.9359962940216, + "p90": 392.5760090351105, + "p95": 547.8399991989136, + "p99": 4298.7518310546875 + }, + "combine": { + "p50": 203.10400426387787, + "p90": 214.62400257587433, + "p95": 227.743998169899, + "p99": 3585.344076156616 + }, + "roundtrip": { + "p50": 532.6399803161621, + "p90": 600.3519892692566, + "p95": 2984.5759868621826, + "p99": 4194.88000869751 + }, + "isolatedSum": { + "p50": 547.0400005578995, + "p90": 607.2000116109848, + "p95": 775.5839973688126, + "p99": 7884.095907211304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bcd103d", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_ef177a3a", + "comparisonKey": "a55ebc5267b466a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:09:13.791921+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 353.69598865509033, + "p90": 400.35200119018555, + "p95": 451.80800557136536, + "p99": 2846.303939819336 + }, + "combine": { + "p50": 208.51199328899384, + "p90": 228.2239943742752, + "p95": 244.51200664043427, + "p99": 3582.304000854492 + }, + "roundtrip": { + "p50": 554.751992225647, + "p90": 628.7360191345215, + "p95": 3065.279960632324, + "p99": 4231.935977935791 + }, + "isolatedSum": { + "p50": 562.2079819440842, + "p90": 628.5759955644608, + "p95": 696.3200122117996, + "p99": 6428.607940673828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 380.16000390052795, + "p90": 418.65599155426025, + "p95": 498.4320104122162, + "p99": 3390.9120559692383 + }, + "combine": { + "p50": 339.9040102958679, + "p90": 359.0399920940399, + "p95": 2219.327926635742, + "p99": 2571.8719959259033 + }, + "roundtrip": { + "p50": 706.0480117797852, + "p90": 784.063994884491, + "p95": 2685.2800846099854, + "p99": 3523.5838890075684 + }, + "isolatedSum": { + "p50": 720.0640141963959, + "p90": 777.6959836483002, + "p95": 2717.7599370479584, + "p99": 5962.784051895142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 449.8240053653717, + "p90": 489.6639883518219, + "p95": 2178.175926208496, + "p99": 3428.256034851074 + }, + "combine": { + "p50": 650.3999829292297, + "p90": 1398.4960317611694, + "p95": 1857.856035232544, + "p99": 2592.1599864959717 + }, + "roundtrip": { + "p50": 1086.176037788391, + "p90": 1233.6959838867188, + "p95": 2647.552013397217, + "p99": 3170.880079269409 + }, + "isolatedSum": { + "p50": 1100.2239882946014, + "p90": 1888.1600201129913, + "p95": 4036.03196144104, + "p99": 6020.416021347046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 600.7360219955444, + "p90": 640.3520107269287, + "p95": 1594.688057899475, + "p99": 2853.856086730957 + }, + "combine": { + "p50": 1215.1679992675781, + "p90": 1355.6159734725952, + "p95": 1637.6639604568481, + "p99": 1993.6959743499756 + }, + "roundtrip": { + "p50": 1815.8080577850342, + "p90": 2044.2559719085693, + "p95": 2687.5839233398438, + "p99": 3150.752067565918 + }, + "isolatedSum": { + "p50": 1815.9040212631226, + "p90": 1995.967984199524, + "p95": 3232.3520183563232, + "p99": 4847.552061080933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 813.215970993042, + "p90": 868.5439825057983, + "p95": 2034.208059310913, + "p99": 2632.0641040802 + }, + "combine": { + "p50": 2328.1280994415283, + "p90": 2340.991973876953, + "p95": 2347.4879264831543, + "p99": 2411.776065826416 + }, + "roundtrip": { + "p50": 3112.704038619995, + "p90": 3178.112030029297, + "p95": 3536.9279384613037, + "p99": 3832.5119018554688 + }, + "isolatedSum": { + "p50": 3141.3440704345703, + "p90": 3209.5359563827515, + "p95": 4381.695985794067, + "p99": 5043.840169906616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1262.9120349884033, + "p90": 1544.0319776535034, + "p95": 1935.2960586547852, + "p99": 2416.6719913482666 + }, + "combine": { + "p50": 4552.9279708862305, + "p90": 4567.584037780762, + "p95": 4573.855876922607, + "p99": 4585.343837738037 + }, + "roundtrip": { + "p50": 5779.903888702393, + "p90": 5817.344188690186, + "p95": 5834.9761962890625, + "p99": 6143.519878387451 + }, + "isolatedSum": { + "p50": 5815.840005874634, + "p90": 6111.616015434265, + "p95": 6509.151935577393, + "p99": 7002.015829086304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-77e3e114", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_c48c1e92", + "comparisonKey": "67947309644fcdb6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:10:30.439833+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 348.54400157928467, + "p90": 395.7439959049225, + "p95": 459.4559967517853, + "p99": 2030.1120281219482 + }, + "combine": { + "p50": 204.76800203323364, + "p90": 235.9679937362671, + "p95": 356.7360043525696, + "p99": 3689.0881061553955 + }, + "roundtrip": { + "p50": 544.2240238189697, + "p90": 626.8799901008606, + "p95": 2905.7600498199463, + "p99": 4207.551956176758 + }, + "isolatedSum": { + "p50": 553.3120036125183, + "p90": 631.7119896411896, + "p95": 816.1920011043549, + "p99": 5719.200134277344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 385.76000928878784, + "p90": 428.6400079727173, + "p95": 579.2319774627686, + "p99": 3132.960081100464 + }, + "combine": { + "p50": 337.40800619125366, + "p90": 354.71999645233154, + "p95": 2278.496026992798, + "p99": 2920.991897583008 + }, + "roundtrip": { + "p50": 724.5439887046814, + "p90": 818.0480003356934, + "p95": 2481.4400672912598, + "p99": 3631.711959838867 + }, + "isolatedSum": { + "p50": 723.1680154800415, + "p90": 783.3600044250488, + "p95": 2857.7280044555664, + "p99": 6053.951978683472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 447.488009929657, + "p90": 491.456001996994, + "p95": 546.6880202293396, + "p99": 3315.743923187256 + }, + "combine": { + "p50": 645.7279920578003, + "p90": 1215.8080339431763, + "p95": 1628.5439729690552, + "p99": 2367.6159381866455 + }, + "roundtrip": { + "p50": 1081.984043121338, + "p90": 1131.9040060043335, + "p95": 2493.5359954833984, + "p99": 2981.760025024414 + }, + "isolatedSum": { + "p50": 1093.2160019874573, + "p90": 1707.2640359401703, + "p95": 2175.231993198395, + "p99": 5683.359861373901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 603.1039953231812, + "p90": 670.527994632721, + "p95": 1975.648045539856, + "p99": 2908.639907836914 + }, + "combine": { + "p50": 1209.9519968032837, + "p90": 1262.336015701294, + "p95": 1666.1440134048462, + "p99": 2077.0559310913086 + }, + "roundtrip": { + "p50": 1826.4319896697998, + "p90": 1917.9840087890625, + "p95": 2608.959913253784, + "p99": 3176.4800548553467 + }, + "isolatedSum": { + "p50": 1813.0559921264648, + "p90": 1932.864010334015, + "p95": 3641.792058944702, + "p99": 4985.695838928223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 804.4480085372925, + "p90": 854.2400002479553, + "p95": 1938.5279417037964, + "p99": 2799.1039752960205 + }, + "combine": { + "p50": 2323.4879970550537, + "p90": 2335.263967514038, + "p95": 2340.4159545898438, + "p99": 2383.9681148529053 + }, + "roundtrip": { + "p50": 3107.5520515441895, + "p90": 3188.5759830474854, + "p95": 3448.2240676879883, + "p99": 3910.7840061187744 + }, + "isolatedSum": { + "p50": 3127.936005592346, + "p90": 3189.5039677619934, + "p95": 4278.94389629364, + "p99": 5183.072090148926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1254.2719841003418, + "p90": 1340.60800075531, + "p95": 1987.1679544448853, + "p99": 2552.6719093322754 + }, + "combine": { + "p50": 4547.391891479492, + "p90": 4562.367916107178, + "p95": 4568.160057067871, + "p99": 4590.303897857666 + }, + "roundtrip": { + "p50": 5759.80806350708, + "p90": 5790.112018585205, + "p95": 5806.816101074219, + "p99": 5980.288028717041 + }, + "isolatedSum": { + "p50": 5801.663875579834, + "p90": 5902.975916862488, + "p95": 6555.328011512756, + "p99": 7142.975807189941 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-770a1582", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_47ac442d", + "comparisonKey": "87e1fc63ac20c8a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:11:31.559576+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 340.60800075531006, + "p90": 401.66398882865906, + "p95": 486.2399995326996, + "p99": 3179.6159744262695 + }, + "combine": { + "p50": 200.95999538898468, + "p90": 213.69600296020508, + "p95": 247.1040040254593, + "p99": 3030.4319858551025 + }, + "roundtrip": { + "p50": 522.2399830818176, + "p90": 599.5519757270813, + "p95": 2504.703998565674, + "p99": 4165.984153747559 + }, + "isolatedSum": { + "p50": 541.5679961442947, + "p90": 615.3599917888641, + "p95": 733.3440035581589, + "p99": 6210.047960281372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 439.4240081310272, + "p90": 499.9360144138336, + "p95": 1270.624041557312, + "p99": 3478.879928588867 + }, + "combine": { + "p50": 642.3680186271667, + "p90": 1233.888030052185, + "p95": 1869.088053703308, + "p99": 2341.887950897217 + }, + "roundtrip": { + "p50": 1066.8480396270752, + "p90": 1141.4079666137695, + "p95": 2479.1359901428223, + "p99": 3222.912073135376 + }, + "isolatedSum": { + "p50": 1081.792026758194, + "p90": 1733.8240444660187, + "p95": 3139.71209526062, + "p99": 5820.767879486084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 809.1840147972107, + "p90": 880.0640106201172, + "p95": 2273.632049560547, + "p99": 2885.3440284729004 + }, + "combine": { + "p50": 2321.984052658081, + "p90": 2337.7280235290527, + "p95": 2354.016065597534, + "p99": 2487.2000217437744 + }, + "roundtrip": { + "p50": 3095.616102218628, + "p90": 3153.3119678497314, + "p95": 3553.2801151275635, + "p99": 3933.3760738372803 + }, + "isolatedSum": { + "p50": 3131.1680674552917, + "p90": 3217.79203414917, + "p95": 4627.648115158081, + "p99": 5372.544050216675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17aedd0f", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_fb40089c", + "comparisonKey": "0b482a042dec8a2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:18:35.869578+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 363.072007894516, + "p90": 424.67200756073, + "p95": 476.73600912094116, + "p99": 2649.8239040374756 + }, + "combine": { + "p50": 207.13600516319275, + "p90": 230.30400276184082, + "p95": 297.37600684165955, + "p99": 3449.023962020874 + }, + "roundtrip": { + "p50": 560.1599812507629, + "p90": 647.6479768753052, + "p95": 1214.2720222473145, + "p99": 3871.648073196411 + }, + "isolatedSum": { + "p50": 570.2080130577087, + "p90": 654.9760103225708, + "p95": 774.1120159626007, + "p99": 6098.84786605835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 395.00799775123596, + "p90": 453.95201444625854, + "p95": 494.33600902557373, + "p99": 2877.120018005371 + }, + "combine": { + "p50": 338.9439880847931, + "p90": 364.25599455833435, + "p95": 1961.8560075759888, + "p99": 2663.327932357788 + }, + "roundtrip": { + "p50": 735.4239821434021, + "p90": 836.9600176811218, + "p95": 2669.408082962036, + "p99": 3424.1600036621094 + }, + "isolatedSum": { + "p50": 733.951985836029, + "p90": 818.2080090045929, + "p95": 2456.1920166015625, + "p99": 5540.447950363159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 2304, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 458.5919976234436, + "p90": 512.1279954910278, + "p95": 566.3679838180542, + "p99": 3206.655979156494 + }, + "combine": { + "p50": 650.3679752349854, + "p90": 671.8720197677612, + "p95": 1458.1760168075562, + "p99": 2039.072036743164 + }, + "roundtrip": { + "p50": 1094.9759483337402, + "p90": 1184.5760345458984, + "p95": 2503.80802154541, + "p99": 3050.528049468994 + }, + "isolatedSum": { + "p50": 1108.959972858429, + "p90": 1184.000015258789, + "p95": 2024.5440006256104, + "p99": 5245.728015899658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 4608, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 624.0959763526917, + "p90": 708.191990852356, + "p95": 2148.639917373657, + "p99": 2897.37606048584 + }, + "combine": { + "p50": 1228.16002368927, + "p90": 1260.5119943618774, + "p95": 1556.928038597107, + "p99": 1929.1839599609375 + }, + "roundtrip": { + "p50": 1849.34401512146, + "p90": 2235.5520725250244, + "p95": 2715.872049331665, + "p99": 3107.2959899902344 + }, + "isolatedSum": { + "p50": 1852.2560000419617, + "p90": 1968.7039852142334, + "p95": 3705.567955970764, + "p99": 4826.560020446777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 9216, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 853.2480001449585, + "p90": 936.8640184402466, + "p95": 2019.2959308624268, + "p99": 2716.6080474853516 + }, + "combine": { + "p50": 2357.664108276367, + "p90": 2371.8080520629883, + "p95": 2376.6720294952393, + "p99": 2395.008087158203 + }, + "roundtrip": { + "p50": 3194.0479278564453, + "p90": 3305.9520721435547, + "p95": 3582.0798873901367, + "p99": 3853.408098220825 + }, + "isolatedSum": { + "p50": 3210.9121084213257, + "p90": 3308.672070503235, + "p95": 4395.967960357666, + "p99": 5111.616134643555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 18432, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1354.0799617767334, + "p90": 1492.8640127182007, + "p95": 1841.599941253662, + "p99": 2380.44810295105 + }, + "combine": { + "p50": 4619.455814361572, + "p90": 4635.007858276367, + "p95": 4640.895843505859, + "p99": 4656.1598777771 + }, + "roundtrip": { + "p50": 5940.032005310059, + "p90": 5977.344036102295, + "p95": 5998.528003692627, + "p99": 6168.064117431641 + }, + "isolatedSum": { + "p50": 5973.535776138306, + "p90": 6127.871870994568, + "p95": 6482.4957847595215, + "p99": 7036.607980728149 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 36864, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe35df00", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_e3a8d4c0", + "comparisonKey": "462100c9280545a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:16:00.228735+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 356.9279909133911, + "p90": 382.4320137500763, + "p95": 499.0079998970032, + "p99": 2722.464084625244 + }, + "combine": { + "p50": 209.4399929046631, + "p90": 226.1440008878708, + "p95": 2509.216070175171, + "p99": 3581.439971923828 + }, + "roundtrip": { + "p50": 568.2880282402039, + "p90": 647.487998008728, + "p95": 2849.760055541992, + "p99": 4034.0800285339355 + }, + "isolatedSum": { + "p50": 566.3679838180542, + "p90": 608.5760146379471, + "p95": 3008.224070072174, + "p99": 6303.904056549072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 405.56800365448, + "p90": 440.19201397895813, + "p95": 527.2319912910461, + "p99": 3114.3040657043457 + }, + "combine": { + "p50": 355.0400137901306, + "p90": 373.4399974346161, + "p95": 2095.871925354004, + "p99": 3011.45601272583 + }, + "roundtrip": { + "p50": 766.0800218582153, + "p90": 818.015992641449, + "p95": 2704.864025115967, + "p99": 3500.8320808410645 + }, + "isolatedSum": { + "p50": 760.6080174446106, + "p90": 813.6320114135742, + "p95": 2623.10391664505, + "p99": 6125.760078430176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 3755, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 493.79199743270874, + "p90": 525.5680084228516, + "p95": 2047.8720664978027, + "p99": 3343.3279991149902 + }, + "combine": { + "p50": 685.6319904327393, + "p90": 1260.1280212402344, + "p95": 1642.016053199768, + "p99": 2403.712034225464 + }, + "roundtrip": { + "p50": 1182.911992073059, + "p90": 1842.9759740829468, + "p95": 2641.535997390747, + "p99": 3101.696014404297 + }, + "isolatedSum": { + "p50": 1179.423987865448, + "p90": 1785.696029663086, + "p95": 3689.888119697571, + "p99": 5747.040033340454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 7556, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 711.9680047035217, + "p90": 843.2639837265015, + "p95": 2079.967975616455, + "p99": 2776.416063308716 + }, + "combine": { + "p50": 1305.408000946045, + "p90": 1319.1360235214233, + "p95": 1508.0640316009521, + "p99": 1956.671953201294 + }, + "roundtrip": { + "p50": 2018.3680057525635, + "p90": 2187.455892562866, + "p95": 2858.1440448760986, + "p99": 3269.984006881714 + }, + "isolatedSum": { + "p50": 2017.3760056495667, + "p90": 2162.400007247925, + "p95": 3588.032007217407, + "p99": 4733.08801651001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 15163, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1040.4479503631592, + "p90": 1387.712001800537, + "p95": 2055.999994277954, + "p99": 2474.2400646209717 + }, + "combine": { + "p50": 2527.776002883911, + "p90": 2540.287971496582, + "p95": 2544.543981552124, + "p99": 2551.9039630889893 + }, + "roundtrip": { + "p50": 3555.7758808135986, + "p90": 3619.7121143341064, + "p95": 3820.41597366333, + "p99": 4144.000053405762 + }, + "isolatedSum": { + "p50": 3568.2239532470703, + "p90": 3927.999973297119, + "p95": 4600.543975830078, + "p99": 5026.144027709961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 30215, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1732.6079607009888, + "p90": 1863.8399839401245, + "p95": 2087.9359245300293, + "p99": 2442.4960613250732 + }, + "combine": { + "p50": 4984.064102172852, + "p90": 4996.7041015625, + "p95": 5001.632213592529, + "p99": 5006.271839141846 + }, + "roundtrip": { + "p50": 6609.312057495117, + "p90": 6644.192218780518, + "p95": 6664.31999206543, + "p99": 6798.079967498779 + }, + "isolatedSum": { + "p50": 6716.67206287384, + "p90": 6860.5440855026245, + "p95": 7089.568138122559, + "p99": 7448.767900466919 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 60512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b3dbc867", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_780f9e8e", + "comparisonKey": "a5adbfc73db30e6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:27:57.964744+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 348.4799861907959, + "p90": 395.52000164985657, + "p95": 430.30399084091187, + "p99": 2900.928020477295 + }, + "combine": { + "p50": 206.81600272655487, + "p90": 222.84799814224243, + "p95": 227.9359996318817, + "p99": 641.759991645813 + }, + "roundtrip": { + "p50": 547.872006893158, + "p90": 598.0479717254639, + "p95": 729.8240065574646, + "p99": 4221.60005569458 + }, + "isolatedSum": { + "p50": 555.2959889173508, + "p90": 618.367999792099, + "p95": 658.2399904727936, + "p99": 3542.688012123108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 1080, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 381.82398676872253, + "p90": 432.2879910469055, + "p95": 485.6640100479126, + "p99": 3080.512046813965 + }, + "combine": { + "p50": 339.26400542259216, + "p90": 357.15198516845703, + "p95": 1949.3119716644287, + "p99": 2908.128023147583 + }, + "roundtrip": { + "p50": 708.8959813117981, + "p90": 754.4000148773193, + "p95": 2617.759943008423, + "p99": 3679.1999340057373 + }, + "isolatedSum": { + "p50": 721.0879921913147, + "p90": 789.4399762153625, + "p95": 2434.9759817123413, + "p99": 5988.640069961548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 2102, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 449.98401403427124, + "p90": 503.4239888191223, + "p95": 578.9120197296143, + "p99": 3371.1040019989014 + }, + "combine": { + "p50": 648.2239961624146, + "p90": 696.9919800758362, + "p95": 1717.1519994735718, + "p99": 2549.247980117798 + }, + "roundtrip": { + "p50": 1086.4319801330566, + "p90": 1170.9760427474976, + "p95": 2510.080099105835, + "p99": 3219.2959785461426 + }, + "isolatedSum": { + "p50": 1098.2080101966858, + "p90": 1200.4159688949585, + "p95": 2296.064019203186, + "p99": 5920.351982116699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 4207, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 605.8239936828613, + "p90": 648.8959789276123, + "p95": 703.9039731025696, + "p99": 3008.5439682006836 + }, + "combine": { + "p50": 1215.008020401001, + "p90": 1237.7599477767944, + "p95": 1516.4799690246582, + "p99": 1888.7360095977783 + }, + "roundtrip": { + "p50": 1812.0959997177124, + "p90": 1895.583987236023, + "p95": 2616.863965988159, + "p99": 3209.120035171509 + }, + "isolatedSum": { + "p50": 1820.8320140838623, + "p90": 1886.6559267044067, + "p95": 2220.383942127228, + "p99": 4897.279977798462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8365, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 819.0079927444458, + "p90": 878.0480027198792, + "p95": 1892.7680253982544, + "p99": 2765.4080390930176 + }, + "combine": { + "p50": 2328.831911087036, + "p90": 2343.456029891968, + "p95": 2351.871967315674, + "p99": 2433.9840412139893 + }, + "roundtrip": { + "p50": 3112.9279136657715, + "p90": 3163.8081073760986, + "p95": 3511.712074279785, + "p99": 3865.0879859924316 + }, + "isolatedSum": { + "p50": 3147.839903831482, + "p90": 3221.504032611847, + "p95": 4244.639992713928, + "p99": 5199.392080307007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 16483, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1273.0239629745483, + "p90": 1470.7520008087158, + "p95": 1958.9120149612427, + "p99": 2296.9279289245605 + }, + "combine": { + "p50": 4552.351951599121, + "p90": 4567.679882049561, + "p95": 4571.392059326172, + "p99": 4590.464115142822 + }, + "roundtrip": { + "p50": 5781.280040740967, + "p90": 5827.936172485352, + "p95": 5850.880146026611, + "p99": 6025.216102600098 + }, + "isolatedSum": { + "p50": 5825.375914573669, + "p90": 6038.431882858276, + "p95": 6530.304074287415, + "p99": 6887.392044067383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 32777, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6db2ebc0", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_bf868a74", + "comparisonKey": "b758f533ff68bf2d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:17:18.065817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 342.78398752212524, + "p90": 387.4559998512268, + "p95": 409.05600786209106, + "p99": 2332.832098007202 + }, + "combine": { + "p50": 205.6960016489029, + "p90": 221.21599316596985, + "p95": 235.3920042514801, + "p99": 3869.760036468506 + }, + "roundtrip": { + "p50": 539.7440195083618, + "p90": 591.6799902915955, + "p95": 2822.751998901367, + "p99": 4149.5041847229 + }, + "isolatedSum": { + "p50": 548.4799891710281, + "p90": 608.6719930171967, + "p95": 644.4480121135712, + "p99": 6202.592134475708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 1064, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 374.4960129261017, + "p90": 420.415997505188, + "p95": 483.2639992237091, + "p99": 3094.9759483337402 + }, + "combine": { + "p50": 338.81598711013794, + "p90": 355.26400804519653, + "p95": 1142.624020576477, + "p99": 2970.8800315856934 + }, + "roundtrip": { + "p50": 707.0080041885376, + "p90": 775.4560112953186, + "p95": 2577.3119926452637, + "p99": 3569.9520111083984 + }, + "isolatedSum": { + "p50": 713.3120000362396, + "p90": 775.6800055503845, + "p95": 1625.8880198001862, + "p99": 6065.855979919434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 2081, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 448.38398694992065, + "p90": 492.6399886608124, + "p95": 557.4399828910828, + "p99": 3432.8958988189697 + }, + "combine": { + "p50": 644.1599726676941, + "p90": 1211.5520238876343, + "p95": 1634.1760158538818, + "p99": 2475.872039794922 + }, + "roundtrip": { + "p50": 1074.3999481201172, + "p90": 1173.2800006866455, + "p95": 2602.368116378784, + "p99": 3344.640016555786 + }, + "isolatedSum": { + "p50": 1092.5439596176147, + "p90": 1704.1920125484467, + "p95": 2191.6159987449646, + "p99": 5908.767938613892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 4153, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 596.7040061950684, + "p90": 673.695981502533, + "p95": 2123.4560012817383, + "p99": 2902.143955230713 + }, + "combine": { + "p50": 1213.9519453048706, + "p90": 1395.583987236023, + "p95": 1709.9519968032837, + "p99": 2229.311943054199 + }, + "roundtrip": { + "p50": 1804.4480085372925, + "p90": 1974.4640588760376, + "p95": 2527.679920196533, + "p99": 3084.127902984619 + }, + "isolatedSum": { + "p50": 1810.655951499939, + "p90": 2069.279968738556, + "p95": 3833.407998085022, + "p99": 5131.455898284912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8313, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 807.0080280303955, + "p90": 861.8559837341309, + "p95": 1941.7920112609863, + "p99": 2731.584072113037 + }, + "combine": { + "p50": 2328.8960456848145, + "p90": 2341.18390083313, + "p95": 2348.992109298706, + "p99": 2394.65594291687 + }, + "roundtrip": { + "p50": 3105.9839725494385, + "p90": 3195.807933807373, + "p95": 3532.7680110931396, + "p99": 3984.5120906829834 + }, + "isolatedSum": { + "p50": 3135.90407371521, + "p90": 3203.0398845672607, + "p95": 4290.784120559692, + "p99": 5126.240015029907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 16581, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1253.4079551696777, + "p90": 1408.8000059127808, + "p95": 1945.5360174179077, + "p99": 2399.168014526367 + }, + "combine": { + "p50": 4547.296047210693, + "p90": 4559.775829315186, + "p95": 4564.383983612061, + "p99": 4574.143886566162 + }, + "roundtrip": { + "p50": 5766.016006469727, + "p90": 5809.855937957764, + "p95": 5839.583873748779, + "p99": 6069.183826446533 + }, + "isolatedSum": { + "p50": 5800.704002380371, + "p90": 5968.575835227966, + "p95": 6509.920001029968, + "p99": 6973.311901092529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 32887, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1c1e555f", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_ed73f5d1", + "comparisonKey": "c80979fe408156db", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:13:02.670120+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 432.19199776649475, + "p90": 482.33601450920105, + "p95": 606.7519783973694, + "p99": 3302.2079467773438 + }, + "combine": { + "p50": 257.31199979782104, + "p90": 273.44000339508057, + "p95": 2030.1439762115479, + "p99": 2939.6159648895264 + }, + "roundtrip": { + "p50": 698.4000205993652, + "p90": 787.7439856529236, + "p95": 2759.135961532593, + "p99": 3709.9199295043945 + }, + "isolatedSum": { + "p50": 689.5039975643158, + "p90": 755.7760179042816, + "p95": 2636.8959546089172, + "p99": 6241.82391166687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 522.0159888267517, + "p90": 569.599986076355, + "p95": 715.7120108604431, + "p99": 2907.8400135040283 + }, + "combine": { + "p50": 466.7840003967285, + "p90": 485.24799942970276, + "p95": 1317.855954170227, + "p99": 2300.895929336548 + }, + "roundtrip": { + "p50": 979.968011379242, + "p90": 1071.6160535812378, + "p95": 2483.6480617523193, + "p99": 3120.352029800415 + }, + "isolatedSum": { + "p50": 988.7999892234802, + "p90": 1054.8479855060577, + "p95": 2033.5679650306702, + "p99": 5208.735942840576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 722.3359942436218, + "p90": 963.2959961891174, + "p95": 2096.0640907287598, + "p99": 2706.559896469116 + }, + "combine": { + "p50": 906.495988368988, + "p90": 1042.240023612976, + "p95": 1277.1519422531128, + "p99": 1641.983985900879 + }, + "roundtrip": { + "p50": 1613.2800579071045, + "p90": 2065.4079914093018, + "p95": 2620.703935623169, + "p99": 3056.864023208618 + }, + "isolatedSum": { + "p50": 1628.8319826126099, + "p90": 2005.5360198020935, + "p95": 3373.2160329818726, + "p99": 4348.543882369995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1214.4960165023804, + "p90": 1464.959979057312, + "p95": 1738.1759881973267, + "p99": 2146.9759941101074 + }, + "combine": { + "p50": 1767.0400142669678, + "p90": 1778.1440019607544, + "p95": 1780.992031097412, + "p99": 1792.639970779419 + }, + "roundtrip": { + "p50": 2955.712080001831, + "p90": 3007.200002670288, + "p95": 3073.7600326538086, + "p99": 3431.1039447784424 + }, + "isolatedSum": { + "p50": 2981.536030769348, + "p90": 3243.1039810180664, + "p95": 3519.1680192947388, + "p99": 3939.6159648895264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1979.4880151748657, + "p90": 2032.1600437164307, + "p95": 2187.6161098480225, + "p99": 2359.6160411834717 + }, + "combine": { + "p50": 3548.2559204101562, + "p90": 3565.4079914093018, + "p95": 3571.2320804595947, + "p99": 3584.991931915283 + }, + "roundtrip": { + "p50": 5474.0800857543945, + "p90": 5502.975940704346, + "p95": 5513.984203338623, + "p99": 5553.023815155029 + }, + "isolatedSum": { + "p50": 5527.743935585022, + "p90": 5597.568035125732, + "p95": 5758.848190307617, + "p99": 5944.607973098755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3713.5679721832275, + "p90": 3751.4240741729736, + "p95": 3765.5038833618164, + "p99": 3813.663959503174 + }, + "combine": { + "p50": 7442.272186279297, + "p90": 7457.536220550537, + "p95": 7462.719917297363, + "p99": 7476.895809173584 + }, + "roundtrip": { + "p50": 11031.744003295898, + "p90": 11066.304206848145, + "p95": 11077.695846557617, + "p99": 11095.232009887695 + }, + "isolatedSum": { + "p50": 11155.840158462524, + "p90": 11208.96029472351, + "p95": 11228.22380065918, + "p99": 11290.559768676758 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd0862e1", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_9afdbb6b", + "comparisonKey": "9ad950cc82b43557", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:14:41.366084+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 489.6639883518219, + "p90": 544.2240238189697, + "p95": 1123.7119436264038, + "p99": 2578.655958175659 + }, + "combine": { + "p50": 309.7600042819977, + "p90": 322.11199402809143, + "p95": 346.68800234794617, + "p99": 2308.4158897399902 + }, + "roundtrip": { + "p50": 776.9920229911804, + "p90": 820.3200101852417, + "p95": 2105.6320667266846, + "p99": 3397.4080085754395 + }, + "isolatedSum": { + "p50": 799.4239926338196, + "p90": 866.3360178470612, + "p95": 1470.39994597435, + "p99": 4887.071847915649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 638.3680105209351, + "p90": 689.2480254173279, + "p95": 1601.0559797286987, + "p99": 2434.2401027679443 + }, + "combine": { + "p50": 563.1999969482422, + "p90": 651.8399715423584, + "p95": 1130.1440000534058, + "p99": 1718.4959650039673 + }, + "roundtrip": { + "p50": 1179.58402633667, + "p90": 1255.071997642517, + "p95": 2240.5760288238525, + "p99": 2744.352102279663 + }, + "isolatedSum": { + "p50": 1201.5680074691772, + "p90": 1341.0879969596863, + "p95": 2731.1999797821045, + "p99": 4152.736067771912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 15151, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 971.9039797782898, + "p90": 1365.7920360565186, + "p95": 1879.9680471420288, + "p99": 2396.6081142425537 + }, + "combine": { + "p50": 1091.040015220642, + "p90": 1104.1920185089111, + "p95": 1132.1280002593994, + "p99": 1358.3680391311646 + }, + "roundtrip": { + "p50": 2027.3280143737793, + "p90": 2085.088014602661, + "p95": 2381.7598819732666, + "p99": 2891.9999599456787 + }, + "isolatedSum": { + "p50": 2062.943994998932, + "p90": 2469.9840545654297, + "p95": 3012.096047401428, + "p99": 3754.9761533737183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 30290, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1666.4639711380005, + "p90": 1732.2239875793457, + "p95": 1825.600028038025, + "p99": 1981.6960096359253 + }, + "combine": { + "p50": 2147.6480960845947, + "p90": 2159.0399742126465, + "p95": 2162.048101425171, + "p99": 2171.135902404785 + }, + "roundtrip": { + "p50": 3779.4559001922607, + "p90": 3816.4799213409424, + "p95": 3838.8800621032715, + "p99": 3903.712034225464 + }, + "isolatedSum": { + "p50": 3814.112067222595, + "p90": 3891.263961791992, + "p95": 3987.648129463196, + "p99": 4152.83191204071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 60548, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2696.320056915283, + "p90": 2737.920045852661, + "p95": 2757.1520805358887, + "p99": 2901.887893676758 + }, + "combine": { + "p50": 4324.895858764648, + "p90": 4342.527866363525, + "p95": 4348.991870880127, + "p99": 4360.447883605957 + }, + "roundtrip": { + "p50": 6960.927963256836, + "p90": 6994.336128234863, + "p95": 7011.96813583374, + "p99": 7059.679985046387 + }, + "isolatedSum": { + "p50": 7021.215915679932, + "p90": 7080.4479122161865, + "p95": 7106.143951416016, + "p99": 7262.335777282715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 121046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 5123.072147369385, + "p90": 5146.336078643799, + "p95": 5156.928062438965, + "p99": 5179.58402633667 + }, + "combine": { + "p50": 8585.472106933594, + "p90": 8607.135772705078, + "p95": 8614.336013793945, + "p99": 8632.224082946777 + }, + "roundtrip": { + "p50": 13662.303924560547, + "p90": 13691.712379455566, + "p95": 13701.663970947266, + "p99": 13743.040084838867 + }, + "isolatedSum": { + "p50": 13708.544254302979, + "p90": 13753.471851348877, + "p95": 13771.26407623291, + "p99": 13811.808109283447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 242154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bd5224a4", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_244814d3", + "comparisonKey": "d6a55306db9b56ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:26:40.223372+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 348.83201122283936, + "p90": 387.1360123157501, + "p95": 498.33598732948303, + "p99": 3259.2320442199707 + }, + "combine": { + "p50": 203.23200523853302, + "p90": 215.83999693393707, + "p95": 241.2160038948059, + "p99": 3605.087995529175 + }, + "roundtrip": { + "p50": 532.3200225830078, + "p90": 558.3680272102356, + "p95": 641.8560147285461, + "p99": 4176.544189453125 + }, + "isolatedSum": { + "p50": 552.0640164613724, + "p90": 602.9760092496872, + "p95": 739.5519912242889, + "p99": 6864.3200397491455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 1049, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 380.0959885120392, + "p90": 427.0400106906891, + "p95": 495.5199956893921, + "p99": 3057.8880310058594 + }, + "combine": { + "p50": 338.7199938297272, + "p90": 353.4719944000244, + "p95": 1827.3279666900635, + "p99": 3373.2481002807617 + }, + "roundtrip": { + "p50": 696.6720223426819, + "p90": 746.5599775314331, + "p95": 2308.799982070923, + "p99": 3618.3040142059326 + }, + "isolatedSum": { + "p50": 718.8159823417664, + "p90": 780.5120050907135, + "p95": 2322.8479623794556, + "p99": 6431.136131286621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 2084, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 453.5999894142151, + "p90": 514.240026473999, + "p95": 575.007975101471, + "p99": 3632.1280002593994 + }, + "combine": { + "p50": 644.4799900054932, + "p90": 695.8079934120178, + "p95": 1799.2639541625977, + "p99": 2424.7360229492188 + }, + "roundtrip": { + "p50": 1073.1840133666992, + "p90": 1139.296054840088, + "p95": 2459.0399265289307, + "p99": 3259.5839500427246 + }, + "isolatedSum": { + "p50": 1098.0799794197083, + "p90": 1210.0480198860168, + "p95": 2374.2719292640686, + "p99": 6056.864023208618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 4126, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 592.7680134773254, + "p90": 658.1119894981384, + "p95": 1866.9120073318481, + "p99": 3017.728090286255 + }, + "combine": { + "p50": 1212.0959758758545, + "p90": 1340.3199911117554, + "p95": 1600.160002708435, + "p99": 1974.9759435653687 + }, + "roundtrip": { + "p50": 1791.7759418487549, + "p90": 2422.8479862213135, + "p95": 2783.776044845581, + "p99": 3150.9759426116943 + }, + "isolatedSum": { + "p50": 1804.86398935318, + "p90": 1998.4319806098938, + "p95": 3467.072010040283, + "p99": 4992.7040338516235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8234, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 806.5919876098633, + "p90": 865.1520013809204, + "p95": 1963.1359577178955, + "p99": 3170.3999042510986 + }, + "combine": { + "p50": 2325.3440856933594, + "p90": 2339.4880294799805, + "p95": 2346.5280532836914, + "p99": 2406.8479537963867 + }, + "roundtrip": { + "p50": 3093.7600135803223, + "p90": 3187.9360675811768, + "p95": 3625.7920265197754, + "p99": 3976.223945617676 + }, + "isolatedSum": { + "p50": 3131.9360733032227, + "p90": 3204.640030860901, + "p95": 4309.664011001587, + "p99": 5577.247858047485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 16480, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1247.1359968185425, + "p90": 1313.2799863815308, + "p95": 1817.6000118255615, + "p99": 2456.0959339141846 + }, + "combine": { + "p50": 4545.792102813721, + "p90": 4556.128025054932, + "p95": 4558.3038330078125, + "p99": 4569.024085998535 + }, + "roundtrip": { + "p50": 5755.839824676514, + "p90": 5787.936210632324, + "p95": 5819.808006286621, + "p99": 6106.272220611572 + }, + "isolatedSum": { + "p50": 5792.928099632263, + "p90": 5869.408011436462, + "p95": 6375.903844833374, + "p99": 7025.12001991272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 32889, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4389e483", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_51d8b3be", + "comparisonKey": "4c738347c1494598", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:21:48.679975+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 380.0320029258728, + "p90": 427.0400106906891, + "p95": 521.9519734382629, + "p99": 2488.2240295410156 + }, + "combine": { + "p50": 219.67999637126923, + "p90": 232.4800044298172, + "p95": 372.44799733161926, + "p99": 3314.271926879883 + }, + "roundtrip": { + "p50": 614.080011844635, + "p90": 737.824022769928, + "p95": 2992.7680492401123, + "p99": 3854.8479080200195 + }, + "isolatedSum": { + "p50": 599.711999297142, + "p90": 659.5200151205063, + "p95": 894.3999707698822, + "p99": 5802.495956420898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 433.0559968948364, + "p90": 486.1760139465332, + "p95": 699.2319822311401, + "p99": 3415.7440662384033 + }, + "combine": { + "p50": 380.16000390052795, + "p90": 398.49600195884705, + "p95": 1819.0720081329346, + "p99": 2437.920093536377 + }, + "roundtrip": { + "p50": 812.0319843292236, + "p90": 879.423975944519, + "p95": 2433.3438873291016, + "p99": 3461.855888366699 + }, + "isolatedSum": { + "p50": 813.2160007953644, + "p90": 884.6720159053802, + "p95": 2518.3039903640747, + "p99": 5853.66415977478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 5302, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 555.5520057678223, + "p90": 621.3120222091675, + "p95": 1963.0719423294067, + "p99": 2998.784065246582 + }, + "combine": { + "p50": 743.9680099487305, + "p90": 778.4000039100647, + "p95": 1321.120023727417, + "p99": 2135.1680755615234 + }, + "roundtrip": { + "p50": 1293.4080362319946, + "p90": 1557.2799444198608, + "p95": 2515.455961227417, + "p99": 2991.7759895324707 + }, + "isolatedSum": { + "p50": 1299.5200157165527, + "p90": 1399.7120261192322, + "p95": 3284.1919660568237, + "p99": 5133.9521408081055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 10587, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 815.8079981803894, + "p90": 1048.3520030975342, + "p95": 1883.8720321655273, + "p99": 2542.880058288574 + }, + "combine": { + "p50": 1398.2720375061035, + "p90": 1418.0799722671509, + "p95": 1471.2320566177368, + "p99": 1646.5920209884644 + }, + "roundtrip": { + "p50": 2201.6000747680664, + "p90": 2279.3281078338623, + "p95": 2661.8878841400146, + "p99": 3109.407901763916 + }, + "isolatedSum": { + "p50": 2214.080035686493, + "p90": 2466.431975364685, + "p95": 3355.104088783264, + "p99": 4189.472079277039 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 21014, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1247.488021850586, + "p90": 1462.015986442566, + "p95": 1848.5440015792847, + "p99": 2247.328042984009 + }, + "combine": { + "p50": 2723.26397895813, + "p90": 2734.8480224609375, + "p95": 2738.111972808838, + "p99": 2743.8719272613525 + }, + "roundtrip": { + "p50": 3920.192003250122, + "p90": 3964.224100112915, + "p95": 3993.504047393799, + "p99": 4241.471767425537 + }, + "isolatedSum": { + "p50": 3970.752000808716, + "p90": 4196.864008903503, + "p95": 4586.655974388123, + "p99": 4991.199970245361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 41814, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2338.9439582824707, + "p90": 2413.2161140441895, + "p95": 2441.7920112609863, + "p99": 2533.888101577759 + }, + "combine": { + "p50": 5649.3120193481445, + "p90": 5667.007923126221, + "p95": 5671.360015869141, + "p99": 5682.271957397461 + }, + "roundtrip": { + "p50": 7771.935939788818, + "p90": 7813.727855682373, + "p95": 7826.496124267578, + "p99": 7882.175922393799 + }, + "isolatedSum": { + "p50": 7988.255977630615, + "p90": 8080.22403717041, + "p95": 8113.152027130127, + "p99": 8216.16005897522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 83417, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bb526e21", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_cf927f30", + "comparisonKey": "947813d914d72f4e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:22:34.184374+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 349.7599959373474, + "p90": 439.0079975128174, + "p95": 489.6000027656555, + "p99": 2643.264055252075 + }, + "combine": { + "p50": 204.83200252056122, + "p90": 221.24800086021423, + "p95": 233.15200209617615, + "p99": 3592.7999019622803 + }, + "roundtrip": { + "p50": 543.615996837616, + "p90": 611.9679808616638, + "p95": 756.991982460022, + "p99": 4206.111907958984 + }, + "isolatedSum": { + "p50": 554.5919984579086, + "p90": 660.2559983730316, + "p95": 722.7520048618317, + "p99": 6236.0639572143555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 1067, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 376.73598527908325, + "p90": 418.4960126876831, + "p95": 492.76798963546753, + "p99": 3357.856035232544 + }, + "combine": { + "p50": 338.3679986000061, + "p90": 355.0719916820526, + "p95": 1877.4080276489258, + "p99": 2595.2000617980957 + }, + "roundtrip": { + "p50": 704.6719789505005, + "p90": 750.2719759941101, + "p95": 2501.024007797241, + "p99": 3565.984010696411 + }, + "isolatedSum": { + "p50": 715.1039838790894, + "p90": 773.5680043697357, + "p95": 2370.1760172843933, + "p99": 5953.05609703064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 2097, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 443.64801049232483, + "p90": 501.8879771232605, + "p95": 548.8960146903992, + "p99": 3259.455919265747 + }, + "combine": { + "p50": 642.8800225257874, + "p90": 700.7359862327576, + "p95": 1655.6479930877686, + "p99": 2243.839979171753 + }, + "roundtrip": { + "p50": 1071.7439651489258, + "p90": 1159.6800088882446, + "p95": 2496.864080429077, + "p99": 3179.136037826538 + }, + "isolatedSum": { + "p50": 1086.5280330181122, + "p90": 1202.623963356018, + "p95": 2204.5440077781677, + "p99": 5503.2958984375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 4163, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 606.1760187149048, + "p90": 697.0239877700806, + "p95": 1828.4800052642822, + "p99": 3092.223882675171 + }, + "combine": { + "p50": 1211.743950843811, + "p90": 1276.8959999084473, + "p95": 1583.9680433273315, + "p99": 1814.7200345993042 + }, + "roundtrip": { + "p50": 1811.903953552246, + "p90": 2156.320095062256, + "p95": 2754.2080879211426, + "p99": 3099.168062210083 + }, + "isolatedSum": { + "p50": 1817.9199695587158, + "p90": 1973.9199876785278, + "p95": 3412.4480485916138, + "p99": 4906.943917274475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8305, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 810.7839822769165, + "p90": 875.328004360199, + "p95": 1822.5599527359009, + "p99": 2625.823974609375 + }, + "combine": { + "p50": 2327.967882156372, + "p90": 2341.0239219665527, + "p95": 2350.752115249634, + "p99": 2461.855888366699 + }, + "roundtrip": { + "p50": 3105.7920455932617, + "p90": 3165.95196723938, + "p95": 3450.9758949279785, + "p99": 3912.3520851135254 + }, + "isolatedSum": { + "p50": 3138.7518644332886, + "p90": 3216.3519263267517, + "p95": 4173.312067985535, + "p99": 5087.679862976074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 16529, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1262.7520561218262, + "p90": 1664.9919748306274, + "p95": 1971.1040258407593, + "p99": 2340.575933456421 + }, + "combine": { + "p50": 4549.439907073975, + "p90": 4563.839912414551, + "p95": 4570.367813110352, + "p99": 4597.8240966796875 + }, + "roundtrip": { + "p50": 5777.184009552002, + "p90": 5816.60795211792, + "p95": 5829.631805419922, + "p99": 6043.327808380127 + }, + "isolatedSum": { + "p50": 5812.191963195801, + "p90": 6228.831887245178, + "p95": 6541.471838951111, + "p99": 6938.400030136108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 32880, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-275645a5", + "identity": "b200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_18fd5edd", + "comparisonKey": "a9139b6c1c63c2d7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:24:37.271817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 425.7600009441376, + "p90": 471.45599126815796, + "p95": 539.5519733428955, + "p99": 2789.2799377441406 + }, + "combine": { + "p50": 257.9840123653412, + "p90": 272.41599559783936, + "p95": 1164.031982421875, + "p99": 3165.440082550049 + }, + "roundtrip": { + "p50": 685.0559711456299, + "p90": 735.4239821434021, + "p95": 2596.224069595337, + "p99": 3655.456066131592 + }, + "isolatedSum": { + "p50": 683.7440133094788, + "p90": 743.8719868659973, + "p95": 1703.5839557647705, + "p99": 5954.720020294189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 524.8960256576538, + "p90": 621.5680241584778, + "p95": 1313.5679960250854, + "p99": 2812.0639324188232 + }, + "combine": { + "p50": 468.25599670410156, + "p90": 492.0639991760254, + "p95": 1568.8320398330688, + "p99": 2256.00004196167 + }, + "roundtrip": { + "p50": 980.7680249214172, + "p90": 1057.9839944839478, + "p95": 2551.7759323120117, + "p99": 3060.5759620666504 + }, + "isolatedSum": { + "p50": 993.1520223617554, + "p90": 1113.6320233345032, + "p95": 2882.4000358581543, + "p99": 5068.063974380493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 720.4480171203613, + "p90": 860.4480028152466, + "p95": 1873.4079599380493, + "p99": 2505.887985229492 + }, + "combine": { + "p50": 907.1040153503418, + "p90": 966.4959907531738, + "p95": 1173.02405834198, + "p99": 1590.3680324554443 + }, + "roundtrip": { + "p50": 1610.144019126892, + "p90": 1986.1119985580444, + "p95": 2537.2159481048584, + "p99": 2951.008081436157 + }, + "isolatedSum": { + "p50": 1627.5520324707031, + "p90": 1826.9439935684204, + "p95": 3046.4320182800293, + "p99": 4096.2560176849365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1215.1999473571777, + "p90": 1440.6399726867676, + "p95": 1815.8080577850342, + "p99": 2148.639917373657 + }, + "combine": { + "p50": 1769.152045249939, + "p90": 1781.7599773406982, + "p95": 1786.6239547729492, + "p99": 1796.5760231018066 + }, + "roundtrip": { + "p50": 2949.3439197540283, + "p90": 3003.8399696350098, + "p95": 3050.463914871216, + "p99": 3302.527904510498 + }, + "isolatedSum": { + "p50": 2984.3519926071167, + "p90": 3222.399950027466, + "p95": 3602.4320125579834, + "p99": 3945.215940475464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1981.5360307693481, + "p90": 2039.7119522094727, + "p95": 2147.871971130371, + "p99": 2325.727939605713 + }, + "combine": { + "p50": 3552.4160861968994, + "p90": 3568.864107131958, + "p95": 3574.3041038513184, + "p99": 3586.0800743103027 + }, + "roundtrip": { + "p50": 5462.560176849365, + "p90": 5492.159843444824, + "p95": 5505.087852478027, + "p99": 5563.295841217041 + }, + "isolatedSum": { + "p50": 5533.952116966248, + "p90": 5608.576059341431, + "p95": 5722.176074981689, + "p99": 5911.808013916016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3680.511951446533, + "p90": 3720.0000286102295, + "p95": 3738.0480766296387, + "p99": 3768.0320739746094 + }, + "combine": { + "p50": 7447.743892669678, + "p90": 7465.280055999756, + "p95": 7472.032070159912, + "p99": 7483.456134796143 + }, + "roundtrip": { + "p50": 11042.271614074707, + "p90": 11081.91967010498, + "p95": 11097.663879394531, + "p99": 11163.104057312012 + }, + "isolatedSum": { + "p50": 11128.255844116211, + "p90": 11185.280084609985, + "p95": 11210.08014678955, + "p99": 11251.488208770752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8c1be55", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_c2dcdc61", + "comparisonKey": "e42f57737e3c55b6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:25:22.469944+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 353.34399342536926, + "p90": 410.65600514411926, + "p95": 1500.607967376709, + "p99": 2955.967903137207 + }, + "combine": { + "p50": 204.6079933643341, + "p90": 224.63999688625336, + "p95": 2554.0480613708496, + "p99": 3468.287944793701 + }, + "roundtrip": { + "p50": 535.7760190963745, + "p90": 589.8560285568237, + "p95": 659.0719819068909, + "p99": 4080.7042121887207 + }, + "isolatedSum": { + "p50": 557.9519867897034, + "p90": 635.2960020303726, + "p95": 4054.6560287475586, + "p99": 6424.255847930908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 382.78400897979736, + "p90": 425.28000473976135, + "p95": 504.0000081062317, + "p99": 2848.9279747009277 + }, + "combine": { + "p50": 339.80798721313477, + "p90": 357.37600922584534, + "p95": 2183.232069015503, + "p99": 3036.479949951172 + }, + "roundtrip": { + "p50": 708.2880139350891, + "p90": 795.9359884262085, + "p95": 2623.4560012817383, + "p99": 3531.935930252075 + }, + "isolatedSum": { + "p50": 722.5919961929321, + "p90": 782.6560139656067, + "p95": 2687.2320771217346, + "p99": 5885.4079246521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 448.06399941444397, + "p90": 493.50398778915405, + "p95": 2275.7439613342285, + "p99": 3475.1999378204346 + }, + "combine": { + "p50": 644.7359919548035, + "p90": 686.3359808921814, + "p95": 1563.2959604263306, + "p99": 2047.0080375671387 + }, + "roundtrip": { + "p50": 1076.640009880066, + "p90": 1156.1599969863892, + "p95": 2419.9678897857666, + "p99": 3156.224012374878 + }, + "isolatedSum": { + "p50": 1092.7999913692474, + "p90": 1179.8399686813354, + "p95": 3839.039921760559, + "p99": 5522.207975387573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 614.6559715270996, + "p90": 741.7600154876709, + "p95": 2038.0480289459229, + "p99": 2905.087947845459 + }, + "combine": { + "p50": 1214.8159742355347, + "p90": 1279.647946357727, + "p95": 1535.0079536437988, + "p99": 2162.208080291748 + }, + "roundtrip": { + "p50": 1809.3760013580322, + "p90": 2077.728033065796, + "p95": 2730.0798892974854, + "p99": 3159.264087677002 + }, + "isolatedSum": { + "p50": 1829.4719457626343, + "p90": 2021.407961845398, + "p95": 3573.0559825897217, + "p99": 5067.296028137207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 810.4640245437622, + "p90": 874.5279908180237, + "p95": 2218.048095703125, + "p99": 2650.559902191162 + }, + "combine": { + "p50": 2327.8400897979736, + "p90": 2341.4719104766846, + "p95": 2346.4319705963135, + "p99": 2375.839948654175 + }, + "roundtrip": { + "p50": 3110.464096069336, + "p90": 3255.359888076782, + "p95": 3584.671974182129, + "p99": 3924.896001815796 + }, + "isolatedSum": { + "p50": 3138.304114341736, + "p90": 3215.9999012947083, + "p95": 4564.4800662994385, + "p99": 5026.399850845337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1257.3120594024658, + "p90": 1363.8720512390137, + "p95": 1965.2800559997559, + "p99": 2400.383949279785 + }, + "combine": { + "p50": 4547.8081703186035, + "p90": 4563.392162322998, + "p95": 4570.879936218262, + "p99": 4593.2159423828125 + }, + "roundtrip": { + "p50": 5774.0797996521, + "p90": 5813.663959503174, + "p95": 5833.248138427734, + "p99": 6029.695987701416 + }, + "isolatedSum": { + "p50": 5805.120229721069, + "p90": 5927.264213562012, + "p95": 6536.159992218018, + "p99": 6993.599891662598 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2c86471", + "identity": "b200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_20d23c1d", + "comparisonKey": "b585ee8585614df4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T16:19:53.440400+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_06", + "sku": "b200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 345.5680012702942, + "p90": 396.2880074977875, + "p95": 457.0240080356598, + "p99": 3001.7600059509277 + }, + "combine": { + "p50": 205.34400641918182, + "p90": 224.12799298763275, + "p95": 232.54400491714478, + "p99": 2941.7920112609863 + }, + "roundtrip": { + "p50": 542.2400236129761, + "p90": 605.247974395752, + "p95": 2900.4480838775635, + "p99": 4088.992118835449 + }, + "isolatedSum": { + "p50": 550.912007689476, + "p90": 620.4160004854202, + "p95": 689.5680129528046, + "p99": 5943.552017211914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 380.92800974845886, + "p90": 419.840008020401, + "p95": 489.24800753593445, + "p99": 3638.5281085968018 + }, + "combine": { + "p50": 337.66400814056396, + "p90": 353.66401076316833, + "p95": 1794.368028640747, + "p99": 2763.5838985443115 + }, + "roundtrip": { + "p50": 710.4640007019043, + "p90": 766.7199969291687, + "p95": 2558.784008026123, + "p99": 3760.2241039276123 + }, + "isolatedSum": { + "p50": 718.5920178890228, + "p90": 773.5040187835693, + "p95": 2283.6160361766815, + "p99": 6402.112007141113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 450.52799582481384, + "p90": 495.2000081539154, + "p95": 539.9680137634277, + "p99": 3368.191957473755 + }, + "combine": { + "p50": 645.5039978027344, + "p90": 1288.256049156189, + "p95": 1737.280011177063, + "p99": 2571.3601112365723 + }, + "roundtrip": { + "p50": 1078.879952430725, + "p90": 1464.959979057312, + "p95": 2548.192024230957, + "p99": 3161.792039871216 + }, + "isolatedSum": { + "p50": 1096.0319936275482, + "p90": 1783.4560573101044, + "p95": 2277.2480249404907, + "p99": 5939.552068710327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 600.0000238418579, + "p90": 665.3760075569153, + "p95": 1970.6560373306274, + "p99": 3049.760103225708 + }, + "combine": { + "p50": 1211.3280296325684, + "p90": 1244.096040725708, + "p95": 1490.7840490341187, + "p99": 1905.087947845459 + }, + "roundtrip": { + "p50": 1811.4240169525146, + "p90": 1916.6719913482666, + "p95": 2628.096103668213, + "p99": 3161.1199378967285 + }, + "isolatedSum": { + "p50": 1811.3280534744263, + "p90": 1909.4720482826233, + "p95": 3461.440086364746, + "p99": 4954.848051071167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 826.3360261917114, + "p90": 1063.2959604263306, + "p95": 1885.6960535049438, + "p99": 2721.695899963379 + }, + "combine": { + "p50": 2327.5198936462402, + "p90": 2341.1519527435303, + "p95": 2351.2320518493652, + "p99": 2384.671926498413 + }, + "roundtrip": { + "p50": 3112.191915512085, + "p90": 3156.1920642852783, + "p95": 3240.799903869629, + "p99": 3863.8720512390137 + }, + "isolatedSum": { + "p50": 3153.8559198379517, + "p90": 3404.447913169861, + "p95": 4236.928105354309, + "p99": 5106.367826461792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1258.7840557098389, + "p90": 1534.432053565979, + "p95": 1920.8639860153198, + "p99": 2385.2479457855225 + }, + "combine": { + "p50": 4549.1838455200195, + "p90": 4565.055847167969, + "p95": 4573.984146118164, + "p99": 4605.792045593262 + }, + "roundtrip": { + "p50": 5776.127815246582, + "p90": 5816.864013671875, + "p95": 5849.023818969727, + "p99": 6097.983837127686 + }, + "isolatedSum": { + "p50": 5807.967901229858, + "p90": 6099.487900733948, + "p95": 6494.848132133484, + "p99": 6991.039991378784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6a055d66", + "identity": "b200|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_ca64a747", + "comparisonKey": "fae4a07988e5d077", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:46:45.523465+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.26399785280228, + "p90": 126.94400548934937, + "p95": 137.40800321102142, + "p99": 150.87999403476715 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 81.66400343179703, + "p95": 85.75999736785889, + "p99": 98.52799773216248 + }, + "roundtrip": { + "p50": 173.50399494171143, + "p90": 191.74399971961975, + "p95": 201.9519954919815, + "p99": 217.24799275398254 + }, + "isolatedSum": { + "p50": 188.9920011162758, + "p90": 208.6080089211464, + "p95": 223.1680005788803, + "p99": 249.40799176692963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 111.87200248241425, + "p90": 131.96800649166107, + "p95": 137.82399892807007, + "p99": 147.5200057029724 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 84.927998483181, + "p95": 89.82399851083755, + "p99": 101.1200025677681 + }, + "roundtrip": { + "p50": 176.35199427604675, + "p90": 200.76799392700195, + "p95": 209.31200683116913, + "p99": 214.4320011138916 + }, + "isolatedSum": { + "p50": 190.5600056052208, + "p90": 216.89600497484207, + "p95": 227.64799743890762, + "p99": 248.6400082707405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.07199835777283, + "p90": 145.53600549697876, + "p95": 152.41600573062897, + "p99": 177.47199535369873 + }, + "combine": { + "p50": 79.80799674987793, + "p90": 92.92799979448318, + "p95": 98.33600372076035, + "p99": 112.47999966144562 + }, + "roundtrip": { + "p50": 183.32800269126892, + "p90": 218.49599480628967, + "p95": 227.2000014781952, + "p99": 243.55199933052063 + }, + "isolatedSum": { + "p50": 202.87999510765076, + "p90": 238.46400529146194, + "p95": 250.7520094513893, + "p99": 289.95199501514435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 124.57600235939026, + "p90": 136.31999492645264, + "p95": 144.28800344467163, + "p99": 152.5759994983673 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 90.27200192213058, + "p95": 95.39200365543365, + "p99": 109.40799862146378 + }, + "roundtrip": { + "p50": 185.08799374103546, + "p90": 196.48000597953796, + "p95": 205.9199959039688, + "p99": 218.4319943189621 + }, + "isolatedSum": { + "p50": 206.11200481653214, + "p90": 226.59199684858322, + "p95": 239.68000710010529, + "p99": 261.9839981198311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.5359947681427, + "p90": 143.42400431632996, + "p95": 149.72800016403198, + "p99": 160.09600460529327 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 90.01599997282028, + "p95": 93.31200271844864, + "p99": 100.8640006184578 + }, + "roundtrip": { + "p50": 184.83200669288635, + "p90": 213.6320024728775, + "p95": 257.0880055427551, + "p99": 283.9359939098358 + }, + "isolatedSum": { + "p50": 206.59199357032776, + "p90": 233.44000428915024, + "p95": 243.04000288248062, + "p99": 260.96000522375107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.76799637079239, + "p90": 136.9599997997284, + "p95": 144.16000247001648, + "p99": 154.04799580574036 + }, + "combine": { + "p50": 88.16000074148178, + "p90": 96.92800045013428, + "p95": 105.40799796581268, + "p99": 126.78399682044983 + }, + "roundtrip": { + "p50": 185.34399569034576, + "p90": 215.96799790859222, + "p95": 223.00800681114197, + "p99": 235.58400571346283 + }, + "isolatedSum": { + "p50": 212.92799711227417, + "p90": 233.88800024986267, + "p95": 249.56800043582916, + "p99": 280.8319926261902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.5599970817566, + "p90": 210.78400313854218, + "p95": 220.60799598693848, + "p99": 227.03999280929565 + }, + "combine": { + "p50": 102.49599814414978, + "p90": 110.6560006737709, + "p95": 122.43200093507767, + "p99": 139.96799290180206 + }, + "roundtrip": { + "p50": 200.9280025959015, + "p90": 224.86400604248047, + "p95": 232.89600014686584, + "p99": 260.0640058517456 + }, + "isolatedSum": { + "p50": 241.05599522590637, + "p90": 321.4400038123131, + "p95": 343.03999692201614, + "p99": 367.0079857110977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 145.34400403499603, + "p90": 164.95999693870544, + "p95": 170.46399414539337, + "p99": 177.47199535369873 + }, + "combine": { + "p50": 116.67200177907944, + "p90": 129.7599971294403, + "p95": 135.1040005683899, + "p99": 160.92799603939056 + }, + "roundtrip": { + "p50": 223.68000447750092, + "p90": 251.3279914855957, + "p95": 262.65600323677063, + "p99": 300.8959889411926 + }, + "isolatedSum": { + "p50": 262.01600581407547, + "p90": 294.71999406814575, + "p95": 305.56799471378326, + "p99": 338.3999913930893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-725365db", + "identity": "b200|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_ca64a747", + "comparisonKey": "40f7aab10d22eda9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:49:04.505477+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 116.15999788045883, + "p90": 149.1200029850006, + "p95": 161.6639941930771, + "p99": 181.08800053596497 + }, + "combine": { + "p50": 79.80799674987793, + "p90": 88.86399865150452, + "p95": 98.24000298976898, + "p99": 108.96000266075134 + }, + "roundtrip": { + "p50": 180.7039976119995, + "p90": 213.72799575328827, + "p95": 226.27200186252594, + "p99": 252.3840069770813 + }, + "isolatedSum": { + "p50": 195.96799463033676, + "p90": 237.98400163650513, + "p95": 259.90399718284607, + "p99": 290.0480031967163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 116.60800129175186, + "p90": 139.13600146770477, + "p95": 144.896000623703, + "p99": 155.61600029468536 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 94.2080020904541, + "p95": 101.6319990158081, + "p99": 115.61600118875504 + }, + "roundtrip": { + "p50": 182.40000307559967, + "p90": 206.91199600696564, + "p95": 217.56799519062042, + "p99": 235.71200668811798 + }, + "isolatedSum": { + "p50": 198.2080042362213, + "p90": 233.34400355815887, + "p95": 246.5279996395111, + "p99": 271.2320014834404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.19999933242798, + "p90": 142.0159935951233, + "p95": 147.5519984960556, + "p99": 159.8079949617386 + }, + "combine": { + "p50": 86.97599917650223, + "p90": 92.16000139713287, + "p95": 98.43199700117111, + "p99": 108.41599851846695 + }, + "roundtrip": { + "p50": 185.08799374103546, + "p90": 208.67200195789337, + "p95": 214.59199488162994, + "p99": 224.63999688625336 + }, + "isolatedSum": { + "p50": 210.1759985089302, + "p90": 234.17599499225616, + "p95": 245.98399549722672, + "p99": 268.22399348020554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.31200051307678, + "p90": 145.37599682807922, + "p95": 149.98400211334229, + "p99": 163.7440025806427 + }, + "combine": { + "p50": 90.01599997282028, + "p90": 97.24800288677216, + "p95": 107.10400342941284, + "p99": 116.48000031709671 + }, + "roundtrip": { + "p50": 187.391996383667, + "p90": 219.55199539661407, + "p95": 227.61599719524384, + "p99": 257.2160065174103 + }, + "isolatedSum": { + "p50": 215.32800048589706, + "p90": 242.62399971485138, + "p95": 257.0880055427551, + "p99": 280.2240028977394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 124.86399710178375, + "p90": 149.02399480342865, + "p95": 152.3520052433014, + "p99": 184.76800620555878 + }, + "combine": { + "p50": 90.4960036277771, + "p90": 97.28000313043594, + "p95": 103.39199751615524, + "p99": 110.52799969911575 + }, + "roundtrip": { + "p50": 184.86399948596954, + "p90": 210.07999777793884, + "p95": 220.2879935503006, + "p99": 229.0239930152893 + }, + "isolatedSum": { + "p50": 215.36000072956085, + "p90": 246.3039979338646, + "p95": 255.74400275945663, + "p99": 295.29600590467453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 130.20800054073334, + "p90": 153.82400155067444, + "p95": 168.38400065898895, + "p99": 196.19199633598328 + }, + "combine": { + "p50": 91.87199920415878, + "p90": 104.25599664449692, + "p95": 111.23199760913849, + "p99": 120.83200365304947 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 227.32800245285034, + "p95": 232.41600394248962, + "p99": 260.25599241256714 + }, + "isolatedSum": { + "p50": 222.07999974489212, + "p90": 258.07999819517136, + "p95": 279.61599826812744, + "p99": 317.02399998903275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.14400136470795, + "p90": 165.27999937534332, + "p95": 175.55199563503265, + "p99": 213.6320024728775 + }, + "combine": { + "p50": 105.24799674749374, + "p90": 118.46400052309036, + "p95": 123.29600006341934, + "p99": 132.51200318336487 + }, + "roundtrip": { + "p50": 207.48800039291382, + "p90": 238.8480007648468, + "p95": 244.76799368858337, + "p99": 280.89600801467896 + }, + "isolatedSum": { + "p50": 243.3919981122017, + "p90": 283.7439998984337, + "p95": 298.847995698452, + "p99": 346.14400565624237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 150.7200002670288, + "p90": 170.0800061225891, + "p95": 176.60799622535706, + "p99": 196.0960030555725 + }, + "combine": { + "p50": 125.82400441169739, + "p90": 135.0719928741455, + "p95": 138.5599970817566, + "p99": 147.5200057029724 + }, + "roundtrip": { + "p50": 237.85600066184998, + "p90": 258.39999318122864, + "p95": 265.9200131893158, + "p99": 279.7439992427826 + }, + "isolatedSum": { + "p50": 276.5440046787262, + "p90": 305.1519989967346, + "p95": 315.16799330711365, + "p99": 343.6160087585449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-779fb99e", + "identity": "b200|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_ca64a747", + "comparisonKey": "47c52f6f817f08a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:51:25.348798+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 109.15199667215347, + "p90": 134.49600338935852, + "p95": 138.87999951839447, + "p99": 152.8320014476776 + }, + "combine": { + "p50": 79.80799674987793, + "p90": 88.03199976682663, + "p95": 89.82399851083755, + "p99": 95.36000341176987 + }, + "roundtrip": { + "p50": 185.31200289726257, + "p90": 194.87999379634857, + "p95": 200.8640021085739, + "p99": 213.18399906158447 + }, + "isolatedSum": { + "p50": 188.9599934220314, + "p90": 222.52800315618515, + "p95": 228.70399802923203, + "p99": 248.19200485944748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 112.70400136709213, + "p90": 124.03199821710587, + "p95": 132.4159950017929, + "p99": 146.01600170135498 + }, + "combine": { + "p50": 83.77599716186523, + "p90": 90.68799763917923, + "p95": 92.54399687051773, + "p99": 102.94400155544281 + }, + "roundtrip": { + "p50": 186.11200153827667, + "p90": 217.8560048341751, + "p95": 224.12799298763275, + "p99": 248.28800559043884 + }, + "isolatedSum": { + "p50": 196.47999852895737, + "p90": 214.7199958562851, + "p95": 224.95999187231064, + "p99": 248.9600032567978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 124.83199685811996, + "p90": 129.37599420547485, + "p95": 133.91999900341034, + "p99": 139.64800536632538 + }, + "combine": { + "p50": 89.91999924182892, + "p90": 94.11200135946274, + "p95": 96.92800045013428, + "p99": 103.74400019645691 + }, + "roundtrip": { + "p50": 181.66400492191315, + "p90": 195.48800587654114, + "p95": 199.48799908161163, + "p99": 208.80000293254852 + }, + "isolatedSum": { + "p50": 214.75199609994888, + "p90": 223.4879955649376, + "p95": 230.84799945354462, + "p99": 243.3920055627823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.59999525547028, + "p90": 130.5920034646988, + "p95": 134.0479999780655, + "p99": 143.36000382900238 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 96.44799679517746, + "p95": 101.34399682283401, + "p99": 107.26399719715118 + }, + "roundtrip": { + "p50": 182.52800405025482, + "p90": 195.74399292469025, + "p95": 198.4959989786148, + "p99": 207.0399969816208 + }, + "isolatedSum": { + "p50": 216.5439948439598, + "p90": 227.04000025987625, + "p95": 235.3919968008995, + "p99": 250.62400102615356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.37600100040436, + "p90": 135.00800728797913, + "p95": 139.29599523544312, + "p99": 164.000004529953 + }, + "combine": { + "p50": 91.48799628019333, + "p90": 100.76799988746643, + "p95": 103.64799946546555, + "p99": 109.0880036354065 + }, + "roundtrip": { + "p50": 184.83200669288635, + "p90": 197.02400267124176, + "p95": 200.76799392700195, + "p99": 212.67199516296387 + }, + "isolatedSum": { + "p50": 216.8639972805977, + "p90": 235.77600717544556, + "p95": 242.94399470090866, + "p99": 273.0880081653595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.74400281906128, + "p90": 137.63199746608734, + "p95": 142.07999408245087, + "p99": 153.53600680828094 + }, + "combine": { + "p50": 99.39199686050415, + "p90": 107.93600231409073, + "p95": 110.78400164842606, + "p99": 124.79999661445618 + }, + "roundtrip": { + "p50": 197.85599410533905, + "p90": 210.55999398231506, + "p95": 215.42400121688843, + "p99": 226.17599368095398 + }, + "isolatedSum": { + "p50": 219.13599967956543, + "p90": 245.56799978017807, + "p95": 252.86399573087692, + "p99": 278.3360034227371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.53600418567657, + "p90": 152.22400426864624, + "p95": 156.6080003976822, + "p99": 173.8239973783493 + }, + "combine": { + "p50": 109.56799983978271, + "p90": 115.64800143241882, + "p95": 117.85600334405899, + "p99": 122.17599898576736 + }, + "roundtrip": { + "p50": 212.51200139522552, + "p90": 230.46399652957916, + "p95": 239.23200368881226, + "p99": 257.31199979782104 + }, + "isolatedSum": { + "p50": 247.1040040254593, + "p90": 267.87200570106506, + "p95": 274.4640037417412, + "p99": 295.99999636411667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.9279934167862, + "p90": 154.14400398731232, + "p95": 158.9439958333969, + "p99": 168.60799491405487 + }, + "combine": { + "p50": 128.9599984884262, + "p90": 136.35200262069702, + "p95": 138.8159990310669, + "p99": 143.23200285434723 + }, + "roundtrip": { + "p50": 243.03999543190002, + "p90": 255.295991897583, + "p95": 259.93600487709045, + "p99": 278.49599719047546 + }, + "isolatedSum": { + "p50": 273.8879919052124, + "p90": 290.49600660800934, + "p95": 297.7599948644638, + "p99": 311.8399977684021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-05b25a41", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_d6b08783", + "comparisonKey": "98d44d75d612d29e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:13.473073+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.0000034570694, + "p90": 137.82399892807007, + "p95": 145.63199877738953, + "p99": 152.70400047302246 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 96.47999703884125, + "p95": 106.72000050544739, + "p99": 116.99199676513672 + }, + "roundtrip": { + "p50": 187.51999735832214, + "p90": 222.75200486183167, + "p95": 228.03199291229248, + "p99": 258.5600018501282 + }, + "isolatedSum": { + "p50": 202.2080048918724, + "p90": 234.30399596691132, + "p95": 252.3519992828369, + "p99": 269.6959972381592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.91999638080597, + "p90": 139.0720009803772, + "p95": 147.039994597435, + "p99": 166.81599617004395 + }, + "combine": { + "p50": 91.58399701118469, + "p90": 103.4879982471466, + "p95": 110.88000237941742, + "p99": 128.38399410247803 + }, + "roundtrip": { + "p50": 185.15199422836304, + "p90": 217.056006193161, + "p95": 224.38399493694305, + "p99": 239.58399891853333 + }, + "isolatedSum": { + "p50": 209.50399339199066, + "p90": 242.5599992275238, + "p95": 257.9199969768524, + "p99": 295.199990272522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.36800110340118, + "p90": 143.71199905872345, + "p95": 150.176003575325, + "p99": 161.21600568294525 + }, + "combine": { + "p50": 92.51199662685394, + "p90": 104.89600151777267, + "p95": 110.07999628782272, + "p99": 120.7360029220581 + }, + "roundtrip": { + "p50": 191.67999923229218, + "p90": 217.98400580883026, + "p95": 224.83199834823608, + "p99": 238.24000358581543 + }, + "isolatedSum": { + "p50": 218.87999773025513, + "p90": 248.60800057649612, + "p95": 260.25599986314774, + "p99": 281.95200860500336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.3919939994812, + "p90": 147.10399508476257, + "p95": 156.38400614261627, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 94.04800087213516, + "p90": 105.0880029797554, + "p95": 111.80800199508667, + "p99": 122.43200093507767 + }, + "roundtrip": { + "p50": 196.76800072193146, + "p90": 228.19200158119202, + "p95": 235.03999412059784, + "p99": 252.0959973335266 + }, + "isolatedSum": { + "p50": 221.43999487161636, + "p90": 252.19199806451797, + "p95": 268.19200813770294, + "p99": 291.6480079293251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.75200402736664, + "p90": 148.03199470043182, + "p95": 156.8640023469925, + "p99": 167.42399334907532 + }, + "combine": { + "p50": 94.71999853849411, + "p90": 110.33599823713303, + "p95": 118.33599954843521, + "p99": 138.59200477600098 + }, + "roundtrip": { + "p50": 199.2959976196289, + "p90": 232.86400735378265, + "p95": 240.447998046875, + "p99": 282.9119861125946 + }, + "isolatedSum": { + "p50": 221.47200256586075, + "p90": 258.36799293756485, + "p95": 275.2000018954277, + "p99": 306.0159981250763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.17599898576736, + "p90": 143.8719928264618, + "p95": 149.4400054216385, + "p99": 158.52800011634827 + }, + "combine": { + "p50": 103.00800204277039, + "p90": 110.944002866745, + "p95": 121.18399888277054, + "p99": 132.38400220870972 + }, + "roundtrip": { + "p50": 197.31199741363525, + "p90": 223.58399629592896, + "p95": 231.77599906921387, + "p99": 239.74399268627167 + }, + "isolatedSum": { + "p50": 225.18400102853775, + "p90": 254.8159956932068, + "p95": 270.624004304409, + "p99": 290.912002325058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.2399946451187, + "p90": 171.00800573825836, + "p95": 178.75200510025024, + "p99": 197.56799936294556 + }, + "combine": { + "p50": 118.68800222873688, + "p90": 130.17599284648895, + "p95": 140.70400595664978, + "p99": 149.72800016403198 + }, + "roundtrip": { + "p50": 225.18399357795715, + "p90": 255.295991897583, + "p95": 264.3519937992096, + "p99": 290.6560003757477 + }, + "isolatedSum": { + "p50": 256.9279968738556, + "p90": 301.1839985847473, + "p95": 319.4560110569, + "p99": 347.29599952697754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 149.4400054216385, + "p90": 173.5360026359558, + "p95": 179.19999361038208, + "p99": 194.5600062608719 + }, + "combine": { + "p50": 139.77600634098053, + "p90": 149.31200444698334, + "p95": 153.6960005760193, + "p99": 162.52799332141876 + }, + "roundtrip": { + "p50": 256.25601410865784, + "p90": 276.99199318885803, + "p95": 281.43998980522156, + "p99": 312.4159872531891 + }, + "isolatedSum": { + "p50": 289.216011762619, + "p90": 322.84800708293915, + "p95": 332.89599418640137, + "p99": 357.08799958229065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0c1e37c9", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_ca64a747", + "comparisonKey": "276cf2ec74fc457f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:38:29.536173+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.78400230407715, + "p90": 141.53599739074707, + "p95": 152.41600573062897, + "p99": 162.11199760437012 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 101.18400305509567, + "p95": 108.67200046777725, + "p99": 117.11999773979187 + }, + "roundtrip": { + "p50": 189.56799805164337, + "p90": 238.20799589157104, + "p95": 268.0000066757202, + "p99": 304.9600124359131 + }, + "isolatedSum": { + "p50": 204.99200373888016, + "p90": 242.72000044584274, + "p95": 261.0880061984062, + "p99": 279.231995344162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.84000355005264, + "p90": 142.65599846839905, + "p95": 162.432000041008, + "p99": 179.1040003299713 + }, + "combine": { + "p50": 91.20000153779984, + "p90": 103.45599800348282, + "p95": 110.11199653148651, + "p99": 119.45600062608719 + }, + "roundtrip": { + "p50": 188.63999843597412, + "p90": 216.15999937057495, + "p95": 226.6560047864914, + "p99": 242.71999299526215 + }, + "isolatedSum": { + "p50": 211.04000508785248, + "p90": 246.11199647188187, + "p95": 272.5439965724945, + "p99": 298.5600009560585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.63200294971466, + "p90": 144.70399916172028, + "p95": 153.31199765205383, + "p99": 176.2239933013916 + }, + "combine": { + "p50": 92.28800237178802, + "p90": 100.3199964761734, + "p95": 107.29599744081497, + "p99": 114.52800035476685 + }, + "roundtrip": { + "p50": 193.34399700164795, + "p90": 226.30399465560913, + "p95": 232.86400735378265, + "p99": 257.24801421165466 + }, + "isolatedSum": { + "p50": 217.92000532150269, + "p90": 245.02399563789368, + "p95": 260.6079950928688, + "p99": 290.75199365615845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.59999525547028, + "p90": 143.00799369812012, + "p95": 154.65599298477173, + "p99": 170.49600183963776 + }, + "combine": { + "p50": 93.34400296211243, + "p90": 104.38399761915207, + "p95": 110.3999987244606, + "p99": 120.38400024175644 + }, + "roundtrip": { + "p50": 197.11999595165253, + "p90": 224.44799542427063, + "p95": 230.6559979915619, + "p99": 245.40799856185913 + }, + "isolatedSum": { + "p50": 218.9439982175827, + "p90": 247.3919913172722, + "p95": 265.05599170923233, + "p99": 290.8800020813942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 123.71200323104858, + "p90": 148.83199334144592, + "p95": 155.13600409030914, + "p99": 169.8240041732788 + }, + "combine": { + "p50": 94.78399902582169, + "p90": 108.73600095510483, + "p95": 113.24799805879593, + "p99": 127.87200510501862 + }, + "roundtrip": { + "p50": 201.75999402999878, + "p90": 231.9359928369522, + "p95": 254.8159956932068, + "p99": 278.0480086803436 + }, + "isolatedSum": { + "p50": 218.49600225687027, + "p90": 257.56799429655075, + "p95": 268.3840021491051, + "p99": 297.6960092782974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.98399752378464, + "p90": 139.16799426078796, + "p95": 146.65600657463074, + "p99": 156.38400614261627 + }, + "combine": { + "p50": 102.94400155544281, + "p90": 110.59200018644333, + "p95": 118.1119978427887, + "p99": 122.84799665212631 + }, + "roundtrip": { + "p50": 198.14400374889374, + "p90": 233.2800030708313, + "p95": 238.49600553512573, + "p99": 257.3759853839874 + }, + "isolatedSum": { + "p50": 224.92799907922745, + "p90": 249.7599944472313, + "p95": 264.76800441741943, + "p99": 279.2320027947426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 146.59200608730316, + "p90": 169.855996966362, + "p95": 187.6160055398941, + "p99": 209.75999534130096 + }, + "combine": { + "p50": 116.95999652147293, + "p90": 130.8159977197647, + "p95": 136.03200018405914, + "p99": 148.83199334144592 + }, + "roundtrip": { + "p50": 222.6559966802597, + "p90": 247.8400021791458, + "p95": 255.48800826072693, + "p99": 266.9439911842346 + }, + "isolatedSum": { + "p50": 263.5520026087761, + "p90": 300.6719946861267, + "p95": 323.64800572395325, + "p99": 358.5919886827469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.54400670528412, + "p90": 185.08799374103546, + "p95": 196.1279958486557, + "p99": 227.10399329662323 + }, + "combine": { + "p50": 139.20000195503235, + "p90": 144.6080058813095, + "p95": 146.91199362277985, + "p99": 159.10400450229645 + }, + "roundtrip": { + "p50": 255.90398907661438, + "p90": 279.9359858036041, + "p95": 287.87198662757874, + "p99": 302.39999294281006 + }, + "isolatedSum": { + "p50": 291.74400866031647, + "p90": 329.69599962234497, + "p95": 343.03998947143555, + "p99": 386.2079977989197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56a777bc", + "identity": "b200|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_ca64a747", + "comparisonKey": "6b1cdc6cc5442c5e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:44:25.841208+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 113.8560026884079, + "p90": 135.83999872207642, + "p95": 139.23199474811554, + "p99": 148.54399859905243 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 95.64799815416336, + "p95": 101.43999755382538, + "p99": 110.20799726247787 + }, + "roundtrip": { + "p50": 188.06399405002594, + "p90": 218.9439982175827, + "p95": 226.04799270629883, + "p99": 261.9200050830841 + }, + "isolatedSum": { + "p50": 201.152004301548, + "p90": 231.48799687623978, + "p95": 240.67199230194092, + "p99": 258.7519958615303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 118.27199906110764, + "p90": 137.7599984407425, + "p95": 141.92000031471252, + "p99": 162.62400150299072 + }, + "combine": { + "p50": 91.0400003194809, + "p90": 96.96000069379807, + "p95": 104.00000214576721, + "p99": 115.74400216341019 + }, + "roundtrip": { + "p50": 184.76800620555878, + "p90": 212.51200139522552, + "p95": 218.46400201320648, + "p99": 226.0800004005432 + }, + "isolatedSum": { + "p50": 209.31199938058853, + "p90": 234.71999913454056, + "p95": 245.92000246047974, + "p99": 278.3680036664009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.78399682044983, + "p90": 142.14399456977844, + "p95": 148.19200336933136, + "p99": 156.6080003976822 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 102.20800340175629, + "p95": 110.6560006737709, + "p99": 118.56000125408173 + }, + "roundtrip": { + "p50": 192.00000166893005, + "p90": 223.03999960422516, + "p95": 228.64000499248505, + "p99": 244.32000517845154 + }, + "isolatedSum": { + "p50": 218.3999940752983, + "p90": 244.35199797153473, + "p95": 258.84800404310226, + "p99": 275.1680016517639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.10399925708771, + "p90": 145.34400403499603, + "p95": 151.296004652977, + "p99": 176.57600343227386 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 102.49599814414978, + "p95": 109.63200032711029, + "p99": 116.80000275373459 + }, + "roundtrip": { + "p50": 196.76800072193146, + "p90": 220.8320051431656, + "p95": 226.78400576114655, + "p99": 237.31200397014618 + }, + "isolatedSum": { + "p50": 219.58399564027786, + "p90": 247.8400021791458, + "p95": 260.9280049800873, + "p99": 293.37600618600845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.2480000257492, + "p90": 142.46399700641632, + "p95": 148.0640023946762, + "p99": 158.4639996290207 + }, + "combine": { + "p50": 94.40000355243683, + "p90": 108.19199681282043, + "p95": 112.5440001487732, + "p99": 125.5359947681427 + }, + "roundtrip": { + "p50": 198.88000190258026, + "p90": 221.95200622081757, + "p95": 229.0560007095337, + "p99": 237.98400163650513 + }, + "isolatedSum": { + "p50": 219.64800357818604, + "p90": 250.65599381923676, + "p95": 260.6080025434494, + "p99": 283.9999943971634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.23999947309494, + "p90": 141.184002161026, + "p95": 147.0080018043518, + "p99": 153.98399531841278 + }, + "combine": { + "p50": 102.94400155544281, + "p90": 108.44799876213074, + "p95": 114.88000303506851, + "p99": 122.6240023970604 + }, + "roundtrip": { + "p50": 197.50399887561798, + "p90": 224.38399493694305, + "p95": 230.335995554924, + "p99": 237.05600202083588 + }, + "isolatedSum": { + "p50": 225.18400102853775, + "p90": 249.63200092315674, + "p95": 261.8880048394203, + "p99": 276.6079977154732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 135.903999209404, + "p90": 160.76800227165222, + "p95": 163.455992937088, + "p99": 171.424001455307 + }, + "combine": { + "p50": 116.92799627780914, + "p90": 128.83199751377106, + "p95": 134.3040019273758, + "p99": 140.86399972438812 + }, + "roundtrip": { + "p50": 224.2880016565323, + "p90": 245.34399807453156, + "p95": 250.8159875869751, + "p99": 260.25599241256714 + }, + "isolatedSum": { + "p50": 252.83199548721313, + "p90": 289.5999997854233, + "p95": 297.7599948644638, + "p99": 312.28800117969513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.7760008573532, + "p90": 175.10400712490082, + "p95": 180.38399517536163, + "p99": 198.2399970293045 + }, + "combine": { + "p50": 138.75199854373932, + "p90": 146.97599411010742, + "p95": 151.74399316310883, + "p99": 160.19199788570404 + }, + "roundtrip": { + "p50": 254.59200143814087, + "p90": 274.7200131416321, + "p95": 281.18398785591125, + "p99": 305.4400086402893 + }, + "isolatedSum": { + "p50": 290.52799940109253, + "p90": 322.08000123500824, + "p95": 332.12798833847046, + "p99": 358.43199491500854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62b4f808", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b200_6f2700ad", + "comparisonKey": "9d18677db65d12ca", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:52:33.446793+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.09599673748016, + "p90": 137.1839940547943, + "p95": 144.22400295734406, + "p99": 157.27999806404114 + }, + "combine": { + "p50": 91.07200056314468, + "p90": 98.14400225877762, + "p95": 106.9440022110939, + "p99": 116.80000275373459 + }, + "roundtrip": { + "p50": 185.2799952030182, + "p90": 223.55200350284576, + "p95": 228.32000255584717, + "p99": 261.3759934902191 + }, + "isolatedSum": { + "p50": 203.16799730062485, + "p90": 235.32799631357193, + "p95": 251.16800516843796, + "p99": 274.0800008177757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 118.04799735546112, + "p90": 133.7919980287552, + "p95": 140.06400108337402, + "p99": 152.3520052433014 + }, + "combine": { + "p50": 91.839998960495, + "p90": 104.5759990811348, + "p95": 110.88000237941742, + "p99": 120.12799829244614 + }, + "roundtrip": { + "p50": 191.71200692653656, + "p90": 219.64800357818604, + "p95": 226.0800004005432, + "p99": 250.2720057964325 + }, + "isolatedSum": { + "p50": 209.88799631595612, + "p90": 238.36799710988998, + "p95": 250.94400346279144, + "p99": 272.4800035357475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.47199428081512, + "p90": 147.32800424098969, + "p95": 156.2879979610443, + "p99": 171.29600048065186 + }, + "combine": { + "p50": 93.02400052547455, + "p90": 102.81600058078766, + "p95": 110.23999750614166, + "p99": 121.95199728012085 + }, + "roundtrip": { + "p50": 194.75199282169342, + "p90": 218.84800493717194, + "p95": 226.4000028371811, + "p99": 237.40799725055695 + }, + "isolatedSum": { + "p50": 218.49599480628967, + "p90": 250.14400482177734, + "p95": 266.527995467186, + "p99": 293.2479977607727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.85599720478058, + "p90": 145.37599682807922, + "p95": 153.1199961900711, + "p99": 167.32800006866455 + }, + "combine": { + "p50": 96.8639999628067, + "p90": 107.07200318574905, + "p95": 117.88800358772278, + "p99": 122.97599762678146 + }, + "roundtrip": { + "p50": 199.35999810695648, + "p90": 237.5359982252121, + "p95": 244.159996509552, + "p99": 293.2800054550171 + }, + "isolatedSum": { + "p50": 222.71999716758728, + "p90": 252.44800001382828, + "p95": 271.0079997777939, + "p99": 290.303997695446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 124.70400333404541, + "p90": 140.73599874973297, + "p95": 149.27999675273895, + "p99": 163.00800442695618 + }, + "combine": { + "p50": 102.39999741315842, + "p90": 112.47999966144562, + "p95": 119.99999731779099, + "p99": 134.17600095272064 + }, + "roundtrip": { + "p50": 198.08000326156616, + "p90": 223.29600155353546, + "p95": 229.44000363349915, + "p99": 240.22400379180908 + }, + "isolatedSum": { + "p50": 227.10400074720383, + "p90": 253.2159984111786, + "p95": 269.27999407052994, + "p99": 297.1840053796768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 135.74400544166565, + "p90": 151.5520066022873, + "p95": 158.24000537395477, + "p99": 168.92799735069275 + }, + "combine": { + "p50": 104.25599664449692, + "p90": 119.99999731779099, + "p95": 123.55200201272964, + "p99": 132.06399977207184 + }, + "roundtrip": { + "p50": 213.44000101089478, + "p90": 251.8720030784607, + "p95": 270.7520127296448, + "p99": 297.3119914531708 + }, + "isolatedSum": { + "p50": 240.00000208616257, + "p90": 271.5520039200783, + "p95": 281.7920073866844, + "p99": 300.9919971227646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 151.36000514030457, + "p90": 165.50399363040924, + "p95": 173.40800166130066, + "p99": 193.05600225925446 + }, + "combine": { + "p50": 124.51200187206268, + "p90": 135.0719928741455, + "p95": 143.00799369812012, + "p99": 148.22399616241455 + }, + "roundtrip": { + "p50": 233.15200209617615, + "p90": 242.0479953289032, + "p95": 248.6400008201599, + "p99": 262.65600323677063 + }, + "isolatedSum": { + "p50": 275.87200701236725, + "p90": 300.57598650455475, + "p95": 316.4159953594208, + "p99": 341.279998421669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 169.3120002746582, + "p90": 184.2239946126938, + "p95": 189.88800048828125, + "p99": 201.75999402999878 + }, + "combine": { + "p50": 156.12800419330597, + "p90": 167.64800250530243, + "p95": 172.28800058364868, + "p99": 181.60000443458557 + }, + "roundtrip": { + "p50": 290.23998975753784, + "p90": 315.13598561286926, + "p95": 320.3839957714081, + "p99": 340.9599959850311 + }, + "isolatedSum": { + "p50": 325.4400044679642, + "p90": 351.8719971179962, + "p95": 362.17600107192993, + "p99": 383.35999846458435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5b4b22c1", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b200_bb7a559c", + "comparisonKey": "a321ebc3cea9e0ac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:53:42.716540+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.75200140476227, + "p90": 124.09599870443344, + "p95": 127.80800461769104, + "p99": 134.2719942331314 + }, + "combine": { + "p50": 82.84799754619598, + "p90": 88.79999816417694, + "p95": 92.44800359010696, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 183.32800269126892, + "p90": 195.8400011062622, + "p95": 201.05600357055664, + "p99": 225.69599747657776 + }, + "isolatedSum": { + "p50": 193.59999895095825, + "p90": 212.89599686861038, + "p95": 220.256008207798, + "p99": 233.8559925556183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 124.32000041007996, + "p90": 131.55199587345123, + "p95": 135.00800728797913, + "p99": 162.56000101566315 + }, + "combine": { + "p50": 79.74400371313095, + "p90": 86.36800199747086, + "p95": 89.15200084447861, + "p99": 94.17600184679031 + }, + "roundtrip": { + "p50": 174.68799650669098, + "p90": 186.75200641155243, + "p95": 189.95200097560883, + "p99": 197.9839950799942 + }, + "isolatedSum": { + "p50": 204.0640041232109, + "p90": 217.9199978709221, + "p95": 224.16000813245773, + "p99": 256.73600286245346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.81599640846252, + "p90": 136.86400651931763, + "p95": 145.50399780273438, + "p99": 156.09599649906158 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 97.47199714183807, + "p95": 104.16000336408615, + "p99": 111.10399663448334 + }, + "roundtrip": { + "p50": 187.23200261592865, + "p90": 202.72000133991241, + "p95": 209.05600488185883, + "p99": 236.06400191783905 + }, + "isolatedSum": { + "p50": 207.67999440431595, + "p90": 234.3360036611557, + "p95": 249.66400116682053, + "p99": 267.1999931335449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 134.62400436401367, + "p90": 145.91999351978302, + "p95": 148.92800152301788, + "p99": 154.78399395942688 + }, + "combine": { + "p50": 94.11200135946274, + "p90": 102.33599692583084, + "p95": 106.36799782514572, + "p99": 113.18399757146835 + }, + "roundtrip": { + "p50": 192.19200313091278, + "p90": 213.02400529384613, + "p95": 228.19200158119202, + "p99": 246.8159943819046 + }, + "isolatedSum": { + "p50": 228.7360057234764, + "p90": 248.25599044561386, + "p95": 255.2959993481636, + "p99": 267.96799153089523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f77432bc", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b200_c0c61e71", + "comparisonKey": "72fb03c306c83aa4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:59:20.940968+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.43199896812439, + "p90": 131.9040060043335, + "p95": 136.83199882507324, + "p99": 150.30400454998016 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 87.23200112581253, + "p95": 93.34400296211243, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 176.64000391960144, + "p90": 203.77600193023682, + "p95": 207.58399367332458, + "p99": 223.1999933719635 + }, + "isolatedSum": { + "p50": 189.59999829530716, + "p90": 219.13600713014603, + "p95": 230.17600178718567, + "p99": 249.95200335979462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.4559999704361, + "p90": 137.11999356746674, + "p95": 142.14399456977844, + "p99": 155.61600029468536 + }, + "combine": { + "p50": 79.39200103282928, + "p90": 90.04800021648407, + "p95": 97.28000313043594, + "p99": 99.2640033364296 + }, + "roundtrip": { + "p50": 180.4800033569336, + "p90": 205.56800067424774, + "p95": 213.76000344753265, + "p99": 258.7200105190277 + }, + "isolatedSum": { + "p50": 194.84800100326538, + "p90": 227.1679937839508, + "p95": 239.4239977002144, + "p99": 254.88000363111496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.5359947681427, + "p90": 139.16799426078796, + "p95": 146.04799449443817, + "p99": 155.61600029468536 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 88.57599645853043, + "p95": 95.45599669218063, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 182.20800161361694, + "p90": 199.20000433921814, + "p95": 206.2399983406067, + "p99": 213.72799575328827 + }, + "isolatedSum": { + "p50": 205.4079920053482, + "p90": 227.7439907193184, + "p95": 241.5039911866188, + "p99": 255.19999861717224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.88000500202179, + "p90": 141.85599982738495, + "p95": 148.83199334144592, + "p99": 156.031996011734 + }, + "combine": { + "p50": 86.94399893283844, + "p90": 97.34400361776352, + "p95": 105.34399747848511, + "p99": 123.74400347471237 + }, + "roundtrip": { + "p50": 185.59999763965607, + "p90": 204.96000349521637, + "p95": 214.9759978055954, + "p99": 222.3680019378662 + }, + "isolatedSum": { + "p50": 213.82400393486023, + "p90": 239.20000344514847, + "p95": 254.17599081993103, + "p99": 279.7759994864464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.65599584579468, + "p90": 140.3840035200119, + "p95": 145.1839953660965, + "p99": 152.51199901103973 + }, + "combine": { + "p50": 81.28000050783157, + "p90": 90.84799885749817, + "p95": 97.75999933481216, + "p99": 105.0880029797554 + }, + "roundtrip": { + "p50": 185.02399325370789, + "p90": 204.99199628829956, + "p95": 211.5519940853119, + "p99": 219.61599588394165 + }, + "isolatedSum": { + "p50": 207.93599635362625, + "p90": 231.23200237751007, + "p95": 242.94399470090866, + "p99": 257.60000199079514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 126.17599964141846, + "p90": 147.39200472831726, + "p95": 153.6960005760193, + "p99": 178.5919964313507 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 102.27199643850327, + "p95": 107.4879989027977, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 183.6480051279068, + "p90": 209.98400449752808, + "p95": 217.66400337219238, + "p99": 232.9919934272766 + }, + "isolatedSum": { + "p50": 216.51200205087662, + "p90": 249.66400116682053, + "p95": 261.183999478817, + "p99": 305.02399802207947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 122.36800044775009, + "p90": 139.42399621009827, + "p95": 145.21600306034088, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 103.58399897813797, + "p95": 109.0560033917427, + "p99": 117.95199662446976 + }, + "roundtrip": { + "p50": 193.95199418067932, + "p90": 215.58399498462677, + "p95": 221.0880070924759, + "p99": 229.95199263095856 + }, + "isolatedSum": { + "p50": 214.4320011138916, + "p90": 243.00799518823624, + "p95": 254.2720064520836, + "p99": 271.9679996371269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 138.62399756908417, + "p90": 164.99200463294983, + "p95": 179.3919950723648, + "p99": 206.68800175189972 + }, + "combine": { + "p50": 112.22399771213531, + "p90": 122.20799922943115, + "p95": 133.15199315547943, + "p99": 151.10400319099426 + }, + "roundtrip": { + "p50": 214.56000208854675, + "p90": 246.24000489711761, + "p95": 257.31199979782104, + "p99": 281.823992729187 + }, + "isolatedSum": { + "p50": 250.84799528121948, + "p90": 287.200003862381, + "p95": 312.54398822784424, + "p99": 357.792004942894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06b95b37", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b200_f6d2df4b", + "comparisonKey": "d6748c0d445f2c00", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:57:02.270571+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 109.95200276374817, + "p90": 138.97599279880524, + "p95": 149.3760049343109, + "p99": 178.56000363826752 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 93.47199648618698, + "p95": 99.2640033364296, + "p99": 110.17599701881409 + }, + "roundtrip": { + "p50": 185.5040043592453, + "p90": 220.09600698947906, + "p95": 225.95199942588806, + "p99": 262.84798979759216 + }, + "isolatedSum": { + "p50": 197.34400510787964, + "p90": 232.44798928499222, + "p95": 248.6400082707405, + "p99": 288.7360006570816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.2239990234375, + "p90": 155.2319973707199, + "p95": 166.24000668525696, + "p99": 186.62400543689728 + }, + "combine": { + "p50": 92.32000261545181, + "p90": 109.02400314807892, + "p95": 113.76000195741653, + "p99": 121.85599654912949 + }, + "roundtrip": { + "p50": 186.65599822998047, + "p90": 220.19200026988983, + "p95": 228.06400060653687, + "p99": 257.4079930782318 + }, + "isolatedSum": { + "p50": 212.5440016388893, + "p90": 264.2560005187988, + "p95": 280.0000086426735, + "p99": 308.48000198602676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.20800733566284, + "p90": 141.05600118637085, + "p95": 148.44800531864166, + "p99": 156.67200088500977 + }, + "combine": { + "p50": 91.5519967675209, + "p90": 103.16800326108932, + "p95": 109.92000252008438, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 189.02400135993958, + "p90": 211.84000372886658, + "p95": 220.22399306297302, + "p99": 228.70400547981262 + }, + "isolatedSum": { + "p50": 217.76000410318375, + "p90": 244.22400444746017, + "p95": 258.36800783872604, + "p99": 270.7519978284836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.97599828243256, + "p90": 139.64800536632538, + "p95": 145.56799829006195, + "p99": 154.55999970436096 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 100.63999891281128, + "p95": 103.42399775981903, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 194.36800479888916, + "p90": 218.27200055122375, + "p95": 226.1119931936264, + "p99": 242.3039972782135 + }, + "isolatedSum": { + "p50": 219.2320004105568, + "p90": 240.28800427913666, + "p95": 248.99199604988098, + "p99": 269.5999965071678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.0479986667633, + "p90": 141.37600362300873, + "p95": 148.95999431610107, + "p99": 167.9680049419403 + }, + "combine": { + "p50": 92.70399808883667, + "p90": 101.82400047779083, + "p95": 104.86400127410889, + "p99": 118.94399672746658 + }, + "roundtrip": { + "p50": 195.8400011062622, + "p90": 217.3440009355545, + "p95": 224.5440036058426, + "p99": 234.81599986553192 + }, + "isolatedSum": { + "p50": 218.75199675559998, + "p90": 243.20000410079956, + "p95": 253.82399559020996, + "p99": 286.9120016694069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.84799665212631, + "p90": 143.93599331378937, + "p95": 149.1519957780838, + "p99": 166.55999422073364 + }, + "combine": { + "p50": 102.81600058078766, + "p90": 111.7120012640953, + "p95": 117.44000017642975, + "p99": 128.22400033473969 + }, + "roundtrip": { + "p50": 199.0399956703186, + "p90": 222.84799814224243, + "p95": 228.19200158119202, + "p99": 246.59200012683868 + }, + "isolatedSum": { + "p50": 225.66399723291397, + "p90": 255.64799457788467, + "p95": 266.59199595451355, + "p99": 294.7839945554733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 135.6160044670105, + "p90": 154.52800691127777, + "p95": 163.39200735092163, + "p99": 178.9119988679886 + }, + "combine": { + "p50": 117.3119992017746, + "p90": 128.92800569534302, + "p95": 134.0160071849823, + "p99": 138.17599415779114 + }, + "roundtrip": { + "p50": 224.83199834823608, + "p90": 243.23199689388275, + "p95": 253.05598974227905, + "p99": 275.4879891872406 + }, + "isolatedSum": { + "p50": 252.9280036687851, + "p90": 283.4560126066208, + "p95": 297.40801453590393, + "p99": 317.0879930257797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.48000073432922, + "p90": 188.12799453735352, + "p95": 197.11999595165253, + "p99": 223.7440049648285 + }, + "combine": { + "p50": 151.61600708961487, + "p90": 158.62399339675903, + "p95": 159.90400314331055, + "p99": 168.09600591659546 + }, + "roundtrip": { + "p50": 275.1680016517639, + "p90": 290.0480031967163, + "p95": 300.0960052013397, + "p99": 324.319988489151 + }, + "isolatedSum": { + "p50": 316.0960078239441, + "p90": 346.75198793411255, + "p95": 357.0239990949631, + "p99": 391.84001088142395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-816067e4", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b200_0ae30f33", + "comparisonKey": "db6077c812a0cd60", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:07:20.549996+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.91200262308121, + "p90": 132.9919993877411, + "p95": 139.93600010871887, + "p99": 147.2640037536621 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 95.87199985980988, + "p95": 101.47199779748917, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 183.52000415325165, + "p90": 208.73600244522095, + "p95": 213.44000101089478, + "p99": 226.1119931936264 + }, + "isolatedSum": { + "p50": 200.25600492954254, + "p90": 228.86399924755096, + "p95": 241.40799790620804, + "p99": 262.9440054297447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 118.68800222873688, + "p90": 135.23200154304504, + "p95": 141.7279988527298, + "p99": 151.16800367832184 + }, + "combine": { + "p50": 90.4960036277771, + "p90": 96.92800045013428, + "p95": 107.13600367307663, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 183.96799266338348, + "p90": 218.1439995765686, + "p95": 228.5120040178299, + "p99": 249.1839975118637 + }, + "isolatedSum": { + "p50": 209.18400585651398, + "p90": 232.16000199317932, + "p95": 248.86400252580643, + "p99": 267.2320008277893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.10399925708771, + "p90": 145.28000354766846, + "p95": 153.76000106334686, + "p99": 173.88799786567688 + }, + "combine": { + "p50": 91.10400080680847, + "p90": 98.11200201511383, + "p95": 106.55999928712845, + "p99": 117.18399822711945 + }, + "roundtrip": { + "p50": 186.97600066661835, + "p90": 210.55999398231506, + "p95": 215.32799303531647, + "p99": 224.16000068187714 + }, + "isolatedSum": { + "p50": 218.20800006389618, + "p90": 243.3920055627823, + "p95": 260.3200003504753, + "p99": 291.0719960927963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.4720023870468, + "p90": 160.863995552063, + "p95": 181.88799917697906, + "p99": 203.45599949359894 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 108.86400192975998, + "p95": 118.56000125408173, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 199.3280053138733, + "p90": 231.55200481414795, + "p95": 250.7840096950531, + "p99": 272.0000147819519 + }, + "isolatedSum": { + "p50": 224.60800409317017, + "p90": 269.72799748182297, + "p95": 300.4480004310608, + "p99": 339.7759944200516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.3919939994812, + "p90": 145.47200500965118, + "p95": 155.16799688339233, + "p99": 176.38400197029114 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 101.40799731016159, + "p95": 105.69600015878677, + "p99": 119.77600306272507 + }, + "roundtrip": { + "p50": 197.1839964389801, + "p90": 227.84000635147095, + "p95": 234.3679964542389, + "p99": 267.8079903125763 + }, + "isolatedSum": { + "p50": 219.64799612760544, + "p90": 246.88000231981277, + "p95": 260.8639970421791, + "p99": 296.1600050330162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.5920021533966, + "p90": 141.184002161026, + "p95": 148.99200201034546, + "p99": 158.720001578331 + }, + "combine": { + "p50": 103.39199751615524, + "p90": 115.52000045776367, + "p95": 120.4800009727478, + "p99": 129.08799946308136 + }, + "roundtrip": { + "p50": 199.52000677585602, + "p90": 224.41600263118744, + "p95": 231.90400004386902, + "p99": 243.03999543190002 + }, + "isolatedSum": { + "p50": 225.98399966955185, + "p90": 256.7040026187897, + "p95": 269.47200298309326, + "p99": 287.80800104141235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.48000359535217, + "p90": 160.92799603939056, + "p95": 170.78399658203125, + "p99": 185.59999763965607 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 125.5359947681427, + "p95": 134.11200046539307, + "p99": 146.94400131702423 + }, + "roundtrip": { + "p50": 225.15200078487396, + "p90": 257.1839988231659, + "p95": 262.4639868736267, + "p99": 303.48798632621765 + }, + "isolatedSum": { + "p50": 252.8960034251213, + "p90": 286.46399080753326, + "p95": 304.8959970474243, + "p99": 332.5439989566803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 158.81599485874176, + "p90": 184.76800620555878, + "p95": 190.08000195026398, + "p99": 207.8080028295517 + }, + "combine": { + "p50": 139.77600634098053, + "p90": 147.2640037536621, + "p95": 152.38399803638458, + "p99": 159.71200168132782 + }, + "roundtrip": { + "p50": 258.36798548698425, + "p90": 270.33600211143494, + "p95": 276.70401334762573, + "p99": 287.1679961681366 + }, + "isolatedSum": { + "p50": 298.5920011997223, + "p90": 332.0320099592209, + "p95": 342.46399998664856, + "p99": 367.5200045108795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bcc74b65", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b200_6d51ecbb", + "comparisonKey": "545fb184c1c531c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:58:12.798718+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.5440001487732, + "p90": 134.65599715709686, + "p95": 141.63200557231903, + "p99": 160.67199409008026 + }, + "combine": { + "p50": 87.07199990749359, + "p90": 92.96000003814697, + "p95": 98.24000298976898, + "p99": 106.75200074911118 + }, + "roundtrip": { + "p50": 186.27199530601501, + "p90": 211.39200031757355, + "p95": 217.72800385951996, + "p99": 249.34400618076324 + }, + "isolatedSum": { + "p50": 199.61600005626678, + "p90": 227.61599719524384, + "p95": 239.872008562088, + "p99": 267.42399483919144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 121.11999839544296, + "p90": 136.4160031080246, + "p95": 144.6399986743927, + "p99": 155.4879993200302 + }, + "combine": { + "p50": 90.7839983701706, + "p90": 97.69599884748459, + "p95": 103.84000092744827, + "p99": 117.98399686813354 + }, + "roundtrip": { + "p50": 188.06399405002594, + "p90": 211.10400557518005, + "p95": 216.89599752426147, + "p99": 229.40799593925476 + }, + "isolatedSum": { + "p50": 211.90399676561356, + "p90": 234.1120019555092, + "p95": 248.47999960184097, + "p99": 273.47199618816376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.68800354003906, + "p90": 142.59199798107147, + "p95": 149.1840034723282, + "p99": 159.8079949617386 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 102.04800218343735, + "p95": 106.52799904346466, + "p99": 120.60800194740295 + }, + "roundtrip": { + "p50": 186.49600446224213, + "p90": 211.87199652194977, + "p95": 219.7120040655136, + "p99": 236.12800240516663 + }, + "isolatedSum": { + "p50": 218.30400079488754, + "p90": 244.64000016450882, + "p95": 255.71200251579285, + "p99": 280.41599690914154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.09599936008453, + "p90": 143.26399564743042, + "p95": 150.59199929237366, + "p99": 163.61600160598755 + }, + "combine": { + "p50": 92.6079973578453, + "p90": 104.70400005578995, + "p95": 108.19199681282043, + "p99": 117.85600334405899 + }, + "roundtrip": { + "p50": 194.75199282169342, + "p90": 217.75999665260315, + "p95": 226.97600722312927, + "p99": 323.35999608039856 + }, + "isolatedSum": { + "p50": 220.70399671792984, + "p90": 247.96799570322037, + "p95": 258.7839961051941, + "p99": 281.47200495004654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.20000743865967, + "p90": 144.22400295734406, + "p95": 151.13599598407745, + "p99": 161.50400042533875 + }, + "combine": { + "p50": 92.57599711418152, + "p90": 102.14400291442871, + "p95": 108.38399827480316, + "p99": 118.46400052309036 + }, + "roundtrip": { + "p50": 198.71999323368073, + "p90": 230.335995554924, + "p95": 237.72799968719482, + "p99": 265.02400636672974 + }, + "isolatedSum": { + "p50": 219.7760045528412, + "p90": 246.36800587177277, + "p95": 259.5199942588806, + "p99": 279.9680009484291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.61600250005722, + "p90": 141.34399592876434, + "p95": 148.25600385665894, + "p99": 156.3519984483719 + }, + "combine": { + "p50": 102.91200131177902, + "p90": 109.69600081443787, + "p95": 114.30399864912033, + "p99": 124.09599870443344 + }, + "roundtrip": { + "p50": 200.22399723529816, + "p90": 222.24000096321106, + "p95": 229.72799837589264, + "p99": 242.33600497245789 + }, + "isolatedSum": { + "p50": 226.52800381183624, + "p90": 251.0399967432022, + "p95": 262.56000250577927, + "p99": 280.4479971528053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.472003698349, + "p90": 156.19200468063354, + "p95": 161.43999993801117, + "p99": 173.95199835300446 + }, + "combine": { + "p50": 116.70400202274323, + "p90": 124.35200065374374, + "p95": 131.00799918174744, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 221.79199755191803, + "p90": 250.94398856163025, + "p95": 258.7200105190277, + "p99": 285.5679988861084 + }, + "isolatedSum": { + "p50": 254.17600572109222, + "p90": 280.5440053343773, + "p95": 292.4479991197586, + "p99": 315.90399146080017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.99999511241913, + "p90": 184.89600718021393, + "p95": 193.15199553966522, + "p99": 248.48000705242157 + }, + "combine": { + "p50": 139.39200341701508, + "p90": 148.8640010356903, + "p95": 153.6320000886917, + "p99": 166.9120043516159 + }, + "roundtrip": { + "p50": 253.1520128250122, + "p90": 268.0000066757202, + "p95": 273.75999093055725, + "p99": 284.7999930381775 + }, + "isolatedSum": { + "p50": 291.3919985294342, + "p90": 333.76000821590424, + "p95": 346.78399562835693, + "p99": 415.3920114040375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5d881e47", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_aad6d4e6", + "comparisonKey": "6d0a4f557b97db6f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:54:42.104540+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 109.40799862146378, + "p90": 133.215993642807, + "p95": 159.32799875736237, + "p99": 179.1040003299713 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 89.37600255012512, + "p95": 90.65599739551544, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 183.1360012292862, + "p90": 217.79200434684753, + "p95": 227.4239957332611, + "p99": 242.71999299526215 + }, + "isolatedSum": { + "p50": 190.88000059127808, + "p90": 222.59199619293213, + "p95": 249.9839961528778, + "p99": 274.9439999461174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 110.84800213575363, + "p90": 125.11999905109406, + "p95": 129.98400628566742, + "p99": 138.91200721263885 + }, + "combine": { + "p50": 87.23200112581253, + "p90": 95.29600292444229, + "p95": 101.21600329875946, + "p99": 108.41599851846695 + }, + "roundtrip": { + "p50": 187.1359944343567, + "p90": 228.19200158119202, + "p95": 240.31999707221985, + "p99": 257.4720084667206 + }, + "isolatedSum": { + "p50": 198.08000326156616, + "p90": 220.41600197553635, + "p95": 231.20000958442688, + "p99": 247.3280057311058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.15200674533844, + "p90": 133.37600231170654, + "p95": 138.11199367046356, + "p99": 144.06399428844452 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 102.88000106811523, + "p95": 112.96000331640244, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 184.2239946126938, + "p90": 197.37599790096283, + "p95": 200.3840059041977, + "p99": 206.84799551963806 + }, + "isolatedSum": { + "p50": 216.09600633382797, + "p90": 236.25600337982178, + "p95": 251.071996986866, + "p99": 261.4719942212105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.27199292182922, + "p90": 132.32000172138214, + "p95": 136.06399297714233, + "p99": 142.62400567531586 + }, + "combine": { + "p50": 91.32800251245499, + "p90": 100.0640019774437, + "p95": 102.75200009346008, + "p99": 109.47199910879135 + }, + "roundtrip": { + "p50": 183.9040070772171, + "p90": 197.34400510787964, + "p95": 200.6720006465912, + "p99": 207.8399956226349 + }, + "isolatedSum": { + "p50": 217.5999954342842, + "p90": 232.38400369882584, + "p95": 238.81599307060242, + "p99": 252.0960047841072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.10399925708771, + "p90": 139.29599523544312, + "p95": 143.26399564743042, + "p99": 160.3199988603592 + }, + "combine": { + "p50": 92.28800237178802, + "p90": 104.032002389431, + "p95": 105.95200210809708, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 190.40000438690186, + "p90": 201.79200172424316, + "p95": 205.9520035982132, + "p99": 216.0000056028366 + }, + "isolatedSum": { + "p50": 219.39200162887573, + "p90": 243.32799762487411, + "p95": 249.2159977555275, + "p99": 274.399995803833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 120.64000219106674, + "p90": 139.52000439167023, + "p95": 146.84799313545227, + "p99": 155.90399503707886 + }, + "combine": { + "p50": 99.90400075912476, + "p90": 106.52799904346466, + "p95": 112.5119999051094, + "p99": 117.91999638080597 + }, + "roundtrip": { + "p50": 198.81600141525269, + "p90": 208.8319957256317, + "p95": 213.59999477863312, + "p99": 219.4879949092865 + }, + "isolatedSum": { + "p50": 220.5440029501915, + "p90": 246.0480034351349, + "p95": 259.3599930405617, + "p99": 273.8239914178848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.0160003900528, + "p90": 145.82400023937225, + "p95": 149.63200688362122, + "p99": 154.81600165367126 + }, + "combine": { + "p50": 115.167997777462, + "p90": 121.98399752378464, + "p95": 125.02400577068329, + "p99": 130.72000443935394 + }, + "roundtrip": { + "p50": 217.15199947357178, + "p90": 235.48799753189087, + "p95": 251.6160011291504, + "p99": 287.1040105819702 + }, + "isolatedSum": { + "p50": 253.1839981675148, + "p90": 267.8079977631569, + "p95": 274.6560126543045, + "p99": 285.5360060930252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.34400534629822, + "p90": 162.4000072479248, + "p95": 165.40800034999847, + "p99": 174.01599884033203 + }, + "combine": { + "p50": 144.06399428844452, + "p90": 152.54400670528412, + "p95": 155.13600409030914, + "p99": 158.24000537395477 + }, + "roundtrip": { + "p50": 267.7119970321655, + "p90": 276.0320007801056, + "p95": 280.70399165153503, + "p99": 287.48801350593567 + }, + "isolatedSum": { + "p50": 297.40799963474274, + "p90": 314.9440139532089, + "p95": 320.5440044403076, + "p99": 332.2560042142868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d11f5074", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b200_7832f204", + "comparisonKey": "7b2f078eced8a3ba", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:55:52.258629+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.59200018644333, + "p90": 145.60000598430634, + "p95": 157.151997089386, + "p99": 214.52799439430237 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 94.84799951314926, + "p95": 101.34399682283401, + "p99": 108.60799998044968 + }, + "roundtrip": { + "p50": 179.83999848365784, + "p90": 203.39199900627136, + "p95": 211.35999262332916, + "p99": 223.39199483394623 + }, + "isolatedSum": { + "p50": 192.51199811697006, + "p90": 240.4480054974556, + "p95": 258.49599391222, + "p99": 323.13599437475204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 109.76000130176544, + "p90": 134.8160058259964, + "p95": 140.1280015707016, + "p99": 149.85600113868713 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 89.12000060081482, + "p95": 97.72799909114838, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 183.1360012292862, + "p90": 218.1439995765686, + "p95": 224.2559939622879, + "p99": 242.40000545978546 + }, + "isolatedSum": { + "p50": 190.8480003476143, + "p90": 223.93600642681122, + "p95": 237.85600066184998, + "p99": 258.30399990081787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.07199835777283, + "p90": 139.67999815940857, + "p95": 147.32800424098969, + "p99": 160.09600460529327 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 95.8079993724823, + "p95": 104.44799810647964, + "p99": 120.03199756145477 + }, + "roundtrip": { + "p50": 185.63200533390045, + "p90": 207.5520008802414, + "p95": 214.20800685882568, + "p99": 230.3999960422516 + }, + "isolatedSum": { + "p50": 204.76800203323364, + "p90": 235.48799753189087, + "p95": 251.77600234746933, + "p99": 280.12800216674805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.02400577068329, + "p90": 143.0400013923645, + "p95": 148.0640023946762, + "p99": 158.49600732326508 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 94.01600062847137, + "p95": 99.10400211811066, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 184.4480037689209, + "p90": 209.72800254821777, + "p95": 218.01599860191345, + "p99": 228.89600694179535 + }, + "isolatedSum": { + "p50": 206.7200094461441, + "p90": 237.05600202083588, + "p95": 247.16800451278687, + "p99": 266.9440060853958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.98399817943573, + "p90": 144.48000490665436, + "p95": 149.72800016403198, + "p99": 158.65600109100342 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 101.75999999046326, + "p95": 109.18399691581726, + "p99": 121.95199728012085 + }, + "roundtrip": { + "p50": 185.56800484657288, + "p90": 221.66399657726288, + "p95": 227.07200050354004, + "p99": 267.0400142669678 + }, + "isolatedSum": { + "p50": 215.58399498462677, + "p90": 246.24000489711761, + "p95": 258.91199707984924, + "p99": 280.60799837112427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.88800489902496, + "p90": 148.99200201034546, + "p95": 153.60000729560852, + "p99": 178.97599935531616 + }, + "combine": { + "p50": 91.839998960495, + "p90": 100.19200295209885, + "p95": 106.88000172376633, + "p99": 115.52000045776367 + }, + "roundtrip": { + "p50": 190.94400107860565, + "p90": 217.56799519062042, + "p95": 224.57599639892578, + "p99": 234.1119945049286 + }, + "isolatedSum": { + "p50": 217.72800385951996, + "p90": 249.1840049624443, + "p95": 260.48000901937485, + "p99": 294.49599981307983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 132.38400220870972, + "p90": 149.1840034723282, + "p95": 153.9199948310852, + "p99": 161.05599701404572 + }, + "combine": { + "p50": 110.88000237941742, + "p90": 118.78400295972824, + "p95": 121.85599654912949, + "p99": 132.76800513267517 + }, + "roundtrip": { + "p50": 209.34399962425232, + "p90": 233.0559939146042, + "p95": 240.28800427913666, + "p99": 252.31999158859253 + }, + "isolatedSum": { + "p50": 243.26400458812714, + "p90": 267.9680064320564, + "p95": 275.7759913802147, + "p99": 293.8240021467209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 157.53600001335144, + "p90": 183.55199694633484, + "p95": 197.9839950799942, + "p99": 233.60000550746918 + }, + "combine": { + "p50": 142.56000518798828, + "p90": 159.5200002193451, + "p95": 164.32000696659088, + "p99": 181.536003947258 + }, + "roundtrip": { + "p50": 261.0880136489868, + "p90": 283.29598903656006, + "p95": 291.1039888858795, + "p99": 323.7760066986084 + }, + "isolatedSum": { + "p50": 300.0960052013397, + "p90": 343.07199716567993, + "p95": 362.3040020465851, + "p99": 415.1360094547272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ea66942c", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b200_81086a2a", + "comparisonKey": "35299def21a2b371", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:06:11.907564+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.78400164842606, + "p90": 139.93600010871887, + "p95": 147.45600521564484, + "p99": 166.84800386428833 + }, + "combine": { + "p50": 82.04799890518188, + "p90": 94.78399902582169, + "p95": 101.56799852848053, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 184.54399704933167, + "p90": 210.68799495697021, + "p95": 218.07999908924103, + "p99": 228.7359982728958 + }, + "isolatedSum": { + "p50": 192.83200055360794, + "p90": 234.71999913454056, + "p95": 249.02400374412537, + "p99": 281.0240015387535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.59200084209442, + "p90": 136.1600011587143, + "p95": 142.07999408245087, + "p99": 147.8080004453659 + }, + "combine": { + "p50": 90.36800265312195, + "p90": 97.28000313043594, + "p95": 109.24799740314484, + "p99": 120.60800194740295 + }, + "roundtrip": { + "p50": 187.00799345970154, + "p90": 228.99200022220612, + "p95": 235.74399948120117, + "p99": 269.76001262664795 + }, + "isolatedSum": { + "p50": 204.96000349521637, + "p90": 233.44000428915024, + "p95": 251.3279914855957, + "p99": 268.41600239276886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.82400441169739, + "p90": 142.11200177669525, + "p95": 150.81599354743958, + "p99": 157.8879952430725 + }, + "combine": { + "p50": 91.10400080680847, + "p90": 99.04000163078308, + "p95": 109.66400057077408, + "p99": 121.44000083208084 + }, + "roundtrip": { + "p50": 190.5599981546402, + "p90": 217.3440009355545, + "p95": 224.2559939622879, + "p99": 236.1920028924942 + }, + "isolatedSum": { + "p50": 216.92800521850586, + "p90": 241.15200340747833, + "p95": 260.47999411821365, + "p99": 279.32799607515335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.75200402736664, + "p90": 145.08800208568573, + "p95": 154.27200496196747, + "p99": 168.06399822235107 + }, + "combine": { + "p50": 92.16000139713287, + "p90": 102.30399668216705, + "p95": 109.95200276374817, + "p99": 121.44000083208084 + }, + "roundtrip": { + "p50": 195.96800208091736, + "p90": 221.47199511528015, + "p95": 230.24000227451324, + "p99": 240.35200476646423 + }, + "isolatedSum": { + "p50": 218.9120054244995, + "p90": 247.39199876785278, + "p95": 264.22400772571564, + "p99": 289.5039990544319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.65599584579468, + "p90": 144.67200636863708, + "p95": 153.53600680828094, + "p99": 165.79200327396393 + }, + "combine": { + "p50": 93.28000247478485, + "p90": 108.38399827480316, + "p95": 112.89600282907486, + "p99": 122.30399996042252 + }, + "roundtrip": { + "p50": 197.4399983882904, + "p90": 225.50399601459503, + "p95": 233.63199830055237, + "p99": 253.12000513076782 + }, + "isolatedSum": { + "p50": 219.93599832057953, + "p90": 253.05600464344025, + "p95": 266.4320096373558, + "p99": 288.09600323438644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.44000083208084, + "p90": 142.5279974937439, + "p95": 150.94399452209473, + "p99": 160.60799360275269 + }, + "combine": { + "p50": 102.04800218343735, + "p90": 108.06400328874588, + "p95": 120.09599804878235, + "p99": 124.76799637079239 + }, + "roundtrip": { + "p50": 198.5280066728592, + "p90": 227.26400196552277, + "p95": 233.37599635124207, + "p99": 241.02400243282318 + }, + "isolatedSum": { + "p50": 223.4880030155182, + "p90": 250.59200078248978, + "p95": 271.0399925708771, + "p99": 285.3759899735451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 135.42400300502777, + "p90": 156.22399747371674, + "p95": 162.4000072479248, + "p99": 170.59199512004852 + }, + "combine": { + "p50": 115.9679964184761, + "p90": 125.11999905109406, + "p95": 133.5040032863617, + "p99": 142.14399456977844 + }, + "roundtrip": { + "p50": 220.22399306297302, + "p90": 245.60000002384186, + "p95": 250.94398856163025, + "p99": 268.2879865169525 + }, + "isolatedSum": { + "p50": 251.39199942350388, + "p90": 281.3439965248108, + "p95": 295.9040105342865, + "p99": 312.73598968982697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 150.33599734306335, + "p90": 179.6800047159195, + "p95": 185.248002409935, + "p99": 201.12000405788422 + }, + "combine": { + "p50": 138.8159990310669, + "p90": 146.7519998550415, + "p95": 153.24799716472626, + "p99": 161.5999937057495 + }, + "roundtrip": { + "p50": 256.00001215934753, + "p90": 280.92798590660095, + "p95": 297.2480058670044, + "p99": 322.4639892578125 + }, + "isolatedSum": { + "p50": 289.15199637413025, + "p90": 326.432004570961, + "p95": 338.49599957466125, + "p99": 362.7199977636337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d5db2edf", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b200_d9a24cef", + "comparisonKey": "bbbf60d54620d685", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:01:33.993692+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.32799899578094, + "p90": 145.91999351978302, + "p95": 151.87199413776398, + "p99": 160.96000373363495 + }, + "combine": { + "p50": 87.87199854850769, + "p90": 93.66399794816971, + "p95": 96.70399874448776, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 186.24000251293182, + "p90": 196.73599302768707, + "p95": 200.22399723529816, + "p99": 206.40000700950623 + }, + "isolatedSum": { + "p50": 203.19999754428864, + "p90": 239.58399146795273, + "p95": 248.57599288225174, + "p99": 263.8400048017502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.63200163841248, + "p90": 125.44000148773193, + "p95": 132.60799646377563, + "p99": 143.36000382900238 + }, + "combine": { + "p50": 90.7519981265068, + "p90": 95.61599791049957, + "p95": 99.96800124645233, + "p99": 112.0000034570694 + }, + "roundtrip": { + "p50": 185.05600094795227, + "p90": 203.77600193023682, + "p95": 224.99200701713562, + "p99": 245.95199525356293 + }, + "isolatedSum": { + "p50": 208.38399976491928, + "p90": 221.0559993982315, + "p95": 232.57599771022797, + "p99": 255.36000728607178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.0479986667633, + "p90": 139.3599957227707, + "p95": 145.53600549697876, + "p99": 156.67200088500977 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 102.14400291442871, + "p95": 104.44799810647964, + "p99": 113.63200098276138 + }, + "roundtrip": { + "p50": 189.11999464035034, + "p90": 227.2000014781952, + "p95": 233.95200073719025, + "p99": 257.3759853839874 + }, + "isolatedSum": { + "p50": 217.82399713993073, + "p90": 241.5039986371994, + "p95": 249.9840036034584, + "p99": 270.30400186777115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.85599720478058, + "p90": 134.62400436401367, + "p95": 139.16799426078796, + "p99": 147.2959965467453 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 98.88000041246414, + "p95": 101.50399804115295, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 194.2719966173172, + "p90": 203.45599949359894, + "p95": 208.22399854660034, + "p99": 226.23999416828156 + }, + "isolatedSum": { + "p50": 217.9199978709221, + "p90": 233.5040047764778, + "p95": 240.67199230194092, + "p99": 252.48000025749207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.07200646400452, + "p90": 142.33599603176117, + "p95": 146.2399959564209, + "p99": 161.43999993801117 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 102.88000106811523, + "p95": 105.6319996714592, + "p99": 110.75200140476227 + }, + "roundtrip": { + "p50": 198.46400618553162, + "p90": 212.351992726326, + "p95": 220.38400173187256, + "p99": 236.09599471092224 + }, + "isolatedSum": { + "p50": 221.0560068488121, + "p90": 245.2159970998764, + "p95": 251.8719956278801, + "p99": 272.19200134277344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.20799922943115, + "p90": 129.95199859142303, + "p95": 135.3600025177002, + "p99": 154.6880006790161 + }, + "combine": { + "p50": 101.69599950313568, + "p90": 105.34399747848511, + "p95": 107.87200182676315, + "p99": 116.64000153541565 + }, + "roundtrip": { + "p50": 197.05599546432495, + "p90": 210.36800742149353, + "p95": 212.99199759960175, + "p99": 218.1439995765686 + }, + "isolatedSum": { + "p50": 223.90399873256683, + "p90": 235.29599606990814, + "p95": 243.23200434446335, + "p99": 271.32800221443176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 135.16800105571747, + "p90": 144.9279934167862, + "p95": 148.51200580596924, + "p99": 152.38399803638458 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 122.56000190973282, + "p95": 124.57600235939026, + "p99": 131.9359987974167 + }, + "roundtrip": { + "p50": 222.1119999885559, + "p90": 234.46400463581085, + "p95": 251.45599246025085, + "p99": 266.9439911842346 + }, + "isolatedSum": { + "p50": 251.5840008854866, + "p90": 267.487995326519, + "p95": 273.0880081653595, + "p99": 284.31999683380127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 159.07199680805206, + "p90": 170.49600183963776, + "p95": 174.81599748134613, + "p99": 189.5039975643158 + }, + "combine": { + "p50": 149.56800639629364, + "p90": 154.84799444675446, + "p95": 157.69599378108978, + "p99": 164.60800170898438 + }, + "roundtrip": { + "p50": 274.399995803833, + "p90": 286.3680124282837, + "p95": 294.75200176239014, + "p99": 354.3039858341217 + }, + "isolatedSum": { + "p50": 308.6400032043457, + "p90": 325.3439962863922, + "p95": 332.5119912624359, + "p99": 354.11199927330017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c2f7d405", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b200_cc9dc50f", + "comparisonKey": "810846223e7221ef", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:02:04.547785+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.29599940776825, + "p90": 137.5039964914322, + "p95": 144.19199526309967, + "p99": 163.03999722003937 + }, + "combine": { + "p50": 89.79199826717377, + "p90": 94.27200257778168, + "p95": 98.24000298976898, + "p99": 107.4879989027977 + }, + "roundtrip": { + "p50": 187.32799589633942, + "p90": 207.61600136756897, + "p95": 217.50399470329285, + "p99": 239.9359941482544 + }, + "isolatedSum": { + "p50": 209.08799767494202, + "p90": 231.77599906921387, + "p95": 242.43199825286865, + "p99": 270.52799612283707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.54400146007538, + "p90": 139.0720009803772, + "p95": 148.44800531864166, + "p99": 204.22400534152985 + }, + "combine": { + "p50": 90.43200314044952, + "p90": 95.23200243711472, + "p95": 97.37599641084671, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 185.69600582122803, + "p90": 220.12799978256226, + "p95": 229.47199642658234, + "p99": 243.6479926109314 + }, + "isolatedSum": { + "p50": 210.9760046005249, + "p90": 234.3040034174919, + "p95": 245.82400172948837, + "p99": 312.8640055656433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 124.67200309038162, + "p90": 130.3360015153885, + "p95": 133.66399705410004, + "p99": 142.56000518798828 + }, + "combine": { + "p50": 92.22400188446045, + "p90": 104.60799932479858, + "p95": 117.69600212574005, + "p99": 133.56800377368927 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 221.15199267864227, + "p95": 229.98400032520294, + "p99": 247.80799448490143 + }, + "isolatedSum": { + "p50": 216.89600497484207, + "p90": 234.94400084018707, + "p95": 251.3599991798401, + "p99": 276.12800896167755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.52799487113953, + "p90": 134.97599959373474, + "p95": 140.1599943637848, + "p99": 147.48799800872803 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 103.5199984908104, + "p95": 105.59999942779541, + "p99": 115.74400216341019 + }, + "roundtrip": { + "p50": 198.0160027742386, + "p90": 217.79200434684753, + "p95": 223.80800545215607, + "p99": 243.8720017671585 + }, + "isolatedSum": { + "p50": 223.1999933719635, + "p90": 238.49599808454514, + "p95": 245.7599937915802, + "p99": 263.2320001721382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.58399546146393, + "p90": 141.34399592876434, + "p95": 151.16800367832184, + "p99": 176.32000148296356 + }, + "combine": { + "p50": 92.73599833250046, + "p90": 101.24800354242325, + "p95": 102.88000106811523, + "p99": 108.99200290441513 + }, + "roundtrip": { + "p50": 200.47999918460846, + "p90": 215.96799790859222, + "p95": 221.0880070924759, + "p99": 246.94399535655975 + }, + "isolatedSum": { + "p50": 220.3199937939644, + "p90": 242.5919994711876, + "p95": 254.04800474643707, + "p99": 285.3120043873787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.52800166606903, + "p90": 131.04000687599182, + "p95": 134.5919966697693, + "p99": 143.93599331378937 + }, + "combine": { + "p50": 103.16800326108932, + "p90": 113.6000007390976, + "p95": 116.03199690580368, + "p99": 123.87199699878693 + }, + "roundtrip": { + "p50": 197.34400510787964, + "p90": 211.19999885559082, + "p95": 213.79199624061584, + "p99": 220.15999257564545 + }, + "isolatedSum": { + "p50": 225.69600492715836, + "p90": 244.64000761508942, + "p95": 250.62399357557297, + "p99": 267.8079903125763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 134.75200533866882, + "p90": 147.87200093269348, + "p95": 152.25599706172943, + "p99": 162.11199760437012 + }, + "combine": { + "p50": 116.38399958610535, + "p90": 122.72000312805176, + "p95": 125.2799928188324, + "p99": 131.071999669075 + }, + "roundtrip": { + "p50": 224.83199834823608, + "p90": 234.6239984035492, + "p95": 239.99999463558197, + "p99": 262.7519965171814 + }, + "isolatedSum": { + "p50": 251.13600492477417, + "p90": 270.59200406074524, + "p95": 277.53598988056183, + "p99": 293.18399727344513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.90399372577667, + "p90": 158.30400586128235, + "p95": 165.72800278663635, + "p99": 178.01600694656372 + }, + "combine": { + "p50": 138.047993183136, + "p90": 144.03200149536133, + "p95": 147.39200472831726, + "p99": 153.76000106334686 + }, + "roundtrip": { + "p50": 249.66399371623993, + "p90": 260.09601354599, + "p95": 263.35999369621277, + "p99": 273.3440101146698 + }, + "isolatedSum": { + "p50": 285.95198690891266, + "p90": 302.3360073566437, + "p95": 313.1200075149536, + "p99": 331.7760080099106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4db176c7", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b200_dcc1ac6c", + "comparisonKey": "058df6601596fd4e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:03:52.829538+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 109.76000130176544, + "p90": 138.33600282669067, + "p95": 143.99999380111694, + "p99": 155.74400126934052 + }, + "combine": { + "p50": 79.96799796819687, + "p90": 92.57599711418152, + "p95": 97.79199957847595, + "p99": 108.47999900579453 + }, + "roundtrip": { + "p50": 182.8799992799759, + "p90": 212.16000616550446, + "p95": 219.9680060148239, + "p99": 261.1519992351532 + }, + "isolatedSum": { + "p50": 189.7279992699623, + "p90": 230.9119999408722, + "p95": 241.7919933795929, + "p99": 264.22400027513504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.14399743080139, + "p90": 137.2479945421219, + "p95": 143.23200285434723, + "p99": 155.008003115654 + }, + "combine": { + "p50": 88.32000195980072, + "p90": 95.64799815416336, + "p95": 103.20000350475311, + "p99": 112.89600282907486 + }, + "roundtrip": { + "p50": 187.80800700187683, + "p90": 224.2240011692047, + "p95": 231.00799322128296, + "p99": 291.1680042743683 + }, + "isolatedSum": { + "p50": 202.4639993906021, + "p90": 232.89599269628525, + "p95": 246.43200635910034, + "p99": 267.90400594472885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.16799974441528, + "p90": 145.4080045223236, + "p95": 154.52800691127777, + "p99": 177.15199291706085 + }, + "combine": { + "p50": 91.26400202512741, + "p90": 102.9760017991066, + "p95": 110.97600311040878, + "p99": 121.05599790811539 + }, + "roundtrip": { + "p50": 185.18400192260742, + "p90": 218.81599724292755, + "p95": 224.60800409317017, + "p99": 257.1200132369995 + }, + "isolatedSum": { + "p50": 218.4320017695427, + "p90": 248.3840063214302, + "p95": 265.50401002168655, + "p99": 298.20799082517624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.49600207805634, + "p90": 147.5200057029724, + "p95": 156.2879979610443, + "p99": 176.57600343227386 + }, + "combine": { + "p50": 91.80799871683121, + "p90": 106.46399855613708, + "p95": 111.7120012640953, + "p99": 121.79200351238251 + }, + "roundtrip": { + "p50": 196.51199877262115, + "p90": 228.7680059671402, + "p95": 242.78399348258972, + "p99": 274.01599287986755 + }, + "isolatedSum": { + "p50": 218.30400079488754, + "p90": 253.9840042591095, + "p95": 267.9999992251396, + "p99": 298.3680069446564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.72799623012543, + "p90": 143.61600577831268, + "p95": 147.5840061903, + "p99": 159.55199301242828 + }, + "combine": { + "p50": 92.54399687051773, + "p90": 102.4319976568222, + "p95": 108.96000266075134, + "p99": 124.15999919176102 + }, + "roundtrip": { + "p50": 193.40799748897552, + "p90": 219.84000504016876, + "p95": 224.89599883556366, + "p99": 237.2480034828186 + }, + "isolatedSum": { + "p50": 218.27199310064316, + "p90": 246.0480034351349, + "p95": 256.54400885105133, + "p99": 283.7119922041893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.05599790811539, + "p90": 143.5520052909851, + "p95": 149.21599626541138, + "p99": 158.91200304031372 + }, + "combine": { + "p50": 94.11200135946274, + "p90": 105.66399991512299, + "p95": 110.91200262308121, + "p99": 122.72000312805176 + }, + "roundtrip": { + "p50": 199.45600628852844, + "p90": 230.75200617313385, + "p95": 236.32000386714935, + "p99": 258.4959864616394 + }, + "isolatedSum": { + "p50": 215.16799926757812, + "p90": 249.2160052061081, + "p95": 260.1279988884926, + "p99": 281.6320061683655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 139.74399864673615, + "p90": 156.5759927034378, + "p95": 160.64000129699707, + "p99": 174.27200078964233 + }, + "combine": { + "p50": 115.48800021409988, + "p90": 123.32800030708313, + "p95": 132.7359974384308, + "p99": 144.31999623775482 + }, + "roundtrip": { + "p50": 216.44799411296844, + "p90": 238.75199258327484, + "p95": 245.37600576877594, + "p99": 256.8640112876892 + }, + "isolatedSum": { + "p50": 255.23199886083603, + "p90": 279.90399301052094, + "p95": 293.37599873542786, + "p99": 318.59199702739716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 155.10399639606476, + "p90": 178.30400168895721, + "p95": 183.16799402236938, + "p99": 192.1599954366684 + }, + "combine": { + "p50": 145.60000598430634, + "p90": 159.87199544906616, + "p95": 169.08800601959229, + "p99": 177.824005484581 + }, + "roundtrip": { + "p50": 269.3440020084381, + "p90": 284.31999683380127, + "p95": 290.0480031967163, + "p99": 299.00801181793213 + }, + "isolatedSum": { + "p50": 300.7040023803711, + "p90": 338.1759971380234, + "p95": 352.25600004196167, + "p99": 369.9840009212494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f08062a5", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_d68ea102", + "comparisonKey": "cbf2b752d6216616", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:04:23.073159+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.10399663448334, + "p90": 133.15199315547943, + "p95": 138.7840062379837, + "p99": 150.81599354743958 + }, + "combine": { + "p50": 82.36800134181976, + "p90": 92.6079973578453, + "p95": 97.21600264310837, + "p99": 107.26399719715118 + }, + "roundtrip": { + "p50": 188.960000872612, + "p90": 216.99200570583344, + "p95": 222.97599911689758, + "p99": 240.60800671577454 + }, + "isolatedSum": { + "p50": 193.4719979763031, + "p90": 225.75999051332474, + "p95": 236.00000888109207, + "p99": 258.07999074459076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.99199742078781, + "p90": 149.53599870204926, + "p95": 163.42400014400482, + "p99": 201.53599977493286 + }, + "combine": { + "p50": 91.23200178146362, + "p90": 99.23200309276581, + "p95": 109.92000252008438, + "p99": 121.21599912643433 + }, + "roundtrip": { + "p50": 185.95199286937714, + "p90": 214.04799818992615, + "p95": 224.2559939622879, + "p99": 238.11200261116028 + }, + "isolatedSum": { + "p50": 212.22399920225143, + "p90": 248.76800179481506, + "p95": 273.3440026640892, + "p99": 322.7519989013672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.36000120639801, + "p90": 139.0399932861328, + "p95": 147.93600142002106, + "p99": 157.47199952602386 + }, + "combine": { + "p50": 91.64799749851227, + "p90": 101.6319990158081, + "p95": 110.30399799346924, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 189.34400379657745, + "p90": 217.28000044822693, + "p95": 225.5679965019226, + "p99": 244.22399699687958 + }, + "isolatedSum": { + "p50": 219.00799870491028, + "p90": 240.67199230194092, + "p95": 258.2399994134903, + "p99": 273.1520012021065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.2800009250641, + "p90": 146.08000218868256, + "p95": 151.2639969587326, + "p99": 159.4880074262619 + }, + "combine": { + "p50": 92.96000003814697, + "p90": 102.39999741315842, + "p95": 108.89600217342377, + "p99": 119.9679970741272 + }, + "roundtrip": { + "p50": 195.77600061893463, + "p90": 220.12799978256226, + "p95": 225.8239984512329, + "p99": 237.66399919986725 + }, + "isolatedSum": { + "p50": 222.24000096321106, + "p90": 248.47999960184097, + "p95": 260.1599991321564, + "p99": 279.4560045003891 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 128.76799702644348, + "p90": 145.6640064716339, + "p95": 153.28000485897064, + "p99": 168.2559996843338 + }, + "combine": { + "p50": 94.91200000047684, + "p90": 104.76800054311752, + "p95": 110.43199896812439, + "p99": 118.27199906110764 + }, + "roundtrip": { + "p50": 200.44800639152527, + "p90": 222.97599911689758, + "p95": 228.38400304317474, + "p99": 235.77600717544556 + }, + "isolatedSum": { + "p50": 223.67999702692032, + "p90": 250.43200701475143, + "p95": 263.71200382709503, + "p99": 286.52799874544144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.75200337171555, + "p90": 140.35199582576752, + "p95": 148.67199957370758, + "p99": 157.8879952430725 + }, + "combine": { + "p50": 103.10400277376175, + "p90": 115.74400216341019, + "p95": 120.67200243473053, + "p99": 138.5599970817566 + }, + "roundtrip": { + "p50": 201.02399587631226, + "p90": 225.24799406528473, + "p95": 234.01600122451782, + "p99": 250.71999430656433 + }, + "isolatedSum": { + "p50": 225.8560061454773, + "p90": 256.0959979891777, + "p95": 269.3440020084381, + "p99": 296.4479923248291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.5039964914322, + "p90": 163.455992937088, + "p95": 170.43200135231018, + "p99": 199.13600385189056 + }, + "combine": { + "p50": 117.53600090742111, + "p90": 127.83999741077423, + "p95": 135.16800105571747, + "p99": 144.1279947757721 + }, + "roundtrip": { + "p50": 219.4879949092865, + "p90": 244.9280023574829, + "p95": 252.16001272201538, + "p99": 260.96001267433167 + }, + "isolatedSum": { + "p50": 255.0399973988533, + "p90": 291.29599034786224, + "p95": 305.60000240802765, + "p99": 343.26399862766266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.74399316310883, + "p90": 168.19199919700623, + "p95": 175.32800137996674, + "p99": 187.00799345970154 + }, + "combine": { + "p50": 140.76800644397736, + "p90": 153.28000485897064, + "p95": 158.49600732326508, + "p99": 170.84799706935883 + }, + "roundtrip": { + "p50": 257.82400369644165, + "p90": 271.13598585128784, + "p95": 278.11199426651, + "p99": 289.4720137119293 + }, + "isolatedSum": { + "p50": 292.5119996070862, + "p90": 321.47200405597687, + "p95": 333.8240087032318, + "p99": 357.85599052906036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2fcb4b0f", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b200_9a321028", + "comparisonKey": "04f19a82a93a5370", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:00:25.331240+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.44799941778183, + "p90": 123.03999811410904, + "p95": 125.63200294971466, + "p99": 137.15200126171112 + }, + "combine": { + "p50": 89.02399986982346, + "p90": 93.18400174379349, + "p95": 96.76799923181534, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 188.9919936656952, + "p90": 201.4400064945221, + "p95": 205.28000593185425, + "p99": 218.52800250053406 + }, + "isolatedSum": { + "p50": 201.47199928760529, + "p90": 216.22399985790253, + "p95": 222.40000218153, + "p99": 243.20000410079956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 122.65600264072418, + "p90": 145.11999487876892, + "p95": 153.53600680828094, + "p99": 201.664000749588 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 96.28800302743912, + "p95": 99.45599734783173, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 186.97600066661835, + "p90": 205.79199492931366, + "p95": 228.7359982728958, + "p99": 245.92000246047974 + }, + "isolatedSum": { + "p50": 214.27199989557266, + "p90": 241.40799790620804, + "p95": 252.99200415611267, + "p99": 307.2640001773834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.17599964141846, + "p90": 139.77600634098053, + "p95": 144.19199526309967, + "p99": 155.29599785804749 + }, + "combine": { + "p50": 92.51199662685394, + "p90": 97.4079966545105, + "p95": 98.91200065612793, + "p99": 105.34399747848511 + }, + "roundtrip": { + "p50": 191.103994846344, + "p90": 203.45599949359894, + "p95": 208.28799903392792, + "p99": 216.12800657749176 + }, + "isolatedSum": { + "p50": 218.6879962682724, + "p90": 237.18400299549103, + "p95": 243.1039959192276, + "p99": 260.6399953365326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.23200023174286, + "p90": 134.11200046539307, + "p95": 137.60000467300415, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 93.50399672985077, + "p90": 100.47999769449234, + "p95": 102.78400033712387, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 198.2720047235489, + "p90": 211.45600080490112, + "p95": 215.5199944972992, + "p99": 222.20799326896667 + }, + "isolatedSum": { + "p50": 220.73599696159363, + "p90": 234.5919981598854, + "p95": 240.38400501012802, + "p99": 258.9760050177574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 128.4160017967224, + "p90": 140.99200069904327, + "p95": 145.31199634075165, + "p99": 159.90400314331055 + }, + "combine": { + "p50": 99.71199929714203, + "p90": 103.61599922180176, + "p95": 105.66399991512299, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 210.04800498485565, + "p90": 233.72800648212433, + "p95": 238.65599930286407, + "p99": 254.33599948883057 + }, + "isolatedSum": { + "p50": 228.12800109386444, + "p90": 244.60799992084503, + "p95": 250.97599625587463, + "p99": 273.69600534439087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.45600128173828, + "p90": 133.69600474834442, + "p95": 136.9599997997284, + "p99": 141.82400703430176 + }, + "combine": { + "p50": 102.81600058078766, + "p90": 107.68000036478043, + "p95": 112.06399649381638, + "p99": 119.9679970741272 + }, + "roundtrip": { + "p50": 200.6080001592636, + "p90": 214.49600160121918, + "p95": 219.80799734592438, + "p99": 240.31999707221985 + }, + "isolatedSum": { + "p50": 226.27200186252594, + "p90": 241.37600511312485, + "p95": 249.02399629354477, + "p99": 261.79200410842896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.9839926958084, + "p90": 147.67999947071075, + "p95": 151.74399316310883, + "p99": 157.98400342464447 + }, + "combine": { + "p50": 117.60000139474869, + "p90": 124.28800016641617, + "p95": 127.03999876976013, + "p99": 130.40000200271606 + }, + "roundtrip": { + "p50": 221.5999960899353, + "p90": 234.52800512313843, + "p95": 249.5039999485016, + "p99": 271.13598585128784 + }, + "isolatedSum": { + "p50": 255.5839940905571, + "p90": 271.9679996371269, + "p95": 278.78399193286896, + "p99": 288.38400542736053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.50399911403656, + "p90": 161.18399798870087, + "p95": 164.35199975967407, + "p99": 171.6800034046173 + }, + "combine": { + "p50": 139.77600634098053, + "p90": 144.54400539398193, + "p95": 146.62399888038635, + "p99": 155.16799688339233 + }, + "roundtrip": { + "p50": 258.39999318122864, + "p90": 267.8079903125763, + "p95": 271.87201380729675, + "p99": 278.2079875469208 + }, + "isolatedSum": { + "p50": 293.2800054550171, + "p90": 305.7280033826828, + "p95": 310.9759986400604, + "p99": 326.84800028800964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5167155", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_aefbb2bb", + "comparisonKey": "f953ded8b200d4b7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:21.264134+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.79199957847595, + "p90": 104.032002389431, + "p95": 107.42399841547012, + "p99": 117.18399822711945 + }, + "combine": { + "p50": 89.1840010881424, + "p90": 92.8959995508194, + "p95": 95.74399888515472, + "p99": 100.99200159311295 + }, + "roundtrip": { + "p50": 166.30400717258453, + "p90": 191.96799397468567, + "p95": 198.40000569820404, + "p99": 213.56800198554993 + }, + "isolatedSum": { + "p50": 186.97600066661835, + "p90": 196.9280019402504, + "p95": 203.16799730062485, + "p99": 218.1759998202324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 103.29599678516388, + "p90": 109.66400057077408, + "p95": 112.8000020980835, + "p99": 120.4800009727478 + }, + "combine": { + "p50": 91.23200178146362, + "p90": 96.38399630784988, + "p95": 99.20000284910202, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 162.62400150299072, + "p90": 174.6560037136078, + "p95": 177.91999876499176, + "p99": 186.65599822998047 + }, + "isolatedSum": { + "p50": 194.5279985666275, + "p90": 206.04799687862396, + "p95": 212.00000494718552, + "p99": 227.00800001621246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 105.02400249242783, + "p90": 109.95200276374817, + "p95": 112.76800185441971, + "p99": 119.64800208806992 + }, + "combine": { + "p50": 91.71199798583984, + "p90": 97.75999933481216, + "p95": 100.38399696350098, + "p99": 107.00800269842148 + }, + "roundtrip": { + "p50": 169.3120002746582, + "p90": 178.43200266361237, + "p95": 183.9359998703003, + "p99": 196.16000354290009 + }, + "isolatedSum": { + "p50": 196.73600047826767, + "p90": 207.71200209856033, + "p95": 213.15199881792068, + "p99": 226.6560047864914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 106.55999928712845, + "p90": 118.14399808645248, + "p95": 123.32800030708313, + "p99": 133.760005235672 + }, + "combine": { + "p50": 97.63199836015701, + "p90": 104.99200224876404, + "p95": 108.41599851846695, + "p99": 114.78400230407715 + }, + "roundtrip": { + "p50": 177.95200645923615, + "p90": 188.4160041809082, + "p95": 193.1840032339096, + "p99": 199.96799528598785 + }, + "isolatedSum": { + "p50": 204.19199764728546, + "p90": 223.13600033521652, + "p95": 231.74399882555008, + "p99": 248.54400753974915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 102.88000106811523, + "p90": 110.01600325107574, + "p95": 113.76000195741653, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 94.08000111579895, + "p90": 104.51199859380722, + "p95": 107.87200182676315, + "p99": 114.49600011110306 + }, + "roundtrip": { + "p50": 179.61600422859192, + "p90": 195.23200392723083, + "p95": 200.6720006465912, + "p99": 213.3760005235672 + }, + "isolatedSum": { + "p50": 196.96000218391418, + "p90": 214.52800184488297, + "p95": 221.6320037841797, + "p99": 237.40799725055695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.24000364542007, + "p90": 123.83999675512314, + "p95": 129.95199859142303, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 102.75200009346008, + "p90": 106.6880002617836, + "p95": 110.30399799346924, + "p99": 120.92799693346024 + }, + "roundtrip": { + "p50": 175.64800381660461, + "p90": 188.60800564289093, + "p95": 192.73599982261658, + "p99": 201.21599733829498 + }, + "isolatedSum": { + "p50": 204.99200373888016, + "p90": 230.52799701690674, + "p95": 240.25599658489227, + "p99": 260.8959898352623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.27999830245972, + "p90": 120.89599668979645, + "p95": 125.40799379348755, + "p99": 129.40800189971924 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 122.68800288438797, + "p95": 124.60800260305405, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 202.62399315834045, + "p90": 208.8319957256317, + "p95": 211.67999505996704, + "p99": 217.1200066804886 + }, + "isolatedSum": { + "p50": 229.69599813222885, + "p90": 243.58399957418442, + "p95": 250.0159963965416, + "p99": 259.2960000038147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.5040032863617, + "p90": 140.9280002117157, + "p95": 142.94399321079254, + "p99": 149.82399344444275 + }, + "combine": { + "p50": 139.16799426078796, + "p90": 145.47200500965118, + "p95": 150.4960060119629, + "p99": 154.6880006790161 + }, + "roundtrip": { + "p50": 235.00800132751465, + "p90": 242.97599494457245, + "p95": 246.0159957408905, + "p99": 263.35999369621277 + }, + "isolatedSum": { + "p50": 272.67199754714966, + "p90": 286.4000052213669, + "p95": 293.43999922275543, + "p99": 304.51199412345886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34da4f41", + "identity": "b200|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b200_ef6002c6", + "comparisonKey": "9fc18c8ab654efd6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:45:36.569656+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 244.47999894618988, + "p90": 284.0000092983246, + "p95": 306.11199140548706, + "p99": 339.80798721313477 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 75.48800110816956, + "p95": 78.07999849319458, + "p99": 86.496002972126 + }, + "roundtrip": { + "p50": 301.5359938144684, + "p90": 320.76799869537354, + "p95": 323.39200377464294, + "p99": 329.6000063419342 + }, + "isolatedSum": { + "p50": 314.5599961280823, + "p90": 359.48801040649414, + "p95": 384.19198989868164, + "p99": 426.3039901852608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 241.2160038948059, + "p90": 261.21601462364197, + "p95": 265.4719948768616, + "p99": 284.5759987831116 + }, + "combine": { + "p50": 71.96799665689468, + "p90": 75.99999755620956, + "p95": 79.80799674987793, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 308.1600069999695, + "p90": 337.18401193618774, + "p95": 358.46400260925293, + "p99": 405.2479863166809 + }, + "isolatedSum": { + "p50": 313.1840005517006, + "p90": 337.21601217985153, + "p95": 345.2799916267395, + "p99": 373.1839954853058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 242.23999679088593, + "p90": 266.7520046234131, + "p95": 274.9119997024536, + "p99": 304.25599217414856 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 79.1039988398552, + "p95": 81.37600123882294, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 305.88799715042114, + "p90": 327.58399844169617, + "p95": 336.4799916744232, + "p99": 406.5920114517212 + }, + "isolatedSum": { + "p50": 317.1199932694435, + "p90": 345.8560034632683, + "p95": 356.28800094127655, + "p99": 396.12799137830734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 243.3920055627823, + "p90": 273.21600914001465, + "p95": 305.5360019207001, + "p99": 337.3439908027649 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 82.8159973025322, + "p95": 85.05599945783615, + "p99": 94.55999732017517 + }, + "roundtrip": { + "p50": 309.59999561309814, + "p90": 332.89599418640137, + "p95": 362.5600039958954, + "p99": 406.20800852775574 + }, + "isolatedSum": { + "p50": 321.9840079545975, + "p90": 356.03200644254684, + "p95": 390.5920013785362, + "p99": 431.90398812294006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 245.95199525356293, + "p90": 297.21599817276, + "p95": 318.56000423431396, + "p99": 378.27199697494507 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 84.63999629020691, + "p95": 86.81599795818329, + "p99": 92.38400310277939 + }, + "roundtrip": { + "p50": 310.016006231308, + "p90": 340.831995010376, + "p95": 387.3920142650604, + "p99": 408.6720049381256 + }, + "isolatedSum": { + "p50": 325.24799555540085, + "p90": 381.8559944629669, + "p95": 405.37600219249725, + "p99": 470.65600007772446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 241.5360063314438, + "p90": 262.04800605773926, + "p95": 268.22400093078613, + "p99": 327.61600613594055 + }, + "combine": { + "p50": 81.727996468544, + "p90": 86.87999844551086, + "p95": 89.9519994854927, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 311.2640082836151, + "p90": 331.64799213409424, + "p95": 335.32801270484924, + "p99": 344.1280126571655 + }, + "isolatedSum": { + "p50": 323.2640027999878, + "p90": 348.9280045032501, + "p95": 358.17600041627884, + "p99": 426.04800313711166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 243.96799504756927, + "p90": 263.8719975948334, + "p95": 269.21600103378296, + "p99": 296.1919903755188 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 100.00000149011612, + "p95": 101.40799731016159, + "p99": 108.35199803113937 + }, + "roundtrip": { + "p50": 330.01598715782166, + "p90": 354.43198680877686, + "p95": 370.07999420166016, + "p99": 406.3040018081665 + }, + "isolatedSum": { + "p50": 339.32799845933914, + "p90": 363.8719990849495, + "p95": 370.62399834394455, + "p99": 404.5439884066582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 251.19999051094055, + "p90": 270.9439992904663, + "p95": 279.00800108909607, + "p99": 300.4480004310608 + }, + "combine": { + "p50": 111.93600296974182, + "p90": 116.73600226640701, + "p95": 118.59200149774551, + "p99": 125.31200051307678 + }, + "roundtrip": { + "p50": 352.512001991272, + "p90": 402.1120071411133, + "p95": 416.1919951438904, + "p99": 440.5120015144348 + }, + "isolatedSum": { + "p50": 363.1359934806824, + "p90": 387.6800015568733, + "p95": 397.6000025868416, + "p99": 425.7600009441376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f81b482", + "identity": "b200|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b200_ef6002c6", + "comparisonKey": "e7e200e921d61955", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:47:54.047590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 254.46400046348572, + "p90": 294.75200176239014, + "p95": 314.2080008983612, + "p99": 358.4960103034973 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 91.23200178146362, + "p95": 96.92800045013428, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 315.10400772094727, + "p90": 360.79999804496765, + "p95": 387.00801134109497, + "p99": 431.67999386787415 + }, + "isolatedSum": { + "p50": 328.41599732637405, + "p90": 385.98400354385376, + "p95": 411.1360013484955, + "p99": 467.42401272058487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 252.70399451255798, + "p90": 307.45598673820496, + "p95": 333.21601152420044, + "p99": 360.8959913253784 + }, + "combine": { + "p50": 76.4480009675026, + "p90": 82.97599852085114, + "p95": 93.63199770450592, + "p99": 102.4319976568222 + }, + "roundtrip": { + "p50": 317.82400608062744, + "p90": 361.1840009689331, + "p95": 372.22400307655334, + "p99": 399.23200011253357 + }, + "isolatedSum": { + "p50": 329.1519954800606, + "p90": 390.4319852590561, + "p95": 426.84800922870636, + "p99": 463.3279889822006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 251.96799635887146, + "p90": 298.4960079193115, + "p95": 307.48799443244934, + "p99": 356.9279909133911 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 86.17600053548813, + "p95": 95.61599791049957, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 320.6399977207184, + "p90": 363.8080060482025, + "p95": 387.8079950809479, + "p99": 428.0320107936859 + }, + "isolatedSum": { + "p50": 330.7199999690056, + "p90": 384.67200845479965, + "p95": 403.1039923429489, + "p99": 461.8559926748276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 250.43201446533203, + "p90": 294.5919930934906, + "p95": 311.5839958190918, + "p99": 358.271986246109 + }, + "combine": { + "p50": 82.65600353479385, + "p90": 89.12000060081482, + "p95": 94.36800330877304, + "p99": 106.88000172376633 + }, + "roundtrip": { + "p50": 328.2560110092163, + "p90": 391.2000060081482, + "p95": 410.7840061187744, + "p99": 454.94401454925537 + }, + "isolatedSum": { + "p50": 333.0880180001259, + "p90": 383.7119936943054, + "p95": 405.95199912786484, + "p99": 465.15198796987534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 253.85600328445435, + "p90": 297.1520125865936, + "p95": 310.59199571609497, + "p99": 347.9360044002533 + }, + "combine": { + "p50": 82.91199803352356, + "p90": 89.05600011348724, + "p95": 99.04000163078308, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 327.39201188087463, + "p90": 368.80001425743103, + "p95": 388.35200667381287, + "p99": 436.383992433548 + }, + "isolatedSum": { + "p50": 336.7680013179779, + "p90": 386.2080127000809, + "p95": 409.63199734687805, + "p99": 453.12000811100006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 269.47200298309326, + "p90": 322.9120075702667, + "p95": 361.85601353645325, + "p99": 384.12800431251526 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 97.31200337409973, + "p95": 105.8880016207695, + "p99": 116.38399958610535 + }, + "roundtrip": { + "p50": 327.61600613594055, + "p90": 364.0640079975128, + "p95": 373.4399974346161, + "p99": 397.8239893913269 + }, + "isolatedSum": { + "p50": 355.8720052242279, + "p90": 420.22401094436646, + "p95": 467.74401515722275, + "p99": 500.5120038986206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 250.8159875869751, + "p90": 285.0880026817322, + "p95": 293.2800054550171, + "p99": 302.43200063705444 + }, + "combine": { + "p50": 101.1200025677681, + "p90": 109.56799983978271, + "p95": 117.69600212574005, + "p99": 126.30400061607361 + }, + "roundtrip": { + "p50": 360.9920144081116, + "p90": 439.8399889469147, + "p95": 467.23198890686035, + "p99": 534.6879959106445 + }, + "isolatedSum": { + "p50": 351.9359901547432, + "p90": 394.6560025215149, + "p95": 410.97600758075714, + "p99": 428.73600125312805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 262.9759907722473, + "p90": 298.2720136642456, + "p95": 318.4320032596588, + "p99": 356.8960130214691 + }, + "combine": { + "p50": 120.4800009727478, + "p90": 129.92000579833984, + "p95": 136.03200018405914, + "p99": 143.0400013923645 + }, + "roundtrip": { + "p50": 387.10400462150574, + "p90": 450.0479996204376, + "p95": 471.0719883441925, + "p99": 514.303982257843 + }, + "isolatedSum": { + "p50": 383.4559917449951, + "p90": 428.19201946258545, + "p95": 454.46400344371796, + "p99": 499.9360144138336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-de7fbd25", + "identity": "b200|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_ef6002c6", + "comparisonKey": "f9a86a8c86641a4b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:50:13.832912+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 246.33599817752838, + "p90": 270.55999636650085, + "p95": 310.3039860725403, + "p99": 339.61600065231323 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 81.69600367546082, + "p95": 84.44800227880478, + "p99": 92.6079973578453 + }, + "roundtrip": { + "p50": 315.36000967025757, + "p90": 333.50399136543274, + "p95": 337.5680148601532, + "p99": 342.9439961910248 + }, + "isolatedSum": { + "p50": 324.19199496507645, + "p90": 352.25600004196167, + "p95": 394.75198835134506, + "p99": 432.22399801015854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 245.05600333213806, + "p90": 266.7520046234131, + "p95": 269.72800493240356, + "p99": 294.14400458335876 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 84.927998483181, + "p95": 87.45600283145905, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 316.6719973087311, + "p90": 351.6800105571747, + "p95": 387.10400462150574, + "p99": 422.04800248146057 + }, + "isolatedSum": { + "p50": 326.08000189065933, + "p90": 351.6800031065941, + "p95": 357.1840077638626, + "p99": 389.98400419950485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 250.59199333190918, + "p90": 277.50399708747864, + "p95": 291.04000329971313, + "p99": 309.53601002693176 + }, + "combine": { + "p50": 83.52000266313553, + "p90": 88.28800171613693, + "p95": 91.45600348711014, + "p99": 99.96800124645233 + }, + "roundtrip": { + "p50": 328.5120129585266, + "p90": 382.01600313186646, + "p95": 395.9999978542328, + "p99": 425.9519875049591 + }, + "isolatedSum": { + "p50": 334.1119959950447, + "p90": 365.79199880361557, + "p95": 382.4960067868233, + "p99": 409.5040112733841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 246.07999622821808, + "p90": 268.8960134983063, + "p95": 273.50398898124695, + "p99": 313.05599212646484 + }, + "combine": { + "p50": 87.00799942016602, + "p90": 96.3520035147667, + "p95": 99.45599734783173, + "p99": 110.944002866745 + }, + "roundtrip": { + "p50": 322.2079873085022, + "p90": 343.968003988266, + "p95": 355.80798983573914, + "p99": 415.48800468444824 + }, + "isolatedSum": { + "p50": 333.0879956483841, + "p90": 365.24801701307297, + "p95": 372.9599863290787, + "p99": 423.99999499320984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 246.87999486923218, + "p90": 270.08000016212463, + "p95": 274.9119997024536, + "p99": 327.4880051612854 + }, + "combine": { + "p50": 87.00799942016602, + "p90": 93.34400296211243, + "p95": 98.78399968147278, + "p99": 112.70400136709213 + }, + "roundtrip": { + "p50": 323.5200047492981, + "p90": 342.3680067062378, + "p95": 346.0800051689148, + "p99": 359.391987323761 + }, + "isolatedSum": { + "p50": 333.8879942893982, + "p90": 363.42400312423706, + "p95": 373.6959993839264, + "p99": 440.19200652837753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 245.82399427890778, + "p90": 268.0639922618866, + "p95": 271.807998418808, + "p99": 286.3360047340393 + }, + "combine": { + "p50": 89.50400352478027, + "p90": 94.17600184679031, + "p95": 96.00000083446503, + "p99": 104.44799810647964 + }, + "roundtrip": { + "p50": 328.92799377441406, + "p90": 352.86399722099304, + "p95": 364.0959858894348, + "p99": 406.68800473213196 + }, + "isolatedSum": { + "p50": 335.32799780368805, + "p90": 362.2399941086769, + "p95": 367.807999253273, + "p99": 390.78400284051895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 247.3279982805252, + "p90": 266.07999205589294, + "p95": 269.4399952888489, + "p99": 279.61599826812744 + }, + "combine": { + "p50": 105.05600273609161, + "p90": 109.95200276374817, + "p95": 114.17599767446518, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 343.3600068092346, + "p90": 361.7280125617981, + "p95": 367.3279881477356, + "p99": 400.06399154663086 + }, + "isolatedSum": { + "p50": 352.3840010166168, + "p90": 376.0319948196411, + "p95": 383.61599296331406, + "p99": 400.60799568891525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.6400034427643, + "p90": 282.20799565315247, + "p95": 285.72800755500793, + "p99": 296.09599709510803 + }, + "combine": { + "p50": 124.83199685811996, + "p90": 132.38400220870972, + "p95": 135.55200397968292, + "p99": 140.6719982624054 + }, + "roundtrip": { + "p50": 373.4399974346161, + "p90": 441.0879909992218, + "p95": 475.96800327301025, + "p99": 636.1600160598755 + }, + "isolatedSum": { + "p50": 389.47200030088425, + "p90": 414.5919978618622, + "p95": 421.28001153469086, + "p99": 436.7679953575134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f73bee3c", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_3dc89f1a", + "comparisonKey": "3edad89339f2ce7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:38.122795+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.61600118875504, + "p90": 141.63200557231903, + "p95": 148.41599762439728, + "p99": 154.23999726772308 + }, + "combine": { + "p50": 83.64800363779068, + "p90": 91.20000153779984, + "p95": 100.63999891281128, + "p99": 108.2879975438118 + }, + "roundtrip": { + "p50": 224.48000311851501, + "p90": 255.16799092292786, + "p95": 260.99199056625366, + "p99": 279.9679934978485 + }, + "isolatedSum": { + "p50": 199.26400482654572, + "p90": 232.83200711011887, + "p95": 249.05599653720856, + "p99": 262.5279948115349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 116.41599982976913, + "p90": 140.99200069904327, + "p95": 145.82400023937225, + "p99": 152.5759994983673 + }, + "combine": { + "p50": 86.2400010228157, + "p90": 99.07200187444687, + "p95": 104.51199859380722, + "p99": 118.52800101041794 + }, + "roundtrip": { + "p50": 228.2239943742752, + "p90": 261.50399446487427, + "p95": 270.27198672294617, + "p99": 296.06398940086365 + }, + "isolatedSum": { + "p50": 202.65600085258484, + "p90": 240.06400257349014, + "p95": 250.33599883317947, + "p99": 271.10400050878525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 119.55200135707855, + "p90": 143.19999516010284, + "p95": 148.54399859905243, + "p99": 156.22399747371674 + }, + "combine": { + "p50": 87.0399996638298, + "p90": 96.73599898815155, + "p95": 102.59199887514114, + "p99": 112.60800063610077 + }, + "roundtrip": { + "p50": 230.04800081253052, + "p90": 259.552001953125, + "p95": 266.88000559806824, + "p99": 283.55199098587036 + }, + "isolatedSum": { + "p50": 206.59200102090836, + "p90": 239.9359941482544, + "p95": 251.13599747419357, + "p99": 268.8319981098175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 121.40800058841705, + "p90": 146.36799693107605, + "p95": 156.5759927034378, + "p99": 195.39199769496918 + }, + "combine": { + "p50": 90.55999666452408, + "p90": 99.67999905347824, + "p95": 110.72000116109848, + "p99": 117.15199798345566 + }, + "roundtrip": { + "p50": 233.88800024986267, + "p90": 268.70399713516235, + "p95": 274.78399872779846, + "p99": 287.23201155662537 + }, + "isolatedSum": { + "p50": 211.96799725294113, + "p90": 246.0479959845543, + "p95": 267.2959938645363, + "p99": 312.54399567842484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 123.55200201272964, + "p90": 152.76800096035004, + "p95": 162.9759967327118, + "p99": 197.66399264335632 + }, + "combine": { + "p50": 99.07200187444687, + "p90": 113.50400000810623, + "p95": 122.65600264072418, + "p99": 134.14399325847626 + }, + "roundtrip": { + "p50": 233.43999683856964, + "p90": 272.38398790359497, + "p95": 280.7680070400238, + "p99": 295.2960133552551 + }, + "isolatedSum": { + "p50": 222.6240038871765, + "p90": 266.27200096845627, + "p95": 285.631999373436, + "p99": 331.8079859018326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.2799928188324, + "p90": 148.0959951877594, + "p95": 154.7520011663437, + "p99": 162.23999857902527 + }, + "combine": { + "p50": 96.12800180912018, + "p90": 104.63999956846237, + "p95": 113.92000317573547, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 239.29600417613983, + "p90": 281.40801191329956, + "p95": 288.89599442481995, + "p99": 327.10400223731995 + }, + "isolatedSum": { + "p50": 221.40799462795258, + "p90": 252.73599475622177, + "p95": 268.67200434207916, + "p99": 283.04000198841095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 133.18400084972382, + "p90": 152.8960019350052, + "p95": 157.53600001335144, + "p99": 164.06400501728058 + }, + "combine": { + "p50": 112.44799941778183, + "p90": 121.88799679279327, + "p95": 130.14400005340576, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 261.1840069293976, + "p90": 300.83200335502625, + "p95": 308.54400992393494, + "p99": 343.29599142074585 + }, + "isolatedSum": { + "p50": 245.63200026750565, + "p90": 274.78399872779846, + "p95": 287.6800000667572, + "p99": 300.3839999437332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 143.77599954605103, + "p90": 164.60800170898438, + "p95": 169.69600319862366, + "p99": 177.72799730300903 + }, + "combine": { + "p50": 133.56800377368927, + "p90": 146.17599546909332, + "p95": 150.04800260066986, + "p99": 158.24000537395477 + }, + "roundtrip": { + "p50": 284.35200452804565, + "p90": 310.9759986400604, + "p95": 316.25598669052124, + "p99": 328.73600721359253 + }, + "isolatedSum": { + "p50": 277.3440033197403, + "p90": 310.7839971780777, + "p95": 319.7440057992935, + "p99": 335.9680026769638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1c5ad008", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_ef6002c6", + "comparisonKey": "223c77003a48078a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:58.942577+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 241.98399484157562, + "p90": 263.71198892593384, + "p95": 270.7520127296448, + "p99": 318.11198592185974 + }, + "combine": { + "p50": 82.11199939250946, + "p90": 86.20800077915192, + "p95": 87.99999952316284, + "p99": 93.79199892282486 + }, + "roundtrip": { + "p50": 315.36000967025757, + "p90": 333.98398756980896, + "p95": 336.4799916744232, + "p99": 342.6559865474701 + }, + "isolatedSum": { + "p50": 324.0959942340851, + "p90": 349.91998970508575, + "p95": 358.7520122528076, + "p99": 411.9039848446846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 244.9599951505661, + "p90": 281.3760042190552, + "p95": 306.5919876098633, + "p99": 340.5120074748993 + }, + "combine": { + "p50": 85.4400023818016, + "p90": 89.21600133180618, + "p95": 91.39200299978256, + "p99": 98.65599870681763 + }, + "roundtrip": { + "p50": 317.3759877681732, + "p90": 336.38399839401245, + "p95": 342.272013425827, + "p99": 382.04801082611084 + }, + "isolatedSum": { + "p50": 330.3999975323677, + "p90": 370.59200555086136, + "p95": 397.98399060964584, + "p99": 439.1680061817169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 242.62399971485138, + "p90": 265.4399871826172, + "p95": 309.53601002693176, + "p99": 352.09599137306213 + }, + "combine": { + "p50": 85.56800335645676, + "p90": 90.30400216579437, + "p95": 93.18400174379349, + "p99": 100.63999891281128 + }, + "roundtrip": { + "p50": 320.0959861278534, + "p90": 338.0480110645294, + "p95": 341.21599793434143, + "p99": 350.75199604034424 + }, + "isolatedSum": { + "p50": 328.19200307130814, + "p90": 355.74398934841156, + "p95": 402.72001177072525, + "p99": 452.7359902858734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 244.00000274181366, + "p90": 268.92799139022827, + "p95": 277.44001150131226, + "p99": 301.1839985847473 + }, + "combine": { + "p50": 89.63199704885483, + "p90": 94.14400160312653, + "p95": 95.93600034713745, + "p99": 103.39199751615524 + }, + "roundtrip": { + "p50": 327.1999955177307, + "p90": 376.15999579429626, + "p95": 407.6800048351288, + "p99": 479.0079891681671 + }, + "isolatedSum": { + "p50": 333.6319997906685, + "p90": 363.0719929933548, + "p95": 373.3760118484497, + "p99": 404.57599610090256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 243.9039945602417, + "p90": 272.41599559783936, + "p95": 305.37599325180054, + "p99": 335.3919982910156 + }, + "combine": { + "p50": 91.13600105047226, + "p90": 99.20000284910202, + "p95": 101.53599828481674, + "p99": 112.12799698114395 + }, + "roundtrip": { + "p50": 329.8879861831665, + "p90": 393.44000816345215, + "p95": 405.56800365448, + "p99": 433.6639940738678 + }, + "isolatedSum": { + "p50": 335.03999561071396, + "p90": 371.6159984469414, + "p95": 406.9119915366173, + "p99": 447.5199952721596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 241.4720058441162, + "p90": 261.4719867706299, + "p95": 267.0080065727234, + "p99": 275.58401226997375 + }, + "combine": { + "p50": 94.78399902582169, + "p90": 98.94400089979172, + "p95": 100.63999891281128, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 331.84000849723816, + "p90": 359.2959940433502, + "p95": 395.2319920063019, + "p99": 421.4400053024292 + }, + "isolatedSum": { + "p50": 336.2560048699379, + "p90": 360.4159876704216, + "p95": 367.64800548553467, + "p99": 383.29601287841797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.27999758720398, + "p90": 265.7279968261719, + "p95": 302.592009305954, + "p99": 332.4800133705139 + }, + "combine": { + "p50": 111.48799955844879, + "p90": 116.12799763679504, + "p95": 118.9119964838028, + "p99": 125.47199428081512 + }, + "roundtrip": { + "p50": 347.680002450943, + "p90": 364.6079897880554, + "p95": 368.6720132827759, + "p99": 379.1680037975311 + }, + "isolatedSum": { + "p50": 356.76799714565277, + "p90": 381.8559944629669, + "p95": 421.5040057897568, + "p99": 457.95200765132904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 265.28000831604004, + "p90": 281.76000714302063, + "p95": 289.3120050430298, + "p99": 356.1919927597046 + }, + "combine": { + "p50": 132.35199451446533, + "p90": 141.66399836540222, + "p95": 144.19199526309967, + "p99": 151.74399316310883 + }, + "roundtrip": { + "p50": 379.8080086708069, + "p90": 396.8319892883301, + "p95": 402.49601006507874, + "p99": 414.46399688720703 + }, + "isolatedSum": { + "p50": 397.63200283050537, + "p90": 423.42400550842285, + "p95": 433.50400030612946, + "p99": 507.9359859228134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6660a729", + "identity": "b200|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b200_ef6002c6", + "comparisonKey": "362d42973eafe11e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:43:13.198747+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 270.3680098056793, + "p90": 357.34400153160095, + "p95": 388.9920115470886, + "p99": 474.68799352645874 + }, + "combine": { + "p50": 81.18399977684021, + "p90": 94.52799707651138, + "p95": 101.47199779748917, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 330.27198910713196, + "p90": 399.1999924182892, + "p95": 437.18400597572327, + "p99": 474.016010761261 + }, + "isolatedSum": { + "p50": 351.55200958251953, + "p90": 451.87199860811234, + "p95": 490.4640093445778, + "p99": 584.4479948282242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 251.71199440956116, + "p90": 286.9119942188263, + "p95": 294.2720055580139, + "p99": 322.9439854621887 + }, + "combine": { + "p50": 84.03199911117554, + "p90": 92.73599833250046, + "p95": 103.90400141477585, + "p99": 108.8000014424324 + }, + "roundtrip": { + "p50": 328.15998792648315, + "p90": 362.2719943523407, + "p95": 372.0960021018982, + "p99": 428.0320107936859 + }, + "isolatedSum": { + "p50": 335.7439935207367, + "p90": 379.64799255132675, + "p95": 398.17600697278976, + "p99": 431.7439869046211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 265.1839852333069, + "p90": 334.1760039329529, + "p95": 360.00001430511475, + "p99": 475.16798973083496 + }, + "combine": { + "p50": 86.30400151014328, + "p90": 91.96799993515015, + "p95": 101.9200012087822, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 333.50399136543274, + "p90": 395.3920006752014, + "p95": 420.54399847984314, + "p99": 457.5999975204468 + }, + "isolatedSum": { + "p50": 351.48798674345016, + "p90": 426.144003868103, + "p95": 461.92001551389694, + "p99": 589.3439874053001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 254.20799851417542, + "p90": 288.4480059146881, + "p95": 295.6160008907318, + "p99": 307.13599920272827 + }, + "combine": { + "p50": 89.9839997291565, + "p90": 98.52799773216248, + "p95": 108.64000022411346, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 350.5600094795227, + "p90": 445.279985666275, + "p95": 470.62399983406067, + "p99": 566.5280222892761 + }, + "isolatedSum": { + "p50": 344.1919982433319, + "p90": 386.9760036468506, + "p95": 404.2560011148453, + "p99": 422.1759960055351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 253.56799364089966, + "p90": 294.8159873485565, + "p95": 318.2399868965149, + "p99": 377.53599882125854 + }, + "combine": { + "p50": 92.12800115346909, + "p90": 100.41599720716476, + "p95": 108.83200168609619, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 335.6800079345703, + "p90": 368.47999691963196, + "p95": 375.93600153923035, + "p99": 423.5199987888336 + }, + "isolatedSum": { + "p50": 345.69599479436874, + "p90": 395.2319845557213, + "p95": 427.0719885826111, + "p99": 494.9439987540245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 252.48000025749207, + "p90": 289.92000222206116, + "p95": 322.7840065956116, + "p99": 351.0720133781433 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 102.39999741315842, + "p95": 106.59199953079224, + "p99": 122.23999947309494 + }, + "roundtrip": { + "p50": 340.67198634147644, + "p90": 395.26399970054626, + "p95": 415.8079922199249, + "p99": 460.4159891605377 + }, + "isolatedSum": { + "p50": 348.6720025539398, + "p90": 392.3199996352196, + "p95": 429.3760061264038, + "p99": 473.31201285123825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.96800446510315, + "p90": 287.23201155662537, + "p95": 302.0159900188446, + "p99": 349.5999872684479 + }, + "combine": { + "p50": 113.47199976444244, + "p90": 122.72000312805176, + "p95": 129.56799566745758, + "p99": 138.2399946451187 + }, + "roundtrip": { + "p50": 358.0799996852875, + "p90": 396.64000272750854, + "p95": 419.67999935150146, + "p99": 480.1599979400635 + }, + "isolatedSum": { + "p50": 369.4400042295456, + "p90": 409.9520146846771, + "p95": 431.5839856863022, + "p99": 487.8399819135666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 272.0000147819519, + "p90": 309.34399366378784, + "p95": 325.3119885921478, + "p99": 348.25599193573 + }, + "combine": { + "p50": 134.39999520778656, + "p90": 143.99999380111694, + "p95": 150.65599977970123, + "p99": 171.26399278640747 + }, + "roundtrip": { + "p50": 385.98400354385376, + "p90": 419.0720021724701, + "p95": 448.1920003890991, + "p99": 492.73601174354553 + }, + "isolatedSum": { + "p50": 406.40000998973846, + "p90": 453.3439874649048, + "p95": 475.96798837184906, + "p99": 519.5199847221375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a02b2d94", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b200_9ccbd052", + "comparisonKey": "ba471c1333f57b96", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:48.257380+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.98400038480759, + "p90": 114.3999993801117, + "p95": 122.72000312805176, + "p99": 139.29599523544312 + }, + "combine": { + "p50": 82.30400085449219, + "p90": 89.31200206279755, + "p95": 96.57599776983261, + "p99": 106.20799660682678 + }, + "roundtrip": { + "p50": 203.0400037765503, + "p90": 243.6479926109314, + "p95": 253.1520128250122, + "p99": 282.81599283218384 + }, + "isolatedSum": { + "p50": 176.28800123929977, + "p90": 203.71200144290924, + "p95": 219.29600089788437, + "p99": 245.5039918422699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 95.61599791049957, + "p90": 120.35199999809265, + "p95": 129.66400384902954, + "p99": 145.4080045223236 + }, + "combine": { + "p50": 86.33600175380707, + "p90": 97.6639986038208, + "p95": 106.72000050544739, + "p99": 115.35999923944473 + }, + "roundtrip": { + "p50": 206.496000289917, + "p90": 248.83200228214264, + "p95": 260.672003030777, + "p99": 294.17601227760315 + }, + "isolatedSum": { + "p50": 181.95199966430664, + "p90": 218.01599860191345, + "p95": 236.38400435447693, + "p99": 260.76800376176834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.0080013871193, + "p90": 123.48800152540207, + "p95": 130.2720010280609, + "p99": 148.00000190734863 + }, + "combine": { + "p50": 87.87199854850769, + "p90": 100.5759984254837, + "p95": 107.84000158309937, + "p99": 123.32800030708313 + }, + "roundtrip": { + "p50": 210.07999777793884, + "p90": 253.2159984111786, + "p95": 265.5999958515167, + "p99": 280.8000147342682 + }, + "isolatedSum": { + "p50": 186.87999993562698, + "p90": 224.06399995088577, + "p95": 238.11200261116028, + "p99": 271.32800221443176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.35999661684036, + "p90": 119.07199770212173, + "p95": 125.2799928188324, + "p99": 136.54400408267975 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 103.00800204277039, + "p95": 111.93600296974182, + "p99": 120.15999853610992 + }, + "roundtrip": { + "p50": 209.53600108623505, + "p90": 242.0479953289032, + "p95": 248.44799935817719, + "p99": 266.431987285614 + }, + "isolatedSum": { + "p50": 189.69599902629852, + "p90": 222.07999974489212, + "p95": 237.21599578857422, + "p99": 256.7040026187897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.47999769449234, + "p90": 117.47200042009354, + "p95": 125.66399574279785, + "p99": 138.14400136470795 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 103.26399654150009, + "p95": 110.49599945545197, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 210.1760059595108, + "p90": 240.03200232982635, + "p95": 246.07999622821808, + "p99": 265.855997800827 + }, + "isolatedSum": { + "p50": 191.93600118160248, + "p90": 220.73599696159363, + "p95": 236.15999519824982, + "p99": 259.3280002474785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.06400263309479, + "p90": 125.08800625801086, + "p95": 130.0159990787506, + "p99": 143.8080072402954 + }, + "combine": { + "p50": 95.77599912881851, + "p90": 104.41599786281586, + "p95": 110.55999994277954, + "p99": 121.34400010108948 + }, + "roundtrip": { + "p50": 218.72000396251678, + "p90": 297.95199632644653, + "p95": 310.016006231308, + "p99": 334.9440097808838 + }, + "isolatedSum": { + "p50": 199.8400017619133, + "p90": 229.50400412082672, + "p95": 240.57599902153015, + "p99": 265.1520073413849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.03200370073318, + "p90": 127.42400169372559, + "p95": 136.03200018405914, + "p99": 144.86399292945862 + }, + "combine": { + "p50": 112.31999844312668, + "p90": 122.97599762678146, + "p95": 129.2479932308197, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 232.4800044298172, + "p90": 263.35999369621277, + "p95": 273.9199995994568, + "p99": 290.46401381492615 + }, + "isolatedSum": { + "p50": 224.35200214385986, + "p90": 250.39999932050705, + "p95": 265.27999341487885, + "p99": 281.18398785591125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.30399996042252, + "p90": 139.615997672081, + "p95": 146.2399959564209, + "p99": 158.49600732326508 + }, + "combine": { + "p50": 132.1280002593994, + "p90": 142.20799505710602, + "p95": 151.90400183200836, + "p99": 158.55999290943146 + }, + "roundtrip": { + "p50": 263.68001103401184, + "p90": 287.9039943218231, + "p95": 295.48799991607666, + "p99": 312.48000264167786 + }, + "isolatedSum": { + "p50": 254.43200021982193, + "p90": 281.823992729187, + "p95": 298.14399778842926, + "p99": 317.05600023269653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2b2d23b", + "identity": "b200|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_ca64a747", + "comparisonKey": "e473613e407e3f31", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:47:21.986497+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 143.8719928264618, + "p90": 163.5199934244156, + "p95": 168.89600455760956, + "p99": 175.90400576591492 + }, + "combine": { + "p50": 116.19199812412262, + "p90": 128.4479945898056, + "p95": 133.4719955921173, + "p99": 149.1519957780838 + }, + "roundtrip": { + "p50": 223.7440049648285, + "p90": 248.54399263858795, + "p95": 252.9599964618683, + "p99": 283.00800919532776 + }, + "isolatedSum": { + "p50": 260.0639909505844, + "p90": 291.9679880142212, + "p95": 302.36800014972687, + "p99": 325.0560015439987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 173.0560064315796, + "p90": 191.03999435901642, + "p95": 196.79999351501465, + "p99": 207.87200331687927 + }, + "combine": { + "p50": 153.08800339698792, + "p90": 161.9199961423874, + "p95": 165.3120070695877, + "p99": 172.38399386405945 + }, + "roundtrip": { + "p50": 290.5600070953369, + "p90": 304.1920065879822, + "p95": 311.67998909950256, + "p99": 321.8559920787811 + }, + "isolatedSum": { + "p50": 326.1440098285675, + "p90": 352.9599905014038, + "p95": 362.11200058460236, + "p99": 380.2559971809387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 229.79199886322021, + "p90": 250.04801154136658, + "p95": 254.46400046348572, + "p99": 266.55998826026917 + }, + "combine": { + "p50": 217.47200191020966, + "p90": 229.0560007095337, + "p95": 232.63999819755554, + "p99": 245.56800723075867 + }, + "roundtrip": { + "p50": 409.824013710022, + "p90": 427.2319972515106, + "p95": 432.48000741004944, + "p99": 441.15200638771057 + }, + "isolatedSum": { + "p50": 447.26400077342987, + "p90": 479.10401225090027, + "p95": 487.10399866104126, + "p99": 512.1279954910278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 351.00799798965454, + "p90": 362.7519905567169, + "p95": 370.4639971256256, + "p99": 381.47199153900146 + }, + "combine": { + "p50": 408.54400396347046, + "p90": 416.159987449646, + "p95": 418.5920059680939, + "p99": 424.6079921722412 + }, + "roundtrip": { + "p50": 661.6960167884827, + "p90": 674.5280027389526, + "p95": 681.7920207977295, + "p99": 704.0960192680359 + }, + "isolatedSum": { + "p50": 759.552001953125, + "p90": 778.9119780063629, + "p95": 789.0560030937195, + "p99": 806.0799837112427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 578.3360004425049, + "p90": 593.4399962425232, + "p95": 600.9600162506104, + "p99": 612.2239828109741 + }, + "combine": { + "p50": 775.5200266838074, + "p90": 785.9200239181519, + "p95": 788.3840203285217, + "p99": 797.4399924278259 + }, + "roundtrip": { + "p50": 1311.9360208511353, + "p90": 1326.1120319366455, + "p95": 1331.1359882354736, + "p99": 1339.359998703003 + }, + "isolatedSum": { + "p50": 1353.8560271263123, + "p90": 1379.360020160675, + "p95": 1389.344036579132, + "p99": 1409.6639752388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1047.7440357208252, + "p90": 1061.2479448318481, + "p95": 1066.5279626846313, + "p99": 1109.4399690628052 + }, + "combine": { + "p50": 1463.744044303894, + "p90": 1475.5840301513672, + "p95": 1481.8559885025024, + "p99": 1492.1280145645142 + }, + "roundtrip": { + "p50": 2457.2160243988037, + "p90": 2470.0798988342285, + "p95": 2473.79207611084, + "p99": 2491.1999702453613 + }, + "isolatedSum": { + "p50": 2511.4880800247192, + "p90": 2536.8319749832153, + "p95": 2548.383951187134, + "p99": 2601.5679836273193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8877f901", + "identity": "b200|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_ca64a747", + "comparisonKey": "7ebad7942f631841", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:49:41.453005+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.13599598407745, + "p90": 155.68000078201294, + "p95": 159.45599973201752, + "p99": 165.3759926557541 + }, + "combine": { + "p50": 125.76000392436981, + "p90": 130.20800054073334, + "p95": 133.34399461746216, + "p99": 141.82400703430176 + }, + "roundtrip": { + "p50": 237.95199394226074, + "p90": 245.12000381946564, + "p95": 247.67999351024628, + "p99": 256.54399394989014 + }, + "isolatedSum": { + "p50": 276.89599990844727, + "p90": 285.8880013227463, + "p95": 292.7999943494797, + "p99": 307.19999969005585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 177.72799730300903, + "p90": 192.3840045928955, + "p95": 195.99999487400055, + "p99": 203.2639980316162 + }, + "combine": { + "p50": 162.62400150299072, + "p90": 169.18399930000305, + "p95": 174.04800653457642, + "p99": 180.12799322605133 + }, + "roundtrip": { + "p50": 302.11201310157776, + "p90": 311.8079900741577, + "p95": 314.7520124912262, + "p99": 320.22398710250854 + }, + "isolatedSum": { + "p50": 340.35199880599976, + "p90": 361.56800389289856, + "p95": 370.04800140857697, + "p99": 383.39199125766754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 240.57599902153015, + "p90": 251.45599246025085, + "p95": 256.03199005126953, + "p99": 263.42400908470154 + }, + "combine": { + "p50": 238.78400027751923, + "p90": 245.85600197315216, + "p95": 249.79199469089508, + "p99": 260.8320116996765 + }, + "roundtrip": { + "p50": 430.9439957141876, + "p90": 442.6240026950836, + "p95": 446.6879963874817, + "p99": 459.1040015220642 + }, + "isolatedSum": { + "p50": 479.3599992990494, + "p90": 497.311994433403, + "p95": 505.8239847421646, + "p99": 524.256020784378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 357.1839928627014, + "p90": 379.2960047721863, + "p95": 394.1760063171387, + "p99": 413.567990064621 + }, + "combine": { + "p50": 461.69599890708923, + "p90": 469.9519872665405, + "p95": 475.48800706863403, + "p99": 492.8640127182007 + }, + "roundtrip": { + "p50": 757.7279806137085, + "p90": 769.2800164222717, + "p95": 776.3519883155823, + "p99": 788.1280183792114 + }, + "isolatedSum": { + "p50": 818.8799917697906, + "p90": 849.2479920387268, + "p95": 869.6640133857727, + "p99": 906.4320027828217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 593.824028968811, + "p90": 603.7759780883789, + "p95": 606.656014919281, + "p99": 617.247998714447 + }, + "combine": { + "p50": 806.9760203361511, + "p90": 812.9600286483765, + "p95": 815.5199885368347, + "p99": 823.6160278320312 + }, + "roundtrip": { + "p50": 1358.6560487747192, + "p90": 1372.2560405731201, + "p95": 1381.4079761505127, + "p99": 1396.1280584335327 + }, + "isolatedSum": { + "p50": 1400.8000493049622, + "p90": 1416.7360067367554, + "p95": 1422.1760034561157, + "p99": 1440.8640265464783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1069.4080591201782, + "p90": 1078.5599946975708, + "p95": 1083.1680297851562, + "p99": 1099.6479988098145 + }, + "combine": { + "p50": 1503.7120580673218, + "p90": 1513.856053352356, + "p95": 1518.9119577407837, + "p99": 1524.4799852371216 + }, + "roundtrip": { + "p50": 2522.239923477173, + "p90": 2532.991886138916, + "p95": 2536.5118980407715, + "p99": 2551.5520572662354 + }, + "isolatedSum": { + "p50": 2573.1201171875, + "p90": 2592.4160480499268, + "p95": 2602.07998752594, + "p99": 2624.127984046936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa92fad3", + "identity": "b200|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_ca64a747", + "comparisonKey": "e8ca56ea39c3415f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:52:03.103986+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.9520025253296, + "p90": 176.256000995636, + "p95": 185.69600582122803, + "p99": 203.16800475120544 + }, + "combine": { + "p50": 130.3360015153885, + "p90": 140.3840035200119, + "p95": 147.77599275112152, + "p99": 159.0079963207245 + }, + "roundtrip": { + "p50": 249.37599897384644, + "p90": 270.81599831581116, + "p95": 279.1680097579956, + "p99": 309.28000807762146 + }, + "isolatedSum": { + "p50": 284.2880040407181, + "p90": 316.6400045156479, + "p95": 333.47199857234955, + "p99": 362.17600107192993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 190.08000195026398, + "p90": 198.40000569820404, + "p95": 204.8960030078888, + "p99": 215.96799790859222 + }, + "combine": { + "p50": 168.09600591659546, + "p90": 179.00800704956055, + "p95": 182.78400599956512, + "p99": 192.47999787330627 + }, + "roundtrip": { + "p50": 316.5439963340759, + "p90": 336.5119993686676, + "p95": 344.1280126571655, + "p99": 353.66401076316833 + }, + "isolatedSum": { + "p50": 358.17600786685944, + "p90": 377.4080127477646, + "p95": 387.6800090074539, + "p99": 408.4479957818985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 252.22399830818176, + "p90": 266.6879892349243, + "p95": 276.0320007801056, + "p99": 298.3039915561676 + }, + "combine": { + "p50": 268.8960134983063, + "p90": 280.2560031414032, + "p95": 282.49600529670715, + "p99": 291.77600145339966 + }, + "roundtrip": { + "p50": 465.2160108089447, + "p90": 485.0560128688812, + "p95": 490.59200286865234, + "p99": 504.4479966163635 + }, + "isolatedSum": { + "p50": 521.120011806488, + "p90": 546.9439923763275, + "p95": 558.5280060768127, + "p99": 590.0799930095673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.36801195144653, + "p90": 386.6559863090515, + "p95": 391.0079896450043, + "p99": 400.12800693511963 + }, + "combine": { + "p50": 472.6080000400543, + "p90": 482.40000009536743, + "p95": 488.0959987640381, + "p99": 500.09602308273315 + }, + "roundtrip": { + "p50": 804.0639758110046, + "p90": 824.7359991073608, + "p95": 831.3279747962952, + "p99": 841.0239815711975 + }, + "isolatedSum": { + "p50": 846.9760119915009, + "p90": 869.055986404419, + "p95": 879.1039884090424, + "p99": 900.2240300178528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 613.6959791183472, + "p90": 629.5679807662964, + "p95": 637.3760104179382, + "p99": 650.9760022163391 + }, + "combine": { + "p50": 829.1839957237244, + "p90": 838.1440043449402, + "p95": 843.392014503479, + "p99": 850.0480055809021 + }, + "roundtrip": { + "p50": 1401.568055152893, + "p90": 1419.0720319747925, + "p95": 1427.5840520858765, + "p99": 1448.799967765808 + }, + "isolatedSum": { + "p50": 1442.8799748420715, + "p90": 1467.7119851112366, + "p95": 1480.7680249214172, + "p99": 1501.0240077972412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1101.5360355377197, + "p90": 1112.4160289764404, + "p95": 1118.0800199508667, + "p99": 1127.7120113372803 + }, + "combine": { + "p50": 1525.920033454895, + "p90": 1537.727952003479, + "p95": 1540.8960580825806, + "p99": 1552.1600246429443 + }, + "roundtrip": { + "p50": 2590.6879901885986, + "p90": 2607.680082321167, + "p95": 2611.8080615997314, + "p99": 2621.3760375976562 + }, + "isolatedSum": { + "p50": 2627.4560689926147, + "p90": 2650.1439809799194, + "p95": 2658.9760780334473, + "p99": 2679.8720359802246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a475aa1e", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_d6b08783", + "comparisonKey": "c3d242e0212b950e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:36:51.194495+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 149.79200065135956, + "p90": 164.2560064792633, + "p95": 171.2000072002411, + "p99": 186.0159933567047 + }, + "combine": { + "p50": 140.00000059604645, + "p90": 147.5519984960556, + "p95": 154.4319987297058, + "p99": 164.51199352741241 + }, + "roundtrip": { + "p50": 254.40001487731934, + "p90": 272.92799949645996, + "p95": 280.64000606536865, + "p99": 306.304007768631 + }, + "isolatedSum": { + "p50": 289.792001247406, + "p90": 311.8080049753189, + "p95": 325.6320059299469, + "p99": 350.5279868841171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.81600689888, + "p90": 197.2160041332245, + "p95": 203.64800095558167, + "p99": 218.46400201320648 + }, + "combine": { + "p50": 180.03199994564056, + "p90": 191.77600741386414, + "p95": 196.51199877262115, + "p99": 208.19200575351715 + }, + "roundtrip": { + "p50": 333.0560028553009, + "p90": 355.52000999450684, + "p95": 360.8640134334564, + "p99": 374.1759955883026 + }, + "isolatedSum": { + "p50": 366.84800684452057, + "p90": 388.9920115470886, + "p95": 400.1599997282028, + "p99": 426.65600776672363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 253.1520128250122, + "p90": 271.2000012397766, + "p95": 280.09599447250366, + "p99": 290.97598791122437 + }, + "combine": { + "p50": 299.6160089969635, + "p90": 309.31198596954346, + "p95": 314.9760067462921, + "p99": 335.6480002403259 + }, + "roundtrip": { + "p50": 500.7680058479309, + "p90": 523.1999754905701, + "p95": 531.6479802131653, + "p99": 552.7679920196533 + }, + "isolatedSum": { + "p50": 552.7680218219757, + "p90": 580.5119872093201, + "p95": 595.0720012187958, + "p99": 626.6239881515503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 390.01598954200745, + "p90": 412.7359986305237, + "p95": 423.7760007381439, + "p99": 465.5359983444214 + }, + "combine": { + "p50": 484.22399163246155, + "p90": 490.01601338386536, + "p95": 491.456001996994, + "p99": 493.21600794792175 + }, + "roundtrip": { + "p50": 825.5360126495361, + "p90": 838.6880159378052, + "p95": 847.7439880371094, + "p99": 859.3599796295166 + }, + "isolatedSum": { + "p50": 874.239981174469, + "p90": 902.752012014389, + "p95": 915.2320027351379, + "p99": 958.7520062923431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 621.4079856872559, + "p90": 657.7919721603394, + "p95": 688.0319714546204, + "p99": 752.5119781494141 + }, + "combine": { + "p50": 848.2879996299744, + "p90": 859.2320084571838, + "p95": 862.9440069198608, + "p99": 953.6960124969482 + }, + "roundtrip": { + "p50": 1427.3600578308105, + "p90": 1439.6480321884155, + "p95": 1443.552017211914, + "p99": 1452.3839950561523 + }, + "isolatedSum": { + "p50": 1469.6959853172302, + "p90": 1517.0239806175232, + "p95": 1550.9759783744812, + "p99": 1706.2079906463623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1115.488052368164, + "p90": 1126.2400150299072, + "p95": 1131.4239501953125, + "p99": 1147.3599672317505 + }, + "combine": { + "p50": 1558.8799715042114, + "p90": 1567.4240589141846, + "p95": 1571.455955505371, + "p99": 1578.0800580978394 + }, + "roundtrip": { + "p50": 2635.7760429382324, + "p90": 2647.808074951172, + "p95": 2651.6799926757812, + "p99": 2660.831928253174 + }, + "isolatedSum": { + "p50": 2674.3680238723755, + "p90": 2693.664073944092, + "p95": 2702.8799057006836, + "p99": 2725.44002532959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f470d56", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_ca64a747", + "comparisonKey": "ae56ad174b80002a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:39:07.278446+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 149.02399480342865, + "p90": 163.00800442695618, + "p95": 167.23200678825378, + "p99": 178.97599935531616 + }, + "combine": { + "p50": 139.71200585365295, + "p90": 146.88000082969666, + "p95": 151.74399316310883, + "p99": 157.56799280643463 + }, + "roundtrip": { + "p50": 252.41601467132568, + "p90": 261.34398579597473, + "p95": 264.99199867248535, + "p99": 276.1920094490051 + }, + "isolatedSum": { + "p50": 288.7360006570816, + "p90": 309.88800525665283, + "p95": 318.9759999513626, + "p99": 336.5439921617508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 184.03199315071106, + "p90": 202.2079974412918, + "p95": 217.75999665260315, + "p99": 223.58399629592896 + }, + "combine": { + "p50": 180.54400384426117, + "p90": 191.26400351524353, + "p95": 193.7279999256134, + "p99": 202.84800231456757 + }, + "roundtrip": { + "p50": 332.38399028778076, + "p90": 343.7120020389557, + "p95": 351.4559864997864, + "p99": 363.647997379303 + }, + "isolatedSum": { + "p50": 364.57599699497223, + "p90": 393.47200095653534, + "p95": 411.48799657821655, + "p99": 426.4319986104965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 252.6719868183136, + "p90": 265.1520073413849, + "p95": 277.6319980621338, + "p99": 290.71998596191406 + }, + "combine": { + "p50": 299.19999837875366, + "p90": 307.8719973564148, + "p95": 312.76801228523254, + "p99": 323.3279883861542 + }, + "roundtrip": { + "p50": 497.8879988193512, + "p90": 507.6479911804199, + "p95": 512.5759840011597, + "p99": 527.0400047302246 + }, + "isolatedSum": { + "p50": 551.8719851970673, + "p90": 573.0240046977997, + "p95": 590.4000103473663, + "p99": 614.0479743480682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.375990152359, + "p90": 398.17601442337036, + "p95": 405.023992061615, + "p99": 417.31199622154236 + }, + "combine": { + "p50": 485.76000332832336, + "p90": 503.77601385116577, + "p95": 540.9600138664246, + "p99": 842.6240086555481 + }, + "roundtrip": { + "p50": 826.0160088539124, + "p90": 834.3039751052856, + "p95": 837.8239870071411, + "p99": 855.679988861084 + }, + "isolatedSum": { + "p50": 863.1359934806824, + "p90": 901.9520282745361, + "p95": 945.9840059280396, + "p99": 1259.9360048770905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 620.9279894828796, + "p90": 629.5999884605408, + "p95": 632.8960061073303, + "p99": 639.9999856948853 + }, + "combine": { + "p50": 848.1919765472412, + "p90": 859.3599796295166, + "p95": 863.4880185127258, + "p99": 867.904007434845 + }, + "roundtrip": { + "p50": 1426.2720346450806, + "p90": 1435.8400106430054, + "p95": 1439.6159648895264, + "p99": 1446.7840194702148 + }, + "isolatedSum": { + "p50": 1469.1199660301208, + "p90": 1488.9599680900574, + "p95": 1496.3840246200562, + "p99": 1507.9039931297302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1117.184042930603, + "p90": 1123.6159801483154, + "p95": 1126.9760131835938, + "p99": 1132.3519945144653 + }, + "combine": { + "p50": 1559.2960119247437, + "p90": 1568.0639743804932, + "p95": 1575.3920078277588, + "p99": 1634.2079639434814 + }, + "roundtrip": { + "p50": 2637.216091156006, + "p90": 2648.0960845947266, + "p95": 2652.031898498535, + "p99": 2660.7680320739746 + }, + "isolatedSum": { + "p50": 2676.4800548553467, + "p90": 2691.6799545288086, + "p95": 2702.3680210113525, + "p99": 2766.559958457947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-130c4235", + "identity": "b200|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_ca64a747", + "comparisonKey": "921fc8b06269e395", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:45:04.012642+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 150.52799880504608, + "p90": 167.39200055599213, + "p95": 172.28800058364868, + "p99": 179.36000227928162 + }, + "combine": { + "p50": 138.49599659442902, + "p90": 146.17599546909332, + "p95": 148.15999567508698, + "p99": 156.44800662994385 + }, + "roundtrip": { + "p50": 253.91998887062073, + "p90": 273.9520072937012, + "p95": 281.823992729187, + "p99": 304.9600124359131 + }, + "isolatedSum": { + "p50": 289.0239953994751, + "p90": 313.56799602508545, + "p95": 320.44799625873566, + "p99": 335.80800890922546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.0159933567047, + "p90": 205.24799823760986, + "p95": 208.76799523830414, + "p99": 220.0320065021515 + }, + "combine": { + "p50": 182.97599256038666, + "p90": 195.3279972076416, + "p95": 197.952002286911, + "p99": 212.67199516296387 + }, + "roundtrip": { + "p50": 340.1919901371002, + "p90": 356.9920063018799, + "p95": 362.62398958206177, + "p99": 382.6240003108978 + }, + "isolatedSum": { + "p50": 368.99198591709137, + "p90": 400.57599544525146, + "p95": 406.71999752521515, + "p99": 432.70400166511536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 254.68799471855164, + "p90": 277.44001150131226, + "p95": 282.8800082206726, + "p99": 300.5119860172272 + }, + "combine": { + "p50": 295.5839931964874, + "p90": 306.17600679397583, + "p95": 310.36800146102905, + "p99": 320.67200541496277 + }, + "roundtrip": { + "p50": 502.81602144241333, + "p90": 514.7839784622192, + "p95": 521.0239887237549, + "p99": 530.4960012435913 + }, + "isolatedSum": { + "p50": 550.2719879150391, + "p90": 583.6160182952881, + "p95": 593.2480096817017, + "p99": 621.1839914321899 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 386.04798913002014, + "p90": 404.63998913764954, + "p95": 410.8479917049408, + "p99": 426.2079894542694 + }, + "combine": { + "p50": 485.79201102256775, + "p90": 491.2639856338501, + "p95": 493.6000108718872, + "p99": 502.7199983596802 + }, + "roundtrip": { + "p50": 825.0560164451599, + "p90": 840.9919738769531, + "p95": 848.4479784965515, + "p99": 862.9760146141052 + }, + "isolatedSum": { + "p50": 871.8400001525879, + "p90": 895.9039747714996, + "p95": 904.448002576828, + "p99": 928.9279878139496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 618.9119815826416, + "p90": 635.7439756393433, + "p95": 642.3680186271667, + "p99": 650.1759886741638 + }, + "combine": { + "p50": 838.1760120391846, + "p90": 848.9279747009277, + "p95": 851.8720269203186, + "p99": 864.0959858894348 + }, + "roundtrip": { + "p50": 1418.6559915542603, + "p90": 1434.0159893035889, + "p95": 1441.7279958724976, + "p99": 1450.9119987487793 + }, + "isolatedSum": { + "p50": 1457.0879936218262, + "p90": 1484.671950340271, + "p95": 1494.2400455474854, + "p99": 1514.2719745635986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1112.3839616775513, + "p90": 1122.7840185165405, + "p95": 1128.5760402679443, + "p99": 1142.143964767456 + }, + "combine": { + "p50": 1552.2880554199219, + "p90": 1567.1039819717407, + "p95": 1577.888011932373, + "p99": 1668.511986732483 + }, + "roundtrip": { + "p50": 2618.783950805664, + "p90": 2630.592107772827, + "p95": 2637.376070022583, + "p99": 2706.9759368896484 + }, + "isolatedSum": { + "p50": 2664.672017097473, + "p90": 2689.8880004882812, + "p95": 2706.4640522003174, + "p99": 2810.655951499939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e89912d", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b200_6f2700ad", + "comparisonKey": "b2cad1dfa4502abf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:53:14.074337+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 168.44800114631653, + "p90": 189.98399376869202, + "p95": 196.54400646686554, + "p99": 208.38400721549988 + }, + "combine": { + "p50": 155.64799308776855, + "p90": 164.38399255275726, + "p95": 167.52000153064728, + "p99": 176.4799952507019 + }, + "roundtrip": { + "p50": 288.1920039653778, + "p90": 305.88799715042114, + "p95": 312.9599988460541, + "p99": 320.99199295043945 + }, + "isolatedSum": { + "p50": 324.0959942340851, + "p90": 354.3679863214493, + "p95": 364.0640079975128, + "p99": 384.8640024662018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 213.24799954891205, + "p90": 225.8239984512329, + "p95": 228.64000499248505, + "p99": 238.81599307060242 + }, + "combine": { + "p50": 226.33600234985352, + "p90": 232.80000686645508, + "p95": 236.86400055885315, + "p99": 241.37599766254425 + }, + "roundtrip": { + "p50": 395.83998918533325, + "p90": 407.1039855480194, + "p95": 413.31198811531067, + "p99": 421.88799381256104 + }, + "isolatedSum": { + "p50": 439.58400189876556, + "p90": 458.624005317688, + "p95": 465.5040055513382, + "p99": 480.19199073314667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 298.4960079193115, + "p90": 306.14399909973145, + "p95": 309.28000807762146, + "p99": 317.56800413131714 + }, + "combine": { + "p50": 368.4160113334656, + "p90": 377.375990152359, + "p95": 380.16000390052795, + "p99": 388.0319893360138 + }, + "roundtrip": { + "p50": 614.080011844635, + "p90": 622.8799819946289, + "p95": 626.9760131835938, + "p99": 641.3440108299255 + }, + "isolatedSum": { + "p50": 666.9120192527771, + "p90": 683.5199892520905, + "p95": 689.4400119781494, + "p99": 705.5999934673309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 472.8640019893646, + "p90": 487.7760112285614, + "p95": 493.6319887638092, + "p99": 504.800021648407 + }, + "combine": { + "p50": 619.1999912261963, + "p90": 626.0480284690857, + "p95": 627.232015132904, + "p99": 632.6720118522644 + }, + "roundtrip": { + "p50": 1054.2720556259155, + "p90": 1064.8640394210815, + "p95": 1074.7519731521606, + "p99": 1089.9840593338013 + }, + "isolatedSum": { + "p50": 1092.063993215561, + "p90": 1113.824039697647, + "p95": 1120.8640038967133, + "p99": 1137.4720335006714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 823.3280181884766, + "p90": 844.4799780845642, + "p95": 848.5440015792847, + "p99": 856.000006198883 + }, + "combine": { + "p50": 1141.8559551239014, + "p90": 1145.535945892334, + "p95": 1148.6719846725464, + "p99": 1154.2400121688843 + }, + "roundtrip": { + "p50": 1924.9600172042847, + "p90": 1951.8719911575317, + "p95": 2045.6318855285645, + "p99": 2085.279941558838 + }, + "isolatedSum": { + "p50": 1965.183973312378, + "p90": 1990.0159239768982, + "p95": 1997.215986251831, + "p99": 2010.2400183677673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1530.6880474090576, + "p90": 1541.375994682312, + "p95": 1545.2799797058105, + "p99": 1556.22398853302 + }, + "combine": { + "p50": 2161.344051361084, + "p90": 2168.12801361084, + "p95": 2169.696092605591, + "p99": 2177.3440837860107 + }, + "roundtrip": { + "p50": 3648.063898086548, + "p90": 3659.104108810425, + "p95": 3664.2239093780518, + "p99": 3693.4399604797363 + }, + "isolatedSum": { + "p50": 3692.0320987701416, + "p90": 3709.504008293152, + "p95": 3714.9760723114014, + "p99": 3733.5680723190308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5352d199", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b200_bb7a559c", + "comparisonKey": "8f5d21502bfc6643", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:54:11.733343+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.05600666999817, + "p90": 145.6959992647171, + "p95": 151.96800231933594, + "p99": 163.16799819469452 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 106.11200332641602, + "p95": 111.07199639081955, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 191.96799397468567, + "p90": 215.58399498462677, + "p95": 223.1680005788803, + "p99": 240.48000574111938 + }, + "isolatedSum": { + "p50": 221.53600305318832, + "p90": 251.80800259113312, + "p95": 263.0399987101555, + "p99": 291.8719947338104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 148.0640023946762, + "p90": 167.32800006866455, + "p95": 176.7680048942566, + "p99": 186.0480010509491 + }, + "combine": { + "p50": 141.50400459766388, + "p90": 154.36799824237823, + "p95": 160.41600704193115, + "p99": 169.0559983253479 + }, + "roundtrip": { + "p50": 259.64799523353577, + "p90": 280.35199642181396, + "p95": 287.6800000667572, + "p99": 306.87999725341797 + }, + "isolatedSum": { + "p50": 289.5680069923401, + "p90": 321.6959983110428, + "p95": 337.18401193618774, + "p99": 355.103999376297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 237.34399676322937, + "p90": 262.2080147266388, + "p95": 269.6639895439148, + "p99": 287.7120077610016 + }, + "combine": { + "p50": 287.7120077610016, + "p90": 294.68798637390137, + "p95": 298.8480031490326, + "p99": 307.48799443244934 + }, + "roundtrip": { + "p50": 502.8799772262573, + "p90": 515.2639746665955, + "p95": 522.0479965209961, + "p99": 534.4640016555786 + }, + "isolatedSum": { + "p50": 525.056004524231, + "p90": 556.8960011005402, + "p95": 568.5119926929474, + "p99": 595.2000021934509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-54e30f90", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b200_c0c61e71", + "comparisonKey": "a81985d607bc9d69", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:59:54.629481+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 135.6479972600937, + "p90": 144.96000111103058, + "p95": 150.14399588108063, + "p99": 153.6960005760193 + }, + "combine": { + "p50": 108.2879975438118, + "p90": 117.60000139474869, + "p95": 121.60000205039978, + "p99": 128.12800705432892 + }, + "roundtrip": { + "p50": 213.72799575328827, + "p90": 239.3919974565506, + "p95": 247.96800315380096, + "p99": 260.70401072502136 + }, + "isolatedSum": { + "p50": 243.9359948039055, + "p90": 262.56000250577927, + "p95": 271.7439979314804, + "p99": 281.8240076303482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.71199357509613, + "p90": 169.0240055322647, + "p95": 175.04000663757324, + "p99": 187.391996383667 + }, + "combine": { + "p50": 129.5360028743744, + "p90": 136.3839954137802, + "p95": 139.16799426078796, + "p99": 144.3520039319992 + }, + "roundtrip": { + "p50": 249.82400238513947, + "p90": 255.42399287223816, + "p95": 259.2639923095703, + "p99": 263.0400061607361 + }, + "isolatedSum": { + "p50": 285.2479964494705, + "p90": 305.4080009460449, + "p95": 314.2080008983612, + "p99": 331.7440003156662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.39999628067017, + "p90": 196.73599302768707, + "p95": 199.42399859428406, + "p99": 211.74399554729462 + }, + "combine": { + "p50": 169.27999258041382, + "p90": 177.12000012397766, + "p95": 178.52799594402313, + "p99": 182.20800161361694 + }, + "roundtrip": { + "p50": 324.6079981327057, + "p90": 333.0560028553009, + "p95": 341.5679931640625, + "p99": 351.6159951686859 + }, + "isolatedSum": { + "p50": 355.679988861084, + "p90": 373.85599315166473, + "p95": 377.9519945383072, + "p99": 393.95199716091156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 255.77598810195923, + "p90": 271.1679935455322, + "p95": 276.89599990844727, + "p99": 283.9680016040802 + }, + "combine": { + "p50": 288.09601068496704, + "p90": 295.1039969921112, + "p95": 298.68799448013306, + "p99": 305.31200766563416 + }, + "roundtrip": { + "p50": 505.69599866867065, + "p90": 532.3839783668518, + "p95": 542.5919890403748, + "p99": 610.368013381958 + }, + "isolatedSum": { + "p50": 543.8719987869263, + "p90": 566.2719905376434, + "p95": 575.5839943885803, + "p99": 589.2800092697144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 414.0160083770752, + "p90": 422.1760034561157, + "p95": 425.59999227523804, + "p99": 437.1519982814789 + }, + "combine": { + "p50": 489.4079864025116, + "p90": 494.1760003566742, + "p95": 497.0879852771759, + "p99": 500.9920001029968 + }, + "roundtrip": { + "p50": 859.1039776802063, + "p90": 866.4000034332275, + "p95": 872.54399061203, + "p99": 887.1999979019165 + }, + "isolatedSum": { + "p50": 903.4239947795868, + "p90": 916.3520038127899, + "p95": 922.6879775524139, + "p99": 938.1439983844757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 718.9120054244995, + "p90": 731.6480278968811, + "p95": 737.7600073814392, + "p99": 753.5359859466553 + }, + "combine": { + "p50": 861.8559837341309, + "p90": 873.8240003585815, + "p95": 876.800000667572, + "p99": 909.1200232505798 + }, + "roundtrip": { + "p50": 1538.7519598007202, + "p90": 1549.8559474945068, + "p95": 1554.3040037155151, + "p99": 1562.0160102844238 + }, + "isolatedSum": { + "p50": 1580.7679891586304, + "p90": 1605.4720282554626, + "p95": 1614.5600080490112, + "p99": 1662.656009197235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e207201", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b200_f6d2df4b", + "comparisonKey": "d2cffd83af69fec4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:57:42.398383+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 159.7760021686554, + "p90": 171.7119961977005, + "p95": 179.45599555969238, + "p99": 191.16799533367157 + }, + "combine": { + "p50": 151.296004652977, + "p90": 157.24800527095795, + "p95": 159.0079963207245, + "p99": 165.6319946050644 + }, + "roundtrip": { + "p50": 273.6639976501465, + "p90": 283.32799673080444, + "p95": 287.07200288772583, + "p99": 291.48799180984497 + }, + "isolatedSum": { + "p50": 311.0720068216324, + "p90": 328.96000146865845, + "p95": 338.46399188041687, + "p99": 356.79998993873596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 203.45599949359894, + "p90": 214.1759991645813, + "p95": 221.79199755191803, + "p99": 230.6240051984787 + }, + "combine": { + "p50": 210.30400693416595, + "p90": 216.92800521850586, + "p95": 220.57600319385529, + "p99": 226.43199563026428 + }, + "roundtrip": { + "p50": 374.2719888687134, + "p90": 382.7199935913086, + "p95": 385.18399000167847, + "p99": 391.32800698280334 + }, + "isolatedSum": { + "p50": 413.7600064277649, + "p90": 431.10400438308716, + "p95": 442.3680007457733, + "p99": 457.056000828743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 280.5759906768799, + "p90": 292.2239899635315, + "p95": 297.95199632644653, + "p99": 310.8159899711609 + }, + "combine": { + "p50": 356.6719889640808, + "p90": 365.9200072288513, + "p95": 368.0639863014221, + "p99": 379.2000114917755 + }, + "roundtrip": { + "p50": 591.7119979858398, + "p90": 607.4240207672119, + "p95": 615.776002407074, + "p99": 630.4000020027161 + }, + "isolatedSum": { + "p50": 637.2479796409607, + "p90": 658.1439971923828, + "p95": 666.0159826278687, + "p99": 690.0160014629364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 429.4080138206482, + "p90": 456.6720128059387, + "p95": 460.06399393081665, + "p99": 488.0639910697937 + }, + "combine": { + "p50": 600.6399989128113, + "p90": 608.4480285644531, + "p95": 612.5119924545288, + "p99": 620.1279759407043 + }, + "roundtrip": { + "p50": 995.9040284156799, + "p90": 1007.8400373458862, + "p95": 1017.3759460449219, + "p99": 1030.56001663208 + }, + "isolatedSum": { + "p50": 1030.0480127334595, + "p90": 1065.1200413703918, + "p95": 1072.5759863853455, + "p99": 1108.191967010498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 742.6559925079346, + "p90": 762.9119753837585, + "p95": 770.3999876976013, + "p99": 805.5040240287781 + }, + "combine": { + "p50": 1106.1760187149048, + "p90": 1115.71204662323, + "p95": 1117.2480583190918, + "p99": 1125.5359649658203 + }, + "roundtrip": { + "p50": 1808.9599609375, + "p90": 1821.727991104126, + "p95": 1827.6480436325073, + "p99": 1839.4559621810913 + }, + "isolatedSum": { + "p50": 1848.8320112228394, + "p90": 1878.6240220069885, + "p95": 1887.6480460166931, + "p99": 1931.0399889945984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1441.1200284957886, + "p90": 1460.319995880127, + "p95": 1466.048002243042, + "p99": 1478.0479669570923 + }, + "combine": { + "p50": 2093.5680866241455, + "p90": 2100.5120277404785, + "p95": 2103.519916534424, + "p99": 2111.327886581421 + }, + "roundtrip": { + "p50": 3488.4800910949707, + "p90": 3501.4400482177734, + "p95": 3506.079912185669, + "p99": 3527.9040336608887 + }, + "isolatedSum": { + "p50": 3534.688115119934, + "p90": 3560.8320236206055, + "p95": 3569.567918777466, + "p99": 3589.375853538513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-61613760", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b200_0ae30f33", + "comparisonKey": "c46d11b69467bcd9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:07:58.566117+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.23999726772308, + "p90": 172.60800302028656, + "p95": 182.65600502490997, + "p99": 190.49599766731262 + }, + "combine": { + "p50": 140.1599943637848, + "p90": 145.05599439144135, + "p95": 148.00000190734863, + "p99": 155.32800555229187 + }, + "roundtrip": { + "p50": 259.16799902915955, + "p90": 266.6879892349243, + "p95": 269.6959972381592, + "p99": 274.944007396698 + }, + "isolatedSum": { + "p50": 294.3999916315079, + "p90": 317.6639974117279, + "p95": 330.6560069322586, + "p99": 345.8240032196045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 189.18399512767792, + "p90": 209.75999534130096, + "p95": 214.9440050125122, + "p99": 226.84800624847412 + }, + "combine": { + "p50": 179.77599799633026, + "p90": 188.09600174427032, + "p95": 192.03199446201324, + "p99": 196.51199877262115 + }, + "roundtrip": { + "p50": 336.92800998687744, + "p90": 349.4719862937927, + "p95": 354.68798875808716, + "p99": 364.76799845695496 + }, + "isolatedSum": { + "p50": 368.9599931240082, + "p90": 397.8559970855713, + "p95": 406.97599947452545, + "p99": 423.3600050210953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 253.6959946155548, + "p90": 262.33598589897156, + "p95": 266.04801416397095, + "p99": 277.21598744392395 + }, + "combine": { + "p50": 293.98399591445923, + "p90": 302.592009305954, + "p95": 305.1519989967346, + "p99": 312.5759959220886 + }, + "roundtrip": { + "p50": 500.5760192871094, + "p90": 511.680006980896, + "p95": 516.2240266799927, + "p99": 540.224015712738 + }, + "isolatedSum": { + "p50": 547.679990530014, + "p90": 564.9279952049255, + "p95": 571.2000131607056, + "p99": 589.7919833660126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.73598527908325, + "p90": 386.9439959526062, + "p95": 390.24001359939575, + "p99": 396.86399698257446 + }, + "combine": { + "p50": 488.7999892234802, + "p90": 497.44001030921936, + "p95": 500.7359981536865, + "p99": 505.0560235977173 + }, + "roundtrip": { + "p50": 826.4319896697998, + "p90": 836.7360234260559, + "p95": 842.3359990119934, + "p99": 857.6639890670776 + }, + "isolatedSum": { + "p50": 865.5359745025635, + "p90": 884.3840062618256, + "p95": 890.9760117530823, + "p99": 901.9200205802917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 619.871973991394, + "p90": 631.6480040550232, + "p95": 637.2799873352051, + "p99": 647.4239826202393 + }, + "combine": { + "p50": 836.9600176811218, + "p90": 846.239984035492, + "p95": 848.0319976806641, + "p99": 856.6719889640808 + }, + "roundtrip": { + "p50": 1419.3919897079468, + "p90": 1437.2479915618896, + "p95": 1446.5279579162598, + "p99": 1493.567943572998 + }, + "isolatedSum": { + "p50": 1456.8319916725159, + "p90": 1477.8879880905151, + "p95": 1485.3119850158691, + "p99": 1504.09597158432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1107.7439785003662, + "p90": 1114.9120330810547, + "p95": 1118.623971939087, + "p99": 1128.1280517578125 + }, + "combine": { + "p50": 1549.280047416687, + "p90": 1558.9760541915894, + "p95": 1562.5920295715332, + "p99": 1574.3680000305176 + }, + "roundtrip": { + "p50": 2617.8879737854004, + "p90": 2629.6958923339844, + "p95": 2633.120059967041, + "p99": 2643.00799369812 + }, + "isolatedSum": { + "p50": 2657.024025917053, + "p90": 2673.888087272644, + "p95": 2681.21600151062, + "p99": 2702.49605178833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71db1865", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b200_6d51ecbb", + "comparisonKey": "dfea21b14732d33f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:58:50.722854+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.93600142002106, + "p90": 173.5360026359558, + "p95": 181.0240000486374, + "p99": 194.14399564266205 + }, + "combine": { + "p50": 139.16799426078796, + "p90": 145.34400403499603, + "p95": 147.74399995803833, + "p99": 158.6879938840866 + }, + "roundtrip": { + "p50": 255.77598810195923, + "p90": 267.0080065727234, + "p95": 272.2879946231842, + "p99": 283.00800919532776 + }, + "isolatedSum": { + "p50": 287.103995680809, + "p90": 318.88000667095184, + "p95": 328.7680000066757, + "p99": 352.83198952674866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 191.00800156593323, + "p90": 218.62399578094482, + "p95": 239.04000222682953, + "p99": 314.07999992370605 + }, + "combine": { + "p50": 186.27199530601501, + "p90": 200.57600736618042, + "p95": 208.064004778862, + "p99": 218.52800250053406 + }, + "roundtrip": { + "p50": 340.4799997806549, + "p90": 365.05600810050964, + "p95": 383.83999466896057, + "p99": 406.8480134010315 + }, + "isolatedSum": { + "p50": 377.27999687194824, + "p90": 419.20000314712524, + "p95": 447.1040070056915, + "p99": 532.6080024242401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 256.9600045681, + "p90": 280.95999360084534, + "p95": 284.8320007324219, + "p99": 294.97599601745605 + }, + "combine": { + "p50": 294.0160036087036, + "p90": 302.4640083312988, + "p95": 305.34398555755615, + "p99": 312.76801228523254 + }, + "roundtrip": { + "p50": 508.38398933410645, + "p90": 518.4000134468079, + "p95": 525.2159833908081, + "p99": 540.0959849357605 + }, + "isolatedSum": { + "p50": 550.9760081768036, + "p90": 583.4240019321442, + "p95": 590.175986289978, + "p99": 607.7440083026886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.3520121574402, + "p90": 391.0079896450043, + "p95": 396.5440094470978, + "p99": 409.7599983215332 + }, + "combine": { + "p50": 486.1440062522888, + "p90": 491.4880096912384, + "p95": 496.0319995880127, + "p99": 503.80802154541016 + }, + "roundtrip": { + "p50": 826.7840147018433, + "p90": 837.8880023956299, + "p95": 847.5840091705322, + "p99": 868.9919710159302 + }, + "isolatedSum": { + "p50": 862.496018409729, + "p90": 882.4959993362427, + "p95": 892.5760090351105, + "p99": 913.5680198669434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 612.9599809646606, + "p90": 633.1200003623962, + "p95": 641.0560011863708, + "p99": 656.2240123748779 + }, + "combine": { + "p50": 833.6640000343323, + "p90": 840.2559757232666, + "p95": 844.4160223007202, + "p99": 854.1759848594666 + }, + "roundtrip": { + "p50": 1410.2400541305542, + "p90": 1428.9920330047607, + "p95": 1439.4880533218384, + "p99": 1469.5039987564087 + }, + "isolatedSum": { + "p50": 1446.623980998993, + "p90": 1473.3759760856628, + "p95": 1485.472023487091, + "p99": 1510.3999972343445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1111.7119789123535, + "p90": 1126.5599727630615, + "p95": 1131.1359405517578, + "p99": 1146.623969078064 + }, + "combine": { + "p50": 1547.8399991989136, + "p90": 1556.9920539855957, + "p95": 1560.1600408554077, + "p99": 1569.7599649429321 + }, + "roundtrip": { + "p50": 2618.43204498291, + "p90": 2629.6000480651855, + "p95": 2633.0559253692627, + "p99": 2648.319959640503 + }, + "isolatedSum": { + "p50": 2659.551978111267, + "p90": 2683.552026748657, + "p95": 2691.2959814071655, + "p99": 2716.383934020996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-851f7066", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_aad6d4e6", + "comparisonKey": "e06045d75e004c73", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:55:21.955869+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 155.96799552440643, + "p90": 174.04800653457642, + "p95": 180.03199994564056, + "p99": 186.3040030002594 + }, + "combine": { + "p50": 147.16799557209015, + "p90": 158.04800391197205, + "p95": 165.12000560760498, + "p99": 172.12800681591034 + }, + "roundtrip": { + "p50": 268.8319981098175, + "p90": 287.6479923725128, + "p95": 293.66400837898254, + "p99": 322.6560056209564 + }, + "isolatedSum": { + "p50": 303.1359910964966, + "p90": 332.09601044654846, + "p95": 345.15200555324554, + "p99": 358.43200981616974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 193.66399943828583, + "p90": 209.6640020608902, + "p95": 214.75200355052948, + "p99": 226.52800381183624 + }, + "combine": { + "p50": 202.39999890327454, + "p90": 211.71200275421143, + "p95": 218.04800629615784, + "p99": 230.68800568580627 + }, + "roundtrip": { + "p50": 358.5599958896637, + "p90": 375.8080005645752, + "p95": 382.6560080051422, + "p99": 396.2559998035431 + }, + "isolatedSum": { + "p50": 396.06399834156036, + "p90": 421.3760048151016, + "p95": 432.8000098466873, + "p99": 457.2160094976425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 259.64799523353577, + "p90": 278.2079875469208, + "p95": 283.61600637435913, + "p99": 291.83998703956604 + }, + "combine": { + "p50": 351.83998942375183, + "p90": 362.4640107154846, + "p95": 367.2960102558136, + "p99": 376.9280016422272 + }, + "roundtrip": { + "p50": 575.2320289611816, + "p90": 588.8320207595825, + "p95": 594.6879982948303, + "p99": 602.7519702911377 + }, + "isolatedSum": { + "p50": 611.4879846572876, + "p90": 640.6719982624054, + "p95": 650.9120166301727, + "p99": 668.7679886817932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 406.6559970378876, + "p90": 421.2160110473633, + "p95": 427.19998955726624, + "p99": 439.90400433540344 + }, + "combine": { + "p50": 612.5760078430176, + "p90": 623.6159801483154, + "p95": 627.776026725769, + "p99": 642.8160071372986 + }, + "roundtrip": { + "p50": 976.2880206108093, + "p90": 990.2719855308533, + "p95": 998.5920190811157, + "p99": 1015.5199766159058 + }, + "isolatedSum": { + "p50": 1019.2320048809052, + "p90": 1044.8319911956787, + "p95": 1054.9760162830353, + "p99": 1082.720011472702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 710.5600237846375, + "p90": 733.5039973258972, + "p95": 740.0000095367432, + "p99": 751.3599991798401 + }, + "combine": { + "p50": 1085.4719877243042, + "p90": 1095.5840349197388, + "p95": 1101.472020149231, + "p99": 1114.8799657821655 + }, + "roundtrip": { + "p50": 1759.071946144104, + "p90": 1775.8079767227173, + "p95": 1783.7120294570923, + "p99": 1814.2399787902832 + }, + "isolatedSum": { + "p50": 1796.0320115089417, + "p90": 1829.088032245636, + "p95": 1841.4720296859741, + "p99": 1866.2399649620056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1373.695969581604, + "p90": 1388.0000114440918, + "p95": 1393.504023551941, + "p99": 1402.8799533843994 + }, + "combine": { + "p50": 2062.335968017578, + "p90": 2072.416067123413, + "p95": 2076.5440464019775, + "p99": 2088.5438919067383 + }, + "roundtrip": { + "p50": 3401.535987854004, + "p90": 3421.2799072265625, + "p95": 3429.824113845825, + "p99": 3478.816032409668 + }, + "isolatedSum": { + "p50": 3436.031937599182, + "p90": 3460.416078567505, + "p95": 3470.0480699539185, + "p99": 3491.4238452911377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa70c934", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b200_7832f204", + "comparisonKey": "ec3ab317dbdb81d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:56:31.660081+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.7760008573532, + "p90": 164.73600268363953, + "p95": 173.12000691890717, + "p99": 190.62399864196777 + }, + "combine": { + "p50": 141.37600362300873, + "p90": 150.07999539375305, + "p95": 152.92799472808838, + "p99": 158.4320068359375 + }, + "roundtrip": { + "p50": 259.64799523353577, + "p90": 270.24000883102417, + "p95": 276.2559950351715, + "p99": 284.06399488449097 + }, + "isolatedSum": { + "p50": 293.15200448036194, + "p90": 314.8159980773926, + "p95": 326.04800164699554, + "p99": 349.0560054779053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 182.78400599956512, + "p90": 192.00000166893005, + "p95": 195.10400295257568, + "p99": 200.3840059041977 + }, + "combine": { + "p50": 194.07999515533447, + "p90": 202.17600464820862, + "p95": 204.6400010585785, + "p99": 212.67199516296387 + }, + "roundtrip": { + "p50": 344.5119857788086, + "p90": 357.5359880924225, + "p95": 364.8639917373657, + "p99": 382.176011800766 + }, + "isolatedSum": { + "p50": 376.8640011548996, + "p90": 394.1760063171387, + "p95": 399.7440040111542, + "p99": 413.05600106716156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 243.23199689388275, + "p90": 261.7279887199402, + "p95": 270.27198672294617, + "p99": 285.8560085296631 + }, + "combine": { + "p50": 334.9120020866394, + "p90": 341.8239951133728, + "p95": 343.6160087585449, + "p99": 349.727988243103 + }, + "roundtrip": { + "p50": 544.1920161247253, + "p90": 557.7920079231262, + "p95": 568.0959820747375, + "p99": 581.9200277328491 + }, + "isolatedSum": { + "p50": 578.1439989805222, + "p90": 603.551983833313, + "p95": 613.8879954814911, + "p99": 635.5839967727661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.6319921016693, + "p90": 396.3199853897095, + "p95": 405.37598729133606, + "p99": 422.65599966049194 + }, + "combine": { + "p50": 612.671971321106, + "p90": 618.8160181045532, + "p95": 622.4319934844971, + "p99": 629.8879981040955 + }, + "roundtrip": { + "p50": 950.7840275764465, + "p90": 958.4959745407104, + "p95": 964.8320078849792, + "p99": 983.5839867591858 + }, + "isolatedSum": { + "p50": 990.3039634227753, + "p90": 1015.1360034942627, + "p95": 1027.8079807758331, + "p99": 1052.5439977645874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 649.728000164032, + "p90": 673.1520295143127, + "p95": 680.8639764785767, + "p99": 692.031979560852 + }, + "combine": { + "p50": 1118.399977684021, + "p90": 1127.135992050171, + "p95": 1129.9200057983398, + "p99": 1140.9599781036377 + }, + "roundtrip": { + "p50": 1689.6320581436157, + "p90": 1704.7359943389893, + "p95": 1711.583971977234, + "p99": 1722.8800058364868 + }, + "isolatedSum": { + "p50": 1768.127977848053, + "p90": 1800.2880215644836, + "p95": 1810.7839822769165, + "p99": 1832.9919576644897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1246.4640140533447, + "p90": 1261.8240118026733, + "p95": 1268.3520317077637, + "p99": 1282.5920581817627 + }, + "combine": { + "p50": 2077.4080753326416, + "p90": 2084.5119953155518, + "p95": 2087.4879360198975, + "p99": 2094.719886779785 + }, + "roundtrip": { + "p50": 3241.312026977539, + "p90": 3254.9118995666504, + "p95": 3259.200096130371, + "p99": 3311.7759227752686 + }, + "isolatedSum": { + "p50": 3323.8720893859863, + "p90": 3346.336007118225, + "p95": 3355.839967727661, + "p99": 3377.311944961548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d49a8df", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b200_81086a2a", + "comparisonKey": "7bfa830de8dfb96c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:06:49.884058+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.19200468063354, + "p90": 180.57599663734436, + "p95": 205.1520049571991, + "p99": 232.7679991722107 + }, + "combine": { + "p50": 142.20799505710602, + "p90": 158.87999534606934, + "p95": 166.143998503685, + "p99": 178.20799350738525 + }, + "roundtrip": { + "p50": 255.295991897583, + "p90": 271.90399169921875, + "p95": 277.21598744392395, + "p99": 283.90398621559143 + }, + "isolatedSum": { + "p50": 298.39999973773956, + "p90": 339.4559919834137, + "p95": 371.2960034608841, + "p99": 410.97599267959595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.5040043592453, + "p90": 198.59200716018677, + "p95": 206.08000457286835, + "p99": 219.10400688648224 + }, + "combine": { + "p50": 179.80800569057465, + "p90": 189.82400000095367, + "p95": 193.95199418067932, + "p99": 204.44799959659576 + }, + "roundtrip": { + "p50": 334.3360126018524, + "p90": 358.14398527145386, + "p95": 365.05600810050964, + "p99": 389.2480134963989 + }, + "isolatedSum": { + "p50": 365.31201004981995, + "p90": 388.41600716114044, + "p95": 400.03199875354767, + "p99": 423.552006483078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 250.71999430656433, + "p90": 265.1520073413849, + "p95": 272.44800329208374, + "p99": 282.0799946784973 + }, + "combine": { + "p50": 298.20799827575684, + "p90": 305.4080009460449, + "p95": 308.351993560791, + "p99": 318.7839984893799 + }, + "roundtrip": { + "p50": 503.35997343063354, + "p90": 518.6560153961182, + "p95": 526.0480046272278, + "p99": 546.4000105857849 + }, + "isolatedSum": { + "p50": 548.9279925823212, + "p90": 570.5600082874298, + "p95": 580.7999968528748, + "p99": 600.8639931678772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.9759874343872, + "p90": 394.78400349617004, + "p95": 400.03201365470886, + "p99": 410.17600893974304 + }, + "combine": { + "p50": 489.6000027656555, + "p90": 500, + "p95": 502.27200984954834, + "p99": 510.591983795166 + }, + "roundtrip": { + "p50": 838.5279774665833, + "p90": 858.3040237426758, + "p95": 865.3119802474976, + "p99": 883.4879994392395 + }, + "isolatedSum": { + "p50": 868.5759902000427, + "p90": 894.78400349617, + "p95": 902.3040235042572, + "p99": 920.7679927349091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 627.3919939994812, + "p90": 642.4959897994995, + "p95": 650.816023349762, + "p99": 660.2879762649536 + }, + "combine": { + "p50": 862.6239895820618, + "p90": 872.4480271339417, + "p95": 874.4959831237793, + "p99": 881.3120126724243 + }, + "roundtrip": { + "p50": 1452.6079893112183, + "p90": 1472.6719856262207, + "p95": 1479.9679517745972, + "p99": 1516.800045967102 + }, + "isolatedSum": { + "p50": 1490.015983581543, + "p90": 1514.9440169334412, + "p95": 1525.3120064735413, + "p99": 1541.599988937378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1136.064052581787, + "p90": 1150.0799655914307, + "p95": 1155.6799411773682, + "p99": 1163.8400554656982 + }, + "combine": { + "p50": 1608.9919805526733, + "p90": 1618.783950805664, + "p95": 1621.4079856872559, + "p99": 1630.9759616851807 + }, + "roundtrip": { + "p50": 2704.5440673828125, + "p90": 2722.2719192504883, + "p95": 2729.4719219207764, + "p99": 2767.3919200897217 + }, + "isolatedSum": { + "p50": 2745.0560331344604, + "p90": 2768.8639163970947, + "p95": 2777.087926864624, + "p99": 2794.816017150879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-61fbc6ac", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b200_d9a24cef", + "comparisonKey": "5343c640bf4d8c97", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:02:44.338670+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 159.64800119400024, + "p90": 184.60799753665924, + "p95": 188.7039989233017, + "p99": 199.13600385189056 + }, + "combine": { + "p50": 150.2400040626526, + "p90": 157.02399611473083, + "p95": 159.36000645160675, + "p99": 167.4560010433197 + }, + "roundtrip": { + "p50": 274.4640111923218, + "p90": 285.6000065803528, + "p95": 288.83200883865356, + "p99": 300.57600140571594 + }, + "isolatedSum": { + "p50": 309.88800525665283, + "p90": 341.6319936513901, + "p95": 348.06400537490845, + "p99": 366.59200489521027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 196.19199633598328, + "p90": 203.36000621318817, + "p95": 206.40000700950623, + "p99": 211.0079973936081 + }, + "combine": { + "p50": 204.22400534152985, + "p90": 213.47199380397797, + "p95": 216.06400609016418, + "p99": 220.92799842357635 + }, + "roundtrip": { + "p50": 365.9839928150177, + "p90": 374.39998984336853, + "p95": 377.344012260437, + "p99": 383.61600041389465 + }, + "isolatedSum": { + "p50": 400.4160016775131, + "p90": 416.83200001716614, + "p95": 422.4640130996704, + "p99": 431.93599581718445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 272.3200023174286, + "p90": 291.9999957084656, + "p95": 299.1679906845093, + "p99": 312.4479949474335 + }, + "combine": { + "p50": 354.94399070739746, + "p90": 362.39999532699585, + "p95": 364.8639917373657, + "p99": 368.2880103588104 + }, + "roundtrip": { + "p50": 580.2559852600098, + "p90": 590.4639959335327, + "p95": 594.7200059890747, + "p99": 604.4480204582214 + }, + "isolatedSum": { + "p50": 627.263993024826, + "p90": 654.3999910354614, + "p95": 664.031982421875, + "p99": 680.7360053062439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 415.008008480072, + "p90": 424.0640103816986, + "p95": 427.839994430542, + "p99": 433.79199504852295 + }, + "combine": { + "p50": 591.5520191192627, + "p90": 601.3759970664978, + "p95": 603.6480069160461, + "p99": 610.2399826049805 + }, + "roundtrip": { + "p50": 981.6319942474365, + "p90": 997.7920055389404, + "p95": 1003.9360523223877, + "p99": 1044.2880392074585 + }, + "isolatedSum": { + "p50": 1006.5600275993347, + "p90": 1025.4400074481964, + "p95": 1031.4880013465881, + "p99": 1044.0319776535034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 717.9840207099915, + "p90": 730.1120162010193, + "p95": 736.5440130233765, + "p99": 751.4240145683289 + }, + "combine": { + "p50": 1069.5680379867554, + "p90": 1079.0079832077026, + "p95": 1081.3759565353394, + "p99": 1085.3439569473267 + }, + "roundtrip": { + "p50": 1750.2399682998657, + "p90": 1762.1439695358276, + "p95": 1767.7119970321655, + "p99": 1779.8399925231934 + }, + "isolatedSum": { + "p50": 1787.5520586967468, + "p90": 1809.119999408722, + "p95": 1817.9199695587158, + "p99": 1836.7679715156555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1403.7760496139526, + "p90": 1420.9280014038086, + "p95": 1427.456021308899, + "p99": 1440.8960342407227 + }, + "combine": { + "p50": 2031.872034072876, + "p90": 2038.9759540557861, + "p95": 2041.3439273834229, + "p99": 2050.4961013793945 + }, + "roundtrip": { + "p50": 3400.9599685668945, + "p90": 3415.584087371826, + "p95": 3418.6880588531494, + "p99": 3427.135944366455 + }, + "isolatedSum": { + "p50": 3435.6480836868286, + "p90": 3459.9039554595947, + "p95": 3468.799948692322, + "p99": 3491.392135620117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a7a4b5d4", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b200_cc9dc50f", + "comparisonKey": "a18efd78533c3026", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:03:22.297232+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 160.67199409008026, + "p90": 179.23200130462646, + "p95": 186.75200641155243, + "p99": 203.48800718784332 + }, + "combine": { + "p50": 140.03199338912964, + "p90": 149.02399480342865, + "p95": 158.59200060367584, + "p99": 169.76000368595123 + }, + "roundtrip": { + "p50": 260.6079876422882, + "p90": 278.49599719047546, + "p95": 287.1679961681366, + "p99": 298.20799827575684 + }, + "isolatedSum": { + "p50": 300.7039874792099, + "p90": 328.2559961080551, + "p95": 345.34400701522827, + "p99": 373.24801087379456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.5359971523285, + "p90": 199.3280053138733, + "p95": 208.5759937763214, + "p99": 217.56799519062042 + }, + "combine": { + "p50": 183.71200561523438, + "p90": 216.89599752426147, + "p95": 221.11999988555908, + "p99": 231.32799565792084 + }, + "roundtrip": { + "p50": 338.23999762535095, + "p90": 412.0959937572479, + "p95": 423.007994890213, + "p99": 437.855988740921 + }, + "isolatedSum": { + "p50": 369.24800276756287, + "p90": 416.22400283813477, + "p95": 429.6959936618805, + "p99": 448.89599084854126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 255.0399899482727, + "p90": 276.8639922142029, + "p95": 283.2320034503937, + "p99": 308.03200602531433 + }, + "combine": { + "p50": 299.4239926338196, + "p90": 306.87999725341797, + "p95": 311.13600730895996, + "p99": 317.27999448776245 + }, + "roundtrip": { + "p50": 505.7920217514038, + "p90": 527.072012424469, + "p95": 531.1040282249451, + "p99": 544.2879796028137 + }, + "isolatedSum": { + "p50": 554.4639825820923, + "p90": 583.7439894676208, + "p95": 594.3680107593536, + "p99": 625.3120005130768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.82400846481323, + "p90": 399.04001355171204, + "p95": 404.12798523902893, + "p99": 420.03199458122253 + }, + "combine": { + "p50": 486.8159890174866, + "p90": 493.4079945087433, + "p95": 497.3439872264862, + "p99": 502.81602144241333 + }, + "roundtrip": { + "p50": 826.2400031089783, + "p90": 839.6160006523132, + "p95": 847.3280072212219, + "p99": 855.8400273323059 + }, + "isolatedSum": { + "p50": 864.6399974822998, + "p90": 892.4480080604553, + "p95": 901.4719724655151, + "p99": 922.8480160236359 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 622.5600242614746, + "p90": 642.9439783096313, + "p95": 653.6960005760193, + "p99": 762.175977230072 + }, + "combine": { + "p50": 846.1120128631592, + "p90": 858.0480217933655, + "p95": 863.3599877357483, + "p99": 907.1999788284302 + }, + "roundtrip": { + "p50": 1426.0159730911255, + "p90": 1452.288031578064, + "p95": 1461.567997932434, + "p99": 1553.0240535736084 + }, + "isolatedSum": { + "p50": 1468.6720371246338, + "p90": 1500.9920001029968, + "p95": 1517.0559883117676, + "p99": 1669.3759560585022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1119.1999912261963, + "p90": 1132.863998413086, + "p95": 1139.296054840088, + "p99": 1203.8719654083252 + }, + "combine": { + "p50": 1558.9120388031006, + "p90": 1566.6240453720093, + "p95": 1571.3599920272827, + "p99": 1590.656042098999 + }, + "roundtrip": { + "p50": 2631.5200328826904, + "p90": 2644.0000534057617, + "p95": 2649.3759155273438, + "p99": 2658.911943435669 + }, + "isolatedSum": { + "p50": 2678.112030029297, + "p90": 2699.488043785095, + "p95": 2710.6560468673706, + "p99": 2794.528007507324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1635c1e6", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b200_dcc1ac6c", + "comparisonKey": "226f44b06081542d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:05:03.082150+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.3200056552887, + "p90": 172.0000058412552, + "p95": 177.63200402259827, + "p99": 185.12000143527985 + }, + "combine": { + "p50": 143.74400675296783, + "p90": 156.22399747371674, + "p95": 160.67199409008026, + "p99": 170.9440052509308 + }, + "roundtrip": { + "p50": 268.8319981098175, + "p90": 287.3919904232025, + "p95": 293.3120131492615, + "p99": 315.90399146080017 + }, + "isolatedSum": { + "p50": 300.06401240825653, + "p90": 328.2240033149719, + "p95": 338.3039981126785, + "p99": 356.06400668621063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 192.7040070295334, + "p90": 204.8639953136444, + "p95": 211.2639993429184, + "p99": 221.40799462795258 + }, + "combine": { + "p50": 202.30400562286377, + "p90": 209.24800634384155, + "p95": 213.919997215271, + "p99": 223.93600642681122 + }, + "roundtrip": { + "p50": 357.2799861431122, + "p90": 374.11201000213623, + "p95": 379.93600964546204, + "p99": 392.12799072265625 + }, + "isolatedSum": { + "p50": 395.00801265239716, + "p90": 414.11200165748596, + "p95": 425.1839965581894, + "p99": 445.3440010547638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 260.672003030777, + "p90": 276.8639922142029, + "p95": 281.8560004234314, + "p99": 289.6000146865845 + }, + "combine": { + "p50": 352.03200578689575, + "p90": 362.68800497055054, + "p95": 367.64800548553467, + "p99": 380.0640106201172 + }, + "roundtrip": { + "p50": 577.5039792060852, + "p90": 601.6960144042969, + "p95": 609.9200248718262, + "p99": 625.4720091819763 + }, + "isolatedSum": { + "p50": 612.7040088176727, + "p90": 639.5519971847534, + "p95": 649.5040059089661, + "p99": 669.6640253067017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 407.9039990901947, + "p90": 421.63199186325073, + "p95": 425.6640076637268, + "p99": 437.3120069503784 + }, + "combine": { + "p50": 611.9040250778198, + "p90": 621.0560202598572, + "p95": 625.7280111312866, + "p99": 636.2559795379639 + }, + "roundtrip": { + "p50": 976.0000109672546, + "p90": 992.5439953804016, + "p95": 996.6400265693665, + "p99": 1008.0640316009521 + }, + "isolatedSum": { + "p50": 1019.8080241680145, + "p90": 1042.688012123108, + "p95": 1051.3920187950134, + "p99": 1073.5679864883423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 713.3439779281616, + "p90": 729.3760180473328, + "p95": 733.7599992752075, + "p99": 747.1039891242981 + }, + "combine": { + "p50": 1090.175986289978, + "p90": 1099.8079776763916, + "p95": 1105.4719686508179, + "p99": 1116.8320178985596 + }, + "roundtrip": { + "p50": 1760.640025138855, + "p90": 1782.7199697494507, + "p95": 1792.799949645996, + "p99": 1820.3519582748413 + }, + "isolatedSum": { + "p50": 1803.5199642181396, + "p90": 1829.1839957237244, + "p95": 1839.2319679260254, + "p99": 1863.9360070228577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1377.7600526809692, + "p90": 1394.7839736938477, + "p95": 1400.7999897003174, + "p99": 1413.1200313568115 + }, + "combine": { + "p50": 2062.3679161071777, + "p90": 2072.3519325256348, + "p95": 2079.5199871063232, + "p99": 2092.8640365600586 + }, + "roundtrip": { + "p50": 3401.4720916748047, + "p90": 3420.448064804077, + "p95": 3426.208019256592, + "p99": 3438.33589553833 + }, + "isolatedSum": { + "p50": 3440.127968788147, + "p90": 3467.1359062194824, + "p95": 3480.3199768066406, + "p99": 3505.98406791687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c3a47ca4", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_d68ea102", + "comparisonKey": "f88dee80d1118d7d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:05:41.486442+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.67200088500977, + "p90": 177.91999876499176, + "p95": 185.44000387191772, + "p99": 203.96800339221954 + }, + "combine": { + "p50": 139.26400244235992, + "p90": 147.64800667762756, + "p95": 155.16799688339233, + "p99": 166.27199947834015 + }, + "roundtrip": { + "p50": 258.11201333999634, + "p90": 272.7999985218048, + "p95": 278.4639894962311, + "p99": 287.9360020160675 + }, + "isolatedSum": { + "p50": 295.9360033273697, + "p90": 325.5680054426193, + "p95": 340.60800075531006, + "p99": 370.2400028705597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 188.73600661754608, + "p90": 211.90400421619415, + "p95": 215.7440036535263, + "p99": 230.56000471115112 + }, + "combine": { + "p50": 181.92000687122345, + "p90": 194.33599710464478, + "p95": 197.50399887561798, + "p99": 207.0080041885376 + }, + "roundtrip": { + "p50": 335.61599254608154, + "p90": 348.83201122283936, + "p95": 354.8800051212311, + "p99": 363.93600702285767 + }, + "isolatedSum": { + "p50": 370.65601348876953, + "p90": 406.2400013208389, + "p95": 413.2480025291443, + "p99": 437.5680088996887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 256.1280131340027, + "p90": 280.0000011920929, + "p95": 285.6000065803528, + "p99": 302.8480112552643 + }, + "combine": { + "p50": 298.43199253082275, + "p90": 306.0159981250763, + "p95": 311.74400448799133, + "p99": 325.82399249076843 + }, + "roundtrip": { + "p50": 506.75201416015625, + "p90": 522.4320292472839, + "p95": 528.8959741592407, + "p99": 543.6800122261047 + }, + "isolatedSum": { + "p50": 554.5600056648254, + "p90": 586.0159993171692, + "p95": 597.3440110683441, + "p99": 628.6720037460327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.6639997959137, + "p90": 390.5920088291168, + "p95": 396.7039883136749, + "p99": 409.9520146846771 + }, + "combine": { + "p50": 485.50400137901306, + "p90": 504.38398122787476, + "p95": 513.9840245246887, + "p99": 542.1440005302429 + }, + "roundtrip": { + "p50": 821.5039968490601, + "p90": 835.6480002403259, + "p95": 842.2080278396606, + "p99": 851.9359827041626 + }, + "isolatedSum": { + "p50": 863.1680011749268, + "p90": 894.9759900569916, + "p95": 910.6880128383636, + "p99": 952.09601521492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 624.8639822006226, + "p90": 639.2959952354431, + "p95": 646.4319825172424, + "p99": 665.7599806785583 + }, + "combine": { + "p50": 852.895975112915, + "p90": 866.3039803504944, + "p95": 873.6960291862488, + "p99": 887.2960209846497 + }, + "roundtrip": { + "p50": 1437.567949295044, + "p90": 1451.6799449920654, + "p95": 1458.847999572754, + "p99": 1481.9200038909912 + }, + "isolatedSum": { + "p50": 1477.7599573135376, + "p90": 1505.5999755859375, + "p95": 1520.1280117034912, + "p99": 1553.056001663208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1115.8080101013184, + "p90": 1127.8400421142578, + "p95": 1131.9999694824219, + "p99": 1139.9999856948853 + }, + "combine": { + "p50": 1561.087965965271, + "p90": 1570.688009262085, + "p95": 1572.4159479141235, + "p99": 1579.4880390167236 + }, + "roundtrip": { + "p50": 2644.767999649048, + "p90": 2654.560089111328, + "p95": 2658.30397605896, + "p99": 2667.840003967285 + }, + "isolatedSum": { + "p50": 2676.8959760665894, + "p90": 2698.528051376343, + "p95": 2704.4159173965454, + "p99": 2719.488024711609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-459017fa", + "identity": "b200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b200_9a321028", + "comparisonKey": "59e26e3da3b9ea8c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:01:03.481665+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 150.2400040626526, + "p90": 168.70400309562683, + "p95": 173.21600019931793, + "p99": 200.06400346755981 + }, + "combine": { + "p50": 140.00000059604645, + "p90": 148.80000054836273, + "p95": 152.76800096035004, + "p99": 160.89600324630737 + }, + "roundtrip": { + "p50": 256.51198625564575, + "p90": 265.24800062179565, + "p95": 269.0240144729614, + "p99": 278.2079875469208 + }, + "isolatedSum": { + "p50": 290.24000465869904, + "p90": 317.50400364398956, + "p95": 325.98400115966797, + "p99": 360.9600067138672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 189.95200097560883, + "p90": 219.7439968585968, + "p95": 226.27200186252594, + "p99": 261.85598969459534 + }, + "combine": { + "p50": 179.58399653434753, + "p90": 196.1279958486557, + "p95": 198.4959989786148, + "p99": 206.1759978532791 + }, + "roundtrip": { + "p50": 336.41600608825684, + "p90": 353.2480001449585, + "p95": 360.9600067138672, + "p99": 371.071994304657 + }, + "isolatedSum": { + "p50": 369.53599750995636, + "p90": 415.8719927072525, + "p95": 424.76800084114075, + "p99": 468.03198754787445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 252.9279887676239, + "p90": 275.4879891872406, + "p95": 282.27201104164124, + "p99": 302.68800258636475 + }, + "combine": { + "p50": 298.6240088939667, + "p90": 307.93601274490356, + "p95": 316.51198863983154, + "p99": 325.28001070022583 + }, + "roundtrip": { + "p50": 509.37598943710327, + "p90": 529.0240049362183, + "p95": 534.0480208396912, + "p99": 552.3840188980103 + }, + "isolatedSum": { + "p50": 551.5519976615906, + "p90": 583.4240019321442, + "p95": 598.7839996814728, + "p99": 627.9680132865906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.07201051712036, + "p90": 399.7440040111542, + "p95": 404.57600355148315, + "p99": 422.2719967365265 + }, + "combine": { + "p50": 480.0640046596527, + "p90": 492.000013589859, + "p95": 493.9199984073639, + "p99": 502.1439790725708 + }, + "roundtrip": { + "p50": 818.7519907951355, + "p90": 832.4159979820251, + "p95": 840.1280045509338, + "p99": 850.5600094795227 + }, + "isolatedSum": { + "p50": 859.1360151767731, + "p90": 891.7440176010132, + "p95": 898.496001958847, + "p99": 924.4159758090973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 623.0080127716064, + "p90": 633.5039734840393, + "p95": 637.7599835395813, + "p99": 648.2239961624146 + }, + "combine": { + "p50": 848.9919900894165, + "p90": 859.4880104064941, + "p95": 863.2320165634155, + "p99": 869.5679903030396 + }, + "roundtrip": { + "p50": 1434.8479509353638, + "p90": 1450.592041015625, + "p95": 1457.919955253601, + "p99": 1469.0239429473877 + }, + "isolatedSum": { + "p50": 1472.000002861023, + "p90": 1492.9919838905334, + "p95": 1500.9920001029968, + "p99": 1517.791986465454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1118.0800199508667, + "p90": 1129.472017288208, + "p95": 1134.81605052948, + "p99": 1142.9439783096313 + }, + "combine": { + "p50": 1561.3759756088257, + "p90": 1573.4080076217651, + "p95": 1577.023983001709, + "p99": 1588.8960361480713 + }, + "roundtrip": { + "p50": 2641.37601852417, + "p90": 2654.9439430236816, + "p95": 2660.223960876465, + "p99": 2669.1200733184814 + }, + "isolatedSum": { + "p50": 2679.4559955596924, + "p90": 2702.880024909973, + "p95": 2711.840033531189, + "p99": 2731.8400144577026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c5ee7c42", + "identity": "b200|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_aefbb2bb", + "comparisonKey": "9aa00139d8119d59", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:37:58.949190+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 136.80000603199005, + "p90": 157.82399475574493, + "p95": 162.23999857902527, + "p99": 179.77599799633026 + }, + "combine": { + "p50": 139.5840048789978, + "p90": 147.90399372577667, + "p95": 157.18400478363037, + "p99": 169.15200650691986 + }, + "roundtrip": { + "p50": 234.592005610466, + "p90": 241.95200204849243, + "p95": 249.1839975118637, + "p99": 261.6640031337738 + }, + "isolatedSum": { + "p50": 276.38401091098785, + "p90": 305.7279884815216, + "p95": 319.42400336265564, + "p99": 348.9280045032501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.3120070695877, + "p90": 182.65600502490997, + "p95": 188.4160041809082, + "p99": 198.43199849128723 + }, + "combine": { + "p50": 180.86400628089905, + "p90": 194.07999515533447, + "p95": 197.1520036458969, + "p99": 206.2080055475235 + }, + "roundtrip": { + "p50": 314.4319951534271, + "p90": 327.2959887981415, + "p95": 330.0800025463104, + "p99": 337.50399947166443 + }, + "isolatedSum": { + "p50": 346.17601335048676, + "p90": 376.73600018024445, + "p95": 385.5680078268051, + "p99": 404.64000403881073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.27200317382812, + "p90": 243.68000030517578, + "p95": 248.89600276947021, + "p99": 259.20000672340393 + }, + "combine": { + "p50": 299.23200607299805, + "p90": 309.9839985370636, + "p95": 319.5199966430664, + "p99": 332.09601044654846 + }, + "roundtrip": { + "p50": 480.3839921951294, + "p90": 493.696004152298, + "p95": 496.8000054359436, + "p99": 505.15198707580566 + }, + "isolatedSum": { + "p50": 533.5040092468262, + "p90": 553.6639988422394, + "p95": 568.4159994125366, + "p99": 591.2960171699524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 359.6160113811493, + "p90": 373.56799840927124, + "p95": 378.04800271987915, + "p99": 386.9760036468506 + }, + "combine": { + "p50": 484.47999358177185, + "p90": 491.39198660850525, + "p95": 494.2399859428406, + "p99": 499.6800124645233 + }, + "roundtrip": { + "p50": 807.3599934577942, + "p90": 821.5360045433044, + "p95": 827.7760148048401, + "p99": 842.6240086555481 + }, + "isolatedSum": { + "p50": 844.0960049629211, + "p90": 864.9599850177765, + "p95": 872.2879886627197, + "p99": 886.6560161113739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 602.7200222015381, + "p90": 614.1759753227234, + "p95": 618.6559796333313, + "p99": 631.55198097229 + }, + "combine": { + "p50": 847.2639918327332, + "p90": 856.8639755249023, + "p95": 858.8799834251404, + "p99": 867.8399920463562 + }, + "roundtrip": { + "p50": 1408.511996269226, + "p90": 1420.192003250122, + "p95": 1424.5760440826416, + "p99": 1434.559941291809 + }, + "isolatedSum": { + "p50": 1449.9840140342712, + "p90": 1471.0399508476257, + "p95": 1477.5359630584717, + "p99": 1499.3919730186462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1088.1600379943848, + "p90": 1100.4799604415894, + "p95": 1108.5439920425415, + "p99": 1116.1919832229614 + }, + "combine": { + "p50": 1559.2639446258545, + "p90": 1567.7440166473389, + "p95": 1571.4240074157715, + "p99": 1575.3599405288696 + }, + "roundtrip": { + "p50": 2607.3598861694336, + "p90": 2624.9918937683105, + "p95": 2632.960081100464, + "p99": 2651.3280868530273 + }, + "isolatedSum": { + "p50": 2647.4239826202393, + "p90": 2668.223977088928, + "p95": 2679.967999458313, + "p99": 2691.551923751831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ae9c997e", + "identity": "b200|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b200_ef6002c6", + "comparisonKey": "e30be189e1e5f825", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:46:15.351922+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 253.31199169158936, + "p90": 287.9360020160675, + "p95": 294.5919930934906, + "p99": 318.1760013103485 + }, + "combine": { + "p50": 111.64800077676773, + "p90": 123.74400347471237, + "p95": 131.80799782276154, + "p99": 149.3760049343109 + }, + "roundtrip": { + "p50": 349.11999106407166, + "p90": 387.00801134109497, + "p95": 398.75200390815735, + "p99": 456.64000511169434 + }, + "isolatedSum": { + "p50": 364.9599924683571, + "p90": 411.6800054907799, + "p95": 426.39999091625214, + "p99": 467.5520062446594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 293.1840121746063, + "p90": 327.87200808525085, + "p95": 339.6480083465576, + "p99": 374.1439878940582 + }, + "combine": { + "p50": 147.77599275112152, + "p90": 162.27200627326965, + "p95": 169.50400173664093, + "p99": 174.01599884033203 + }, + "roundtrip": { + "p50": 418.14398765563965, + "p90": 444.2560076713562, + "p95": 448.7679898738861, + "p99": 463.6479914188385 + }, + "isolatedSum": { + "p50": 440.96000492572784, + "p90": 490.1440143585205, + "p95": 509.15201008319855, + "p99": 548.1599867343903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 375.61601400375366, + "p90": 402.0160138607025, + "p95": 407.71201252937317, + "p99": 417.279988527298 + }, + "combine": { + "p50": 213.24799954891205, + "p90": 223.23200106620789, + "p95": 227.58400440216064, + "p99": 237.08799481391907 + }, + "roundtrip": { + "p50": 561.7920160293579, + "p90": 585.6000185012817, + "p95": 599.7440218925476, + "p99": 636.7359757423401 + }, + "isolatedSum": { + "p50": 588.8640135526657, + "p90": 625.2480149269104, + "p95": 635.2960169315338, + "p99": 654.367983341217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 562.2079968452454, + "p90": 586.3999724388123, + "p95": 600.0639796257019, + "p99": 636.1280083656311 + }, + "combine": { + "p50": 419.0079867839813, + "p90": 430.01601099967957, + "p95": 433.6000084877014, + "p99": 445.43999433517456 + }, + "roundtrip": { + "p50": 956.4480185508728, + "p90": 988.2240295410156, + "p95": 999.6479749679565, + "p99": 1016.9919729232788 + }, + "isolatedSum": { + "p50": 981.2159836292267, + "p90": 1016.4159834384918, + "p95": 1033.6639881134033, + "p99": 1081.5680027008057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 915.7440066337585, + "p90": 934.6879720687866, + "p95": 943.5840249061584, + "p99": 963.9679789543152 + }, + "combine": { + "p50": 772.9600071907043, + "p90": 781.1520099639893, + "p95": 784.2239737510681, + "p99": 793.503999710083 + }, + "roundtrip": { + "p50": 1658.944010734558, + "p90": 1744.8960542678833, + "p95": 1774.7199535369873, + "p99": 1818.336009979248 + }, + "isolatedSum": { + "p50": 1688.704013824463, + "p90": 1715.8399820327759, + "p95": 1727.8079986572266, + "p99": 1757.4719786643982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1710.1119756698608, + "p90": 1731.9680452346802, + "p95": 1740.0319576263428, + "p99": 1766.752004623413 + }, + "combine": { + "p50": 1464.2239809036255, + "p90": 1477.6959419250488, + "p95": 1483.199954032898, + "p99": 1494.9760437011719 + }, + "roundtrip": { + "p50": 3126.3039112091064, + "p90": 3142.2719955444336, + "p95": 3149.247884750366, + "p99": 3175.6160259246826 + }, + "isolatedSum": { + "p50": 3174.3359565734863, + "p90": 3209.663987159729, + "p95": 3223.2319116592407, + "p99": 3261.728048324585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8d0d772", + "identity": "b200|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b200_ef6002c6", + "comparisonKey": "00d6cd82747e4f10", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:48:34.021021+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.8080039024353, + "p90": 294.94398832321167, + "p95": 305.02399802207947, + "p99": 322.1440017223358 + }, + "combine": { + "p50": 119.29599940776825, + "p90": 129.4720023870468, + "p95": 135.77599823474884, + "p99": 143.8719928264618 + }, + "roundtrip": { + "p50": 357.56799578666687, + "p90": 403.4239947795868, + "p95": 432.0319890975952, + "p99": 511.680006980896 + }, + "isolatedSum": { + "p50": 379.10400331020355, + "p90": 424.4159907102585, + "p95": 440.7999962568283, + "p99": 466.0159945487976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 304.4160008430481, + "p90": 341.5679931640625, + "p95": 351.83998942375183, + "p99": 370.4639971256256 + }, + "combine": { + "p50": 156.8319946527481, + "p90": 166.07999801635742, + "p95": 172.83199727535248, + "p99": 181.98400735855103 + }, + "roundtrip": { + "p50": 442.75200366973877, + "p90": 476.7040014266968, + "p95": 487.13600635528564, + "p99": 523.3920216560364 + }, + "isolatedSum": { + "p50": 461.2479954957962, + "p90": 507.6479911804199, + "p95": 524.6719866991043, + "p99": 552.4480044841766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 404.83200550079346, + "p90": 440.70398807525635, + "p95": 452.7040123939514, + "p99": 496.5440034866333 + }, + "combine": { + "p50": 231.1680018901825, + "p90": 244.47999894618988, + "p95": 248.83200228214264, + "p99": 254.30399179458618 + }, + "roundtrip": { + "p50": 633.3119869232178, + "p90": 703.4239768981934, + "p95": 732.1919798851013, + "p99": 1099.2000102996826 + }, + "isolatedSum": { + "p50": 636.000007390976, + "p90": 685.1839870214462, + "p95": 701.536014676094, + "p99": 750.8479952812195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 609.2479825019836, + "p90": 650.0160098075867, + "p95": 677.40797996521, + "p99": 702.6879787445068 + }, + "combine": { + "p50": 456.7039906978607, + "p90": 463.5840058326721, + "p95": 466.3360118865967, + "p99": 471.3279902935028 + }, + "roundtrip": { + "p50": 1024.57594871521, + "p90": 1049.4400262832642, + "p95": 1072.4159479141235, + "p99": 1099.9360084533691 + }, + "isolatedSum": { + "p50": 1065.9519731998444, + "p90": 1113.6000156402588, + "p95": 1143.7439918518066, + "p99": 1174.0159690380096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1017.5360441207886, + "p90": 1047.1680164337158, + "p95": 1061.8239641189575, + "p99": 1093.34397315979 + }, + "combine": { + "p50": 803.6800026893616, + "p90": 812.7999901771545, + "p95": 816.9919848442078, + "p99": 826.1439800262451 + }, + "roundtrip": { + "p50": 1782.2400331497192, + "p90": 1799.3600368499756, + "p95": 1808.9280128479004, + "p99": 1830.847978591919 + }, + "isolatedSum": { + "p50": 1821.2160468101501, + "p90": 1859.9680066108704, + "p95": 1878.8159489631653, + "p99": 1919.4879531860352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1914.3040180206299, + "p90": 1926.1759519577026, + "p95": 1931.7760467529297, + "p99": 1945.0880289077759 + }, + "combine": { + "p50": 1505.247950553894, + "p90": 1514.7839784622192, + "p95": 1518.7519788742065, + "p99": 1529.5679569244385 + }, + "roundtrip": { + "p50": 3379.456043243408, + "p90": 3395.359992980957, + "p95": 3399.1360664367676, + "p99": 3408.479928970337 + }, + "isolatedSum": { + "p50": 3419.551968574524, + "p90": 3440.959930419922, + "p95": 3450.5280256271362, + "p99": 3474.6559858322144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a04d392", + "identity": "b200|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_ef6002c6", + "comparisonKey": "ec78d9e3945aea2f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:50:54.995879+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 267.61600375175476, + "p90": 308.19201469421387, + "p95": 314.7520124912262, + "p99": 347.84001111984253 + }, + "combine": { + "p50": 125.02400577068329, + "p90": 131.74399733543396, + "p95": 137.95199990272522, + "p99": 144.9279934167862 + }, + "roundtrip": { + "p50": 370.2720105648041, + "p90": 406.39999508857727, + "p95": 414.11200165748596, + "p99": 424.73599314689636 + }, + "isolatedSum": { + "p50": 392.64000952243805, + "p90": 439.9360120296478, + "p95": 452.7040123939514, + "p99": 492.7680045366287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 318.08000802993774, + "p90": 360.22400856018066, + "p95": 369.7600066661835, + "p99": 413.4080111980438 + }, + "combine": { + "p50": 164.86400365829468, + "p90": 176.06399953365326, + "p95": 181.05599284172058, + "p99": 201.50400698184967 + }, + "roundtrip": { + "p50": 472.896009683609, + "p90": 498.4639883041382, + "p95": 507.26401805877686, + "p99": 521.5680003166199 + }, + "isolatedSum": { + "p50": 482.9440116882324, + "p90": 536.2880080938339, + "p95": 550.815999507904, + "p99": 614.9120181798935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 440.7680034637451, + "p90": 470.91200947761536, + "p95": 485.6640100479126, + "p99": 515.0399804115295 + }, + "combine": { + "p50": 265.0560140609741, + "p90": 271.93599939346313, + "p95": 275.7120132446289, + "p99": 282.8480005264282 + }, + "roundtrip": { + "p50": 697.7599859237671, + "p90": 725.2159714698792, + "p95": 739.4239902496338, + "p99": 772.8639841079712 + }, + "isolatedSum": { + "p50": 705.8240175247192, + "p90": 742.8480088710785, + "p95": 761.3760232925415, + "p99": 797.8879809379578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 671.392023563385, + "p90": 706.9759964942932, + "p95": 734.5600128173828, + "p99": 777.567982673645 + }, + "combine": { + "p50": 465.9839868545532, + "p90": 478.94400358200073, + "p95": 487.13600635528564, + "p99": 499.9360144138336 + }, + "roundtrip": { + "p50": 1106.3679456710815, + "p90": 1135.807991027832, + "p95": 1151.6480445861816, + "p99": 1173.375964164734 + }, + "isolatedSum": { + "p50": 1137.3760104179382, + "p90": 1185.920000076294, + "p95": 1221.6960191726685, + "p99": 1277.5039970874786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1153.3440351486206, + "p90": 1170.7520484924316, + "p95": 1176.416039466858, + "p99": 1206.976056098938 + }, + "combine": { + "p50": 824.4479894638062, + "p90": 832.863986492157, + "p95": 836.031973361969, + "p99": 847.104012966156 + }, + "roundtrip": { + "p50": 1945.0880289077759, + "p90": 2039.2639636993408, + "p95": 2086.4639282226562, + "p99": 2114.0799522399902 + }, + "isolatedSum": { + "p50": 1977.7920246124268, + "p90": 2003.6160349845886, + "p95": 2012.448012828827, + "p99": 2054.080069065094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2176.8319606781006, + "p90": 2191.551923751831, + "p95": 2199.455976486206, + "p99": 2238.464117050171 + }, + "combine": { + "p50": 1524.7679948806763, + "p90": 1535.7760190963745, + "p95": 1541.0239696502686, + "p99": 1559.2319965362549 + }, + "roundtrip": { + "p50": 3672.2240447998047, + "p90": 3686.4960193634033, + "p95": 3692.0320987701416, + "p99": 3706.2079906463623 + }, + "isolatedSum": { + "p50": 3701.599955558777, + "p90": 3727.3279428482056, + "p95": 3740.4799461364746, + "p99": 3797.696113586426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7734a1c9", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_3dc89f1a", + "comparisonKey": "2f7771a826e97fee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:40:17.252965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.40799641609192, + "p90": 163.13600540161133, + "p95": 169.24799978733063, + "p99": 181.18399381637573 + }, + "combine": { + "p50": 131.6159963607788, + "p90": 141.4400041103363, + "p95": 151.36000514030457, + "p99": 159.743994474411 + }, + "roundtrip": { + "p50": 282.943993806839, + "p90": 307.42400884628296, + "p95": 314.84800577163696, + "p99": 328.38401198387146 + }, + "isolatedSum": { + "p50": 273.0239927768707, + "p90": 304.57600951194763, + "p95": 320.6080049276352, + "p99": 340.92798829078674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 175.26400089263916, + "p90": 193.82399320602417, + "p95": 200.06400346755981, + "p99": 210.11200547218323 + }, + "combine": { + "p50": 176.09600722789764, + "p90": 188.6720061302185, + "p95": 196.79999351501465, + "p99": 205.6960016489029 + }, + "roundtrip": { + "p50": 389.7919952869415, + "p90": 411.6800129413605, + "p95": 416.03198647499084, + "p99": 437.47198581695557 + }, + "isolatedSum": { + "p50": 351.3600081205368, + "p90": 382.4959993362427, + "p95": 396.86399698257446, + "p99": 415.8080071210861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.51999843120575, + "p90": 254.2400062084198, + "p95": 260.5760097503662, + "p99": 267.96799898147583 + }, + "combine": { + "p50": 291.0720109939575, + "p90": 301.15199089050293, + "p95": 305.63199520111084, + "p99": 316.6719973087311 + }, + "roundtrip": { + "p50": 624.671995639801, + "p90": 639.4559741020203, + "p95": 644.1599726676941, + "p99": 651.5840291976929 + }, + "isolatedSum": { + "p50": 530.5920094251633, + "p90": 555.3919970989227, + "p95": 566.208004951477, + "p99": 584.6399962902069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 356.7039966583252, + "p90": 374.2719888687134, + "p95": 379.7439932823181, + "p99": 389.75998759269714 + }, + "combine": { + "p50": 481.56800866127014, + "p90": 492.0639991760254, + "p95": 494.62398886680603, + "p99": 500.7680058479309 + }, + "roundtrip": { + "p50": 1055.456042289734, + "p90": 1069.6959495544434, + "p95": 1077.6959657669067, + "p99": 1089.5999670028687 + }, + "isolatedSum": { + "p50": 838.2720053195953, + "p90": 866.3359880447388, + "p95": 874.3679821491241, + "p99": 890.527993440628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 592.1919941902161, + "p90": 609.6640229225159, + "p95": 616.7359948158264, + "p99": 630.7839751243591 + }, + "combine": { + "p50": 842.9759740829468, + "p90": 852.1599769592285, + "p95": 855.2640080451965, + "p99": 860.0320219993591 + }, + "roundtrip": { + "p50": 1896.1600065231323, + "p90": 1913.7920141220093, + "p95": 1920.6080436706543, + "p99": 1930.624008178711 + }, + "isolatedSum": { + "p50": 1435.1679682731628, + "p90": 1461.8239998817444, + "p95": 1472.000002861023, + "p99": 1490.8159971237183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1063.6800527572632, + "p90": 1076.6079425811768, + "p95": 1081.503987312317, + "p99": 1088.096022605896 + }, + "combine": { + "p50": 1553.4720420837402, + "p90": 1563.2319450378418, + "p95": 1566.6879415512085, + "p99": 1575.1359462738037 + }, + "roundtrip": { + "p50": 3566.8160915374756, + "p90": 3582.9761028289795, + "p95": 3589.440107345581, + "p99": 3605.3121089935303 + }, + "isolatedSum": { + "p50": 2617.1520948410034, + "p90": 2639.8398876190186, + "p95": 2648.1919288635254, + "p99": 2663.2319688796997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-70d48515", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_ef6002c6", + "comparisonKey": "e526a4d21d2b1619", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:42:40.906747+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 271.7759907245636, + "p90": 313.56799602508545, + "p95": 323.4879970550537, + "p99": 367.5200045108795 + }, + "combine": { + "p50": 133.2480013370514, + "p90": 145.11999487876892, + "p95": 152.54400670528412, + "p99": 166.30400717258453 + }, + "roundtrip": { + "p50": 380.7680010795593, + "p90": 414.7520065307617, + "p95": 420.22401094436646, + "p99": 443.4880018234253 + }, + "isolatedSum": { + "p50": 405.023992061615, + "p90": 458.68799090385437, + "p95": 476.03200376033783, + "p99": 533.824011683464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 324.9279856681824, + "p90": 363.20000886917114, + "p95": 373.63201379776, + "p99": 408.9919924736023 + }, + "combine": { + "p50": 175.26400089263916, + "p90": 182.65600502490997, + "p95": 187.29600310325623, + "p99": 198.43199849128723 + }, + "roundtrip": { + "p50": 484.41600799560547, + "p90": 512.7679705619812, + "p95": 528.6080241203308, + "p99": 564.8639798164368 + }, + "isolatedSum": { + "p50": 500.19198656082153, + "p90": 545.8560138940811, + "p95": 560.9280169010162, + "p99": 607.4239909648895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 457.18398690223694, + "p90": 487.96799778938293, + "p95": 495.61598896980286, + "p99": 532.7039957046509 + }, + "combine": { + "p50": 288.5439991950989, + "p90": 297.2480058670044, + "p95": 301.1839985847473, + "p99": 311.7760121822357 + }, + "roundtrip": { + "p50": 727.6800274848938, + "p90": 744.9280023574829, + "p95": 750.7519721984863, + "p99": 763.9359831809998 + }, + "isolatedSum": { + "p50": 745.7279860973358, + "p90": 785.2160036563873, + "p95": 796.7999875545502, + "p99": 844.4800078868866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 703.1999826431274, + "p90": 729.2479872703552, + "p95": 737.7600073814392, + "p99": 766.3999795913696 + }, + "combine": { + "p50": 480.89599609375, + "p90": 490.04799127578735, + "p95": 492.70400404930115, + "p99": 499.07198548316956 + }, + "roundtrip": { + "p50": 1153.056025505066, + "p90": 1170.9760427474976, + "p95": 1184.0319633483887, + "p99": 1210.2400064468384 + }, + "isolatedSum": { + "p50": 1184.0959787368774, + "p90": 1219.2959785461426, + "p95": 1230.4640114307404, + "p99": 1265.4719650745392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1252.0960569381714, + "p90": 1271.3919878005981, + "p95": 1285.4399681091309, + "p99": 1342.1440124511719 + }, + "combine": { + "p50": 843.999981880188, + "p90": 853.5360097885132, + "p95": 856.5120100975037, + "p99": 864.6399974822998 + }, + "roundtrip": { + "p50": 2062.016010284424, + "p90": 2075.8399963378906, + "p95": 2081.5041065216064, + "p99": 2096.6079235076904 + }, + "isolatedSum": { + "p50": 2096.0960388183594, + "p90": 2124.9279975891113, + "p95": 2141.9519782066345, + "p99": 2206.7840099334717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2362.3039722442627, + "p90": 2380.9919357299805, + "p95": 2386.9760036468506, + "p99": 2404.6719074249268 + }, + "combine": { + "p50": 1563.264012336731, + "p90": 1575.4239559173584, + "p95": 1583.008050918579, + "p99": 1594.1439867019653 + }, + "roundtrip": { + "p50": 3895.2319622039795, + "p90": 3916.383981704712, + "p95": 3923.680067062378, + "p99": 3939.7759437561035 + }, + "isolatedSum": { + "p50": 3925.5679845809937, + "p90": 3956.415891647339, + "p95": 3969.9840545654297, + "p99": 3998.815894126892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c44d446", + "identity": "b200|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b200_ef6002c6", + "comparisonKey": "4c5802f8327e0681", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:43:55.344938+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 271.9680070877075, + "p90": 307.13599920272827, + "p95": 318.84801387786865, + "p99": 335.55200695991516 + }, + "combine": { + "p50": 132.83200562000275, + "p90": 144.54400539398193, + "p95": 151.39199793338776, + "p99": 168.57600212097168 + }, + "roundtrip": { + "p50": 379.5199990272522, + "p90": 410.4959964752197, + "p95": 418.14398765563965, + "p99": 440.7680034637451 + }, + "isolatedSum": { + "p50": 404.80001270771027, + "p90": 451.6800045967102, + "p95": 470.2400118112564, + "p99": 504.12800908088684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 325.9199857711792, + "p90": 369.28001046180725, + "p95": 382.207989692688, + "p99": 413.7279987335205 + }, + "combine": { + "p50": 175.90400576591492, + "p90": 182.3360025882721, + "p95": 187.391996383667, + "p99": 199.64799284934998 + }, + "roundtrip": { + "p50": 483.7760031223297, + "p90": 500.70399045944214, + "p95": 507.58397579193115, + "p99": 517.408013343811 + }, + "isolatedSum": { + "p50": 501.8239915370941, + "p90": 551.6160130500793, + "p95": 569.599986076355, + "p99": 613.3759915828705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 461.7280066013336, + "p90": 506.5600275993347, + "p95": 521.888017654419, + "p99": 586.9439840316772 + }, + "combine": { + "p50": 290.49599170684814, + "p90": 299.8720109462738, + "p95": 304.4799864292145, + "p99": 315.13598561286926 + }, + "roundtrip": { + "p50": 721.1520075798035, + "p90": 740.4800057411194, + "p95": 748.0000257492065, + "p99": 760.703980922699 + }, + "isolatedSum": { + "p50": 752.2239983081818, + "p90": 806.4320385456085, + "p95": 826.3680040836334, + "p99": 902.0799696445465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 697.7599859237671, + "p90": 716.2879705429077, + "p95": 739.1359806060791, + "p99": 760.1280212402344 + }, + "combine": { + "p50": 482.04800486564636, + "p90": 490.5279874801636, + "p95": 493.151992559433, + "p99": 498.01599979400635 + }, + "roundtrip": { + "p50": 1153.216004371643, + "p90": 1180.9279918670654, + "p95": 1198.1439590454102, + "p99": 1230.0159931182861 + }, + "isolatedSum": { + "p50": 1179.8079907894135, + "p90": 1206.8159580230713, + "p95": 1232.287973165512, + "p99": 1258.1440210342407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1241.0880327224731, + "p90": 1255.4880380630493, + "p95": 1261.1839771270752, + "p99": 1273.8560438156128 + }, + "combine": { + "p50": 834.6880078315735, + "p90": 842.3680067062378, + "p95": 845.7599878311157, + "p99": 851.9359827041626 + }, + "roundtrip": { + "p50": 2042.4320697784424, + "p90": 2056.2241077423096, + "p95": 2062.432050704956, + "p99": 2075.1678943634033 + }, + "isolatedSum": { + "p50": 2075.7760405540466, + "p90": 2097.856044769287, + "p95": 2106.943964958191, + "p99": 2125.7920265197754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2348.83189201355, + "p90": 2360.2240085601807, + "p95": 2364.9919033050537, + "p99": 2373.215913772583 + }, + "combine": { + "p50": 1546.9759702682495, + "p90": 1556.9599866867065, + "p95": 1560.1919889450073, + "p99": 1572.9279518127441 + }, + "roundtrip": { + "p50": 3865.760087966919, + "p90": 3878.943920135498, + "p95": 3883.4879398345947, + "p99": 3892.47989654541 + }, + "isolatedSum": { + "p50": 3895.8078622817993, + "p90": 3917.183995246887, + "p95": 3925.183892250061, + "p99": 3946.143865585327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd8c06f1", + "identity": "b200|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b200_9ccbd052", + "comparisonKey": "4fd3fbadf32eb664", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:41:27.085734+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b200-dgxc_00", + "sku": "b200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B200 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577796059", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577796059", + "createdAt": "2026-07-02T08:53:50Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.94399738311768, + "p90": 132.47999548912048, + "p95": 141.85599982738495, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 132.192000746727, + "p90": 138.94400000572205, + "p95": 147.23199605941772, + "p99": 157.8879952430725 + }, + "roundtrip": { + "p50": 263.45598697662354, + "p90": 300.25601387023926, + "p95": 303.99999022483826, + "p99": 341.40801429748535 + }, + "isolatedSum": { + "p50": 255.13599812984467, + "p90": 271.42399549484253, + "p95": 289.0879958868027, + "p99": 312.6079887151718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 156.44800662994385, + "p90": 175.29599368572235, + "p95": 179.9039989709854, + "p99": 192.54399836063385 + }, + "combine": { + "p50": 175.74399709701538, + "p90": 188.76799941062927, + "p95": 193.82399320602417, + "p99": 209.24800634384155 + }, + "roundtrip": { + "p50": 370.7840144634247, + "p90": 390.1120126247406, + "p95": 396.2559998035431, + "p99": 411.42401099205017 + }, + "isolatedSum": { + "p50": 332.19200372695923, + "p90": 364.0639930963516, + "p95": 373.7279921770096, + "p99": 401.7920047044754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 218.1120067834854, + "p90": 227.80799865722656, + "p95": 234.72000658512115, + "p99": 242.23999679088593 + }, + "combine": { + "p50": 290.97598791122437, + "p90": 296.86400294303894, + "p95": 300.6719946861267, + "p99": 307.8719973564148 + }, + "roundtrip": { + "p50": 604.960024356842, + "p90": 616.1919832229614, + "p95": 623.3919858932495, + "p99": 629.9840211868286 + }, + "isolatedSum": { + "p50": 509.0879946947098, + "p90": 524.6720016002655, + "p95": 535.3920012712479, + "p99": 550.1119941473007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 336.5119993686676, + "p90": 349.98399019241333, + "p95": 354.68798875808716, + "p99": 362.8480136394501 + }, + "combine": { + "p50": 482.1760058403015, + "p90": 493.24798583984375, + "p95": 495.743989944458, + "p99": 501.9199848175049 + }, + "roundtrip": { + "p50": 1034.5920324325562, + "p90": 1046.4320182800293, + "p95": 1053.1840324401855, + "p99": 1066.2399530410767 + }, + "isolatedSum": { + "p50": 818.6880052089691, + "p90": 843.2319760322571, + "p95": 850.4319787025452, + "p99": 864.767998456955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.8479881286621, + "p90": 586.8800282478333, + "p95": 594.7520136833191, + "p99": 604.960024356842 + }, + "combine": { + "p50": 845.1840281486511, + "p90": 854.6239733695984, + "p95": 858.3999872207642, + "p99": 864.8639917373657 + }, + "roundtrip": { + "p50": 1877.7600526809692, + "p90": 1893.5999870300293, + "p95": 1900.6400108337402, + "p99": 1962.048053741455 + }, + "isolatedSum": { + "p50": 1416.0320162773132, + "p90": 1441.5040016174316, + "p95": 1453.1520009040833, + "p99": 1469.8240160942078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1032.960057258606, + "p90": 1046.7519760131836, + "p95": 1051.1360168457031, + "p99": 1063.3599758148193 + }, + "combine": { + "p50": 1555.9359788894653, + "p90": 1566.4960145950317, + "p95": 1568.5440301895142, + "p99": 1577.6959657669067 + }, + "roundtrip": { + "p50": 3536.639928817749, + "p90": 3549.66402053833, + "p95": 3554.527997970581, + "p99": 3573.280096054077 + }, + "isolatedSum": { + "p50": 2588.8960361480713, + "p90": 2613.2479906082153, + "p95": 2619.6800470352173, + "p99": 2641.055941581726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e8fb3c8", + "identity": "b300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_4328f415", + "comparisonKey": "3ec49f5595796fb0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:12.098481+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.671999394893646, + "p90": 58.04799869656563, + "p95": 60.35200133919716, + "p99": 72.09599763154984 + }, + "combine": { + "p50": 55.52000179886818, + "p90": 57.21599981188774, + "p95": 58.6559996008873, + "p99": 64.92800265550613 + }, + "roundtrip": { + "p50": 94.2080020904541, + "p90": 97.24800288677216, + "p95": 99.67999905347824, + "p99": 118.01599711179733 + }, + "isolatedSum": { + "p50": 112.19200119376183, + "p90": 115.26399850845337, + "p95": 119.00800094008446, + "p99": 137.02400028705597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.63999915122986, + "p90": 58.111999183893204, + "p95": 60.5119988322258, + "p99": 72.35199958086014 + }, + "combine": { + "p50": 55.39200082421303, + "p90": 57.023998349905014, + "p95": 58.04799869656563, + "p99": 66.17599725723267 + }, + "roundtrip": { + "p50": 95.96800059080124, + "p90": 102.62399911880493, + "p95": 103.67999970912933, + "p99": 112.0000034570694 + }, + "isolatedSum": { + "p50": 112.03199997544289, + "p90": 115.13599753379822, + "p95": 118.55999752879143, + "p99": 138.5279968380928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.17599967122078, + "p90": 67.84000247716904, + "p95": 71.26399874687195, + "p99": 74.72000271081924 + }, + "combine": { + "p50": 56.671999394893646, + "p90": 65.31199812889099, + "p95": 66.30399823188782, + "p99": 70.88000327348709 + }, + "roundtrip": { + "p50": 112.06399649381638, + "p90": 114.62400108575821, + "p95": 115.61600118875504, + "p99": 126.08000636100769 + }, + "isolatedSum": { + "p50": 114.84799906611443, + "p90": 133.15200060606003, + "p95": 137.56799697875977, + "p99": 145.60000598430634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.111999183893204, + "p90": 59.93599817156792, + "p95": 60.80000102519989, + "p99": 65.76000154018402 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 66.49599969387054, + "p95": 66.72000139951706, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 106.39999806880951, + "p90": 111.16799712181091, + "p95": 113.02399635314941, + "p99": 118.07999759912491 + }, + "isolatedSum": { + "p50": 123.84000048041344, + "p90": 126.43199786543846, + "p95": 127.52000242471695, + "p99": 144.32000368833542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.4559982419014, + "p90": 62.33600154519081, + "p95": 65.43999910354614, + "p99": 80.83199709653854 + }, + "combine": { + "p50": 65.8240020275116, + "p90": 66.65600091218948, + "p95": 66.880002617836, + "p99": 70.3359991312027 + }, + "roundtrip": { + "p50": 107.00800269842148, + "p90": 109.76000130176544, + "p95": 112.09599673748016, + "p99": 120.99199742078781 + }, + "isolatedSum": { + "p50": 125.280000269413, + "p90": 128.9920024573803, + "p95": 132.32000172138214, + "p99": 151.16799622774124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.05599749088287, + "p90": 76.1599987745285, + "p95": 77.2159993648529, + "p99": 94.46399658918381 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 67.19999760389328, + "p95": 67.77600198984146, + "p99": 71.19999825954437 + }, + "roundtrip": { + "p50": 109.21599715948105, + "p90": 117.0559972524643, + "p95": 123.19999933242798, + "p99": 144.06399428844452 + }, + "isolatedSum": { + "p50": 139.3279954791069, + "p90": 143.35999637842178, + "p95": 144.99200135469437, + "p99": 165.66399484872818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 70.36799937486649, + "p90": 72.9919970035553, + "p95": 76.48000121116638, + "p99": 118.52800101041794 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 79.8719972372055, + "p95": 80.60800284147263, + "p99": 85.31200140714645 + }, + "roundtrip": { + "p50": 131.77600502967834, + "p90": 140.3840035200119, + "p95": 146.94400131702423, + "p99": 159.5200002193451 + }, + "isolatedSum": { + "p50": 149.05600249767303, + "p90": 152.8639942407608, + "p95": 157.088004052639, + "p99": 203.8400024175644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.49600231647491, + "p90": 87.5839963555336, + "p95": 90.52799642086029, + "p99": 101.56799852848053 + }, + "combine": { + "p50": 92.38400310277939, + "p90": 96.57599776983261, + "p95": 102.14400291442871, + "p99": 104.86400127410889 + }, + "roundtrip": { + "p50": 165.3439998626709, + "p90": 173.63199591636658, + "p95": 175.20000040531158, + "p99": 180.38399517536163 + }, + "isolatedSum": { + "p50": 174.8800054192543, + "p90": 184.1599941253662, + "p95": 192.671999335289, + "p99": 206.43199980258942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2903ab0d", + "identity": "b300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_4328f415", + "comparisonKey": "417ddd603bb642e0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:59.407226+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.18399956822395, + "p90": 60.416001826524734, + "p95": 63.680000603199005, + "p99": 96.76799923181534 + }, + "combine": { + "p50": 56.51199817657471, + "p90": 65.11999666690826, + "p95": 65.8240020275116, + "p99": 81.40800148248672 + }, + "roundtrip": { + "p50": 96.38399630784988, + "p90": 103.10400277376175, + "p95": 105.95200210809708, + "p99": 134.5600038766861 + }, + "isolatedSum": { + "p50": 113.69599774479866, + "p90": 125.535998493433, + "p95": 129.5040026307106, + "p99": 178.17600071430206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.151999324560165, + "p90": 58.94400179386139, + "p95": 61.3120011985302, + "p99": 71.80800288915634 + }, + "combine": { + "p50": 56.86400085687637, + "p90": 66.17599725723267, + "p95": 66.3359984755516, + "p99": 71.19999825954437 + }, + "roundtrip": { + "p50": 108.41599851846695, + "p90": 112.8000020980835, + "p95": 113.98400366306305, + "p99": 152.16000378131866 + }, + "isolatedSum": { + "p50": 114.01600018143654, + "p90": 125.11999905109406, + "p95": 127.6479996740818, + "p99": 143.0080011487007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.42400172352791, + "p90": 84.54400300979614, + "p95": 86.75199747085571, + "p99": 143.13599467277527 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 77.504001557827, + "p95": 78.015998005867, + "p99": 79.32800054550171 + }, + "roundtrip": { + "p50": 108.35199803113937, + "p90": 125.31200051307678, + "p95": 127.00800597667694, + "p99": 144.70399916172028 + }, + "isolatedSum": { + "p50": 126.24000385403633, + "p90": 162.04800456762314, + "p95": 164.76799547672272, + "p99": 222.46399521827698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.52799862623215, + "p90": 60.95999851822853, + "p95": 62.752000987529755, + "p99": 86.11200004816055 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 67.29599833488464, + "p95": 68.51200014352798, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 106.97600245475769, + "p90": 109.6000000834465, + "p95": 111.16799712181091, + "p99": 143.0719941854477 + }, + "isolatedSum": { + "p50": 124.76799637079239, + "p90": 128.25599685311317, + "p95": 131.26400113105774, + "p99": 176.38400197029114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.19200012087822, + "p90": 62.6240000128746, + "p95": 66.49599969387054, + "p99": 91.45600348711014 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 67.55200028419495, + "p95": 68.12799721956253, + "p99": 76.83199644088745 + }, + "roundtrip": { + "p50": 108.8000014424324, + "p90": 113.3119985461235, + "p95": 115.93600362539291, + "p99": 129.15199995040894 + }, + "isolatedSum": { + "p50": 126.49599835276604, + "p90": 130.17600029706955, + "p95": 134.62399691343307, + "p99": 168.2879999279976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.80800354480743, + "p90": 77.85599678754807, + "p95": 79.00799810886383, + "p99": 96.38399630784988 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 77.15199887752533, + "p95": 77.63200253248215, + "p99": 78.52800190448761 + }, + "roundtrip": { + "p50": 123.45600128173828, + "p90": 126.97599828243256, + "p95": 128.57599556446075, + "p99": 148.3519971370697 + }, + "isolatedSum": { + "p50": 143.74400675296783, + "p90": 155.0079956650734, + "p95": 156.64000064134598, + "p99": 174.9119982123375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 73.27999919652939, + "p90": 93.56799721717834, + "p95": 95.36000341176987, + "p99": 100.92800110578537 + }, + "combine": { + "p50": 81.31200075149536, + "p90": 91.10400080680847, + "p95": 92.41600334644318, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 149.08799529075623, + "p90": 157.79200196266174, + "p95": 160.64000129699707, + "p99": 180.41600286960602 + }, + "isolatedSum": { + "p50": 154.59199994802475, + "p90": 184.67199802398682, + "p95": 187.77600675821304, + "p99": 202.81600207090378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.96000003814697, + "p90": 103.84000092744827, + "p95": 110.30399799346924, + "p99": 121.21599912643433 + }, + "combine": { + "p50": 103.16800326108932, + "p90": 104.96000200510025, + "p95": 106.11200332641602, + "p99": 115.32799899578094 + }, + "roundtrip": { + "p50": 176.41599476337433, + "p90": 180.1919937133789, + "p95": 185.5040043592453, + "p99": 204.48000729084015 + }, + "isolatedSum": { + "p50": 196.1280032992363, + "p90": 208.80000293254852, + "p95": 216.41600131988525, + "p99": 236.54399812221527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fa5e840", + "identity": "b300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_4328f415", + "comparisonKey": "a2aff6899d2f4b00", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:50.155269+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.86400085687637, + "p90": 58.97599831223488, + "p95": 60.80000102519989, + "p99": 72.57600128650665 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 66.75200164318085, + "p95": 67.07199662923813, + "p99": 69.92000341415405 + }, + "roundtrip": { + "p50": 106.6880002617836, + "p90": 113.24799805879593, + "p95": 114.30399864912033, + "p99": 125.15200674533844 + }, + "isolatedSum": { + "p50": 122.94399738311768, + "p90": 125.72799995541573, + "p95": 127.87199765443802, + "p99": 142.4960047006607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.671999394893646, + "p90": 58.52799862623215, + "p95": 60.864001512527466, + "p99": 73.60000163316727 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 67.16799736022949, + "p95": 67.87200272083282, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 106.20799660682678, + "p90": 113.56800049543381, + "p95": 114.17599767446518, + "p99": 119.52000111341476 + }, + "isolatedSum": { + "p50": 122.91199713945389, + "p90": 125.69599598646164, + "p95": 128.7360042333603, + "p99": 163.4880006313324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.10399928689003, + "p90": 61.055999249219894, + "p95": 62.68800050020218, + "p99": 79.6160027384758 + }, + "combine": { + "p50": 67.55200028419495, + "p90": 69.37599927186966, + "p95": 76.76800340414047, + "p99": 79.6160027384758 + }, + "roundtrip": { + "p50": 109.50399935245514, + "p90": 115.84000289440155, + "p95": 118.33599954843521, + "p99": 132.22399353981018 + }, + "isolatedSum": { + "p50": 126.65599957108498, + "p90": 130.43199852108955, + "p95": 139.45600390434265, + "p99": 159.2320054769516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.13599953055382, + "p90": 61.11999973654747, + "p95": 62.3680017888546, + "p99": 72.35199958086014 + }, + "combine": { + "p50": 67.45599955320358, + "p90": 69.18399780988693, + "p95": 70.56000083684921, + "p99": 79.32800054550171 + }, + "roundtrip": { + "p50": 115.167997777462, + "p90": 119.00799721479416, + "p95": 121.5360015630722, + "p99": 125.76000392436981 + }, + "isolatedSum": { + "p50": 126.5919990837574, + "p90": 130.3039975464344, + "p95": 132.9280026257038, + "p99": 151.68000012636185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.63999980688095, + "p90": 63.040003180503845, + "p95": 64.83200192451477, + "p99": 81.56800270080566 + }, + "combine": { + "p50": 67.71200150251389, + "p90": 69.47200000286102, + "p95": 70.62400132417679, + "p99": 80.54400235414505 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 126.68800354003906, + "p95": 127.42400169372559, + "p99": 131.74399733543396 + }, + "isolatedSum": { + "p50": 128.35200130939484, + "p90": 132.51200318336487, + "p95": 135.45600324869156, + "p99": 162.1120050549507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.0479975938797, + "p90": 77.11999863386154, + "p95": 77.82399654388428, + "p99": 93.37600320577621 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 78.72000336647034, + "p95": 79.29600030183792, + "p99": 90.68799763917923 + }, + "roundtrip": { + "p50": 123.4240010380745, + "p90": 128.4160017967224, + "p95": 131.23199343681335, + "p99": 145.63199877738953 + }, + "isolatedSum": { + "p50": 144.31999623775482, + "p90": 155.84000200033188, + "p95": 157.1199968457222, + "p99": 184.06400084495544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.61599659919739, + "p90": 90.55999666452408, + "p95": 91.51999652385712, + "p99": 102.65599936246872 + }, + "combine": { + "p50": 90.7519981265068, + "p90": 91.42400324344635, + "p95": 91.67999774217606, + "p99": 93.59999746084213 + }, + "roundtrip": { + "p50": 147.16799557209015, + "p90": 153.6639928817749, + "p95": 154.55999970436096, + "p99": 163.93600404262543 + }, + "isolatedSum": { + "p50": 178.3679947257042, + "p90": 181.98399990797043, + "p95": 183.19999426603317, + "p99": 196.25599682331085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.1200025677681, + "p90": 103.26399654150009, + "p95": 104.00000214576721, + "p99": 107.42399841547012 + }, + "combine": { + "p50": 105.12000322341919, + "p90": 113.88800293207169, + "p95": 114.81600254774094, + "p99": 119.61600184440613 + }, + "roundtrip": { + "p50": 185.5359971523285, + "p90": 190.0479942560196, + "p95": 191.6159987449646, + "p99": 209.75999534130096 + }, + "isolatedSum": { + "p50": 206.2400057911873, + "p90": 217.15199947357178, + "p95": 218.81600469350815, + "p99": 227.04000025987625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b6a1bd0b", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_822258bd", + "comparisonKey": "6f9b85440328ac2f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:56.172756+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.96000158786774, + "p90": 58.97599831223488, + "p95": 61.72800064086914, + "p99": 73.11999797821045 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 68.12799721956253, + "p95": 68.86400282382965, + "p99": 71.58400118350983 + }, + "roundtrip": { + "p50": 107.04000294208527, + "p90": 112.47999966144562, + "p95": 113.66400122642517, + "p99": 121.98399752378464 + }, + "isolatedSum": { + "p50": 123.4240010380745, + "p90": 127.10399553179741, + "p95": 130.5920034646988, + "p99": 144.70399916172028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.5999990105629, + "p90": 59.74400043487549, + "p95": 60.99199876189232, + "p99": 68.03199648857117 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 69.21599805355072, + "p95": 76.1599987745285, + "p99": 77.91999727487564 + }, + "roundtrip": { + "p50": 108.60799998044968, + "p90": 111.23199760913849, + "p95": 113.79200220108032, + "p99": 127.58399546146393 + }, + "isolatedSum": { + "p50": 125.47200173139572, + "p90": 128.9599984884262, + "p95": 137.15199753642082, + "p99": 145.9519937634468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.647999703884125, + "p90": 65.69600105285645, + "p95": 67.07199662923813, + "p99": 70.0799971818924 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 77.18399912118912, + "p95": 77.56800204515457, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 123.87199699878693, + "p90": 126.81600451469421, + "p95": 127.61600315570831, + "p99": 140.09599387645721 + }, + "isolatedSum": { + "p50": 127.71199643611908, + "p90": 142.88000017404556, + "p95": 144.6399986743927, + "p99": 148.9279940724373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.67999994754791, + "p90": 61.69600039720535, + "p95": 63.48799914121628, + "p99": 69.76000219583511 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 75.93599706888199, + "p95": 78.04799824953079, + "p99": 80.12799918651581 + }, + "roundtrip": { + "p50": 120.03199756145477, + "p90": 125.66399574279785, + "p95": 126.56000256538391, + "p99": 130.91200590133667 + }, + "isolatedSum": { + "p50": 127.74399667978287, + "p90": 137.63199746608734, + "p95": 141.53599739074707, + "p99": 149.88800138235092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.111999839544296, + "p90": 68.86400282382965, + "p95": 70.56000083684921, + "p99": 91.39200299978256 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 78.65600287914276, + "p95": 78.84799689054489, + "p99": 81.50400221347809 + }, + "roundtrip": { + "p50": 120.09599804878235, + "p90": 122.27199971675873, + "p95": 124.54400211572647, + "p99": 141.7279988527298 + }, + "isolatedSum": { + "p50": 139.45600017905235, + "p90": 147.5200057029724, + "p95": 149.4079977273941, + "p99": 172.89600521326065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.21599805355072, + "p90": 72.15999811887741, + "p95": 75.07199794054031, + "p99": 83.42400193214417 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 80.09599894285202, + "p95": 80.60800284147263, + "p99": 83.26400071382523 + }, + "roundtrip": { + "p50": 131.20000064373016, + "p90": 135.96799969673157, + "p95": 137.79200613498688, + "p99": 155.45600652694702 + }, + "isolatedSum": { + "p50": 147.90400117635727, + "p90": 152.25599706172943, + "p95": 155.68000078201294, + "p99": 166.6880026459694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.67999643087387, + "p90": 89.85599875450134, + "p95": 91.42400324344635, + "p99": 103.42399775981903 + }, + "combine": { + "p50": 92.19200164079666, + "p90": 94.27200257778168, + "p95": 101.27999633550644, + "p99": 103.00800204277039 + }, + "roundtrip": { + "p50": 159.39199924468994, + "p90": 164.15999829769135, + "p95": 165.3439998626709, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 175.87199807167053, + "p90": 184.12800133228302, + "p95": 192.7039995789528, + "p99": 206.43199980258942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.85599941015244, + "p90": 98.84800016880035, + "p95": 100.19200295209885, + "p99": 105.12000322341919 + }, + "combine": { + "p50": 115.4559999704361, + "p90": 116.12799763679504, + "p95": 117.98399686813354, + "p99": 131.71200454235077 + }, + "roundtrip": { + "p50": 193.6960071325302, + "p90": 199.0399956703186, + "p95": 200.41599869728088, + "p99": 209.60000157356262 + }, + "isolatedSum": { + "p50": 209.31199938058853, + "p90": 214.9759978055954, + "p95": 218.1759998202324, + "p99": 236.83200776576996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-af8b996f", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_4328f415", + "comparisonKey": "3e4078a9694a13b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:44.586588+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.631999254226685, + "p90": 79.48800176382065, + "p95": 82.65600353479385, + "p99": 121.88799679279327 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 68.03199648857117, + "p95": 68.7360018491745, + "p99": 76.12799853086472 + }, + "roundtrip": { + "p50": 106.65600001811981, + "p90": 112.41599917411804, + "p95": 113.79200220108032, + "p99": 138.08000087738037 + }, + "isolatedSum": { + "p50": 124.44800138473511, + "p90": 147.51999825239182, + "p95": 151.39200538396835, + "p99": 198.015995323658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.440001517534256, + "p90": 59.7120001912117, + "p95": 62.84800171852112, + "p99": 81.7599967122078 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 68.76800209283829, + "p95": 69.34399902820587, + "p99": 72.57600128650665 + }, + "roundtrip": { + "p50": 107.55199939012527, + "p90": 119.90399658679962, + "p95": 128.09599936008453, + "p99": 148.95999431610107 + }, + "isolatedSum": { + "p50": 124.70399960875511, + "p90": 128.48000228405, + "p95": 132.192000746727, + "p99": 154.33599799871445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.88000130653381, + "p90": 61.15199998021126, + "p95": 63.4239986538887, + "p99": 84.86399799585342 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 76.4480009675026, + "p95": 77.504001557827, + "p99": 90.46400338411331 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 126.78399682044983, + "p95": 127.93600559234619, + "p99": 158.6879938840866 + }, + "isolatedSum": { + "p50": 126.848004758358, + "p90": 137.60000094771385, + "p95": 140.9280002117157, + "p99": 175.32800137996674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.487998485565186, + "p90": 61.983998864889145, + "p95": 64.31999802589417, + "p99": 76.25599950551987 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 77.11999863386154, + "p95": 77.91999727487564, + "p99": 79.16799932718277 + }, + "roundtrip": { + "p50": 120.35199999809265, + "p90": 125.59999525547028, + "p95": 127.20000743865967, + "p99": 147.39200472831726 + }, + "isolatedSum": { + "p50": 127.45600193738937, + "p90": 139.1039974987507, + "p95": 142.2399953007698, + "p99": 155.42399883270264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.3120011985302, + "p90": 67.48799979686737, + "p95": 68.51200014352798, + "p99": 72.60800153017044 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 78.65600287914276, + "p95": 79.00799810886383, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 119.64800208806992, + "p90": 122.36800044775009, + "p95": 125.08800625801086, + "p99": 138.49599659442902 + }, + "isolatedSum": { + "p50": 138.17599788308144, + "p90": 146.14400267601013, + "p95": 147.51999825239182, + "p99": 153.82400155067444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.37599927186966, + "p90": 73.21599870920181, + "p95": 75.77600330114365, + "p99": 94.4959968328476 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 79.83999699354172, + "p95": 80.38400113582611, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 131.32800161838531, + "p90": 137.85600662231445, + "p95": 138.87999951839447, + "p99": 164.57599401474 + }, + "isolatedSum": { + "p50": 148.09600263834, + "p90": 153.05599570274353, + "p95": 156.16000443696976, + "p99": 177.0239993929863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.28000116348267, + "p90": 90.14400094747543, + "p95": 91.39200299978256, + "p99": 100.35199671983719 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 94.11200135946274, + "p95": 100.99200159311295, + "p99": 115.23199826478958 + }, + "roundtrip": { + "p50": 158.6879938840866, + "p90": 162.81600296497345, + "p95": 164.8319959640503, + "p99": 180.7679980993271 + }, + "isolatedSum": { + "p50": 177.5360032916069, + "p90": 184.25600230693817, + "p95": 192.3840045928955, + "p99": 215.58399498462677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.2080020904541, + "p90": 96.41599655151367, + "p95": 100.19200295209885, + "p99": 116.57600104808807 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 116.2559986114502, + "p95": 117.44000017642975, + "p99": 139.26400244235992 + }, + "roundtrip": { + "p50": 194.30400431156158, + "p90": 200.28799772262573, + "p95": 201.50400698184967, + "p99": 215.5199944972992 + }, + "isolatedSum": { + "p50": 209.60000157356262, + "p90": 212.67199516296387, + "p95": 217.6320031285286, + "p99": 255.840003490448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8d36769e", + "identity": "b300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_4328f415", + "comparisonKey": "322ad7dc2efd1844", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:24.114727+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.82400071620941, + "p90": 60.92799827456474, + "p95": 63.61600011587143, + "p99": 97.69599884748459 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 67.90400296449661, + "p95": 69.76000219583511, + "p99": 93.44000369310379 + }, + "roundtrip": { + "p50": 107.90400207042694, + "p90": 114.43199962377548, + "p95": 116.19199812412262, + "p99": 172.7360039949417 + }, + "isolatedSum": { + "p50": 124.12799894809723, + "p90": 128.83200123906136, + "p95": 133.37600231170654, + "p99": 191.13600254058838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.98399820923805, + "p90": 61.184000223875046, + "p95": 65.40799885988235, + "p99": 128.12800705432892 + }, + "combine": { + "p50": 66.880002617836, + "p90": 68.80000233650208, + "p95": 70.97599655389786, + "p99": 115.77600240707397 + }, + "roundtrip": { + "p50": 107.55199939012527, + "p90": 111.58400028944016, + "p95": 115.03999680280685, + "p99": 130.75199723243713 + }, + "isolatedSum": { + "p50": 124.86400082707405, + "p90": 129.98400256037712, + "p95": 136.3839954137802, + "p99": 243.9040094614029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.51999872922897, + "p90": 62.04799935221672, + "p95": 64.38399851322174, + "p99": 94.17600184679031 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 75.45600086450577, + "p95": 77.34400033950806, + "p99": 81.37600123882294 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 127.23200023174286, + "p95": 128.1919926404953, + "p99": 145.08800208568573 + }, + "isolatedSum": { + "p50": 127.87199765443802, + "p90": 137.5040002167225, + "p95": 141.7279988527298, + "p99": 175.55200308561325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.93599817156792, + "p90": 63.13599646091461, + "p95": 66.65600091218948, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 76.80000364780426, + "p95": 77.91999727487564, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 127.3919939994812, + "p95": 129.56799566745758, + "p99": 152.79999375343323 + }, + "isolatedSum": { + "p50": 127.87200137972832, + "p90": 139.93600010871887, + "p95": 144.57599818706512, + "p99": 212.51200139522552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.6240000128746, + "p90": 68.51200014352798, + "p95": 71.55200093984604, + "p99": 89.63199704885483 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 78.36800068616867, + "p95": 78.68800312280655, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 120.86399644613266, + "p90": 124.1919994354248, + "p95": 126.71999633312225, + "p99": 146.5280055999756 + }, + "isolatedSum": { + "p50": 131.23200088739395, + "p90": 146.88000082969666, + "p95": 150.2400040626526, + "p99": 192.60799884796143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.43199986219406, + "p90": 76.25599950551987, + "p95": 77.504001557827, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 80.57600259780884, + "p95": 81.37600123882294, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 133.69600474834442, + "p90": 138.14400136470795, + "p95": 140.44800400733948, + "p99": 172.41600155830383 + }, + "isolatedSum": { + "p50": 149.1520032286644, + "p90": 156.8320021033287, + "p95": 158.88000279664993, + "p99": 210.24000644683838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.39200234413147, + "p90": 90.7839983701706, + "p95": 91.64799749851227, + "p99": 98.4639972448349 + }, + "combine": { + "p50": 92.3520028591156, + "p90": 95.93600034713745, + "p95": 102.08000242710114, + "p99": 127.83999741077423 + }, + "roundtrip": { + "p50": 161.0880047082901, + "p90": 165.75999557971954, + "p95": 168.47999393939972, + "p99": 185.98400056362152 + }, + "isolatedSum": { + "p50": 179.74400520324707, + "p90": 186.71999871730804, + "p95": 193.7279999256134, + "p99": 226.30399465560913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.7519987821579, + "p90": 100.03200173377991, + "p95": 102.08000242710114, + "p99": 160.38399934768677 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 116.54400080442429, + "p95": 118.1119978427887, + "p99": 157.21599757671356 + }, + "roundtrip": { + "p50": 196.16000354290009, + "p90": 201.1519968509674, + "p95": 202.65600085258484, + "p99": 229.8240065574646 + }, + "isolatedSum": { + "p50": 210.1759985089302, + "p90": 216.5760025382042, + "p95": 220.19200026988983, + "p99": 317.59999692440033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5b315862", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_e8841c1f", + "comparisonKey": "97de77dd5086da22", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:44.048684+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.023998349905014, + "p90": 58.6559996008873, + "p95": 60.7680007815361, + "p99": 71.26399874687195 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 69.72800195217133, + "p95": 76.80000364780426, + "p99": 78.49600166082382 + }, + "roundtrip": { + "p50": 107.32799768447876, + "p90": 110.944002866745, + "p95": 114.97599631547928, + "p99": 171.55200242996216 + }, + "isolatedSum": { + "p50": 124.86400082707405, + "p90": 128.38400155305862, + "p95": 137.56800442934036, + "p99": 149.76000040769577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.111999183893204, + "p90": 75.93599706888199, + "p95": 79.77599650621414, + "p99": 123.52000176906586 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 78.8159966468811, + "p95": 82.30400085449219, + "p99": 92.16000139713287 + }, + "roundtrip": { + "p50": 115.42399972677231, + "p90": 120.41600048542023, + "p95": 122.94399738311768, + "p99": 144.67200636863708 + }, + "isolatedSum": { + "p50": 126.78400054574013, + "p90": 154.7519937157631, + "p95": 162.07999736070633, + "p99": 215.68000316619873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.43200162053108, + "p90": 60.447998344898224, + "p95": 62.94400244951248, + "p99": 73.53600114583969 + }, + "combine": { + "p50": 77.60000228881836, + "p90": 78.75200361013412, + "p95": 79.03999835252762, + "p99": 81.40800148248672 + }, + "roundtrip": { + "p50": 119.03999745845795, + "p90": 125.37600100040436, + "p95": 126.49600207805634, + "p99": 143.2960033416748 + }, + "isolatedSum": { + "p50": 136.03200390934944, + "p90": 139.20000195503235, + "p95": 141.9840008020401, + "p99": 154.94400262832642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.007998555898666, + "p90": 61.76000088453293, + "p95": 63.74400109052658, + "p99": 108.5439994931221 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 78.87999713420868, + "p95": 79.23199981451035, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 118.9119964838028, + "p90": 121.88799679279327, + "p95": 125.98399817943573, + "p99": 143.68000626564026 + }, + "isolatedSum": { + "p50": 137.2159980237484, + "p90": 140.6399980187416, + "p95": 142.97600090503693, + "p99": 199.16799664497375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 68.2239979505539, + "p90": 75.00799745321274, + "p95": 76.54400169849396, + "p99": 136.4479959011078 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 79.00799810886383, + "p95": 79.64800298213959, + "p99": 93.85599941015244 + }, + "roundtrip": { + "p50": 121.08799815177917, + "p90": 123.29600006341934, + "p95": 125.98399817943573, + "p99": 141.53599739074707 + }, + "isolatedSum": { + "p50": 146.62399888038635, + "p90": 154.01599556207657, + "p95": 156.19200468063354, + "p99": 230.30399531126022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.49600034952164, + "p90": 72.67200201749802, + "p95": 75.1039981842041, + "p99": 97.98400104045868 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 82.17599987983704, + "p95": 89.40800279378891, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 133.59999656677246, + "p90": 138.17599415779114, + "p95": 140.70400595664978, + "p99": 152.99199521541595 + }, + "isolatedSum": { + "p50": 151.0080024600029, + "p90": 154.84800189733505, + "p95": 164.512000977993, + "p99": 189.92000073194504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 84.89599823951721, + "p90": 87.61599659919739, + "p95": 91.16800129413605, + "p99": 109.3439981341362 + }, + "combine": { + "p50": 93.63199770450592, + "p90": 103.00800204277039, + "p95": 103.35999727249146, + "p99": 115.35999923944473 + }, + "roundtrip": { + "p50": 168.92799735069275, + "p90": 175.135999917984, + "p95": 176.57600343227386, + "p99": 193.63200664520264 + }, + "isolatedSum": { + "p50": 178.52799594402313, + "p90": 190.62399864196777, + "p95": 194.5279985666275, + "p99": 224.70399737358093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.84800148010254, + "p90": 122.17599898576736, + "p95": 131.42399489879608, + "p99": 156.12800419330597 + }, + "combine": { + "p50": 131.00799918174744, + "p90": 139.8719996213913, + "p95": 140.22399485111237, + "p99": 152.25599706172943 + }, + "roundtrip": { + "p50": 229.95199263095856, + "p90": 237.8239929676056, + "p95": 244.54399943351746, + "p99": 288.5439991950989 + }, + "isolatedSum": { + "p50": 237.85600066184998, + "p90": 262.04799860715866, + "p95": 271.64798974990845, + "p99": 308.3840012550354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-08bd8f8b", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_3b4b5c66", + "comparisonKey": "a398d62fa20efd49", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:38.576359+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.17600032687187, + "p90": 64.89600241184235, + "p95": 65.79200178384781, + "p99": 72.64000177383423 + }, + "combine": { + "p50": 54.23999950289726, + "p90": 56.28800019621849, + "p95": 57.95200169086456, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 94.52799707651138, + "p90": 98.55999797582626, + "p95": 102.1760031580925, + "p99": 142.17600226402283 + }, + "isolatedSum": { + "p50": 116.41599982976913, + "p90": 121.18400260806084, + "p95": 123.74400347471237, + "p99": 156.09600394964218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.49599838256836, + "p90": 61.664000153541565, + "p95": 65.24799764156342, + "p99": 87.39200234413147 + }, + "combine": { + "p50": 56.703999638557434, + "p90": 65.85600227117538, + "p95": 66.30399823188782, + "p99": 67.80800223350525 + }, + "roundtrip": { + "p50": 111.77600175142288, + "p90": 115.03999680280685, + "p95": 116.73600226640701, + "p99": 147.5200057029724 + }, + "isolatedSum": { + "p50": 115.1999980211258, + "p90": 127.52000242471695, + "p95": 131.55199587345123, + "p99": 155.20000457763672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.34399902820587, + "p90": 74.20799881219864, + "p95": 76.4480009675026, + "p99": 104.12800312042236 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 67.1359971165657, + "p95": 67.6800012588501, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 119.07199770212173, + "p90": 124.22399967908859, + "p95": 125.2799928188324, + "p99": 163.5199934244156 + }, + "isolatedSum": { + "p50": 135.5839967727661, + "p90": 141.34399592876434, + "p95": 144.1280022263527, + "p99": 195.0400024652481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.21599805355072, + "p90": 73.05599749088287, + "p95": 75.32799988985062, + "p99": 97.9200005531311 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 69.34399902820587, + "p95": 70.592001080513, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 121.0239976644516, + "p90": 128.38399410247803, + "p95": 132.4480026960373, + "p99": 152.319997549057 + }, + "isolatedSum": { + "p50": 136.9599997997284, + "p90": 142.39999651908875, + "p95": 145.92000097036362, + "p99": 177.21600085496902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e51fa676", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_3dc9ebbf", + "comparisonKey": "5e3459e68c3d8224", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:02.659545+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.52799862623215, + "p90": 62.04799935221672, + "p95": 63.968002796173096, + "p99": 80.25600016117096 + }, + "combine": { + "p50": 55.55199831724167, + "p90": 57.472001761198044, + "p95": 64.4799992442131, + "p99": 65.98400324583054 + }, + "roundtrip": { + "p50": 96.63999825716019, + "p90": 102.49599814414978, + "p95": 105.56799918413162, + "p99": 124.09599870443344 + }, + "isolatedSum": { + "p50": 114.07999694347382, + "p90": 119.52000111341476, + "p95": 128.4480020403862, + "p99": 146.2400034070015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.88000130653381, + "p90": 61.91999837756157, + "p95": 67.4239993095398, + "p99": 112.0000034570694 + }, + "combine": { + "p50": 56.60799890756607, + "p90": 66.04799628257751, + "p95": 69.63200122117996, + "p99": 92.22400188446045 + }, + "roundtrip": { + "p50": 111.42399907112122, + "p90": 115.9679964184761, + "p95": 122.40000069141388, + "p99": 162.04799711704254 + }, + "isolatedSum": { + "p50": 115.48800021409988, + "p90": 127.96799466013908, + "p95": 137.05600053071976, + "p99": 204.22400534152985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.864001512527466, + "p90": 67.19999760389328, + "p95": 69.31199878454208, + "p99": 87.36000210046768 + }, + "combine": { + "p50": 65.24799764156342, + "p90": 66.56000018119812, + "p95": 67.00800359249115, + "p99": 69.95200365781784 + }, + "roundtrip": { + "p50": 108.38399827480316, + "p90": 113.3119985461235, + "p95": 115.07199704647064, + "p99": 141.59999787807465 + }, + "isolatedSum": { + "p50": 126.11199915409088, + "p90": 133.7599977850914, + "p95": 136.32000237703323, + "p99": 157.31200575828552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.575999319553375, + "p90": 63.07200342416763, + "p95": 65.72800129652023, + "p99": 78.40000092983246 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 66.52799993753433, + "p95": 66.880002617836, + "p99": 70.0799971818924 + }, + "roundtrip": { + "p50": 110.30399799346924, + "p90": 121.69600278139114, + "p95": 126.81600451469421, + "p99": 152.79999375343323 + }, + "isolatedSum": { + "p50": 126.49600207805634, + "p90": 129.60000336170197, + "p95": 132.60800391435623, + "p99": 148.47999811172485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.591999769210815, + "p90": 84.35200154781342, + "p95": 90.36800265312195, + "p99": 109.82400178909302 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 67.10399687290192, + "p95": 67.77600198984146, + "p99": 78.11199873685837 + }, + "roundtrip": { + "p50": 108.92800241708755, + "p90": 111.61600053310394, + "p95": 114.62400108575821, + "p99": 144.6080058813095 + }, + "isolatedSum": { + "p50": 128.7359967827797, + "p90": 151.45599842071533, + "p95": 158.1440046429634, + "p99": 187.93600052595139 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 62.55999952554703, + "p90": 68.96000355482101, + "p95": 71.58400118350983, + "p99": 129.50399518013 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 68.06399673223495, + "p95": 69.023996591568, + "p99": 78.36800068616867 + }, + "roundtrip": { + "p50": 111.04000359773636, + "p90": 118.33599954843521, + "p95": 119.80800330638885, + "p99": 158.24000537395477 + }, + "isolatedSum": { + "p50": 128.89599800109863, + "p90": 137.02400028705597, + "p95": 140.60799777507782, + "p99": 207.87199586629868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.2159993648529, + "p90": 79.83999699354172, + "p95": 80.99199831485748, + "p99": 97.69599884748459 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 70.3359991312027, + "p95": 77.27999985218048, + "p99": 81.82399719953537 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 128.48000228405, + "p95": 130.23999333381653, + "p99": 133.7279975414276 + }, + "isolatedSum": { + "p50": 145.1520025730133, + "p90": 150.17599612474442, + "p95": 158.27199816703796, + "p99": 179.51999604701996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.57600325345993, + "p90": 100.47999769449234, + "p95": 103.61599922180176, + "p99": 114.9120032787323 + }, + "combine": { + "p50": 89.56799656152725, + "p90": 91.0400003194809, + "p95": 91.48799628019333, + "p99": 104.73600029945374 + }, + "roundtrip": { + "p50": 146.59200608730316, + "p90": 150.27199685573578, + "p95": 152.6080071926117, + "p99": 177.59999632835388 + }, + "isolatedSum": { + "p50": 174.14399981498718, + "p90": 191.51999801397324, + "p95": 195.1039955019951, + "p99": 219.64800357818604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3f5b0149", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_9b10df89", + "comparisonKey": "b2c350e019977bd8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:14.865595+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.37600103020668, + "p90": 63.968002796173096, + "p95": 66.23999774456024, + "p99": 82.62400329113007 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 66.65600091218948, + "p95": 67.29599833488464, + "p99": 70.14399766921997 + }, + "roundtrip": { + "p50": 106.08000308275223, + "p90": 113.3119985461235, + "p95": 116.5120005607605, + "p99": 140.47999680042267 + }, + "isolatedSum": { + "p50": 123.51999804377556, + "p90": 130.62400370836258, + "p95": 133.53599607944489, + "p99": 152.76800096035004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.671999394893646, + "p90": 58.079998940229416, + "p95": 59.10399928689003, + "p99": 69.40799951553345 + }, + "combine": { + "p50": 66.3679987192154, + "p90": 68.1919977068901, + "p95": 69.92000341415405, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 106.01600259542465, + "p90": 109.66400057077408, + "p95": 111.55200004577637, + "p99": 116.95999652147293 + }, + "isolatedSum": { + "p50": 123.03999811410904, + "p90": 126.27199664711952, + "p95": 129.02400270104408, + "p99": 155.35999834537506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.61599946022034, + "p90": 61.91999837756157, + "p95": 63.4239986538887, + "p99": 69.24799829721451 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 78.84799689054489, + "p95": 81.24800026416779, + "p99": 105.0880029797554 + }, + "roundtrip": { + "p50": 123.64800274372101, + "p90": 127.3919939994812, + "p95": 129.4720023870468, + "p99": 142.33599603176117 + }, + "isolatedSum": { + "p50": 128.1920000910759, + "p90": 140.76799526810646, + "p95": 144.6719989180565, + "p99": 174.3360012769699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.36000123620033, + "p90": 61.24800071120262, + "p95": 62.72000074386597, + "p99": 76.57600194215775 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 78.65600287914276, + "p95": 79.83999699354172, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 122.84799665212631, + "p90": 129.40800189971924, + "p95": 134.46399569511414, + "p99": 148.3519971370697 + }, + "isolatedSum": { + "p50": 127.32800468802452, + "p90": 139.90400359034538, + "p95": 142.55999773740768, + "p99": 178.6240041255951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 66.14399701356888, + "p90": 82.2720006108284, + "p95": 85.9839990735054, + "p99": 95.0080007314682 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 79.16799932718277, + "p95": 81.05599880218506, + "p99": 102.84800082445145 + }, + "roundtrip": { + "p50": 120.06399780511856, + "p90": 124.76799637079239, + "p95": 126.81600451469421, + "p99": 154.27200496196747 + }, + "isolatedSum": { + "p50": 143.2959958910942, + "p90": 161.43999993801117, + "p95": 167.03999787569046, + "p99": 197.85600155591965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.11999732255936, + "p90": 76.4160007238388, + "p95": 88.41600269079208, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 80.44800162315369, + "p95": 81.56800270080566, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 131.9040060043335, + "p90": 142.59199798107147, + "p95": 146.17599546909332, + "p99": 160.288006067276 + }, + "isolatedSum": { + "p50": 147.67999947071075, + "p90": 156.8640023469925, + "p95": 169.98400539159775, + "p99": 187.9040002822876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.06400001049042, + "p90": 90.4960036277771, + "p95": 91.96799993515015, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 102.01600193977356, + "p95": 102.7199998497963, + "p99": 115.52000045776367 + }, + "roundtrip": { + "p50": 159.9999964237213, + "p90": 164.2879992723465, + "p95": 166.30400717258453, + "p99": 183.6480051279068 + }, + "isolatedSum": { + "p50": 180.54399639368057, + "p90": 192.51200556755066, + "p95": 194.68799978494644, + "p99": 230.84799945354462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.16000336408615, + "p90": 105.8880016207695, + "p95": 107.00800269842148, + "p99": 110.30399799346924 + }, + "combine": { + "p50": 127.55200266838074, + "p90": 129.66400384902954, + "p95": 130.8480054140091, + "p99": 141.2159949541092 + }, + "roundtrip": { + "p50": 218.84800493717194, + "p90": 226.17599368095398, + "p95": 228.99200022220612, + "p99": 266.01600646972656 + }, + "isolatedSum": { + "p50": 231.7120060324669, + "p90": 235.55200546979904, + "p95": 237.85600811243057, + "p99": 251.51999294757843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5453372", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_1a43c4c5", + "comparisonKey": "af73b2b8ecd6def7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:15.630029+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.98399820923805, + "p90": 60.35200133919716, + "p95": 62.33600154519081, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 66.49599969387054, + "p90": 67.64800101518631, + "p95": 68.80000233650208, + "p99": 91.839998960495 + }, + "roundtrip": { + "p50": 107.68000036478043, + "p90": 111.96800321340561, + "p95": 116.73600226640701, + "p99": 161.6639941930771 + }, + "isolatedSum": { + "p50": 124.4799979031086, + "p90": 128.00000235438347, + "p95": 131.1360038816929, + "p99": 190.36799669265747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.78400057554245, + "p90": 61.5679994225502, + "p95": 62.94400244951248, + "p99": 94.08000111579895 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 69.7920024394989, + "p95": 77.27999985218048, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 108.89600217342377, + "p90": 114.07999694347382, + "p95": 121.79200351238251, + "p99": 218.176007270813 + }, + "isolatedSum": { + "p50": 126.78400427103043, + "p90": 131.3600018620491, + "p95": 140.22400230169296, + "p99": 196.44799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.74400043487549, + "p90": 68.1919977068901, + "p95": 71.58400118350983, + "p99": 146.17599546909332 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 77.72800326347351, + "p95": 78.17599922418594, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 121.91999703645706, + "p90": 125.98399817943573, + "p95": 131.32800161838531, + "p99": 203.16800475120544 + }, + "isolatedSum": { + "p50": 128.7359967827797, + "p90": 145.92000097036362, + "p95": 149.76000040769577, + "p99": 236.31999641656876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.967998415231705, + "p90": 63.45599889755249, + "p95": 68.15999746322632, + "p99": 123.26399981975555 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 77.69600301980972, + "p95": 78.23999971151352, + "p99": 79.39200103282928 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 128.28800082206726, + "p95": 130.17599284648895, + "p99": 189.31199610233307 + }, + "isolatedSum": { + "p50": 128.73600050807, + "p90": 141.1520019173622, + "p95": 146.39999717473984, + "p99": 202.65600085258484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 64.25599753856659, + "p90": 69.76000219583511, + "p95": 78.97599786520004, + "p99": 142.2719955444336 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 79.00799810886383, + "p95": 80.09599894285202, + "p99": 98.14400225877762 + }, + "roundtrip": { + "p50": 121.11999839544296, + "p90": 125.791996717453, + "p95": 129.60000336170197, + "p99": 187.3600035905838 + }, + "isolatedSum": { + "p50": 141.40799641609192, + "p90": 148.76800030469894, + "p95": 159.07199680805206, + "p99": 240.4159978032112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.19999825954437, + "p90": 75.77600330114365, + "p95": 80.03199845552444, + "p99": 152.96000242233276 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 81.02399855852127, + "p95": 82.07999914884567, + "p99": 91.58399701118469 + }, + "roundtrip": { + "p50": 132.35199451446533, + "p90": 136.9599997997284, + "p95": 139.16799426078796, + "p99": 177.72799730300903 + }, + "isolatedSum": { + "p50": 150.36799758672714, + "p90": 156.80000185966492, + "p95": 162.11199760437012, + "p99": 244.54399943351746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.27200126647949, + "p90": 90.97599983215332, + "p95": 96.3520035147667, + "p99": 198.7839937210083 + }, + "combine": { + "p50": 93.40800344944, + "p90": 102.46399790048599, + "p95": 103.74400019645691, + "p99": 168.44800114631653 + }, + "roundtrip": { + "p50": 161.0880047082901, + "p90": 165.8879965543747, + "p95": 168.2240068912506, + "p99": 183.1360012292862 + }, + "isolatedSum": { + "p50": 179.6800047159195, + "p90": 193.4399977326393, + "p95": 200.0960037112236, + "p99": 367.23199486732483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.16800194978714, + "p90": 105.79200088977814, + "p95": 110.20799726247787, + "p99": 172.2240000963211 + }, + "combine": { + "p50": 115.64800143241882, + "p90": 117.5680011510849, + "p95": 119.45600062608719, + "p99": 176.70400440692902 + }, + "roundtrip": { + "p50": 194.62400674819946, + "p90": 201.92000269889832, + "p95": 208.48000049591064, + "p99": 237.92000114917755 + }, + "isolatedSum": { + "p50": 210.81600338220596, + "p90": 223.36000204086304, + "p95": 229.66399788856506, + "p99": 348.9280045032501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87d59617", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_35838e41", + "comparisonKey": "dfeacf9a44487972", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:10.123542+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.400001376867294, + "p90": 61.40799820423126, + "p95": 64.99200314283371, + "p99": 101.88800096511841 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 67.16799736022949, + "p95": 67.90400296449661, + "p99": 70.30399888753891 + }, + "roundtrip": { + "p50": 106.08000308275223, + "p90": 112.57600039243698, + "p95": 115.39199948310852, + "p99": 181.37599527835846 + }, + "isolatedSum": { + "p50": 124.60799887776375, + "p90": 128.57599556446075, + "p95": 132.89600610733032, + "p99": 172.19199985265732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.111999183893204, + "p90": 61.216000467538834, + "p95": 65.37599861621857, + "p99": 127.26399302482605 + }, + "combine": { + "p50": 66.84800237417221, + "p90": 68.80000233650208, + "p95": 70.46400010585785, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 108.25599730014801, + "p90": 131.9359987974167, + "p95": 146.7519998550415, + "p99": 177.0240068435669 + }, + "isolatedSum": { + "p50": 124.96000155806541, + "p90": 130.0160028040409, + "p95": 135.83999872207642, + "p99": 218.20799261331558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.93599817156792, + "p90": 62.24000081419945, + "p95": 64.41599875688553, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 73.66400212049484, + "p95": 77.85599678754807, + "p99": 115.07199704647064 + }, + "roundtrip": { + "p50": 119.74400281906128, + "p90": 126.65599584579468, + "p95": 127.55200266838074, + "p99": 154.40000593662262 + }, + "isolatedSum": { + "p50": 128.09599563479424, + "p90": 135.9040029346943, + "p95": 142.2719955444336, + "p99": 225.15199333429337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.67200005054474, + "p90": 62.81600147485733, + "p95": 66.30399823188782, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 69.98399645090103, + "p95": 77.7600035071373, + "p99": 90.7519981265068 + }, + "roundtrip": { + "p50": 120.57600170373917, + "p90": 126.68800354003906, + "p95": 128.03199887275696, + "p99": 150.87999403476715 + }, + "isolatedSum": { + "p50": 128.48000228405, + "p90": 132.79999792575836, + "p95": 144.06400173902512, + "p99": 222.62399643659592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.74400109052658, + "p90": 84.09599959850311, + "p95": 89.50400352478027, + "p99": 106.65600001811981 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 78.59200239181519, + "p95": 79.03999835252762, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 120.83200365304947, + "p90": 125.59999525547028, + "p95": 127.74400413036346, + "p99": 155.20000457763672 + }, + "isolatedSum": { + "p50": 133.08800011873245, + "p90": 162.6880019903183, + "p95": 168.5440018773079, + "p99": 196.60799950361252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.46400010585785, + "p90": 76.35200023651123, + "p95": 77.85599678754807, + "p99": 160.92799603939056 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 81.11999928951263, + "p95": 82.20800012350082, + "p99": 128.28800082206726 + }, + "roundtrip": { + "p50": 132.86399841308594, + "p90": 138.5599970817566, + "p95": 144.25599575042725, + "p99": 187.3600035905838 + }, + "isolatedSum": { + "p50": 149.34399724006653, + "p90": 157.47199952602386, + "p95": 160.0639969110489, + "p99": 289.2159968614578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.63199704885483, + "p90": 93.75999867916107, + "p95": 95.90400010347366, + "p99": 125.82400441169739 + }, + "combine": { + "p50": 92.00000017881393, + "p90": 94.14400160312653, + "p95": 101.31199657917023, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 160.19199788570404, + "p90": 164.86400365829468, + "p95": 166.46400094032288, + "p99": 180.00000715255737 + }, + "isolatedSum": { + "p50": 181.63199722766876, + "p90": 187.9040002822876, + "p95": 197.2159966826439, + "p99": 233.18400233983994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.45599669218063, + "p90": 112.67200112342834, + "p95": 121.40800058841705, + "p99": 246.43200635910034 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 116.80000275373459, + "p95": 117.98399686813354, + "p99": 138.33600282669067 + }, + "roundtrip": { + "p50": 193.56800615787506, + "p90": 200.28799772262573, + "p95": 202.2079974412918, + "p99": 218.6879962682724 + }, + "isolatedSum": { + "p50": 210.9759971499443, + "p90": 229.47200387716293, + "p95": 239.3919974565506, + "p99": 384.768009185791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d687e2b5", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_a17fe97c", + "comparisonKey": "9a39ebb480e8a505", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:23.211831+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.66399949789047, + "p90": 60.15999987721443, + "p95": 62.94400244951248, + "p99": 100.92800110578537 + }, + "combine": { + "p50": 66.11199676990509, + "p90": 66.97600334882736, + "p95": 67.26399809122086, + "p99": 133.18400084972382 + }, + "roundtrip": { + "p50": 107.93600231409073, + "p90": 114.84800279140472, + "p95": 117.91999638080597, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 123.77599626779556, + "p90": 127.1360032260418, + "p95": 130.20800054073334, + "p99": 234.1120019555092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.69599974155426, + "p90": 60.67200005054474, + "p95": 64.86400216817856, + "p99": 107.71200060844421 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 67.84000247716904, + "p95": 69.47200000286102, + "p99": 117.8240031003952 + }, + "roundtrip": { + "p50": 107.39199817180634, + "p90": 113.69600147008896, + "p95": 115.07199704647064, + "p99": 136.25599443912506 + }, + "isolatedSum": { + "p50": 124.03199821710587, + "p90": 128.51200252771378, + "p95": 134.33600217103958, + "p99": 225.53600370883942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.7120001912117, + "p90": 61.85600161552429, + "p95": 65.98400324583054, + "p99": 90.62399715185165 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 69.63200122117996, + "p95": 72.35199958086014, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 109.98400300741196, + "p90": 116.95999652147293, + "p95": 118.27199906110764, + "p99": 156.8319946527481 + }, + "isolatedSum": { + "p50": 127.1359995007515, + "p90": 131.48800283670425, + "p95": 138.33600282669067, + "p99": 173.0239987373352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.93599817156792, + "p90": 62.6240000128746, + "p95": 66.04799628257751, + "p99": 106.6880002617836 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 76.76800340414047, + "p95": 77.63200253248215, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 118.33599954843521, + "p90": 124.4800016283989, + "p95": 126.49600207805634, + "p99": 176.32000148296356 + }, + "isolatedSum": { + "p50": 127.80800089240074, + "p90": 139.39200341701508, + "p95": 143.67999881505966, + "p99": 196.80000096559525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.92799827456474, + "p90": 64.2239972949028, + "p95": 67.55200028419495, + "p99": 167.7439957857132 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 76.7040029168129, + "p95": 77.69600301980972, + "p99": 90.59199690818787 + }, + "roundtrip": { + "p50": 124.06399846076965, + "p90": 127.29600071907043, + "p95": 129.5360028743744, + "p99": 176.28799378871918 + }, + "isolatedSum": { + "p50": 129.05599549412727, + "p90": 140.9280002117157, + "p95": 145.24800330400467, + "p99": 258.33599269390106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.51200145483017, + "p90": 79.68000322580338, + "p95": 81.18399977684021, + "p99": 145.4080045223236 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 79.80799674987793, + "p95": 80.99199831485748, + "p99": 102.33599692583084 + }, + "roundtrip": { + "p50": 131.29599392414093, + "p90": 135.55200397968292, + "p95": 139.16799426078796, + "p99": 148.3519971370697 + }, + "isolatedSum": { + "p50": 155.04000335931778, + "p90": 159.4879999756813, + "p95": 162.1759980916977, + "p99": 247.74400144815445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.39200103282928, + "p90": 81.63200318813324, + "p95": 85.21600067615509, + "p99": 145.31199634075165 + }, + "combine": { + "p50": 91.16800129413605, + "p90": 92.67199784517288, + "p95": 94.27200257778168, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 159.64800119400024, + "p90": 163.96799683570862, + "p95": 165.3759926557541, + "p99": 206.62400126457214 + }, + "isolatedSum": { + "p50": 170.56000232696533, + "p90": 174.30400103330612, + "p95": 179.48800325393677, + "p99": 261.27999275922775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.47199779748917, + "p90": 105.6319996714592, + "p95": 109.82400178909302, + "p99": 157.75999426841736 + }, + "combine": { + "p50": 127.13600695133209, + "p90": 128.31999361515045, + "p95": 129.34400141239166, + "p99": 151.96800231933594 + }, + "roundtrip": { + "p50": 210.14399826526642, + "p90": 215.13600647449493, + "p95": 217.15199947357178, + "p99": 243.45600605010986 + }, + "isolatedSum": { + "p50": 228.60800474882126, + "p90": 233.95199328660965, + "p95": 239.16800320148468, + "p99": 309.7279965877533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-175a266e", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_a31a9c8e", + "comparisonKey": "0af981852a9b0829", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:20.232200+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.28000029921532, + "p90": 66.04799628257751, + "p95": 69.85600292682648, + "p99": 82.20800012350082 + }, + "combine": { + "p50": 55.55199831724167, + "p90": 59.487998485565186, + "p95": 65.21599739789963, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 95.74399888515472, + "p90": 102.78400033712387, + "p95": 104.70400005578995, + "p99": 126.27199292182922 + }, + "isolatedSum": { + "p50": 112.83199861645699, + "p90": 125.5359947681427, + "p95": 135.0720003247261, + "p99": 160.15999764204025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.92800134420395, + "p90": 68.76800209283829, + "p95": 100.12800246477127, + "p99": 126.65599584579468 + }, + "combine": { + "p50": 56.543998420238495, + "p90": 66.20799750089645, + "p95": 66.65600091218948, + "p99": 80.64000308513641 + }, + "roundtrip": { + "p50": 96.0640013217926, + "p90": 102.59199887514114, + "p95": 104.54399883747101, + "p99": 125.44000148773193 + }, + "isolatedSum": { + "p50": 113.47199976444244, + "p90": 134.97599959373474, + "p95": 166.78400337696075, + "p99": 207.2959989309311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.52799862623215, + "p90": 64.80000168085098, + "p95": 67.52000004053116, + "p99": 91.39200299978256 + }, + "combine": { + "p50": 56.03199824690819, + "p90": 65.0240033864975, + "p95": 65.85600227117538, + "p99": 69.92000341415405 + }, + "roundtrip": { + "p50": 107.77600109577179, + "p90": 114.17599767446518, + "p95": 114.88000303506851, + "p99": 119.35999989509583 + }, + "isolatedSum": { + "p50": 114.55999687314034, + "p90": 129.82400506734848, + "p95": 133.37600231170654, + "p99": 161.31200641393661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.49599838256836, + "p90": 69.05599683523178, + "p95": 71.99999690055847, + "p99": 88.0960002541542 + }, + "combine": { + "p50": 65.63200056552887, + "p90": 66.43199920654297, + "p95": 66.84800237417221, + "p99": 77.2479996085167 + }, + "roundtrip": { + "p50": 106.9440022110939, + "p90": 112.5440001487732, + "p95": 114.59200084209442, + "p99": 174.49599504470825 + }, + "isolatedSum": { + "p50": 124.12799894809723, + "p90": 135.48799604177475, + "p95": 138.84799927473068, + "p99": 165.3439998626709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.95999851822853, + "p90": 68.57600063085556, + "p95": 70.04799693822861, + "p99": 85.85599809885025 + }, + "combine": { + "p50": 66.11199676990509, + "p90": 66.84800237417221, + "p95": 67.58400052785873, + "p99": 70.14399766921997 + }, + "roundtrip": { + "p50": 107.87200182676315, + "p90": 110.944002866745, + "p95": 114.20799791812897, + "p99": 134.24000144004822 + }, + "isolatedSum": { + "p50": 127.07199528813362, + "p90": 135.42400300502777, + "p95": 137.63199746608734, + "p99": 155.99999576807022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.09599763154984, + "p90": 76.64000242948532, + "p95": 77.88799703121185, + "p99": 91.36000275611877 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 77.34400033950806, + "p95": 77.98399776220322, + "p99": 80.19199967384338 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 125.18399953842163, + "p95": 127.16799974441528, + "p99": 143.327996134758 + }, + "isolatedSum": { + "p50": 140.06400108337402, + "p90": 153.98400276899338, + "p95": 155.87199479341507, + "p99": 171.55200242996216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.8719972372055, + "p90": 82.07999914884567, + "p95": 84.44800227880478, + "p99": 104.47999835014343 + }, + "combine": { + "p50": 89.82399851083755, + "p90": 91.00800007581711, + "p95": 91.26400202512741, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 146.33600413799286, + "p90": 154.27200496196747, + "p95": 157.05600380897522, + "p99": 180.35200238227844 + }, + "isolatedSum": { + "p50": 169.69599574804306, + "p90": 173.08799922466278, + "p95": 175.7120043039322, + "p99": 200.44799894094467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.12000125646591, + "p90": 95.32800316810608, + "p95": 96.63999825716019, + "p99": 108.92800241708755 + }, + "combine": { + "p50": 117.24799871444702, + "p90": 126.3359934091568, + "p95": 127.03999876976013, + "p99": 141.50400459766388 + }, + "roundtrip": { + "p50": 197.82400131225586, + "p90": 206.496000289917, + "p95": 209.85600352287292, + "p99": 223.80800545215607 + }, + "isolatedSum": { + "p50": 210.36799997091293, + "p90": 221.66399657726288, + "p95": 223.67999702692032, + "p99": 250.43200701475143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06d939b6", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_714eb7c0", + "comparisonKey": "40ff1eb0115cac75", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:21.724027+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.111999183893204, + "p90": 59.58399921655655, + "p95": 61.503998935222626, + "p99": 68.67200136184692 + }, + "combine": { + "p50": 66.17599725723267, + "p90": 66.72000139951706, + "p95": 67.29599833488464, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 107.58399963378906, + "p90": 114.97599631547928, + "p95": 116.28799885511398, + "p99": 139.0399932861328 + }, + "isolatedSum": { + "p50": 124.28799644112587, + "p90": 126.30400061607361, + "p95": 128.79999727010727, + "p99": 166.33599996566772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.95200169086456, + "p90": 59.61599946022034, + "p95": 61.76000088453293, + "p99": 71.61600142717361 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 67.84000247716904, + "p95": 69.05599683523178, + "p99": 90.46400338411331 + }, + "roundtrip": { + "p50": 107.16799646615982, + "p90": 109.92000252008438, + "p95": 111.7440015077591, + "p99": 123.07199835777283 + }, + "isolatedSum": { + "p50": 124.28800016641617, + "p90": 127.45600193738937, + "p95": 130.8159977197647, + "p99": 162.08000481128693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.19200012087822, + "p90": 62.6240000128746, + "p95": 65.5680000782013, + "p99": 85.02399921417236 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 69.88800317049026, + "p95": 77.02399790287018, + "p99": 80.38400113582611 + }, + "roundtrip": { + "p50": 123.80799651145935, + "p90": 126.94400548934937, + "p95": 127.9039978981018, + "p99": 170.3999936580658 + }, + "isolatedSum": { + "p50": 128.03200259804726, + "p90": 132.51200318336487, + "p95": 142.59199798107147, + "p99": 165.40800034999847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.19200012087822, + "p90": 62.78400123119354, + "p95": 66.68800115585327, + "p99": 87.13600039482117 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 77.37600058317184, + "p95": 78.3040001988411, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 120.80000340938568, + "p90": 126.36800110340118, + "p95": 127.32799351215363, + "p99": 177.91999876499176 + }, + "isolatedSum": { + "p50": 128.09600308537483, + "p90": 140.1600018143654, + "p95": 144.99200135469437, + "p99": 176.767997443676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.65600025653839, + "p90": 69.50400024652481, + "p95": 72.80000299215317, + "p99": 127.45599448680878 + }, + "combine": { + "p50": 77.72800326347351, + "p90": 78.72000336647034, + "p95": 79.13599908351898, + "p99": 102.27199643850327 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 124.64000284671783, + "p95": 127.55200266838074, + "p99": 146.01600170135498 + }, + "isolatedSum": { + "p50": 140.3840035200119, + "p90": 148.22400361299515, + "p95": 151.93600207567215, + "p99": 229.72799092531204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.0799971818924, + "p90": 73.47200065851212, + "p95": 77.27999985218048, + "p99": 104.41599786281586 + }, + "combine": { + "p50": 78.59200239181519, + "p90": 79.93599772453308, + "p95": 80.76799660921097, + "p99": 92.79999881982803 + }, + "roundtrip": { + "p50": 131.8719983100891, + "p90": 137.05599308013916, + "p95": 137.95199990272522, + "p99": 147.93600142002106 + }, + "isolatedSum": { + "p50": 148.67199957370758, + "p90": 153.4079983830452, + "p95": 158.04799646139145, + "p99": 197.2159966826439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.52000266313553, + "p90": 90.14400094747543, + "p95": 92.16000139713287, + "p99": 111.07199639081955 + }, + "combine": { + "p50": 92.32000261545181, + "p90": 96.54399752616882, + "p95": 102.33599692583084, + "p99": 107.00800269842148 + }, + "roundtrip": { + "p50": 161.50400042533875, + "p90": 166.01599752902985, + "p95": 167.58400201797485, + "p99": 184.32000279426575 + }, + "isolatedSum": { + "p50": 175.84000527858734, + "p90": 186.68799847364426, + "p95": 194.49599832296371, + "p99": 218.07999908924103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.78399902582169, + "p90": 97.69599884748459, + "p95": 100.73599964380264, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 116.64000153541565, + "p95": 117.50400066375732, + "p99": 127.16799974441528 + }, + "roundtrip": { + "p50": 197.05599546432495, + "p90": 200.6720006465912, + "p95": 202.07999646663666, + "p99": 217.21599996089935 + }, + "isolatedSum": { + "p50": 210.207998752594, + "p90": 214.33600038290024, + "p95": 218.24000030755997, + "p99": 244.159996509552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2beffc19", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_0209cdf1", + "comparisonKey": "9d5ae16c831cd96f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:45.296082+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.24800005555153, + "p90": 59.93599817156792, + "p95": 63.13599646091461, + "p99": 72.67200201749802 + }, + "combine": { + "p50": 66.11199676990509, + "p90": 66.84800237417221, + "p95": 67.07199662923813, + "p99": 70.01599669456482 + }, + "roundtrip": { + "p50": 106.81600123643875, + "p90": 113.3119985461235, + "p95": 114.97599631547928, + "p99": 131.071999669075 + }, + "isolatedSum": { + "p50": 123.35999682545662, + "p90": 126.78400054574013, + "p95": 130.20799309015274, + "p99": 142.68799871206284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.56799876689911, + "p90": 59.42400172352791, + "p95": 61.37600168585777, + "p99": 71.1359977722168 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 67.80800223350525, + "p95": 68.51200014352798, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 106.81600123643875, + "p90": 108.99200290441513, + "p95": 110.11199653148651, + "p99": 117.27999895811081 + }, + "isolatedSum": { + "p50": 124.03199821710587, + "p90": 127.23200395703316, + "p95": 129.88800182938576, + "p99": 161.31199896335602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.87200140953064, + "p90": 62.24000081419945, + "p95": 64.19199705123901, + "p99": 84.16000008583069 + }, + "combine": { + "p50": 67.80800223350525, + "p90": 69.56800073385239, + "p95": 76.86399668455124, + "p99": 78.46400141716003 + }, + "roundtrip": { + "p50": 117.91999638080597, + "p90": 124.09599870443344, + "p95": 125.56800246238708, + "p99": 130.0799995660782 + }, + "isolatedSum": { + "p50": 127.68000364303589, + "p90": 131.80800154805183, + "p95": 141.05599373579025, + "p99": 162.62400150299072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.19200012087822, + "p90": 62.33600154519081, + "p95": 63.71200084686279, + "p99": 82.04799890518188 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 76.7040029168129, + "p95": 77.56800204515457, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 123.83999675512314, + "p90": 127.13600695133209, + "p95": 128.35200130939484, + "p99": 162.1759980916977 + }, + "isolatedSum": { + "p50": 128.1920038163662, + "p90": 139.0400044620037, + "p95": 141.28000289201736, + "p99": 161.9199961423874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.272001057863235, + "p90": 69.023996591568, + "p95": 70.20799815654755, + "p99": 88.57599645853043 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 77.504001557827, + "p95": 78.43200117349625, + "p99": 90.40000289678574 + }, + "roundtrip": { + "p50": 120.2239990234375, + "p90": 125.56800246238708, + "p95": 127.9039978981018, + "p99": 144.3520039319992 + }, + "isolatedSum": { + "p50": 130.2720047533512, + "p90": 146.527998149395, + "p95": 148.6399993300438, + "p99": 178.97599935531616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.3359991312027, + "p90": 77.60000228881836, + "p95": 79.19999957084656, + "p99": 93.75999867916107 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 79.99999821186066, + "p95": 80.99199831485748, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 134.3040019273758, + "p90": 137.37599551677704, + "p95": 138.94400000572205, + "p99": 153.76000106334686 + }, + "isolatedSum": { + "p50": 148.8960012793541, + "p90": 157.60000050067902, + "p95": 160.19199788570404, + "p99": 183.51999670267105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.21600133180618, + "p90": 91.96799993515015, + "p95": 93.44000369310379, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 94.04800087213516, + "p95": 96.6079980134964, + "p99": 107.80800133943558 + }, + "roundtrip": { + "p50": 159.7760021686554, + "p90": 164.19200599193573, + "p95": 165.53600132465363, + "p99": 183.20000171661377 + }, + "isolatedSum": { + "p50": 180.83199858665466, + "p90": 186.0160008072853, + "p95": 190.0480017066002, + "p99": 226.78399831056595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.3199971318245, + "p90": 107.744000852108, + "p95": 111.1999973654747, + "p99": 121.37600034475327 + }, + "combine": { + "p50": 127.45599448680878, + "p90": 128.60800325870514, + "p95": 130.3360015153885, + "p99": 140.03199338912964 + }, + "roundtrip": { + "p50": 207.35999941825867, + "p90": 215.36000072956085, + "p95": 216.5759950876236, + "p99": 223.7440049648285 + }, + "isolatedSum": { + "p50": 231.77599161863327, + "p90": 236.35200411081314, + "p95": 241.5359988808632, + "p99": 261.4079937338829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db041d13", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_d24e13bd", + "comparisonKey": "3de6c26902d242a3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:08.275177+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.46399813890457, + "p90": 61.85600161552429, + "p95": 64.44799900054932, + "p99": 104.54399883747101 + }, + "combine": { + "p50": 66.52799993753433, + "p90": 68.25599819421768, + "p95": 69.31199878454208, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 108.2879975438118, + "p90": 117.40799993276596, + "p95": 124.54400211572647, + "p99": 196.31999731063843 + }, + "isolatedSum": { + "p50": 124.9919980764389, + "p90": 130.11199980974197, + "p95": 133.7599977850914, + "p99": 194.72000002861023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.687999844551086, + "p90": 64.35199826955795, + "p95": 71.80800288915634, + "p99": 169.5999950170517 + }, + "combine": { + "p50": 67.07199662923813, + "p90": 69.40799951553345, + "p95": 77.02399790287018, + "p99": 92.79999881982803 + }, + "roundtrip": { + "p50": 108.73600095510483, + "p90": 113.88800293207169, + "p95": 118.97599697113037, + "p99": 180.09600043296814 + }, + "isolatedSum": { + "p50": 125.75999647378922, + "p90": 133.7599977850914, + "p95": 148.83200079202652, + "p99": 262.39999383687973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.58399921655655, + "p90": 62.30400130152702, + "p95": 64.51199948787689, + "p99": 86.81599795818329 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 70.49600034952164, + "p95": 77.37600058317184, + "p99": 81.11999928951263 + }, + "roundtrip": { + "p50": 122.11199849843979, + "p90": 126.3359934091568, + "p95": 127.77599692344666, + "p99": 140.22399485111237 + }, + "isolatedSum": { + "p50": 127.80799716711044, + "p90": 132.80000165104866, + "p95": 141.88800007104874, + "p99": 167.93599724769592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.22400036454201, + "p90": 65.34399837255478, + "p95": 71.1359977722168, + "p99": 150.43200552463531 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 77.60000228881836, + "p95": 78.36800068616867, + "p99": 82.24000036716461 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 128.89599800109863, + "p95": 133.27999413013458, + "p99": 199.35999810695648 + }, + "isolatedSum": { + "p50": 128.38399782776833, + "p90": 142.94400066137314, + "p95": 149.50399845838547, + "p99": 232.67200589179993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.07999959588051, + "p90": 74.14399832487106, + "p95": 81.4720019698143, + "p99": 128.92800569534302 + }, + "combine": { + "p50": 78.27199995517731, + "p90": 79.19999957084656, + "p95": 80.32000064849854, + "p99": 102.94400155544281 + }, + "roundtrip": { + "p50": 121.95199728012085, + "p90": 128.1919926404953, + "p95": 130.8480054140091, + "p99": 183.80799889564514 + }, + "isolatedSum": { + "p50": 140.35199955105782, + "p90": 153.34399789571762, + "p95": 161.79200261831284, + "p99": 231.87200725078583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.68800181150436, + "p90": 76.03199779987335, + "p95": 79.0719985961914, + "p99": 159.87199544906616 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 81.79199695587158, + "p95": 89.79199826717377, + "p99": 130.17599284648895 + }, + "roundtrip": { + "p50": 132.54399597644806, + "p90": 137.15200126171112, + "p95": 140.76800644397736, + "p99": 200.8640021085739 + }, + "isolatedSum": { + "p50": 150.04800260066986, + "p90": 157.82399475574493, + "p95": 168.86399686336517, + "p99": 290.0479882955551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.51200342178345, + "p90": 91.90399944782257, + "p95": 97.9200005531311, + "p99": 165.75999557971954 + }, + "combine": { + "p50": 92.3520028591156, + "p90": 99.29600358009338, + "p95": 102.11200267076492, + "p99": 116.80000275373459 + }, + "roundtrip": { + "p50": 160.99199652671814, + "p90": 165.8560037612915, + "p95": 168.06399822235107, + "p99": 205.6639939546585 + }, + "isolatedSum": { + "p50": 180.86400628089905, + "p90": 191.20000302791595, + "p95": 200.03200322389603, + "p99": 282.55999833345413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.10400146245956, + "p90": 100.8640006184578, + "p95": 102.52799838781357, + "p99": 119.10399794578552 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 116.48000031709671, + "p95": 117.40799993276596, + "p99": 127.20000743865967 + }, + "roundtrip": { + "p50": 197.1839964389801, + "p90": 201.82399451732635, + "p95": 204.54399287700653, + "p99": 223.93600642681122 + }, + "isolatedSum": { + "p50": 210.49600094556808, + "p90": 217.3440009355545, + "p95": 219.93599832057953, + "p99": 246.3040053844452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9cc81229", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_57af4dde", + "comparisonKey": "8ca5ceea383bd384", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:32.975302+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.472001761198044, + "p90": 60.7680007815361, + "p95": 63.35999816656113, + "p99": 111.10399663448334 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 67.16799736022949, + "p95": 67.80800223350525, + "p99": 86.46400272846222 + }, + "roundtrip": { + "p50": 107.35999792814255, + "p90": 113.82400244474411, + "p95": 116.48000031709671, + "p99": 162.20800578594208 + }, + "isolatedSum": { + "p50": 123.61599877476692, + "p90": 127.9359981417656, + "p95": 131.16800040006638, + "p99": 197.56799936294556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.631999254226685, + "p90": 60.63999980688095, + "p95": 63.26399743556976, + "p99": 105.82400113344193 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 68.06399673223495, + "p95": 70.39999961853027, + "p99": 114.94400352239609 + }, + "roundtrip": { + "p50": 107.64800012111664, + "p90": 114.17599767446518, + "p95": 116.54400080442429, + "p99": 137.11999356746674 + }, + "isolatedSum": { + "p50": 123.9359974861145, + "p90": 128.7039965391159, + "p95": 133.66399705410004, + "p99": 220.768004655838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.74400043487549, + "p90": 61.91999837756157, + "p95": 64.64000046253204, + "p99": 82.40000158548355 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 69.21599805355072, + "p95": 70.52800059318542, + "p99": 91.51999652385712 + }, + "roundtrip": { + "p50": 109.82400178909302, + "p90": 116.28799885511398, + "p95": 118.94399672746658, + "p99": 133.15199315547943 + }, + "isolatedSum": { + "p50": 127.00799852609634, + "p90": 131.1359964311123, + "p95": 135.16800105571747, + "p99": 173.91999810934067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.967998415231705, + "p90": 82.87999778985977, + "p95": 87.71199733018875, + "p99": 104.5759990811348 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 77.60000228881836, + "p95": 78.14399898052216, + "p99": 90.68799763917923 + }, + "roundtrip": { + "p50": 118.20799857378006, + "p90": 125.11999905109406, + "p95": 126.68800354003906, + "p99": 191.71200692653656 + }, + "isolatedSum": { + "p50": 128.51199880242348, + "p90": 160.48000007867813, + "p95": 165.8559963107109, + "p99": 195.26399672031403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.99199876189232, + "p90": 66.75200164318085, + "p95": 68.70400160551071, + "p99": 93.37600320577621 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 77.08799839019775, + "p95": 77.63200253248215, + "p99": 90.68799763917923 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 126.81600451469421, + "p95": 128.06400656700134, + "p99": 173.92000555992126 + }, + "isolatedSum": { + "p50": 129.5039989054203, + "p90": 143.8400000333786, + "p95": 146.33600413799286, + "p99": 184.06400084495544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.7040029168129, + "p90": 79.13599908351898, + "p95": 80.22399991750717, + "p99": 98.43199700117111 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 79.48800176382065, + "p95": 80.4160013794899, + "p99": 102.33599692583084 + }, + "roundtrip": { + "p50": 132.1599930524826, + "p90": 137.66400516033173, + "p95": 140.1280015707016, + "p99": 193.59999895095825 + }, + "isolatedSum": { + "p50": 155.20000457763672, + "p90": 158.62400084733963, + "p95": 160.64000129699707, + "p99": 200.76799392700195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.55200225114822, + "p90": 81.34400099515915, + "p95": 84.28800106048584, + "p99": 111.29599809646606 + }, + "combine": { + "p50": 90.97599983215332, + "p90": 92.57599711418152, + "p95": 93.91999989748001, + "p99": 104.51199859380722 + }, + "roundtrip": { + "p50": 159.84000265598297, + "p90": 163.4880006313324, + "p95": 165.02399742603302, + "p99": 186.24000251293182 + }, + "isolatedSum": { + "p50": 170.52800208330154, + "p90": 173.91999810934067, + "p95": 178.20800095796585, + "p99": 215.80799669027328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.2319962978363, + "p90": 108.31999778747559, + "p95": 111.61600053310394, + "p99": 165.92000424861908 + }, + "combine": { + "p50": 127.10399925708771, + "p90": 128.12800705432892, + "p95": 129.15199995040894, + "p99": 139.8400068283081 + }, + "roundtrip": { + "p50": 209.9519968032837, + "p90": 215.2000069618225, + "p95": 217.69599616527557, + "p99": 246.49600684642792 + }, + "isolatedSum": { + "p50": 230.335995554924, + "p90": 236.4480048418045, + "p95": 240.76800048351288, + "p99": 305.7600110769272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-617b37f7", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_389b1c50", + "comparisonKey": "3558a3dfdd1a7379", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:56.154959+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.559998869895935, + "p90": 63.93600255250931, + "p95": 65.92000275850296, + "p99": 98.01600128412247 + }, + "combine": { + "p50": 66.17599725723267, + "p90": 67.03999638557434, + "p95": 68.31999868154526, + "p99": 78.62400263547897 + }, + "roundtrip": { + "p50": 108.35199803113937, + "p90": 115.26399850845337, + "p95": 116.48000031709671, + "p99": 124.51200187206268 + }, + "isolatedSum": { + "p50": 124.7359961271286, + "p90": 130.97599893808365, + "p95": 134.24000144004822, + "p99": 176.64000391960144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.720000088214874, + "p90": 61.184000223875046, + "p95": 64.86400216817856, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 69.15199756622314, + "p95": 70.36799937486649, + "p99": 82.56000280380249 + }, + "roundtrip": { + "p50": 108.41599851846695, + "p90": 112.28799819946289, + "p95": 118.14399808645248, + "p99": 134.94400680065155 + }, + "isolatedSum": { + "p50": 125.91999769210815, + "p90": 130.3359977900982, + "p95": 135.23200154304504, + "p99": 203.8080021739006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.63999980688095, + "p90": 63.64800035953522, + "p95": 66.59200042486191, + "p99": 104.76800054311752 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 76.86399668455124, + "p95": 77.47200131416321, + "p99": 90.7519981265068 + }, + "roundtrip": { + "p50": 124.60800260305405, + "p90": 139.26400244235992, + "p95": 148.73600006103516, + "p99": 177.34399437904358 + }, + "isolatedSum": { + "p50": 128.92799824476242, + "p90": 140.51199704408646, + "p95": 144.06400173902512, + "p99": 195.51999866962433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.54399907588959, + "p90": 83.61600339412689, + "p95": 89.12000060081482, + "p99": 120.12799829244614 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 78.5600021481514, + "p95": 79.0719985961914, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 125.05599856376648, + "p90": 128.48000228405, + "p95": 132.22399353981018, + "p99": 157.18400478363037 + }, + "isolatedSum": { + "p50": 129.40800189971924, + "p90": 162.1760055422783, + "p95": 168.19199919700623, + "p99": 200.54399967193604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.00800293684006, + "p90": 70.39999961853027, + "p95": 74.30399954319, + "p99": 97.28000313043594 + }, + "combine": { + "p50": 77.69600301980972, + "p90": 78.8159966468811, + "p95": 79.16799932718277, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 121.37600034475327, + "p90": 130.62399625778198, + "p95": 136.00000739097595, + "p99": 148.76799285411835 + }, + "isolatedSum": { + "p50": 140.70400595664978, + "p90": 149.21599626541138, + "p95": 153.47199887037277, + "p99": 187.3920038342476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.75200229883194, + "p90": 77.15199887752533, + "p95": 79.71200346946716, + "p99": 145.02400159835815 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 81.85599744319916, + "p95": 90.36800265312195, + "p99": 105.50399869680405 + }, + "roundtrip": { + "p50": 133.760005235672, + "p90": 138.7840062379837, + "p95": 140.00000059604645, + "p99": 162.9759967327118 + }, + "isolatedSum": { + "p50": 149.56799894571304, + "p90": 159.0079963207245, + "p95": 170.0800061225891, + "p99": 250.5280002951622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.56799656152725, + "p90": 92.16000139713287, + "p95": 93.18400174379349, + "p99": 109.40799862146378 + }, + "combine": { + "p50": 92.44800359010696, + "p90": 94.91200000047684, + "p95": 101.85600072145462, + "p99": 114.30399864912033 + }, + "roundtrip": { + "p50": 162.30399906635284, + "p90": 166.87999665737152, + "p95": 168.32000017166138, + "p99": 220.5439954996109 + }, + "isolatedSum": { + "p50": 182.01600015163422, + "p90": 187.0720013976097, + "p95": 195.0400024652481, + "p99": 223.7119972705841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.23200243711472, + "p90": 98.9760011434555, + "p95": 101.85600072145462, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 116.80000275373459, + "p95": 118.07999759912491, + "p99": 128.48000228405 + }, + "roundtrip": { + "p50": 194.91200149059296, + "p90": 202.55999267101288, + "p95": 208.6080014705658, + "p99": 222.24000096321106 + }, + "isolatedSum": { + "p50": 210.62400192022324, + "p90": 215.7760038971901, + "p95": 219.93599832057953, + "p99": 245.4719990491867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dcf86785", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_b6ab71d2", + "comparisonKey": "82ae1f6124edd870", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:51.955005+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.720000088214874, + "p90": 76.06399804353714, + "p95": 86.65599673986435, + "p99": 109.92000252008438 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 67.35999882221222, + "p95": 69.08799707889557, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 109.79200154542923, + "p90": 126.20800733566284, + "p95": 137.56799697875977, + "p99": 260.6079876422882 + }, + "isolatedSum": { + "p50": 124.9919980764389, + "p90": 143.42399686574936, + "p95": 155.74399381875992, + "p99": 200.70400089025497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.6559996008873, + "p90": 64.83200192451477, + "p95": 67.84000247716904, + "p99": 120.41600048542023 + }, + "combine": { + "p50": 67.35999882221222, + "p90": 68.9919963479042, + "p95": 69.88800317049026, + "p99": 80.44800162315369 + }, + "roundtrip": { + "p50": 108.15999656915665, + "p90": 111.87200248241425, + "p95": 116.15999788045883, + "p99": 143.68000626564026 + }, + "isolatedSum": { + "p50": 126.01599842309952, + "p90": 133.82399827241898, + "p95": 137.7280056476593, + "p99": 200.8640021085739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.60799956321716, + "p90": 62.84800171852112, + "p95": 64.09599632024765, + "p99": 81.15199953317642 + }, + "combine": { + "p50": 70.30399888753891, + "p90": 79.19999957084656, + "p95": 81.4720019698143, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 124.12799894809723, + "p90": 128.63999605178833, + "p95": 132.35199451446533, + "p99": 189.31199610233307 + }, + "isolatedSum": { + "p50": 130.91199845075607, + "p90": 142.04800128936768, + "p95": 145.56799829006195, + "p99": 172.2240000963211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.95999851822853, + "p90": 71.87200337648392, + "p95": 76.03199779987335, + "p99": 82.5280025601387 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 76.89599692821503, + "p95": 78.07999849319458, + "p99": 79.1039988398552 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 127.87200510501862, + "p95": 128.86400520801544, + "p99": 143.13599467277527 + }, + "isolatedSum": { + "p50": 129.02399525046349, + "p90": 148.76800030469894, + "p95": 154.11199629306793, + "p99": 161.6320013999939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.6240000128746, + "p90": 69.50400024652481, + "p95": 71.6480016708374, + "p99": 86.91199868917465 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 78.72000336647034, + "p95": 79.1039988398552, + "p99": 81.85599744319916 + }, + "roundtrip": { + "p50": 121.72800302505493, + "p90": 130.72000443935394, + "p95": 136.57599687576294, + "p99": 152.99199521541595 + }, + "isolatedSum": { + "p50": 140.51199704408646, + "p90": 148.22400361299515, + "p95": 150.7520005106926, + "p99": 168.7679961323738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.94399631023407, + "p90": 76.48000121116638, + "p95": 77.27999985218048, + "p99": 87.67999708652496 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 80.19199967384338, + "p95": 81.69600367546082, + "p99": 102.08000242710114 + }, + "roundtrip": { + "p50": 134.43200290203094, + "p90": 137.9839926958084, + "p95": 139.67999815940857, + "p99": 155.39200603961945 + }, + "isolatedSum": { + "p50": 149.6639996767044, + "p90": 156.67200088500977, + "p95": 158.9760035276413, + "p99": 189.7599995136261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.72799777984619, + "p90": 92.22400188446045, + "p95": 93.37600320577621, + "p99": 105.43999820947647 + }, + "combine": { + "p50": 92.70399808883667, + "p90": 102.04800218343735, + "p95": 103.07200253009796, + "p99": 132.7040046453476 + }, + "roundtrip": { + "p50": 163.00800442695618, + "p90": 167.52000153064728, + "p95": 170.78399658203125, + "p99": 204.0960043668747 + }, + "isolatedSum": { + "p50": 182.43199586868286, + "p90": 194.2720040678978, + "p95": 196.44800573587418, + "p99": 238.14400285482407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.64799815416336, + "p90": 98.7199991941452, + "p95": 101.95200145244598, + "p99": 118.04799735546112 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 117.15199798345566, + "p95": 118.07999759912491, + "p99": 127.32799351215363 + }, + "roundtrip": { + "p50": 195.5839991569519, + "p90": 203.2960057258606, + "p95": 205.85599541664124, + "p99": 225.72800517082214 + }, + "isolatedSum": { + "p50": 211.16799861192703, + "p90": 215.87199717760086, + "p95": 220.0319990515709, + "p99": 245.37599086761475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-04661963", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_213466e9", + "comparisonKey": "f6bb20a6281e16f2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:51.318997+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 50.08000135421753, + "p90": 51.67999863624573, + "p95": 52.70399898290634, + "p99": 76.25599950551987 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 68.15999746322632, + "p95": 69.85600292682648, + "p99": 85.91999858617783 + }, + "roundtrip": { + "p50": 99.71199929714203, + "p90": 105.02400249242783, + "p95": 107.04000294208527, + "p99": 123.23199957609177 + }, + "isolatedSum": { + "p50": 116.5120005607605, + "p90": 119.83999609947205, + "p95": 122.56000190973282, + "p99": 162.1759980916977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.624001771211624, + "p90": 53.44000086188316, + "p95": 57.023998349905014, + "p99": 76.28799974918365 + }, + "combine": { + "p50": 67.64800101518631, + "p90": 69.2799985408783, + "p95": 69.98399645090103, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 102.01600193977356, + "p90": 104.99200224876404, + "p95": 108.57599973678589, + "p99": 157.82399475574493 + }, + "isolatedSum": { + "p50": 118.27200278639793, + "p90": 122.71999940276146, + "p95": 127.00799480080605, + "p99": 154.23999726772308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 53.66399884223938, + "p90": 60.896001756191254, + "p95": 63.77600133419037, + "p99": 77.18399912118912 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 73.11999797821045, + "p95": 77.37600058317184, + "p99": 80.73599636554718 + }, + "roundtrip": { + "p50": 116.80000275373459, + "p90": 120.09599804878235, + "p95": 121.0239976644516, + "p99": 133.5040032863617 + }, + "isolatedSum": { + "p50": 121.66400253772736, + "p90": 134.0159997344017, + "p95": 141.1520019173622, + "p99": 157.9199954867363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 53.119998425245285, + "p90": 56.76800012588501, + "p95": 60.127999633550644, + "p99": 69.85600292682648 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 77.05599814653397, + "p95": 77.91999727487564, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 113.98400366306305, + "p90": 119.6800023317337, + "p95": 120.70400267839432, + "p99": 143.48800480365753 + }, + "isolatedSum": { + "p50": 121.18399515748024, + "p90": 133.82399827241898, + "p95": 138.04799690842628, + "p99": 148.70399981737137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.80000102519989, + "p90": 67.16799736022949, + "p95": 68.03199648857117, + "p99": 71.74400240182877 + }, + "combine": { + "p50": 76.67200267314911, + "p90": 78.52800190448761, + "p95": 78.68800312280655, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 112.92800307273865, + "p90": 115.1999980211258, + "p95": 117.08799749612808, + "p99": 124.25599992275238 + }, + "isolatedSum": { + "p50": 137.472003698349, + "p90": 145.6959992647171, + "p95": 146.71999961137772, + "p99": 162.08000481128693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 62.463998794555664, + "p90": 63.64800035953522, + "p95": 66.04799628257751, + "p99": 73.34399968385696 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 79.96799796819687, + "p95": 80.83199709653854, + "p99": 93.98400038480759 + }, + "roundtrip": { + "p50": 124.09599870443344, + "p90": 126.71999633312225, + "p95": 129.08799946308136, + "p99": 149.56800639629364 + }, + "isolatedSum": { + "p50": 141.08800143003464, + "p90": 143.6159983277321, + "p95": 146.87999337911606, + "p99": 167.32800006866455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 75.03999769687653, + "p90": 80.89599758386612, + "p95": 81.91999793052673, + "p99": 122.36800044775009 + }, + "combine": { + "p50": 92.19200164079666, + "p90": 94.36800330877304, + "p95": 101.40799731016159, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 151.71200037002563, + "p90": 155.64799308776855, + "p95": 156.99200332164764, + "p99": 169.11999881267548 + }, + "isolatedSum": { + "p50": 167.2319993376732, + "p90": 175.26400089263916, + "p95": 183.32799524068832, + "p99": 228.89599949121475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.16800063848495, + "p90": 89.72799777984619, + "p95": 92.57599711418152, + "p99": 97.88800030946732 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 116.2559986114502, + "p95": 117.27999895811081, + "p99": 126.91199779510498 + }, + "roundtrip": { + "p50": 186.3040030002594, + "p90": 193.05600225925446, + "p95": 193.95199418067932, + "p99": 209.24800634384155 + }, + "isolatedSum": { + "p50": 202.59200036525726, + "p90": 205.9839963912964, + "p95": 209.85599607229233, + "p99": 224.7999981045723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e900dd8f", + "identity": "b300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_39fa33d8", + "comparisonKey": "93529625e3de8524", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:18.302564+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.64800077676773, + "p90": 117.91999638080597, + "p95": 125.11999905109406, + "p99": 201.27999782562256 + }, + "combine": { + "p50": 51.29599943757057, + "p90": 53.02400141954422, + "p95": 53.47200110554695, + "p99": 68.2239979505539 + }, + "roundtrip": { + "p50": 154.23999726772308, + "p90": 160.09600460529327, + "p95": 165.3439998626709, + "p99": 198.40000569820404 + }, + "isolatedSum": { + "p50": 162.9440002143383, + "p90": 170.9439978003502, + "p95": 178.592000156641, + "p99": 269.50399577617645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 112.8000020980835, + "p90": 119.71200257539749, + "p95": 125.02400577068329, + "p99": 253.05598974227905 + }, + "combine": { + "p50": 52.032001316547394, + "p90": 53.37600037455559, + "p95": 53.888000547885895, + "p99": 59.13599953055382 + }, + "roundtrip": { + "p50": 155.83999454975128, + "p90": 160.96000373363495, + "p95": 166.20799899101257, + "p99": 188.25599551200867 + }, + "isolatedSum": { + "p50": 164.8320034146309, + "p90": 173.08800294995308, + "p95": 178.91200631856918, + "p99": 312.19198927283287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 113.53600025177002, + "p90": 117.95199662446976, + "p95": 122.11199849843979, + "p99": 143.327996134758 + }, + "combine": { + "p50": 57.312000542879105, + "p90": 63.71200084686279, + "p95": 65.50399959087372, + "p99": 85.91999858617783 + }, + "roundtrip": { + "p50": 162.59199380874634, + "p90": 169.8240041732788, + "p95": 182.01600015163422, + "p99": 262.1760070323944 + }, + "isolatedSum": { + "p50": 170.84800079464912, + "p90": 181.66399747133255, + "p95": 187.6159980893135, + "p99": 229.24799472093582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 114.94400352239609, + "p90": 120.89599668979645, + "p95": 125.88800489902496, + "p99": 251.10399723052979 + }, + "combine": { + "p50": 56.992001831531525, + "p90": 59.328000992536545, + "p95": 60.92799827456474, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 162.91199624538422, + "p90": 168.2559996843338, + "p95": 173.43999445438385, + "p99": 237.63200640678406 + }, + "isolatedSum": { + "p50": 171.9360053539276, + "p90": 180.223997682333, + "p95": 186.8160031735897, + "p99": 330.84800094366074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.58400094509125, + "p90": 120.09599804878235, + "p95": 123.64800274372101, + "p99": 143.327996134758 + }, + "combine": { + "p50": 57.53599852323532, + "p90": 64.80000168085098, + "p95": 66.81600213050842, + "p99": 83.83999764919281 + }, + "roundtrip": { + "p50": 164.0319973230362, + "p90": 188.31999599933624, + "p95": 205.76000213623047, + "p99": 242.43199825286865 + }, + "isolatedSum": { + "p50": 173.11999946832657, + "p90": 184.89599972963333, + "p95": 190.46400487422943, + "p99": 227.1679937839508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 118.23999881744385, + "p90": 122.78400361537933, + "p95": 128.1919926404953, + "p99": 214.81600403785706 + }, + "combine": { + "p50": 60.095999389886856, + "p90": 62.30400130152702, + "p95": 63.19999694824219, + "p99": 71.48800045251846 + }, + "roundtrip": { + "p50": 169.72799599170685, + "p90": 187.48800456523895, + "p95": 201.4400064945221, + "p99": 256.8640112876892 + }, + "isolatedSum": { + "p50": 178.3359982073307, + "p90": 185.08800491690636, + "p95": 191.3919895887375, + "p99": 286.3040044903755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 129.4720023870468, + "p90": 133.88800621032715, + "p95": 139.0399932861328, + "p99": 157.53600001335144 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 76.38400048017502, + "p95": 77.69600301980972, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 193.7599927186966, + "p90": 199.35999810695648, + "p95": 208.6080014705658, + "p99": 305.02399802207947 + }, + "isolatedSum": { + "p50": 202.97600328922272, + "p90": 210.27200669050217, + "p95": 216.73599630594254, + "p99": 247.16799706220627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.28800344467163, + "p90": 150.01599490642548, + "p95": 153.53600680828094, + "p99": 192.54399836063385 + }, + "combine": { + "p50": 90.40000289678574, + "p90": 92.73599833250046, + "p95": 94.55999732017517, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 228.2239943742752, + "p90": 243.45600605010986, + "p95": 254.8159956932068, + "p99": 299.1360127925873 + }, + "isolatedSum": { + "p50": 234.68800634145737, + "p90": 242.75199323892593, + "p95": 248.09600412845612, + "p99": 300.25599896907806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-41ae3fd0", + "identity": "b300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_39fa33d8", + "comparisonKey": "c34b6faf10aa7909", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:04.175156+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.67200112342834, + "p90": 118.9119964838028, + "p95": 125.76000392436981, + "p99": 252.79998779296875 + }, + "combine": { + "p50": 54.71999943256378, + "p90": 56.51199817657471, + "p95": 57.24800005555153, + "p99": 77.08799839019775 + }, + "roundtrip": { + "p50": 157.8879952430725, + "p90": 163.71199488639832, + "p95": 175.7120043039322, + "p99": 273.27999472618103 + }, + "isolatedSum": { + "p50": 167.39200055599213, + "p90": 175.4239946603775, + "p95": 183.00800397992134, + "p99": 329.8879861831665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.69600147008896, + "p90": 118.367999792099, + "p95": 126.81600451469421, + "p99": 271.07200026512146 + }, + "combine": { + "p50": 55.84000051021576, + "p90": 57.312000542879105, + "p95": 57.98399820923805, + "p99": 65.0240033864975 + }, + "roundtrip": { + "p50": 160.73599457740784, + "p90": 165.98400473594666, + "p95": 174.14399981498718, + "p99": 316.5760040283203 + }, + "isolatedSum": { + "p50": 169.53600198030472, + "p90": 175.6800003349781, + "p95": 184.80000272393227, + "p99": 336.09600365161896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.52000045776367, + "p90": 123.64800274372101, + "p95": 128.1919926404953, + "p99": 252.41601467132568 + }, + "combine": { + "p50": 60.32000109553337, + "p90": 62.272001057863235, + "p95": 63.13599646091461, + "p99": 66.3679987192154 + }, + "roundtrip": { + "p50": 167.55199432373047, + "p90": 205.24799823760986, + "p95": 235.32800376415253, + "p99": 384.0000033378601 + }, + "isolatedSum": { + "p50": 175.84000155329704, + "p90": 185.92000380158424, + "p95": 191.3279891014099, + "p99": 318.7840133905411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.57600104808807, + "p90": 121.56800180673599, + "p95": 130.75199723243713, + "p99": 250.5280077457428 + }, + "combine": { + "p50": 60.06399914622307, + "p90": 62.78400123119354, + "p95": 71.16799801588058, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 168.38400065898895, + "p90": 174.01599884033203, + "p95": 180.54400384426117, + "p99": 395.4879939556122 + }, + "isolatedSum": { + "p50": 176.64000019431114, + "p90": 184.35200303792953, + "p95": 201.91999524831772, + "p99": 345.18400579690933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 117.34399944543839, + "p90": 127.16799974441528, + "p95": 135.29600203037262, + "p99": 203.3279985189438 + }, + "combine": { + "p50": 61.5679994225502, + "p90": 63.551999628543854, + "p95": 66.14399701356888, + "p99": 89.66399729251862 + }, + "roundtrip": { + "p50": 169.72799599170685, + "p90": 184.51200425624847, + "p95": 193.59999895095825, + "p99": 275.7759988307953 + }, + "isolatedSum": { + "p50": 178.9119988679886, + "p90": 190.71999937295914, + "p95": 201.4399990439415, + "p99": 292.9919958114624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.79200351238251, + "p90": 127.00800597667694, + "p95": 141.27999544143677, + "p99": 296.60800099372864 + }, + "combine": { + "p50": 65.0240033864975, + "p90": 67.10399687290192, + "p95": 67.77600198984146, + "p99": 74.30399954319 + }, + "roundtrip": { + "p50": 177.34399437904358, + "p90": 181.8239986896515, + "p95": 184.9920004606247, + "p99": 262.0159983634949 + }, + "isolatedSum": { + "p50": 186.81600689888, + "p90": 194.11200284957886, + "p95": 209.05599743127823, + "p99": 370.91200053691864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 133.18400084972382, + "p90": 138.0160003900528, + "p95": 141.59999787807465, + "p99": 247.74399399757385 + }, + "combine": { + "p50": 78.3040001988411, + "p90": 81.50400221347809, + "p95": 82.97599852085114, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 203.36000621318817, + "p90": 207.519993185997, + "p95": 213.05599808692932, + "p99": 254.65598702430725 + }, + "isolatedSum": { + "p50": 211.4880010485649, + "p90": 219.52000260353088, + "p95": 224.57599639892578, + "p99": 350.1119911670685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.24799716472626, + "p90": 158.49600732326508, + "p95": 163.42400014400482, + "p99": 236.32000386714935 + }, + "combine": { + "p50": 97.88800030946732, + "p90": 100.47999769449234, + "p95": 101.95200145244598, + "p99": 118.65600198507309 + }, + "roundtrip": { + "p50": 242.49599874019623, + "p90": 248.76800179481506, + "p95": 257.4400007724762, + "p99": 321.79200649261475 + }, + "isolatedSum": { + "p50": 251.13599747419357, + "p90": 258.9760050177574, + "p95": 265.3760015964508, + "p99": 354.97600585222244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ed94da83", + "identity": "b300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_39fa33d8", + "comparisonKey": "d702705af53ed5e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:53.795833+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.86400258541107, + "p90": 136.9599997997284, + "p95": 180.16000092029572, + "p99": 231.77599906921387 + }, + "combine": { + "p50": 58.62399935722351, + "p90": 61.15199998021126, + "p95": 62.68800050020218, + "p99": 71.3919997215271 + }, + "roundtrip": { + "p50": 161.18399798870087, + "p90": 165.56799411773682, + "p95": 168.60799491405487, + "p99": 199.90399479866028 + }, + "isolatedSum": { + "p50": 171.48800194263458, + "p90": 198.11199977993965, + "p95": 242.8480014204979, + "p99": 303.16799879074097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.37599903345108, + "p90": 119.07199770212173, + "p95": 124.35200065374374, + "p99": 209.9200040102005 + }, + "combine": { + "p50": 60.35200133919716, + "p90": 61.91999837756157, + "p95": 63.00800293684006, + "p99": 71.3919997215271 + }, + "roundtrip": { + "p50": 163.64799439907074, + "p90": 168.73599588871002, + "p95": 171.55200242996216, + "p99": 209.24800634384155 + }, + "isolatedSum": { + "p50": 173.72800037264824, + "p90": 180.9919960796833, + "p95": 187.3600035905838, + "p99": 281.3120037317276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.48800021409988, + "p90": 119.99999731779099, + "p95": 122.56000190973282, + "p99": 139.8719996213913 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 63.90400230884552, + "p95": 65.66400080919266, + "p99": 86.62399649620056 + }, + "roundtrip": { + "p50": 169.5680022239685, + "p90": 174.84800517559052, + "p95": 177.91999876499176, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 177.37600207328796, + "p90": 183.9039996266365, + "p95": 188.22400271892548, + "p99": 226.49599611759186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 115.90400338172913, + "p90": 121.85599654912949, + "p95": 125.2799928188324, + "p99": 204.0639966726303 + }, + "combine": { + "p50": 63.231997191905975, + "p90": 65.08799642324448, + "p95": 65.50399959087372, + "p99": 72.51200079917908 + }, + "roundtrip": { + "p50": 169.47199404239655, + "p90": 174.84800517559052, + "p95": 178.0479997396469, + "p99": 202.11200416088104 + }, + "isolatedSum": { + "p50": 179.1360005736351, + "p90": 186.94399297237396, + "p95": 190.78399240970612, + "p99": 276.5759974718094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 116.60800129175186, + "p90": 141.2159949541092, + "p95": 182.01600015163422, + "p99": 233.21600258350372 + }, + "combine": { + "p50": 64.25599753856659, + "p90": 66.14399701356888, + "p95": 67.32799857854843, + "p99": 75.19999891519547 + }, + "roundtrip": { + "p50": 171.00800573825836, + "p90": 176.86399817466736, + "p95": 181.2479943037033, + "p99": 226.27200186252594 + }, + "isolatedSum": { + "p50": 180.86399883031845, + "p90": 207.35999196767807, + "p95": 249.34399873018265, + "p99": 308.4160014986992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.08799815177917, + "p90": 126.11199915409088, + "p95": 128.80000472068787, + "p99": 141.24800264835358 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 70.0799971818924, + "p95": 71.3919997215271, + "p99": 76.35200023651123 + }, + "roundtrip": { + "p50": 181.11999332904816, + "p90": 185.98400056362152, + "p95": 189.02400135993958, + "p99": 227.32800245285034 + }, + "isolatedSum": { + "p50": 189.7599995136261, + "p90": 196.19199633598328, + "p95": 200.19200444221497, + "p99": 217.6000028848648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.89599931240082, + "p90": 198.11199605464935, + "p95": 201.05600357055664, + "p99": 209.21599864959717 + }, + "combine": { + "p50": 83.71199667453766, + "p90": 90.71999788284302, + "p95": 91.96799993515015, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 210.01599729061127, + "p90": 269.9519991874695, + "p95": 272.19200134277344, + "p99": 292.1279966831207 + }, + "isolatedSum": { + "p50": 220.60799598693848, + "p90": 288.83199393749237, + "p95": 293.0240035057068, + "p99": 307.48800188302994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.63999319076538, + "p90": 197.2160041332245, + "p95": 224.41600263118744, + "p99": 281.0879945755005 + }, + "combine": { + "p50": 102.49599814414978, + "p90": 104.99200224876404, + "p95": 106.46399855613708, + "p99": 123.03999811410904 + }, + "roundtrip": { + "p50": 250.30401349067688, + "p90": 255.77598810195923, + "p95": 258.87998938560486, + "p99": 281.47199749946594 + }, + "isolatedSum": { + "p50": 259.13599133491516, + "p90": 302.2080063819885, + "p95": 330.8800011873245, + "p99": 404.1279926896095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4386a2fa", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_89708fb0", + "comparisonKey": "911f63541f159ecc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:38.657767+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.543998420238495, + "p90": 58.94400179386139, + "p95": 60.95999851822853, + "p99": 68.96000355482101 + }, + "combine": { + "p50": 61.63199990987778, + "p90": 63.61600011587143, + "p95": 65.08799642324448, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 122.01599776744843, + "p90": 124.03199821710587, + "p95": 125.02400577068329, + "p99": 133.18400084972382 + }, + "isolatedSum": { + "p50": 118.17599833011627, + "p90": 122.56000190973282, + "p95": 126.04799494147301, + "p99": 155.87200224399567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.760000228881836, + "p90": 60.67200005054474, + "p95": 63.07200342416763, + "p99": 71.29599899053574 + }, + "combine": { + "p50": 64.4799992442131, + "p90": 65.88800251483917, + "p95": 67.10399687290192, + "p99": 78.20799946784973 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 128.57599556446075, + "p95": 132.09599256515503, + "p99": 160.5760008096695 + }, + "isolatedSum": { + "p50": 122.23999947309494, + "p90": 126.56000256538391, + "p95": 130.17600029706955, + "p99": 149.50399845838547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.67999994754791, + "p90": 62.01599910855293, + "p95": 63.10400366783142, + "p99": 84.48000252246857 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 69.08799707889557, + "p95": 69.5360004901886, + "p99": 80.57600259780884 + }, + "roundtrip": { + "p50": 129.37599420547485, + "p90": 140.09599387645721, + "p95": 148.70400726795197, + "p99": 165.8560037612915 + }, + "isolatedSum": { + "p50": 126.78399682044983, + "p90": 131.1039961874485, + "p95": 132.64000415802002, + "p99": 165.0560051202774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.90400165319443, + "p90": 62.72000074386597, + "p95": 65.18399715423584, + "p99": 82.91199803352356 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 69.24799829721451, + "p95": 69.69600170850754, + "p99": 71.68000191450119 + }, + "roundtrip": { + "p50": 131.6159963607788, + "p90": 151.42400562763214, + "p95": 155.87200224399567, + "p99": 207.58399367332458 + }, + "isolatedSum": { + "p50": 127.3920014500618, + "p90": 131.96799904108047, + "p95": 134.87999886274338, + "p99": 154.59199994802475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.99199876189232, + "p90": 63.00800293684006, + "p95": 64.96000289916992, + "p99": 92.38400310277939 + }, + "combine": { + "p50": 69.18399780988693, + "p90": 70.75200229883194, + "p95": 71.45600020885468, + "p99": 82.65600353479385 + }, + "roundtrip": { + "p50": 132.28799402713776, + "p90": 134.783998131752, + "p95": 136.25599443912506, + "p99": 161.9199961423874 + }, + "isolatedSum": { + "p50": 130.17599657177925, + "p90": 133.760005235672, + "p95": 136.4160031080246, + "p99": 175.04000663757324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 64.19199705123901, + "p90": 66.23999774456024, + "p95": 67.35999882221222, + "p99": 75.45600086450577 + }, + "combine": { + "p50": 73.47200065851212, + "p90": 79.6160027384758, + "p95": 81.50400221347809, + "p99": 84.06399935483932 + }, + "roundtrip": { + "p50": 144.57599818706512, + "p90": 147.5840061903, + "p95": 149.59999918937683, + "p99": 168.83200407028198 + }, + "isolatedSum": { + "p50": 137.66399770975113, + "p90": 145.85600048303604, + "p95": 148.8640010356903, + "p99": 159.5200002193451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 73.05599749088287, + "p90": 74.94399696588516, + "p95": 75.93599706888199, + "p99": 79.96799796819687 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 91.07200056314468, + "p95": 91.93599969148636, + "p99": 100.80000013113022 + }, + "roundtrip": { + "p50": 173.72800409793854, + "p90": 176.5120029449463, + "p95": 179.03999984264374, + "p99": 234.72000658512115 + }, + "isolatedSum": { + "p50": 161.43999993801117, + "p90": 166.01599752902985, + "p95": 167.87199676036835, + "p99": 180.7679980993271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.55200290679932, + "p90": 85.69599688053131, + "p95": 86.84799820184708, + "p99": 107.13600367307663 + }, + "combine": { + "p50": 108.67200046777725, + "p90": 110.81600189208984, + "p95": 112.41599917411804, + "p99": 130.048006772995 + }, + "roundtrip": { + "p50": 219.39200162887573, + "p90": 226.8799990415573, + "p95": 231.6800057888031, + "p99": 251.26400589942932 + }, + "isolatedSum": { + "p50": 192.22400337457657, + "p90": 196.51199877262115, + "p95": 199.26399737596512, + "p99": 237.18401044607162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17057b7a", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_39fa33d8", + "comparisonKey": "4b2e6c529876735f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:28.668389+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 113.53600025177002, + "p90": 118.68800222873688, + "p95": 126.49600207805634, + "p99": 297.4399924278259 + }, + "combine": { + "p50": 61.47199869155884, + "p90": 63.231997191905975, + "p95": 63.840001821517944, + "p99": 82.87999778985977 + }, + "roundtrip": { + "p50": 166.4319932460785, + "p90": 172.95999825000763, + "p95": 188.83199989795685, + "p99": 412.00000047683716 + }, + "isolatedSum": { + "p50": 175.00799894332886, + "p90": 181.91999942064285, + "p95": 190.33600389957428, + "p99": 380.3199902176857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.94400352239609, + "p90": 122.52800166606903, + "p95": 131.58400356769562, + "p99": 305.88799715042114 + }, + "combine": { + "p50": 64.19199705123901, + "p90": 65.72800129652023, + "p95": 66.23999774456024, + "p99": 79.19999957084656 + }, + "roundtrip": { + "p50": 171.90399765968323, + "p90": 217.66400337219238, + "p95": 252.00000405311584, + "p99": 411.45598888397217 + }, + "isolatedSum": { + "p50": 179.1360005736351, + "p90": 188.25600296258926, + "p95": 197.82400131225586, + "p99": 385.0879967212677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.60000139474869, + "p90": 122.01599776744843, + "p95": 124.60800260305405, + "p99": 228.4799963235855 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 69.60000097751617, + "p95": 70.91200351715088, + "p99": 75.45600086450577 + }, + "roundtrip": { + "p50": 176.64000391960144, + "p90": 186.68800592422485, + "p95": 197.2160041332245, + "p99": 302.2719919681549 + }, + "isolatedSum": { + "p50": 185.08800119161606, + "p90": 191.6159987449646, + "p95": 195.52000612020493, + "p99": 303.9359971880913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.08799749612808, + "p90": 122.78400361537933, + "p95": 130.14400005340576, + "p99": 257.4720084667206 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 69.24799829721451, + "p95": 70.11199742555618, + "p99": 75.93599706888199 + }, + "roundtrip": { + "p50": 176.4799952507019, + "p90": 183.29599499702454, + "p95": 194.20799612998962, + "p99": 275.8080065250397 + }, + "isolatedSum": { + "p50": 184.57599729299545, + "p90": 192.03200191259384, + "p95": 200.25599747896194, + "p99": 333.40800553560257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.40000003576279, + "p90": 122.56000190973282, + "p95": 124.9919980764389, + "p99": 228.09599339962006 + }, + "combine": { + "p50": 69.18399780988693, + "p90": 71.00799679756165, + "p95": 71.6480016708374, + "p99": 81.37600123882294 + }, + "roundtrip": { + "p50": 178.94400656223297, + "p90": 198.2080042362213, + "p95": 217.8560048341751, + "p99": 315.96800684928894 + }, + "isolatedSum": { + "p50": 187.58399784564972, + "p90": 193.56799870729446, + "p95": 196.6399997472763, + "p99": 309.471994638443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.7360035777092, + "p90": 130.5920034646988, + "p95": 137.28000223636627, + "p99": 257.63198733329773 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 75.16799867153168, + "p95": 75.87199658155441, + "p99": 84.09599959850311 + }, + "roundtrip": { + "p50": 190.36799669265747, + "p90": 207.0399969816208, + "p95": 218.6879962682724, + "p99": 259.3599855899811 + }, + "isolatedSum": { + "p50": 197.9840025305748, + "p90": 205.76000213623047, + "p95": 213.15199881792068, + "p99": 341.72798693180084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 139.16799426078796, + "p90": 144.1279947757721, + "p95": 148.95999431610107, + "p99": 236.15999519824982 + }, + "combine": { + "p50": 88.639996945858, + "p90": 90.87999910116196, + "p95": 92.28800237178802, + "p99": 112.99200356006622 + }, + "roundtrip": { + "p50": 219.80799734592438, + "p90": 237.88799345493317, + "p95": 257.6960027217865, + "p99": 309.34399366378784 + }, + "isolatedSum": { + "p50": 227.80799120664597, + "p90": 235.00799387693405, + "p95": 241.2479966878891, + "p99": 349.15199875831604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.69600188732147, + "p90": 167.29600727558136, + "p95": 171.51999473571777, + "p99": 244.83199417591095 + }, + "combine": { + "p50": 109.47199910879135, + "p90": 113.24799805879593, + "p95": 115.1999980211258, + "p99": 132.9279989004135 + }, + "roundtrip": { + "p50": 264.44798707962036, + "p90": 269.9519991874695, + "p95": 278.56001257896423, + "p99": 337.2800052165985 + }, + "isolatedSum": { + "p50": 271.1680009961128, + "p90": 280.5440053343773, + "p95": 286.71999275684357, + "p99": 377.75999307632446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-812b32ff", + "identity": "b300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_39fa33d8", + "comparisonKey": "dc335d7f19e0504b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:27.048754+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.68800157308578, + "p90": 119.32799965143204, + "p95": 122.23999947309494, + "p99": 191.103994846344 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 62.33600154519081, + "p95": 63.32799792289734, + "p99": 91.64799749851227 + }, + "roundtrip": { + "p50": 167.58400201797485, + "p90": 173.18400740623474, + "p95": 179.1680008172989, + "p99": 248.51199984550476 + }, + "isolatedSum": { + "p50": 175.36000162363052, + "p90": 181.66400119662285, + "p95": 185.56799739599228, + "p99": 282.75199234485626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.48800021409988, + "p90": 123.48800152540207, + "p95": 139.67999815940857, + "p99": 265.4399871826172 + }, + "combine": { + "p50": 62.111999839544296, + "p90": 71.87200337648392, + "p95": 75.6480023264885, + "p99": 83.96799862384796 + }, + "roundtrip": { + "p50": 169.50400173664093, + "p90": 177.98399925231934, + "p95": 186.75200641155243, + "p99": 259.13599133491516 + }, + "isolatedSum": { + "p50": 177.60000005364418, + "p90": 195.360004901886, + "p95": 215.32800048589706, + "p99": 349.40798580646515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 118.84800344705582, + "p90": 127.16799974441528, + "p95": 132.9600065946579, + "p99": 153.82400155067444 + }, + "combine": { + "p50": 66.78400188684464, + "p90": 69.11999732255936, + "p95": 69.98399645090103, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 174.97600615024567, + "p90": 179.3919950723648, + "p95": 185.92000007629395, + "p99": 227.13600099086761 + }, + "isolatedSum": { + "p50": 185.63200533390045, + "p90": 196.28799706697464, + "p95": 202.94400304555893, + "p99": 236.12800240516663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 118.49600076675415, + "p90": 123.19999933242798, + "p95": 127.96799838542938, + "p99": 258.36798548698425 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 69.43999975919724, + "p95": 69.7920024394989, + "p99": 73.18399846553802 + }, + "roundtrip": { + "p50": 178.20799350738525, + "p90": 184.7040057182312, + "p95": 194.30400431156158, + "p99": 245.69599330425262 + }, + "isolatedSum": { + "p50": 186.24000251293182, + "p90": 192.6399990916252, + "p95": 197.76000082492828, + "p99": 331.5519839525223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 119.23199892044067, + "p90": 123.74400347471237, + "p95": 126.78399682044983, + "p99": 175.6799966096878 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 69.82400268316269, + "p95": 71.00799679756165, + "p99": 79.26400005817413 + }, + "roundtrip": { + "p50": 179.55200374126434, + "p90": 192.7040070295334, + "p95": 206.40000700950623, + "p99": 239.9040013551712 + }, + "isolatedSum": { + "p50": 187.51999735832214, + "p90": 193.56800615787506, + "p95": 197.79199361801147, + "p99": 254.94399666786194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.83199685811996, + "p90": 131.23199343681335, + "p95": 136.99199259281158, + "p99": 255.67999482154846 + }, + "combine": { + "p50": 73.79200309515, + "p90": 75.77600330114365, + "p95": 76.35200023651123, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 192.1280026435852, + "p90": 196.4160054922104, + "p95": 199.16799664497375, + "p99": 243.6159998178482 + }, + "isolatedSum": { + "p50": 198.62399995326996, + "p90": 207.007996737957, + "p95": 213.34399282932281, + "p99": 348.9279970526695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.7599984407425, + "p90": 141.59999787807465, + "p95": 145.53600549697876, + "p99": 166.33599996566772 + }, + "combine": { + "p50": 89.47200328111649, + "p90": 91.87199920415878, + "p95": 93.18400174379349, + "p99": 114.3679991364479 + }, + "roundtrip": { + "p50": 220.2560007572174, + "p90": 224.44799542427063, + "p95": 227.55199670791626, + "p99": 339.87200260162354 + }, + "isolatedSum": { + "p50": 227.23200172185898, + "p90": 233.47199708223343, + "p95": 238.72000724077225, + "p99": 280.70399910211563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.61600160598755, + "p90": 168.12799870967865, + "p95": 170.9119975566864, + "p99": 200.3840059041977 + }, + "combine": { + "p50": 108.96000266075134, + "p90": 112.22399771213531, + "p95": 113.08799684047699, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 266.9439911842346, + "p90": 272.19200134277344, + "p95": 276.70401334762573, + "p99": 296.4479923248291 + }, + "isolatedSum": { + "p50": 272.5760042667389, + "p90": 280.35199642181396, + "p95": 283.9999943971634, + "p99": 326.81600749492645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-065b2546", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_88c2290c", + "comparisonKey": "ba14c442ac75681b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:33.491124+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 51.35999992489815, + "p90": 52.86400020122528, + "p95": 54.368000477552414, + "p99": 76.76800340414047 + }, + "combine": { + "p50": 61.664000153541565, + "p90": 63.551999628543854, + "p95": 64.2239972949028, + "p99": 72.1919983625412 + }, + "roundtrip": { + "p50": 115.29599875211716, + "p90": 118.27199906110764, + "p95": 122.20799922943115, + "p99": 156.22399747371674 + }, + "isolatedSum": { + "p50": 113.02400007843971, + "p90": 116.41599982976913, + "p95": 118.59199777245522, + "p99": 148.96000176668167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.032001316547394, + "p90": 54.23999950289726, + "p95": 57.56799876689911, + "p99": 112.92800307273865 + }, + "combine": { + "p50": 64.86400216817856, + "p90": 66.23999774456024, + "p95": 67.32799857854843, + "p99": 117.76000261306763 + }, + "roundtrip": { + "p50": 118.367999792099, + "p90": 120.95999717712402, + "p95": 127.42400169372559, + "p99": 196.3520050048828 + }, + "isolatedSum": { + "p50": 116.89600348472595, + "p90": 120.4799972474575, + "p95": 124.89599734544754, + "p99": 230.68800568580627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 55.48800155520439, + "p90": 65.18399715423584, + "p95": 66.23999774456024, + "p99": 75.07199794054031 + }, + "combine": { + "p50": 67.71200150251389, + "p90": 75.26399940252304, + "p95": 76.09599828720093, + "p99": 80.44800162315369 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 152.5759994983673, + "p95": 154.2080044746399, + "p99": 159.71200168132782 + }, + "isolatedSum": { + "p50": 123.20000305771828, + "p90": 140.44799655675888, + "p95": 142.33599603176117, + "p99": 155.519999563694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 54.4000007212162, + "p90": 56.73599988222122, + "p95": 58.848001062870026, + "p99": 91.16800129413605 + }, + "combine": { + "p50": 67.77600198984146, + "p90": 70.68800181150436, + "p95": 73.7600028514862, + "p99": 131.32800161838531 + }, + "roundtrip": { + "p50": 123.61600250005722, + "p90": 127.00800597667694, + "p95": 131.20000064373016, + "p99": 201.82399451732635 + }, + "isolatedSum": { + "p50": 122.17600271105766, + "p90": 127.42400169372559, + "p95": 132.60800391435623, + "p99": 222.49600291252136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 55.55199831724167, + "p90": 57.760000228881836, + "p95": 60.70400029420853, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 69.21599805355072, + "p90": 71.16799801588058, + "p95": 72.03199714422226, + "p99": 116.44800007343292 + }, + "roundtrip": { + "p50": 125.72799623012543, + "p90": 130.52800297737122, + "p95": 138.87999951839447, + "p99": 192.1280026435852 + }, + "isolatedSum": { + "p50": 124.76799637079239, + "p90": 128.92799824476242, + "p95": 132.7359974384308, + "p99": 229.95200008153915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 58.687999844551086, + "p90": 60.80000102519989, + "p95": 62.30400130152702, + "p99": 79.19999957084656 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 75.39200037717819, + "p95": 76.12799853086472, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 137.9839926958084, + "p90": 141.6960060596466, + "p95": 147.96799421310425, + "p99": 188.6720061302185 + }, + "isolatedSum": { + "p50": 132.03199952840805, + "p90": 136.19200140237808, + "p95": 138.43199983239174, + "p99": 168.67200285196304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 68.64000111818314, + "p90": 78.07999849319458, + "p95": 79.00799810886383, + "p99": 90.7839983701706 + }, + "combine": { + "p50": 89.72799777984619, + "p90": 97.75999933481216, + "p95": 99.2640033364296, + "p99": 114.75200206041336 + }, + "roundtrip": { + "p50": 168.2559996843338, + "p90": 180.63999712467194, + "p95": 182.20800161361694, + "p99": 195.8719938993454 + }, + "isolatedSum": { + "p50": 158.36799889802933, + "p90": 175.83999782800674, + "p95": 178.27200144529343, + "p99": 205.53600043058395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 78.33600044250488, + "p90": 81.18399977684021, + "p95": 84.22400057315826, + "p99": 143.26399564743042 + }, + "combine": { + "p50": 108.83200168609619, + "p90": 111.07199639081955, + "p95": 112.03200370073318, + "p99": 116.92799627780914 + }, + "roundtrip": { + "p50": 212.47999370098114, + "p90": 216.0000056028366, + "p95": 220.8320051431656, + "p99": 235.77600717544556 + }, + "isolatedSum": { + "p50": 187.16800212860107, + "p90": 192.25599616765976, + "p95": 196.25600427389145, + "p99": 260.19199192523956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ea8b6d2f", + "identity": "b300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_f08976e2", + "comparisonKey": "7a7da090db66f8b7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:30.599950+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.40800127387047, + "p90": 60.35200133919716, + "p95": 64.28799778223038, + "p99": 118.75200271606445 + }, + "combine": { + "p50": 56.352000683546066, + "p90": 58.848001062870026, + "p95": 66.30399823188782, + "p99": 115.07199704647064 + }, + "roundtrip": { + "p50": 95.29600292444229, + "p90": 99.71199929714203, + "p95": 105.47199845314026, + "p99": 162.01600432395935 + }, + "isolatedSum": { + "p50": 113.76000195741653, + "p90": 119.20000240206718, + "p95": 130.5919960141182, + "p99": 233.8239997625351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.760000228881836, + "p90": 61.91999837756157, + "p95": 65.98400324583054, + "p99": 145.9839940071106 + }, + "combine": { + "p50": 56.48000165820122, + "p90": 65.18399715423584, + "p95": 66.27199798822403, + "p99": 81.85599744319916 + }, + "roundtrip": { + "p50": 97.43999689817429, + "p90": 108.31999778747559, + "p95": 113.66400122642517, + "p99": 158.1760048866272 + }, + "isolatedSum": { + "p50": 114.24000188708305, + "p90": 127.10399553179741, + "p95": 132.25600123405457, + "p99": 227.83999145030975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.8560009598732, + "p90": 60.54399907588959, + "p95": 63.90400230884552, + "p99": 124.15999919176102 + }, + "combine": { + "p50": 57.40800127387047, + "p90": 66.56000018119812, + "p95": 67.10399687290192, + "p99": 78.46400141716003 + }, + "roundtrip": { + "p50": 101.88800096511841, + "p90": 107.45599865913391, + "p95": 111.7440015077591, + "p99": 158.55999290943146 + }, + "isolatedSum": { + "p50": 115.26400223374367, + "p90": 127.10399925708771, + "p95": 131.00799918174744, + "p99": 202.62400060892105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.97599831223488, + "p90": 62.81600147485733, + "p95": 68.57600063085556, + "p99": 152.70400047302246 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 67.26399809122086, + "p95": 67.84000247716904, + "p99": 129.7920048236847 + }, + "roundtrip": { + "p50": 108.12799632549286, + "p90": 114.9120032787323, + "p95": 118.78400295972824, + "p99": 279.1680097579956 + }, + "isolatedSum": { + "p50": 125.31199678778648, + "p90": 130.0799995660782, + "p95": 136.4160031080246, + "p99": 282.49600529670715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.36000123620033, + "p90": 62.463998794555664, + "p95": 67.45599955320358, + "p99": 112.99200356006622 + }, + "combine": { + "p50": 66.65600091218948, + "p90": 67.32799857854843, + "p95": 67.61600077152252, + "p99": 70.56000083684921 + }, + "roundtrip": { + "p50": 108.03200304508209, + "p90": 111.29599809646606, + "p95": 114.04799669981003, + "p99": 149.50400590896606 + }, + "isolatedSum": { + "p50": 126.01600214838982, + "p90": 129.7919973731041, + "p95": 135.0720003247261, + "p99": 183.55200439691544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.98399710655212, + "p90": 82.30400085449219, + "p95": 86.71999722719193, + "p99": 213.53599429130554 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 69.34399902820587, + "p95": 71.48800045251846, + "p99": 153.50399911403656 + }, + "roundtrip": { + "p50": 110.52799969911575, + "p90": 117.21599847078323, + "p95": 119.61600184440613, + "p99": 189.18399512767792 + }, + "isolatedSum": { + "p50": 141.24799519777298, + "p90": 151.64799988269806, + "p95": 158.2079976797104, + "p99": 367.0399934053421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 70.39999961853027, + "p90": 74.36800003051758, + "p95": 79.26400005817413, + "p99": 148.5760062932968 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 81.727996468544, + "p95": 91.42400324344635, + "p99": 152.73599326610565 + }, + "roundtrip": { + "p50": 132.35199451446533, + "p90": 136.03200018405914, + "p95": 139.55199718475342, + "p99": 176.70400440692902 + }, + "isolatedSum": { + "p50": 149.9200016260147, + "p90": 156.09599649906158, + "p95": 170.68800330162048, + "p99": 301.31199955940247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.3280012011528, + "p90": 86.40000224113464, + "p95": 90.62399715185165, + "p99": 139.64800536632538 + }, + "combine": { + "p50": 93.31200271844864, + "p90": 102.7199998497963, + "p95": 103.45599800348282, + "p99": 156.95999562740326 + }, + "roundtrip": { + "p50": 166.9120043516159, + "p90": 175.04000663757324, + "p95": 176.7359972000122, + "p99": 217.056006193161 + }, + "isolatedSum": { + "p50": 176.64000391960144, + "p90": 189.12000209093094, + "p95": 194.07999515533447, + "p99": 296.60800099372864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b476d80a", + "identity": "b300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_f08976e2", + "comparisonKey": "abe481159d6c5253", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:59.352904+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.21599981188774, + "p90": 60.15999987721443, + "p95": 62.3680017888546, + "p99": 75.71200281381607 + }, + "combine": { + "p50": 56.44800141453743, + "p90": 65.8240020275116, + "p95": 66.56000018119812, + "p99": 68.54400038719177 + }, + "roundtrip": { + "p50": 96.22400254011154, + "p90": 100.76799988746643, + "p95": 104.86400127410889, + "p99": 122.01599776744843 + }, + "isolatedSum": { + "p50": 113.66400122642517, + "p90": 125.98400190472603, + "p95": 128.92800197005272, + "p99": 144.25600320100784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 56.992001831531525, + "p90": 59.61599946022034, + "p95": 62.01599910855293, + "p99": 84.54400300979614 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 67.1359971165657, + "p95": 67.45599955320358, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 107.04000294208527, + "p90": 112.89600282907486, + "p95": 113.66400122642517, + "p99": 121.18399888277054 + }, + "isolatedSum": { + "p50": 114.81600254774094, + "p90": 126.75199657678604, + "p95": 129.47199866175652, + "p99": 163.4880006313324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 56.89600110054016, + "p90": 60.127999633550644, + "p95": 63.48799914121628, + "p99": 87.48800307512283 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 67.35999882221222, + "p95": 67.71200150251389, + "p99": 69.85600292682648 + }, + "roundtrip": { + "p50": 107.90400207042694, + "p90": 114.43199962377548, + "p95": 115.74400216341019, + "p99": 131.80799782276154 + }, + "isolatedSum": { + "p50": 123.52000176906586, + "p90": 127.48799845576286, + "p95": 131.20000064373016, + "p99": 157.3440060019493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.33600088953972, + "p90": 60.7680007815361, + "p95": 63.48799914121628, + "p99": 74.52800124883652 + }, + "combine": { + "p50": 66.97600334882736, + "p90": 67.90400296449661, + "p95": 68.96000355482101, + "p99": 91.10400080680847 + }, + "roundtrip": { + "p50": 107.26399719715118, + "p90": 110.20799726247787, + "p95": 112.2559979557991, + "p99": 132.76800513267517 + }, + "isolatedSum": { + "p50": 125.31200423836708, + "p90": 128.67200374603271, + "p95": 132.4480026960373, + "p99": 165.632002055645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.74400043487549, + "p90": 63.80800157785416, + "p95": 69.60000097751617, + "p99": 87.39200234413147 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 68.31999868154526, + "p95": 69.21599805355072, + "p99": 79.03999835252762 + }, + "roundtrip": { + "p50": 108.19199681282043, + "p90": 111.42399907112122, + "p95": 114.46399986743927, + "p99": 163.90399634838104 + }, + "isolatedSum": { + "p50": 126.8479973077774, + "p90": 132.1280002593994, + "p95": 138.8159990310669, + "p99": 166.4320006966591 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.68000257015228, + "p90": 78.04799824953079, + "p95": 79.32800054550171, + "p99": 91.0400003194809 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 71.55200093984604, + "p95": 78.52800190448761, + "p99": 80.83199709653854 + }, + "roundtrip": { + "p50": 123.71200323104858, + "p90": 127.55200266838074, + "p95": 128.7360042333603, + "p99": 151.5520066022873 + }, + "isolatedSum": { + "p50": 144.19200271368027, + "p90": 149.59999918937683, + "p95": 157.85600244998932, + "p99": 171.87199741601944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.03199714422226, + "p90": 78.11199873685837, + "p95": 81.28000050783157, + "p99": 96.92800045013428 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 90.7519981265068, + "p95": 91.42400324344635, + "p99": 107.744000852108 + }, + "roundtrip": { + "p50": 148.41599762439728, + "p90": 151.19999647140503, + "p95": 152.41600573062897, + "p99": 183.61599743366241 + }, + "isolatedSum": { + "p50": 152.95999497175217, + "p90": 168.86399686336517, + "p95": 172.70400375127792, + "p99": 204.67200130224228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.98400038480759, + "p90": 101.40799731016159, + "p95": 102.94400155544281, + "p99": 118.20799857378006 + }, + "combine": { + "p50": 103.87200117111206, + "p90": 105.24799674749374, + "p95": 106.55999928712845, + "p99": 127.93600559234619 + }, + "roundtrip": { + "p50": 172.86400496959686, + "p90": 182.14400112628937, + "p95": 186.8479996919632, + "p99": 237.12000250816345 + }, + "isolatedSum": { + "p50": 197.85600155591965, + "p90": 206.65599405765533, + "p95": 209.50400084257126, + "p99": 246.14400416612625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-871ab0cf", + "identity": "b300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_f08976e2", + "comparisonKey": "a7ec41285a64b800", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:25.287378+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.69599974155426, + "p90": 59.67999994754791, + "p95": 61.983998864889145, + "p99": 73.11999797821045 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 67.74400174617767, + "p95": 68.1919977068901, + "p99": 80.19199967384338 + }, + "roundtrip": { + "p50": 111.58400028944016, + "p90": 114.46399986743927, + "p95": 115.68000167608261, + "p99": 138.36799561977386 + }, + "isolatedSum": { + "p50": 124.51200187206268, + "p90": 127.42400169372559, + "p95": 130.17599657177925, + "p99": 153.31199765205383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.8560009598732, + "p90": 60.127999633550644, + "p95": 62.84800171852112, + "p99": 70.27199864387512 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 68.03199648857117, + "p95": 68.7360018491745, + "p99": 71.29599899053574 + }, + "roundtrip": { + "p50": 107.96800255775452, + "p90": 114.14399743080139, + "p95": 115.10399729013443, + "p99": 136.4160031080246 + }, + "isolatedSum": { + "p50": 125.02399832010269, + "p90": 128.1599961221218, + "p95": 131.58400356769562, + "p99": 141.56799763441086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.88800120353699, + "p90": 60.22400036454201, + "p95": 62.591999769210815, + "p99": 93.34400296211243 + }, + "combine": { + "p50": 67.35999882221222, + "p90": 69.08799707889557, + "p95": 70.49600034952164, + "p99": 80.1599994301796 + }, + "roundtrip": { + "p50": 107.87200182676315, + "p90": 110.49599945545197, + "p95": 112.19199746847153, + "p99": 131.9040060043335 + }, + "isolatedSum": { + "p50": 125.2480000257492, + "p90": 129.31199744343758, + "p95": 133.08800011873245, + "p99": 173.50400239229202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.42400172352791, + "p90": 62.463998794555664, + "p95": 66.65600091218948, + "p99": 80.51200211048126 + }, + "combine": { + "p50": 68.03199648857117, + "p90": 70.14399766921997, + "p95": 75.39200037717819, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 111.80800199508667, + "p90": 118.65600198507309, + "p95": 121.44000083208084, + "p99": 133.88800621032715 + }, + "isolatedSum": { + "p50": 127.45599821209908, + "p90": 132.60799646377563, + "p95": 142.04800128936768, + "p99": 162.49600052833557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.03199890255928, + "p90": 62.78400123119354, + "p95": 65.5359998345375, + "p99": 73.37599992752075 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 70.46400010585785, + "p95": 77.44000107049942, + "p99": 80.32000064849854 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 126.36800110340118, + "p95": 127.61600315570831, + "p99": 148.80000054836273 + }, + "isolatedSum": { + "p50": 128.54399904608727, + "p90": 133.2480013370514, + "p95": 142.97600090503693, + "p99": 153.6960005760193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.11199742555618, + "p90": 76.4160007238388, + "p95": 77.2479996085167, + "p99": 84.41600203514099 + }, + "combine": { + "p50": 78.14399898052216, + "p90": 79.55200225114822, + "p95": 79.83999699354172, + "p99": 80.51200211048126 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 128.38399410247803, + "p95": 131.00799918174744, + "p99": 145.9839940071106 + }, + "isolatedSum": { + "p50": 148.25599640607834, + "p90": 155.96800297498703, + "p95": 157.0879966020584, + "p99": 164.92800414562225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.35200220346451, + "p90": 91.13600105047226, + "p95": 92.12800115346909, + "p99": 115.10399729013443 + }, + "combine": { + "p50": 91.5519967675209, + "p90": 92.47999638319016, + "p95": 93.91999989748001, + "p99": 115.35999923944473 + }, + "roundtrip": { + "p50": 147.5200057029724, + "p90": 151.8400013446808, + "p95": 155.07200360298157, + "p99": 258.6880028247833 + }, + "isolatedSum": { + "p50": 179.9039989709854, + "p90": 183.61599743366241, + "p95": 186.0480010509491, + "p99": 230.46399652957916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.80800002813339, + "p90": 102.46399790048599, + "p95": 103.07200253009796, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 106.08000308275223, + "p90": 115.13599753379822, + "p95": 115.64800143241882, + "p99": 127.45599448680878 + }, + "roundtrip": { + "p50": 186.62400543689728, + "p90": 190.65600633621216, + "p95": 191.67999923229218, + "p99": 204.70400154590607 + }, + "isolatedSum": { + "p50": 205.88800311088562, + "p90": 217.5999954342842, + "p95": 218.72000396251678, + "p99": 235.58399081230164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff3a3c6c", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_27a5238e", + "comparisonKey": "c777627e39152404", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:33.804496+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.28000029921532, + "p90": 58.78400057554245, + "p95": 60.22400036454201, + "p99": 64.83200192451477 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 68.83200258016586, + "p95": 70.01599669456482, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 107.51999914646149, + "p90": 113.79200220108032, + "p95": 114.81600254774094, + "p99": 117.69600212574005 + }, + "isolatedSum": { + "p50": 124.60799887776375, + "p90": 127.61600315570831, + "p95": 130.23999705910683, + "p99": 145.24800330400467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.50399827957153, + "p90": 59.039998799562454, + "p95": 60.95999851822853, + "p99": 62.97600269317627 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 70.04799693822861, + "p95": 70.46400010585785, + "p99": 78.33600044250488 + }, + "roundtrip": { + "p50": 107.90400207042694, + "p90": 110.84800213575363, + "p95": 113.21599781513214, + "p99": 123.55200201272964 + }, + "isolatedSum": { + "p50": 125.63199549913406, + "p90": 129.08799573779106, + "p95": 131.42399862408638, + "p99": 141.31200313568115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.792000472545624, + "p90": 60.60799956321716, + "p95": 63.551999628543854, + "p99": 72.60800153017044 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 78.87999713420868, + "p95": 79.32800054550171, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 111.35999858379364, + "p90": 119.1679984331131, + "p95": 122.04799801111221, + "p99": 128.63999605178833 + }, + "isolatedSum": { + "p50": 127.26400047540665, + "p90": 139.48799669742584, + "p95": 142.88000017404556, + "p99": 163.52000087499619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.06399914622307, + "p90": 62.84800171852112, + "p95": 65.8240020275116, + "p99": 73.34399968385696 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 79.00799810886383, + "p95": 79.3600007891655, + "p99": 82.07999914884567 + }, + "roundtrip": { + "p50": 120.67200243473053, + "p90": 126.0479986667633, + "p95": 127.29600071907043, + "p99": 134.5919966697693 + }, + "isolatedSum": { + "p50": 129.98400256037712, + "p90": 141.85599982738495, + "p95": 145.1840028166771, + "p99": 155.42399883270264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 66.81600213050842, + "p90": 74.01599735021591, + "p95": 76.25599950551987, + "p99": 80.22399991750717 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 79.48800176382065, + "p95": 79.71200346946716, + "p99": 84.60800349712372 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 124.15999919176102, + "p95": 127.36000120639801, + "p99": 145.82400023937225 + }, + "isolatedSum": { + "p50": 145.34400403499603, + "p90": 153.50399911403656, + "p95": 155.96800297498703, + "p99": 164.8320034146309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.85600292682648, + "p90": 73.91999661922455, + "p95": 77.40800082683563, + "p99": 95.77599912881851 + }, + "combine": { + "p50": 79.45600152015686, + "p90": 80.92799782752991, + "p95": 82.24000036716461, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 134.68800485134125, + "p90": 137.37599551677704, + "p95": 139.8400068283081, + "p99": 150.91200172901154 + }, + "isolatedSum": { + "p50": 149.31200444698334, + "p90": 154.84799444675446, + "p95": 159.64800119400024, + "p99": 186.68799847364426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.20000022649765, + "p90": 90.97599983215332, + "p95": 92.86399930715561, + "p99": 106.11200332641602 + }, + "combine": { + "p50": 93.21600198745728, + "p90": 97.37599641084671, + "p95": 102.94400155544281, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 160.863995552063, + "p90": 165.47200083732605, + "p95": 167.00799763202667, + "p99": 185.15199422836304 + }, + "isolatedSum": { + "p50": 176.41600221395493, + "p90": 188.35199624300003, + "p95": 195.80800086259842, + "p99": 221.95200622081757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.55999732017517, + "p90": 97.85600006580353, + "p95": 101.40799731016159, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 116.12799763679504, + "p90": 117.72800236940384, + "p95": 119.07199770212173, + "p99": 140.60799777507782 + }, + "roundtrip": { + "p50": 196.8960016965866, + "p90": 200.95999538898468, + "p95": 202.4639993906021, + "p99": 218.20800006389618 + }, + "isolatedSum": { + "p50": 210.68799495697021, + "p90": 215.58400243520737, + "p95": 220.47999501228333, + "p99": 250.07999688386917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0fc33058", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_f08976e2", + "comparisonKey": "9edd6f2d1d4ae2cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:57.076377+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.8560009598732, + "p90": 59.967998415231705, + "p95": 61.3120011985302, + "p99": 70.14399766921997 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 78.72000336647034, + "p95": 79.77599650621414, + "p99": 89.9839997291565 + }, + "roundtrip": { + "p50": 109.43999886512756, + "p90": 116.35199934244156, + "p95": 118.40000003576279, + "p99": 134.65599715709686 + }, + "isolatedSum": { + "p50": 125.76000392436981, + "p90": 138.68800178170204, + "p95": 141.08799770474434, + "p99": 160.12799739837646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.920001447200775, + "p90": 60.47999858856201, + "p95": 62.94400244951248, + "p99": 76.76800340414047 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 69.95200365781784, + "p95": 71.45600020885468, + "p99": 82.0159986615181 + }, + "roundtrip": { + "p50": 108.44799876213074, + "p90": 111.51999980211258, + "p95": 114.17599767446518, + "p99": 127.96799838542938 + }, + "isolatedSum": { + "p50": 125.88800489902496, + "p90": 130.43200224637985, + "p95": 134.40000265836716, + "p99": 158.78400206565857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.88800120353699, + "p90": 59.99999865889549, + "p95": 62.431998550891876, + "p99": 83.00799876451492 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 79.52000200748444, + "p95": 91.00800007581711, + "p99": 103.64799946546555 + }, + "roundtrip": { + "p50": 111.48799955844879, + "p90": 123.3920007944107, + "p95": 127.77599692344666, + "p99": 135.45599579811096 + }, + "isolatedSum": { + "p50": 126.94399803876877, + "p90": 139.52000066637993, + "p95": 153.43999862670898, + "p99": 186.65599822998047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.7680007815361, + "p90": 63.74400109052658, + "p95": 66.6240006685257, + "p99": 80.38400113582611 + }, + "combine": { + "p50": 69.7920024394989, + "p90": 79.23199981451035, + "p95": 79.48800176382065, + "p99": 80.86399734020233 + }, + "roundtrip": { + "p50": 122.27199971675873, + "p90": 131.99999928474426, + "p95": 137.11999356746674, + "p99": 160.7999950647354 + }, + "isolatedSum": { + "p50": 130.560003221035, + "p90": 142.97600090503693, + "p95": 146.11200243234634, + "p99": 161.24799847602844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 66.3679987192154, + "p90": 73.88799637556076, + "p95": 76.35200023651123, + "p99": 88.06400001049042 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 79.68000322580338, + "p95": 80.60800284147263, + "p99": 153.21600437164307 + }, + "roundtrip": { + "p50": 121.85599654912949, + "p90": 125.40799379348755, + "p95": 127.83999741077423, + "p99": 151.5520066022873 + }, + "isolatedSum": { + "p50": 145.12000232934952, + "p90": 153.56799960136414, + "p95": 156.96000307798386, + "p99": 241.28000438213348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.11199742555618, + "p90": 73.7600028514862, + "p95": 75.80800354480743, + "p99": 82.46400207281113 + }, + "combine": { + "p50": 79.55200225114822, + "p90": 81.02399855852127, + "p95": 82.68799632787704, + "p99": 103.87200117111206 + }, + "roundtrip": { + "p50": 134.8160058259964, + "p90": 137.88799941539764, + "p95": 139.3599957227707, + "p99": 149.1840034723282 + }, + "isolatedSum": { + "p50": 149.6639996767044, + "p90": 154.78400141000748, + "p95": 158.49599987268448, + "p99": 186.3360032439232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.55200290679932, + "p90": 91.07200056314468, + "p95": 93.40800344944, + "p99": 151.7760008573532 + }, + "combine": { + "p50": 92.92799979448318, + "p90": 94.97600048780441, + "p95": 102.46399790048599, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 161.5999937057495, + "p90": 166.6560024023056, + "p95": 168.83200407028198, + "p99": 212.64000236988068 + }, + "isolatedSum": { + "p50": 176.4800027012825, + "p90": 186.0480010509491, + "p95": 195.872001349926, + "p99": 267.8399980068207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.68799829483032, + "p90": 96.54399752616882, + "p95": 99.20000284910202, + "p99": 108.03200304508209 + }, + "combine": { + "p50": 116.38399958610535, + "p90": 127.26399302482605, + "p95": 128.67200374603271, + "p99": 144.16000247001648 + }, + "roundtrip": { + "p50": 197.1839964389801, + "p90": 201.24800503253937, + "p95": 202.4960070848465, + "p99": 217.31199324131012 + }, + "isolatedSum": { + "p50": 211.07199788093567, + "p90": 223.80799055099487, + "p95": 227.87200659513474, + "p99": 252.19200551509857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e6ecdf9", + "identity": "b300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_f08976e2", + "comparisonKey": "ce598376d36e7b7e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:06.988123+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.81600081920624, + "p90": 73.11999797821045, + "p95": 77.31200009584427, + "p99": 112.86400258541107 + }, + "combine": { + "p50": 67.391999065876, + "p90": 84.3840017914772, + "p95": 91.61599725484848, + "p99": 104.00000214576721 + }, + "roundtrip": { + "p50": 113.15199732780457, + "p90": 121.08799815177917, + "p95": 128.12800705432892, + "p99": 301.6960024833679 + }, + "isolatedSum": { + "p50": 126.20799988508224, + "p90": 157.50399976968765, + "p95": 168.92799735069275, + "p99": 216.86400473117828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.687999844551086, + "p90": 61.28000095486641, + "p95": 64.25599753856659, + "p99": 88.32000195980072 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 69.7920024394989, + "p95": 70.8480030298233, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 109.21599715948105, + "p90": 112.28799819946289, + "p95": 116.5120005607605, + "p99": 164.15999829769135 + }, + "isolatedSum": { + "p50": 126.52800232172012, + "p90": 131.0720033943653, + "p95": 135.1040005683899, + "p99": 171.9999983906746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.26400050520897, + "p90": 67.10399687290192, + "p95": 70.592001080513, + "p99": 93.59999746084213 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 70.52800059318542, + "p95": 77.79199630022049, + "p99": 80.60800284147263 + }, + "roundtrip": { + "p50": 112.15999722480774, + "p90": 119.39200013875961, + "p95": 123.83999675512314, + "p99": 307.5200021266937 + }, + "isolatedSum": { + "p50": 127.9360018670559, + "p90": 137.63199746608734, + "p95": 148.3839973807335, + "p99": 174.20800030231476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.35200133919716, + "p90": 63.29599767923355, + "p95": 66.84800237417221, + "p99": 123.32800030708313 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 72.28799909353256, + "p95": 78.62400263547897, + "p99": 82.46400207281113 + }, + "roundtrip": { + "p50": 125.21600723266602, + "p90": 128.25599312782288, + "p95": 129.05600666999817, + "p99": 150.81599354743958 + }, + "isolatedSum": { + "p50": 129.05600294470787, + "p90": 135.5839967727661, + "p95": 145.47200500965118, + "p99": 205.79200237989426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.6240000128746, + "p90": 68.86400282382965, + "p95": 70.81600278615952, + "p99": 81.15199953317642 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 79.58400249481201, + "p95": 79.99999821186066, + "p99": 94.52799707651138 + }, + "roundtrip": { + "p50": 122.40000069141388, + "p90": 125.59999525547028, + "p95": 128.54400277137756, + "p99": 178.0800074338913 + }, + "isolatedSum": { + "p50": 140.51199704408646, + "p90": 148.44800531864166, + "p95": 150.81600099802017, + "p99": 175.6799966096878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.97599655389786, + "p90": 73.11999797821045, + "p95": 77.2479996085167, + "p99": 88.48000317811966 + }, + "combine": { + "p50": 79.68000322580338, + "p90": 82.04799890518188, + "p95": 83.74399691820145, + "p99": 94.55999732017517 + }, + "roundtrip": { + "p50": 136.00000739097595, + "p90": 148.83199334144592, + "p95": 159.84000265598297, + "p99": 193.40799748897552 + }, + "isolatedSum": { + "p50": 150.65599977970123, + "p90": 155.16799688339233, + "p95": 160.99199652671814, + "p99": 183.04000049829483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.35200220346451, + "p90": 91.10400080680847, + "p95": 92.86399930715561, + "p99": 114.46399986743927 + }, + "combine": { + "p50": 93.24800223112106, + "p90": 102.46399790048599, + "p95": 103.20000350475311, + "p99": 120.19199877977371 + }, + "roundtrip": { + "p50": 163.32800686359406, + "p90": 169.08800601959229, + "p95": 173.0560064315796, + "p99": 256.7040026187897 + }, + "isolatedSum": { + "p50": 181.60000443458557, + "p90": 193.56799870729446, + "p95": 196.06400281190872, + "p99": 234.65599864721298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.71199864149094, + "p90": 99.61599856615067, + "p95": 102.68799960613251, + "p99": 171.1679995059967 + }, + "combine": { + "p50": 116.31999909877777, + "p90": 128.4479945898056, + "p95": 131.3599944114685, + "p99": 155.10399639606476 + }, + "roundtrip": { + "p50": 199.10399615764618, + "p90": 202.65600085258484, + "p95": 203.80799472332, + "p99": 225.40800273418427 + }, + "isolatedSum": { + "p50": 212.0319977402687, + "p90": 228.06399315595627, + "p95": 234.047994017601, + "p99": 326.27199590206146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e8b41ef", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_26bc4356", + "comparisonKey": "f7fb907517e85b4c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:35.953464+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.92800134420395, + "p90": 58.52799862623215, + "p95": 59.967998415231705, + "p99": 73.5040009021759 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 70.592001080513, + "p95": 77.82399654388428, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 107.19999670982361, + "p90": 110.07999628782272, + "p95": 112.41599917411804, + "p99": 129.2479932308197 + }, + "isolatedSum": { + "p50": 125.31200051307678, + "p90": 129.11999970674515, + "p95": 137.79199495911598, + "p99": 153.31199765205383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.472001761198044, + "p90": 58.9120015501976, + "p95": 60.67200005054474, + "p99": 74.5920017361641 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 78.015998005867, + "p95": 78.68800312280655, + "p99": 79.45600152015686 + }, + "roundtrip": { + "p50": 116.31999909877777, + "p90": 121.2799996137619, + "p95": 123.99999797344208, + "p99": 129.02399897575378 + }, + "isolatedSum": { + "p50": 126.3360045850277, + "p90": 136.9279995560646, + "p95": 139.3600031733513, + "p99": 154.04800325632095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.72799998521805, + "p90": 59.90400165319443, + "p95": 61.85600161552429, + "p99": 77.53600180149078 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 79.26400005817413, + "p95": 79.68000322580338, + "p99": 83.42400193214417 + }, + "roundtrip": { + "p50": 124.25599992275238, + "p90": 126.49600207805634, + "p95": 127.29600071907043, + "p99": 140.99200069904327 + }, + "isolatedSum": { + "p50": 127.74399667978287, + "p90": 139.16800171136856, + "p95": 141.53600484132767, + "p99": 160.96000373363495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.51999872922897, + "p90": 61.72800064086914, + "p95": 63.80800157785416, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 79.55200225114822, + "p95": 79.80799674987793, + "p99": 84.22400057315826 + }, + "roundtrip": { + "p50": 119.52000111341476, + "p90": 122.56000190973282, + "p95": 125.21600723266602, + "p99": 147.32800424098969 + }, + "isolatedSum": { + "p50": 138.39999586343765, + "p90": 141.28000289201736, + "p95": 143.6159983277321, + "p99": 193.82400065660477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.18399780988693, + "p90": 75.48800110816956, + "p95": 76.51200145483017, + "p99": 80.76799660921097 + }, + "combine": { + "p50": 79.23199981451035, + "p90": 79.77599650621414, + "p95": 80.76799660921097, + "p99": 87.42400258779526 + }, + "roundtrip": { + "p50": 121.60000205039978, + "p90": 124.06399846076965, + "p95": 127.29600071907043, + "p99": 146.97599411010742 + }, + "isolatedSum": { + "p50": 148.41599762439728, + "p90": 155.2639976143837, + "p95": 157.27999806404114, + "p99": 168.19199919700623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.20799815654755, + "p90": 71.9359964132309, + "p95": 73.31199944019318, + "p99": 82.5280025601387 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 83.00799876451492, + "p95": 90.2400016784668, + "p99": 92.79999881982803 + }, + "roundtrip": { + "p50": 134.5279961824417, + "p90": 141.95199310779572, + "p95": 142.97600090503693, + "p99": 151.07199549674988 + }, + "isolatedSum": { + "p50": 151.0079950094223, + "p90": 154.94399517774582, + "p95": 163.55200111865997, + "p99": 175.32800137996674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.08799970149994, + "p90": 87.39200234413147, + "p95": 89.15200084447861, + "p99": 135.26399433612823 + }, + "combine": { + "p50": 94.62399780750275, + "p90": 103.87200117111206, + "p95": 104.09600287675858, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 168.96000504493713, + "p90": 171.51999473571777, + "p95": 174.55999553203583, + "p99": 194.59199905395508 + }, + "isolatedSum": { + "p50": 179.71199750900269, + "p90": 191.26400351524353, + "p95": 193.24800372123718, + "p99": 243.3919906616211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.6880002617836, + "p90": 109.21599715948105, + "p95": 112.2559979557991, + "p99": 132.51200318336487 + }, + "combine": { + "p50": 131.55199587345123, + "p90": 140.57600498199463, + "p95": 140.9280002117157, + "p99": 141.92000031471252 + }, + "roundtrip": { + "p50": 229.34399545192719, + "p90": 236.51200532913208, + "p95": 238.3359968662262, + "p99": 259.64799523353577 + }, + "isolatedSum": { + "p50": 238.23999613523483, + "p90": 249.79200214147568, + "p95": 253.1839981675148, + "p99": 274.4320034980774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a6d22598", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_36af80c9", + "comparisonKey": "92091df8a2cc97c9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:48.023508+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.752000987529755, + "p90": 65.60000032186508, + "p95": 68.89600306749344, + "p99": 143.8719928264618 + }, + "combine": { + "p50": 54.84800040721893, + "p90": 56.8000003695488, + "p95": 58.27200040221214, + "p99": 140.54399728775024 + }, + "roundtrip": { + "p50": 95.74399888515472, + "p90": 101.15200281143188, + "p95": 103.55199873447418, + "p99": 200.80000162124634 + }, + "isolatedSum": { + "p50": 117.60000139474869, + "p90": 122.40000069141388, + "p95": 127.16800346970558, + "p99": 284.41599011421204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.39200147986412, + "p90": 63.07200342416763, + "p95": 69.21599805355072, + "p99": 171.48800194263458 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 66.3679987192154, + "p95": 67.10399687290192, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 108.06400328874588, + "p90": 115.00799655914307, + "p95": 117.98399686813354, + "p99": 206.40000700950623 + }, + "isolatedSum": { + "p50": 117.21600219607353, + "p90": 129.44000214338303, + "p95": 136.31999492645264, + "p99": 266.1439999938011 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.65600156784058, + "p90": 77.2159993648529, + "p95": 80.6720033288002, + "p99": 372.96000123023987 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 68.1919977068901, + "p95": 77.95199751853943, + "p99": 119.19999867677689 + }, + "roundtrip": { + "p50": 122.65600264072418, + "p90": 126.11199915409088, + "p95": 129.40800189971924, + "p99": 186.62400543689728 + }, + "isolatedSum": { + "p50": 137.69599795341492, + "p90": 145.407997071743, + "p95": 158.62400084733963, + "p99": 492.15999990701675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 70.23999840021133, + "p90": 72.22399860620499, + "p95": 74.87999647855759, + "p99": 110.3999987244606 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 73.7600028514862, + "p95": 78.40000092983246, + "p99": 140.47999680042267 + }, + "roundtrip": { + "p50": 122.23999947309494, + "p90": 131.3599944114685, + "p95": 139.64800536632538, + "p99": 234.72000658512115 + }, + "isolatedSum": { + "p50": 139.39199596643448, + "p90": 145.9840014576912, + "p95": 153.27999740839005, + "p99": 250.87999552488327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edb37559", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_81013c18", + "comparisonKey": "80b558551b9bdc3e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:40.142874+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.559998869895935, + "p90": 60.736000537872314, + "p95": 64.35199826955795, + "p99": 84.51200276613235 + }, + "combine": { + "p50": 56.384000927209854, + "p90": 58.27200040221214, + "p95": 65.47199934720993, + "p99": 67.00800359249115 + }, + "roundtrip": { + "p50": 96.73599898815155, + "p90": 99.10400211811066, + "p95": 102.27199643850327, + "p99": 116.86400324106216 + }, + "isolatedSum": { + "p50": 114.94399979710579, + "p90": 119.00800094008446, + "p95": 129.82399761676788, + "p99": 151.5200063586235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.33600088953972, + "p90": 61.15199998021126, + "p95": 64.83200192451477, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 66.84800237417221, + "p95": 67.10399687290192, + "p99": 70.43199986219406 + }, + "roundtrip": { + "p50": 105.6319996714592, + "p90": 112.89600282907486, + "p95": 114.23999816179276, + "p99": 180.9920072555542 + }, + "isolatedSum": { + "p50": 115.93599990010262, + "p90": 128.00000235438347, + "p95": 131.9359987974167, + "p99": 190.68799912929535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.94400179386139, + "p90": 61.43999844789505, + "p95": 63.48799914121628, + "p99": 83.13599973917007 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 67.23199784755707, + "p95": 67.35999882221222, + "p99": 78.17599922418594 + }, + "roundtrip": { + "p50": 112.89600282907486, + "p90": 115.74400216341019, + "p95": 116.5120005607605, + "p99": 138.94400000572205 + }, + "isolatedSum": { + "p50": 125.02399832010269, + "p90": 128.67199629545212, + "p95": 130.8479979634285, + "p99": 161.31199896335602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.47999858856201, + "p90": 63.45599889755249, + "p95": 67.61600077152252, + "p99": 95.23200243711472 + }, + "combine": { + "p50": 66.880002617836, + "p90": 67.391999065876, + "p95": 67.84000247716904, + "p99": 71.87200337648392 + }, + "roundtrip": { + "p50": 108.64000022411346, + "p90": 111.42399907112122, + "p95": 113.69600147008896, + "p99": 136.25599443912506 + }, + "isolatedSum": { + "p50": 127.36000120639801, + "p90": 130.8479979634285, + "p95": 135.45600324869156, + "p99": 167.10400581359863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.535999178886414, + "p90": 64.86400216817856, + "p95": 67.90400296449661, + "p99": 97.43999689817429 + }, + "combine": { + "p50": 66.94400310516357, + "p90": 67.48799979686737, + "p95": 68.54400038719177, + "p99": 71.80800288915634 + }, + "roundtrip": { + "p50": 109.24799740314484, + "p90": 112.12799698114395, + "p95": 116.03199690580368, + "p99": 134.65599715709686 + }, + "isolatedSum": { + "p50": 128.48000228405, + "p90": 132.35200196504593, + "p95": 136.44800335168839, + "p99": 169.24799978733063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 62.07999959588051, + "p90": 64.54399973154068, + "p95": 67.52000004053116, + "p99": 98.52799773216248 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 67.9360032081604, + "p95": 68.4799998998642, + "p99": 78.3040001988411 + }, + "roundtrip": { + "p50": 110.36799848079681, + "p90": 113.43999952077866, + "p95": 117.18399822711945, + "p99": 148.19200336933136 + }, + "isolatedSum": { + "p50": 129.18399646878242, + "p90": 132.48000293970108, + "p95": 135.99999994039536, + "p99": 176.83199793100357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.27999985218048, + "p90": 79.96799796819687, + "p95": 80.83199709653854, + "p99": 90.71999788284302 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 78.40000092983246, + "p95": 79.0719985961914, + "p99": 129.31199371814728 + }, + "roundtrip": { + "p50": 126.0479986667633, + "p90": 134.11200046539307, + "p95": 139.16799426078796, + "p99": 181.72800540924072 + }, + "isolatedSum": { + "p50": 146.43199741840363, + "p90": 158.36799889802933, + "p95": 159.90399569272995, + "p99": 220.0319916009903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.39200168848038, + "p90": 90.20800143480301, + "p95": 91.26400202512741, + "p99": 105.76000064611435 + }, + "combine": { + "p50": 90.46400338411331, + "p90": 91.80799871683121, + "p95": 92.19200164079666, + "p99": 114.97599631547928 + }, + "roundtrip": { + "p50": 146.88000082969666, + "p90": 150.14399588108063, + "p95": 152.51199901103973, + "p99": 170.84799706935883 + }, + "isolatedSum": { + "p50": 173.8560050725937, + "p90": 182.01600015163422, + "p95": 183.45600366592407, + "p99": 220.73599696159363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-85bf2248", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_9804a17c", + "comparisonKey": "1976411feabf1971", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:15.799769+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.34400078654289, + "p90": 59.61599946022034, + "p95": 63.00800293684006, + "p99": 124.95999783277512 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 78.8159966468811, + "p95": 81.66400343179703, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 113.50400000810623, + "p90": 127.83999741077423, + "p95": 138.59200477600098, + "p99": 172.19200730323792 + }, + "isolatedSum": { + "p50": 124.4799979031086, + "p90": 138.43199610710144, + "p95": 144.67200636863708, + "p99": 228.16000133752823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.08799883723259, + "p90": 59.07199904322624, + "p95": 62.272001057863235, + "p99": 92.79999881982803 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 68.96000355482101, + "p95": 70.39999961853027, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 107.4879989027977, + "p90": 110.33599823713303, + "p95": 113.82400244474411, + "p99": 144.19199526309967 + }, + "isolatedSum": { + "p50": 124.38399717211723, + "p90": 128.03200259804726, + "p95": 132.6720006763935, + "p99": 188.6719986796379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.559998869895935, + "p90": 61.11999973654747, + "p95": 65.5359998345375, + "p99": 92.00000017881393 + }, + "combine": { + "p50": 68.31999868154526, + "p90": 70.46400010585785, + "p95": 77.88799703121185, + "p99": 91.16800129413605 + }, + "roundtrip": { + "p50": 111.58400028944016, + "p90": 119.00799721479416, + "p95": 122.94399738311768, + "p99": 138.97599279880524 + }, + "isolatedSum": { + "p50": 126.87999755144119, + "p90": 131.58399984240532, + "p95": 143.42399686574936, + "p99": 183.16800147294998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.58399921655655, + "p90": 62.52799928188324, + "p95": 65.2799978852272, + "p99": 137.53600418567657 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 78.5600021481514, + "p95": 79.45600152015686, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 123.32800030708313, + "p90": 126.94400548934937, + "p95": 129.02399897575378, + "p99": 175.6799966096878 + }, + "isolatedSum": { + "p50": 128.35200130939484, + "p90": 141.08800143003464, + "p95": 144.73599940538406, + "p99": 229.66400533914566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 65.72800129652023, + "p90": 73.79200309515, + "p95": 76.9599974155426, + "p99": 92.51199662685394 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 79.6160027384758, + "p95": 80.12799918651581, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 120.89599668979645, + "p90": 139.3280029296875, + "p95": 156.8319946527481, + "p99": 219.00799870491028 + }, + "isolatedSum": { + "p50": 136.44800335168839, + "p90": 153.4080058336258, + "p95": 157.0879966020584, + "p99": 188.35199624300003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 69.50400024652481, + "p90": 72.22399860620499, + "p95": 76.28799974918365, + "p99": 93.1520015001297 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 81.08799904584885, + "p95": 82.68799632787704, + "p99": 103.35999727249146 + }, + "roundtrip": { + "p50": 134.36800241470337, + "p90": 138.2720023393631, + "p95": 140.99200069904327, + "p99": 178.30400168895721 + }, + "isolatedSum": { + "p50": 149.02400225400925, + "p90": 153.31199765205383, + "p95": 158.9759960770607, + "p99": 196.51199877262115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.74399757385254, + "p90": 91.10400080680847, + "p95": 93.21600198745728, + "p99": 142.04800128936768 + }, + "combine": { + "p50": 93.28000247478485, + "p90": 115.68000167608261, + "p95": 116.60800129175186, + "p99": 203.77600193023682 + }, + "roundtrip": { + "p50": 160.73599457740784, + "p90": 166.59200191497803, + "p95": 184.4159960746765, + "p99": 217.92000532150269 + }, + "isolatedSum": { + "p50": 181.0240000486374, + "p90": 206.78400248289108, + "p95": 209.82400327920914, + "p99": 345.8240032196045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.40799796581268, + "p90": 107.04000294208527, + "p95": 108.41599851846695, + "p99": 147.5840061903 + }, + "combine": { + "p50": 128.54400277137756, + "p90": 130.0799995660782, + "p95": 131.84000551700592, + "p99": 152.51199901103973 + }, + "roundtrip": { + "p50": 217.56799519062042, + "p90": 225.95199942588806, + "p95": 228.19200158119202, + "p99": 245.44000625610352 + }, + "isolatedSum": { + "p50": 233.95200073719025, + "p90": 237.12000250816345, + "p95": 240.25600403547287, + "p99": 300.0960052013397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60c3b8b9", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_54cb99d2", + "comparisonKey": "61d6063317c035bc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:00.174300+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.82400071620941, + "p90": 59.51999872922897, + "p95": 61.15199998021126, + "p99": 65.2799978852272 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 67.9360032081604, + "p95": 68.51200014352798, + "p99": 72.83200323581696 + }, + "roundtrip": { + "p50": 108.19199681282043, + "p90": 115.10399729013443, + "p95": 116.03199690580368, + "p99": 123.23199957609177 + }, + "isolatedSum": { + "p50": 124.92799758911133, + "p90": 127.45600193738937, + "p95": 129.66400012373924, + "p99": 138.11200112104416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.43200162053108, + "p90": 60.92799827456474, + "p95": 63.61600011587143, + "p99": 83.93599838018417 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 69.88800317049026, + "p95": 70.81600278615952, + "p99": 90.84799885749817 + }, + "roundtrip": { + "p50": 109.11999642848969, + "p90": 111.77600175142288, + "p95": 114.3999993801117, + "p99": 125.2799928188324 + }, + "isolatedSum": { + "p50": 126.40000507235527, + "p90": 130.816001445055, + "p95": 134.43200290203094, + "p99": 174.78399723768234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.720000088214874, + "p90": 60.54399907588959, + "p95": 62.6240000128746, + "p99": 82.36800134181976 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 70.23999840021133, + "p95": 70.91200351715088, + "p99": 79.3600007891655 + }, + "roundtrip": { + "p50": 118.40000003576279, + "p90": 132.09599256515503, + "p95": 136.00000739097595, + "p99": 146.68799936771393 + }, + "isolatedSum": { + "p50": 126.91199779510498, + "p90": 130.78399747610092, + "p95": 133.53600353002548, + "p99": 161.72800213098526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.35200133919716, + "p90": 62.30400130152702, + "p95": 63.45599889755249, + "p99": 71.03999704122543 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 78.23999971151352, + "p95": 78.97599786520004, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 122.3360002040863, + "p90": 127.58399546146393, + "p95": 128.48000228405, + "p99": 132.76800513267517 + }, + "isolatedSum": { + "p50": 129.05600294470787, + "p90": 140.54400101304054, + "p95": 142.43199676275253, + "p99": 150.78400075435638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.78400123119354, + "p90": 69.95200365781784, + "p95": 72.41600006818771, + "p99": 78.87999713420868 + }, + "combine": { + "p50": 70.39999961853027, + "p90": 79.42400127649307, + "p95": 79.77599650621414, + "p99": 82.68799632787704 + }, + "roundtrip": { + "p50": 121.8239963054657, + "p90": 127.29600071907043, + "p95": 128.76799702644348, + "p99": 140.19200205802917 + }, + "isolatedSum": { + "p50": 133.18400084972382, + "p90": 149.3760049343109, + "p95": 152.19199657440186, + "p99": 161.56799346208572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.87200337648392, + "p90": 82.24000036716461, + "p95": 88.41600269079208, + "p99": 102.52799838781357 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 80.92799782752991, + "p95": 81.63200318813324, + "p99": 84.35200154781342 + }, + "roundtrip": { + "p50": 134.71999764442444, + "p90": 138.2399946451187, + "p95": 139.1039937734604, + "p99": 154.2080044746399 + }, + "isolatedSum": { + "p50": 151.39200538396835, + "p90": 163.16799819469452, + "p95": 170.04800587892532, + "p99": 186.87999993562698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.55200290679932, + "p90": 90.68799763917923, + "p95": 91.71199798583984, + "p99": 98.36799651384354 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 95.04000097513199, + "p95": 102.39999741315842, + "p99": 105.12000322341919 + }, + "roundtrip": { + "p50": 162.78399527072906, + "p90": 167.35999286174774, + "p95": 168.57600212097168, + "p99": 191.23199582099915 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 185.72799861431122, + "p95": 194.11199539899826, + "p99": 203.48799973726273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.04000097513199, + "p90": 99.58399832248688, + "p95": 101.21600329875946, + "p99": 111.455999314785 + }, + "combine": { + "p50": 116.35199934244156, + "p90": 118.14399808645248, + "p95": 119.26399916410446, + "p99": 127.77599692344666 + }, + "roundtrip": { + "p50": 195.8400011062622, + "p90": 202.33599841594696, + "p95": 204.352006316185, + "p99": 224.35200214385986 + }, + "isolatedSum": { + "p50": 211.39200031757355, + "p90": 217.72799640893936, + "p95": 220.48000246286392, + "p99": 239.23199623823166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c944b42e", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_e3448cc0", + "comparisonKey": "fd846e97e8041d0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:29.176115+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 59.647999703884125, + "p90": 64.12799656391144, + "p95": 70.78400254249573, + "p99": 137.05599308013916 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 68.41599941253662, + "p95": 69.08799707889557, + "p99": 71.23199850320816 + }, + "roundtrip": { + "p50": 107.96800255775452, + "p90": 114.07999694347382, + "p95": 116.19199812412262, + "p99": 169.95200514793396 + }, + "isolatedSum": { + "p50": 126.91199779510498, + "p90": 132.54399597644806, + "p95": 139.8719996213913, + "p99": 208.28799158334732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 59.99999865889549, + "p90": 67.61600077152252, + "p95": 73.27999919652939, + "p99": 159.71200168132782 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 71.29599899053574, + "p95": 78.5600021481514, + "p99": 142.752006649971 + }, + "roundtrip": { + "p50": 111.00800335407257, + "p90": 125.59999525547028, + "p95": 135.3600025177002, + "p99": 185.66399812698364 + }, + "isolatedSum": { + "p50": 128.7040002644062, + "p90": 138.91199976205826, + "p95": 151.8400013446808, + "p99": 302.4640083312988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.22400036454201, + "p90": 64.57599997520447, + "p95": 69.43999975919724, + "p99": 106.27199709415436 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 77.88799703121185, + "p95": 78.87999713420868, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 111.77600175142288, + "p90": 134.0160071849823, + "p95": 144.03200149536133, + "p99": 167.55199432373047 + }, + "isolatedSum": { + "p50": 129.69600036740303, + "p90": 142.46399700641632, + "p95": 148.31999689340591, + "p99": 189.95199352502823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.85600161552429, + "p90": 71.35999947786331, + "p95": 79.03999835252762, + "p99": 143.8719928264618 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 79.16799932718277, + "p95": 80.06399869918823, + "p99": 141.82400703430176 + }, + "roundtrip": { + "p50": 125.98399817943573, + "p90": 140.76800644397736, + "p95": 150.7200002670288, + "p99": 203.5199999809265 + }, + "isolatedSum": { + "p50": 131.9359987974167, + "p90": 150.52799880504608, + "p95": 159.10399705171585, + "p99": 285.69599986076355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.231997191905975, + "p90": 86.01599931716919, + "p95": 93.02400052547455, + "p99": 175.80799758434296 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 80.03199845552444, + "p95": 80.76799660921097, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 123.16799908876419, + "p90": 129.15199995040894, + "p95": 131.3599944114685, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 141.1839947104454, + "p90": 166.04799777269363, + "p95": 173.79199713468552, + "p99": 272.44799584150314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.38399982452393, + "p90": 79.55200225114822, + "p95": 83.93599838018417, + "p99": 169.855996966362 + }, + "combine": { + "p50": 79.6160027384758, + "p90": 81.31200075149536, + "p95": 82.62400329113007, + "p99": 131.23199343681335 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 146.01600170135498, + "p95": 151.19999647140503, + "p99": 225.600004196167 + }, + "isolatedSum": { + "p50": 152.00000256299973, + "p90": 160.86400300264359, + "p95": 166.56000167131424, + "p99": 301.08799040317535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.20800143480301, + "p90": 93.75999867916107, + "p95": 97.37599641084671, + "p99": 168.06399822235107 + }, + "combine": { + "p50": 92.92799979448318, + "p90": 95.51999717950821, + "p95": 103.45599800348282, + "p99": 142.39999651908875 + }, + "roundtrip": { + "p50": 163.83999586105347, + "p90": 173.12000691890717, + "p95": 182.97599256038666, + "p99": 208.80000293254852 + }, + "isolatedSum": { + "p50": 183.1360012292862, + "p90": 189.27999585866928, + "p95": 200.83199441432953, + "p99": 310.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.57599776983261, + "p90": 110.944002866745, + "p95": 123.61600250005722, + "p99": 155.008003115654 + }, + "combine": { + "p50": 116.57600104808807, + "p90": 118.75200271606445, + "p95": 120.28799951076508, + "p99": 167.26399958133698 + }, + "roundtrip": { + "p50": 199.20000433921814, + "p90": 203.39199900627136, + "p95": 205.53599298000336, + "p99": 240.4160052537918 + }, + "isolatedSum": { + "p50": 213.15199881792068, + "p90": 229.69600558280945, + "p95": 243.9040020108223, + "p99": 322.27200269699097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-86ba6860", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_fa8ff6dd", + "comparisonKey": "327f8b044d0cf971", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:50.995465+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.472001761198044, + "p90": 61.055999249219894, + "p95": 64.57599997520447, + "p99": 121.34400010108948 + }, + "combine": { + "p50": 66.880002617836, + "p90": 67.87200272083282, + "p95": 69.37599927186966, + "p99": 116.64000153541565 + }, + "roundtrip": { + "p50": 108.06400328874588, + "p90": 113.43999952077866, + "p95": 114.656001329422, + "p99": 131.6159963607788 + }, + "isolatedSum": { + "p50": 124.35200437903404, + "p90": 128.92800197005272, + "p95": 133.95199924707413, + "p99": 237.98400163650513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.66399949789047, + "p90": 61.72800064086914, + "p95": 66.78400188684464, + "p99": 123.64800274372101 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 69.023996591568, + "p95": 71.74400240182877, + "p99": 142.33599603176117 + }, + "roundtrip": { + "p50": 108.60799998044968, + "p90": 116.28799885511398, + "p95": 121.24799937009811, + "p99": 205.02400398254395 + }, + "isolatedSum": { + "p50": 124.79999661445618, + "p90": 130.75199723243713, + "p95": 138.5280042886734, + "p99": 265.9839987754822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.17599967122078, + "p90": 61.76000088453293, + "p95": 67.74400174617767, + "p99": 106.01600259542465 + }, + "combine": { + "p50": 67.391999065876, + "p90": 68.76800209283829, + "p95": 70.75200229883194, + "p99": 129.40800189971924 + }, + "roundtrip": { + "p50": 108.47999900579453, + "p90": 112.15999722480774, + "p95": 117.27999895811081, + "p99": 178.39999496936798 + }, + "isolatedSum": { + "p50": 125.56799873709679, + "p90": 130.52800297737122, + "p95": 138.4960040450096, + "p99": 235.4240044951439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 59.776000678539276, + "p90": 66.43199920654297, + "p95": 73.98399710655212, + "p99": 182.23999440670013 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 78.07999849319458, + "p95": 78.49600166082382, + "p99": 116.86400324106216 + }, + "roundtrip": { + "p50": 119.77600306272507, + "p90": 126.46399438381195, + "p95": 129.92000579833984, + "p99": 189.63199853897095 + }, + "isolatedSum": { + "p50": 128.76799702644348, + "p90": 144.51199769973755, + "p95": 152.47999876737595, + "p99": 299.1039976477623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.3120011985302, + "p90": 69.31199878454208, + "p95": 75.55200159549713, + "p99": 149.79200065135956 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 77.504001557827, + "p95": 78.72000336647034, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 127.83999741077423, + "p95": 130.91200590133667, + "p99": 185.34399569034576 + }, + "isolatedSum": { + "p50": 129.85600158572197, + "p90": 146.81600034236908, + "p95": 154.27200496196747, + "p99": 254.36799973249435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.93599706888199, + "p90": 78.8159966468811, + "p95": 79.83999699354172, + "p99": 136.3839954137802 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 80.22399991750717, + "p95": 81.05599880218506, + "p99": 140.47999680042267 + }, + "roundtrip": { + "p50": 129.69599664211273, + "p90": 136.31999492645264, + "p95": 138.91200721263885, + "p99": 211.67999505996704 + }, + "isolatedSum": { + "p50": 155.10399639606476, + "p90": 159.03999656438828, + "p95": 160.89599579572678, + "p99": 276.8639922142029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.6720033288002, + "p90": 87.23200112581253, + "p95": 88.67199718952179, + "p99": 169.88800466060638 + }, + "combine": { + "p50": 91.71199798583984, + "p90": 93.08800101280212, + "p95": 94.7519987821579, + "p99": 145.63199877738953 + }, + "roundtrip": { + "p50": 160.7999950647354, + "p90": 165.0560051202774, + "p95": 166.04800522327423, + "p99": 188.03200125694275 + }, + "isolatedSum": { + "p50": 172.38400131464005, + "p90": 180.32000213861465, + "p95": 183.4239959716797, + "p99": 315.5200034379959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.42399775981903, + "p90": 106.65600001811981, + "p95": 111.1999973654747, + "p99": 166.55999422073364 + }, + "combine": { + "p50": 127.6479959487915, + "p90": 129.05600666999817, + "p95": 130.20800054073334, + "p99": 142.43200421333313 + }, + "roundtrip": { + "p50": 211.0079973936081, + "p90": 216.12800657749176, + "p95": 218.01599860191345, + "p99": 315.744012594223 + }, + "isolatedSum": { + "p50": 231.07199370861053, + "p90": 235.71200668811798, + "p95": 241.40799790620804, + "p99": 308.9919984340668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-64c7043b", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_63dcd15f", + "comparisonKey": "9114b40d794cd5e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:03.221305+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.551999628543854, + "p90": 66.14399701356888, + "p95": 68.83200258016586, + "p99": 79.93599772453308 + }, + "combine": { + "p50": 56.51199817657471, + "p90": 65.63200056552887, + "p95": 66.84800237417221, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 96.41599655151367, + "p90": 103.93600165843964, + "p95": 108.60799998044968, + "p99": 155.87200224399567 + }, + "isolatedSum": { + "p50": 120.06399780511856, + "p90": 131.77599757909775, + "p95": 135.68000495433807, + "p99": 171.55199497938156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.18399956822395, + "p90": 63.64800035953522, + "p95": 66.20799750089645, + "p99": 86.30400151014328 + }, + "combine": { + "p50": 56.2559999525547, + "p90": 65.37599861621857, + "p95": 66.52799993753433, + "p99": 103.26399654150009 + }, + "roundtrip": { + "p50": 97.15200215578079, + "p90": 103.13600301742554, + "p95": 105.0880029797554, + "p99": 120.51200121641159 + }, + "isolatedSum": { + "p50": 113.43999952077866, + "p90": 129.02399897575378, + "p95": 132.7359974384308, + "p99": 189.56799805164337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 57.50399827957153, + "p90": 61.983998864889145, + "p95": 65.63200056552887, + "p99": 79.45600152015686 + }, + "combine": { + "p50": 56.63999915122986, + "p90": 66.17599725723267, + "p95": 66.68800115585327, + "p99": 70.39999961853027 + }, + "roundtrip": { + "p50": 104.63999956846237, + "p90": 117.24799871444702, + "p95": 123.83999675512314, + "p99": 144.83200013637543 + }, + "isolatedSum": { + "p50": 114.14399743080139, + "p90": 128.1599961221218, + "p95": 132.32000172138214, + "p99": 149.85600113868713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 58.27200040221214, + "p90": 62.01599910855293, + "p95": 65.47199934720993, + "p99": 84.70399677753448 + }, + "combine": { + "p50": 66.17599725723267, + "p90": 67.19999760389328, + "p95": 67.52000004053116, + "p99": 78.27199995517731 + }, + "roundtrip": { + "p50": 110.88000237941742, + "p90": 116.99199676513672, + "p95": 118.75200271606445, + "p99": 834.559977054596 + }, + "isolatedSum": { + "p50": 124.44799765944481, + "p90": 129.2159967124462, + "p95": 132.9919993877411, + "p99": 162.9759967327118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 59.67999994754791, + "p90": 62.752000987529755, + "p95": 67.29599833488464, + "p99": 109.82400178909302 + }, + "combine": { + "p50": 66.91200286149979, + "p90": 67.52000004053116, + "p95": 68.4799998998642, + "p99": 77.79199630022049 + }, + "roundtrip": { + "p50": 107.77600109577179, + "p90": 113.34399878978729, + "p95": 118.40000003576279, + "p99": 167.26399958133698 + }, + "isolatedSum": { + "p50": 126.5920028090477, + "p90": 130.2720010280609, + "p95": 135.77599823474884, + "p99": 187.6159980893135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.31999933719635, + "p90": 79.13599908351898, + "p95": 80.51200211048126, + "p99": 102.52799838781357 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 70.56000083684921, + "p95": 77.95199751853943, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 126.71999633312225, + "p95": 132.1599930524826, + "p99": 148.80000054836273 + }, + "isolatedSum": { + "p50": 140.60799777507782, + "p90": 149.6959999203682, + "p95": 158.4639996290207, + "p99": 194.2399963736534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.38400113582611, + "p90": 83.90399813652039, + "p95": 88.76799792051315, + "p99": 182.97599256038666 + }, + "combine": { + "p50": 86.81599795818329, + "p90": 93.79199892282486, + "p95": 105.12000322341919, + "p99": 154.36799824237823 + }, + "roundtrip": { + "p50": 148.00000190734863, + "p90": 155.39200603961945, + "p95": 157.4079990386963, + "p99": 168.60799491405487 + }, + "isolatedSum": { + "p50": 167.1999990940094, + "p90": 177.69599705934525, + "p95": 193.88800114393234, + "p99": 337.3439908027649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.08000111579895, + "p90": 96.96000069379807, + "p95": 99.39199686050415, + "p99": 118.8800036907196 + }, + "combine": { + "p50": 116.89600348472595, + "p90": 123.55200201272964, + "p95": 127.96799838542938, + "p99": 189.5039975643158 + }, + "roundtrip": { + "p50": 195.23200392723083, + "p90": 202.55999267101288, + "p95": 205.47200739383698, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 210.9760046005249, + "p90": 220.5120027065277, + "p95": 227.35999524593353, + "p99": 308.3840012550354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d1f38c81", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_aaea337f", + "comparisonKey": "9cfc2b5dde54fd96", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:48.054527+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 60.47999858856201, + "p90": 69.11999732255936, + "p95": 72.86400347948074, + "p99": 151.2639969587326 + }, + "combine": { + "p50": 67.00800359249115, + "p90": 68.15999746322632, + "p95": 70.43199986219406, + "p99": 154.88000214099884 + }, + "roundtrip": { + "p50": 111.42399907112122, + "p90": 117.79200285673141, + "p95": 119.26399916410446, + "p99": 174.78400468826294 + }, + "isolatedSum": { + "p50": 127.48800218105316, + "p90": 137.27999478578568, + "p95": 143.2960033416748, + "p99": 306.14399909973145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 59.808000922203064, + "p90": 63.29599767923355, + "p95": 66.23999774456024, + "p99": 115.10399729013443 + }, + "combine": { + "p50": 67.391999065876, + "p90": 69.24799829721451, + "p95": 71.55200093984604, + "p99": 103.4879982471466 + }, + "roundtrip": { + "p50": 111.07199639081955, + "p90": 129.2800009250641, + "p95": 142.39999651908875, + "p99": 217.82399713993073 + }, + "isolatedSum": { + "p50": 127.19999998807907, + "p90": 132.54399597644806, + "p95": 137.79199868440628, + "p99": 218.59199553728104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.93599817156792, + "p90": 62.81600147485733, + "p95": 65.66400080919266, + "p99": 105.79200088977814 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 70.72000205516815, + "p95": 78.20799946784973, + "p99": 94.46399658918381 + }, + "roundtrip": { + "p50": 119.19999867677689, + "p90": 127.80800461769104, + "p95": 130.5599957704544, + "p99": 178.847998380661 + }, + "isolatedSum": { + "p50": 128.60799953341484, + "p90": 133.53600353002548, + "p95": 143.8720002770424, + "p99": 200.25599747896194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.983998864889145, + "p90": 67.71200150251389, + "p95": 73.72800260782242, + "p99": 182.72000551223755 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 82.36800134181976, + "p95": 91.13600105047226, + "p99": 140.1599943637848 + }, + "roundtrip": { + "p50": 124.06399846076965, + "p90": 128.92800569534302, + "p95": 131.1040073633194, + "p99": 199.26400482654572 + }, + "isolatedSum": { + "p50": 131.99999555945396, + "p90": 150.08000284433365, + "p95": 164.86400365829468, + "p99": 322.87999987602234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.19999694824219, + "p90": 70.65600156784058, + "p95": 74.72000271081924, + "p99": 133.53599607944489 + }, + "combine": { + "p50": 78.78399640321732, + "p90": 82.43200182914734, + "p95": 91.16800129413605, + "p99": 129.2479932308197 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 128.35200130939484, + "p95": 132.6719969511032, + "p99": 189.60000574588776 + }, + "isolatedSum": { + "p50": 141.9839933514595, + "p90": 153.08800339698792, + "p95": 165.8880040049553, + "p99": 262.7839893102646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.87200337648392, + "p90": 78.27199995517731, + "p95": 80.1599994301796, + "p99": 100.89600086212158 + }, + "combine": { + "p50": 79.39200103282928, + "p90": 81.24800026416779, + "p95": 83.13599973917007, + "p99": 132.79999792575836 + }, + "roundtrip": { + "p50": 135.48800349235535, + "p90": 139.13600146770477, + "p95": 141.66399836540222, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 151.2640044093132, + "p90": 159.5200002193451, + "p95": 163.29599916934967, + "p99": 233.69599878787994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.24800157546997, + "p90": 93.98400038480759, + "p95": 98.55999797582626, + "p99": 183.58400464057922 + }, + "combine": { + "p50": 93.28000247478485, + "p90": 102.55999863147736, + "p95": 103.58399897813797, + "p99": 216.63999557495117 + }, + "roundtrip": { + "p50": 163.61600160598755, + "p90": 167.87199676036835, + "p95": 170.33599317073822, + "p99": 245.92000246047974 + }, + "isolatedSum": { + "p50": 182.52800405025482, + "p90": 196.54399901628494, + "p95": 202.14399695396423, + "p99": 400.2240002155304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.54399752616882, + "p90": 100.89600086212158, + "p95": 102.68799960613251, + "p99": 131.58400356769562 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 117.69600212574005, + "p95": 119.29599940776825, + "p99": 140.3840035200119 + }, + "roundtrip": { + "p50": 198.4959989786148, + "p90": 201.79200172424316, + "p95": 203.80799472332, + "p99": 228.86399924755096 + }, + "isolatedSum": { + "p50": 212.79999613761902, + "p90": 218.59200298786163, + "p95": 221.98399901390076, + "p99": 271.9680070877075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-08196702", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_2784b9e2", + "comparisonKey": "31d18fcf874ee38e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:59.156178+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.34400078654289, + "p90": 59.039998799562454, + "p95": 61.535999178886414, + "p99": 75.07199794054031 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 68.4799998998642, + "p95": 69.43999975919724, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 107.90400207042694, + "p90": 114.75200206041336, + "p95": 116.06399714946747, + "p99": 127.32799351215363 + }, + "isolatedSum": { + "p50": 124.51199814677238, + "p90": 127.51999869942665, + "p95": 130.97599893808365, + "p99": 154.36799824237823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.53599852323532, + "p90": 59.74400043487549, + "p95": 63.93600255250931, + "p99": 123.48800152540207 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 68.7360018491745, + "p95": 69.95200365781784, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 107.51999914646149, + "p90": 110.78400164842606, + "p95": 113.56800049543381, + "p99": 126.08000636100769 + }, + "isolatedSum": { + "p50": 124.76799637079239, + "p90": 128.48000228405, + "p95": 133.88800621032715, + "p99": 203.90400290489197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.14399942755699, + "p90": 60.38400158286095, + "p95": 63.26399743556976, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 70.23999840021133, + "p95": 77.72800326347351, + "p99": 84.927998483181 + }, + "roundtrip": { + "p50": 111.00800335407257, + "p90": 125.791996717453, + "p95": 134.24000144004822, + "p99": 170.56000232696533 + }, + "isolatedSum": { + "p50": 126.49599835276604, + "p90": 130.62399998307228, + "p95": 140.99200069904327, + "p99": 191.42399728298187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.35200133919716, + "p90": 62.65600025653839, + "p95": 66.04799628257751, + "p99": 84.25600081682205 + }, + "combine": { + "p50": 68.83200258016586, + "p90": 78.72000336647034, + "p95": 79.13599908351898, + "p99": 92.8959995508194 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 128.7360042333603, + "p95": 131.32800161838531, + "p99": 233.8239997625351 + }, + "isolatedSum": { + "p50": 129.18400391936302, + "p90": 141.37600362300873, + "p95": 145.1839953660965, + "p99": 177.15200036764145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 67.29599833488464, + "p90": 74.46400076150894, + "p95": 75.6480023264885, + "p99": 78.84799689054489 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 78.87999713420868, + "p95": 79.42400127649307, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 121.21599912643433, + "p90": 126.88000500202179, + "p95": 128.57599556446075, + "p99": 143.8080072402954 + }, + "isolatedSum": { + "p50": 136.09600067138672, + "p90": 153.34399789571762, + "p95": 155.07200360298157, + "p99": 160.0639969110489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.10399752855301, + "p90": 77.40800082683563, + "p95": 78.17599922418594, + "p99": 112.64000087976456 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 80.89599758386612, + "p95": 82.68799632787704, + "p99": 91.0400003194809 + }, + "roundtrip": { + "p50": 135.74400544166565, + "p90": 139.5840048789978, + "p95": 141.4400041103363, + "p99": 176.60799622535706 + }, + "isolatedSum": { + "p50": 150.62399953603745, + "p90": 158.30399841070175, + "p95": 160.863995552063, + "p99": 203.68000119924545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.05600011348724, + "p90": 92.12800115346909, + "p95": 94.43199634552002, + "p99": 148.44800531864166 + }, + "combine": { + "p50": 92.6399976015091, + "p90": 95.39200365543365, + "p95": 103.58399897813797, + "p99": 139.93600010871887 + }, + "roundtrip": { + "p50": 160.38399934768677, + "p90": 165.56799411773682, + "p95": 167.67999529838562, + "p99": 205.76000213623047 + }, + "isolatedSum": { + "p50": 181.69599771499634, + "p90": 187.52000480890274, + "p95": 198.015995323658, + "p99": 288.38400542736053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.63999956846237, + "p90": 107.39199817180634, + "p95": 110.1439967751503, + "p99": 153.31199765205383 + }, + "combine": { + "p50": 128.22400033473969, + "p90": 129.05600666999817, + "p95": 130.0799995660782, + "p99": 142.0159935951233 + }, + "roundtrip": { + "p50": 209.24800634384155, + "p90": 216.19200706481934, + "p95": 218.9760059118271, + "p99": 231.64799809455872 + }, + "isolatedSum": { + "p50": 232.86399990320206, + "p90": 236.4480048418045, + "p95": 240.22399634122849, + "p99": 295.3279912471771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-602b26ee", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_cd6abfac", + "comparisonKey": "f2d36423a1c653c6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:31.178125+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.88000130653381, + "p90": 60.736000537872314, + "p95": 63.64800035953522, + "p99": 74.46400076150894 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 68.51200014352798, + "p95": 69.7920024394989, + "p99": 91.74399822950363 + }, + "roundtrip": { + "p50": 109.11999642848969, + "p90": 116.2559986114502, + "p95": 117.66400188207626, + "p99": 132.54399597644806 + }, + "isolatedSum": { + "p50": 126.0479986667633, + "p90": 129.2480006814003, + "p95": 133.44000279903412, + "p99": 166.20799899101257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.848001062870026, + "p90": 60.5119988322258, + "p95": 62.463998794555664, + "p99": 70.04799693822861 + }, + "combine": { + "p50": 68.44799965620041, + "p90": 70.17599791288376, + "p95": 72.06399738788605, + "p99": 81.60000294446945 + }, + "roundtrip": { + "p50": 109.50399935245514, + "p90": 112.5119999051094, + "p95": 116.19199812412262, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 127.29600071907043, + "p90": 130.68799674510956, + "p95": 134.5279961824417, + "p99": 151.64799988269806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.487998485565186, + "p90": 61.02399900555611, + "p95": 62.6240000128746, + "p99": 70.91200351715088 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 70.43199986219406, + "p95": 77.2159993648529, + "p99": 80.25600016117096 + }, + "roundtrip": { + "p50": 116.60800129175186, + "p90": 119.4240003824234, + "p95": 123.80799651145935, + "p99": 140.3840035200119 + }, + "isolatedSum": { + "p50": 128.25600057840347, + "p90": 131.45599886775017, + "p95": 139.8399993777275, + "p99": 151.16800367832184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.535999178886414, + "p90": 63.80800157785416, + "p95": 65.11999666690826, + "p99": 73.47200065851212 + }, + "combine": { + "p50": 69.88800317049026, + "p90": 79.13599908351898, + "p95": 79.45600152015686, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 125.5359947681427, + "p90": 143.96800100803375, + "p95": 161.3759994506836, + "p99": 184.57600474357605 + }, + "isolatedSum": { + "p50": 131.42400234937668, + "p90": 142.94400066137314, + "p95": 144.57599818706512, + "p99": 163.64800184965134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.00800293684006, + "p90": 68.1919977068901, + "p95": 71.03999704122543, + "p99": 85.40800213813782 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 79.64800298213959, + "p95": 79.8719972372055, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 126.49600207805634, + "p95": 130.23999333381653, + "p99": 146.08000218868256 + }, + "isolatedSum": { + "p50": 141.88800007104874, + "p90": 147.8400006890297, + "p95": 150.91199427843094, + "p99": 169.0240055322647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.19999825954437, + "p90": 74.0479975938797, + "p95": 77.2159993648529, + "p99": 95.77599912881851 + }, + "combine": { + "p50": 80.06399869918823, + "p90": 82.07999914884567, + "p95": 83.0719992518425, + "p99": 94.52799707651138 + }, + "roundtrip": { + "p50": 135.16800105571747, + "p90": 138.94400000572205, + "p95": 139.96799290180206, + "p99": 150.2079963684082 + }, + "isolatedSum": { + "p50": 151.2639969587326, + "p90": 156.12799674272537, + "p95": 160.2879986166954, + "p99": 190.3039962053299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.44800293445587, + "p90": 91.80799871683121, + "p95": 93.40800344944, + "p99": 130.20800054073334 + }, + "combine": { + "p50": 93.21600198745728, + "p90": 95.87199985980988, + "p95": 102.49599814414978, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 163.26400637626648, + "p90": 167.1680063009262, + "p95": 168.5439944267273, + "p99": 186.94399297237396 + }, + "isolatedSum": { + "p50": 181.66400492191315, + "p90": 187.67999857664108, + "p95": 195.90400159358978, + "p99": 234.68799889087677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.74399888515472, + "p90": 97.98400104045868, + "p95": 101.1200025677681, + "p99": 111.7120012640953 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 117.53600090742111, + "p95": 118.40000003576279, + "p99": 129.88799810409546 + }, + "roundtrip": { + "p50": 199.10399615764618, + "p90": 202.14399695396423, + "p95": 203.48800718784332, + "p99": 216.67200326919556 + }, + "isolatedSum": { + "p50": 211.99999749660492, + "p90": 215.5200019478798, + "p95": 219.52000260353088, + "p99": 241.59999936819077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c55289e6", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_d1101c29", + "comparisonKey": "a7658b5c685cfc9b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:23.317230+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.56799876689911, + "p90": 60.99199876189232, + "p95": 66.0799965262413, + "p99": 118.75200271606445 + }, + "combine": { + "p50": 66.94400310516357, + "p90": 67.61600077152252, + "p95": 68.60800087451935, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 108.70400071144104, + "p90": 114.04799669981003, + "p95": 115.80800265073776, + "p99": 162.75200247764587 + }, + "isolatedSum": { + "p50": 124.51200187206268, + "p90": 128.60799953341484, + "p95": 134.68799740076065, + "p99": 198.62399995326996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.95200169086456, + "p90": 62.94400244951248, + "p95": 68.1919977068901, + "p99": 135.26399433612823 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 69.85600292682648, + "p95": 71.77600264549255, + "p99": 120.92799693346024 + }, + "roundtrip": { + "p50": 111.77600175142288, + "p90": 115.9679964184761, + "p95": 118.84800344705582, + "p99": 162.84799575805664 + }, + "isolatedSum": { + "p50": 125.5360022187233, + "p90": 132.80000537633896, + "p95": 139.96800035238266, + "p99": 256.19199126958847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.01599845290184, + "p90": 60.32000109553337, + "p95": 61.5679994225502, + "p99": 74.14399832487106 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 68.86400282382965, + "p95": 70.27199864387512, + "p99": 103.58399897813797 + }, + "roundtrip": { + "p50": 108.15999656915665, + "p90": 113.69600147008896, + "p95": 120.2239990234375, + "p99": 160.70400178432465 + }, + "isolatedSum": { + "p50": 125.21599605679512, + "p90": 129.18400391936302, + "p95": 131.83999806642532, + "p99": 177.72799730300903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.7680007815361, + "p90": 68.67200136184692, + "p95": 72.54400104284286, + "p99": 132.35199451446533 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 78.04799824953079, + "p95": 78.52800190448761, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 122.11199849843979, + "p90": 127.74400413036346, + "p95": 132.89600610733032, + "p99": 199.20000433921814 + }, + "isolatedSum": { + "p50": 130.0479993224144, + "p90": 146.71999961137772, + "p95": 151.07200294733047, + "p99": 235.55199801921844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.3120011985302, + "p90": 67.61600077152252, + "p95": 69.40799951553345, + "p99": 87.61599659919739 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 78.14399898052216, + "p95": 79.0719985961914, + "p99": 116.03199690580368 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 127.58399546146393, + "p95": 128.4160017967224, + "p99": 163.16799819469452 + }, + "isolatedSum": { + "p50": 130.39999827742577, + "p90": 145.75999975204468, + "p95": 148.47999811172485, + "p99": 203.64799350500107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 76.19199901819229, + "p90": 79.0719985961914, + "p95": 80.06399869918823, + "p99": 88.22400122880936 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 91.0400003194809, + "p95": 92.28800237178802, + "p99": 134.07999277114868 + }, + "roundtrip": { + "p50": 129.63199615478516, + "p90": 136.4160031080246, + "p95": 141.79199934005737, + "p99": 193.1840032339096 + }, + "isolatedSum": { + "p50": 155.4879993200302, + "p90": 170.1119989156723, + "p95": 172.35200107097626, + "p99": 222.30399399995804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 81.24800026416779, + "p90": 88.0960002541542, + "p95": 89.56799656152725, + "p99": 133.27999413013458 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 93.34400296211243, + "p95": 94.84799951314926, + "p99": 139.23199474811554 + }, + "roundtrip": { + "p50": 161.43999993801117, + "p90": 165.12000560760498, + "p95": 167.00799763202667, + "p99": 211.32799983024597 + }, + "isolatedSum": { + "p50": 173.0239987373352, + "p90": 181.44000321626663, + "p95": 184.4159960746765, + "p99": 272.5119888782501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.67999970912933, + "p90": 107.16799646615982, + "p95": 109.0560033917427, + "p99": 129.40800189971924 + }, + "combine": { + "p50": 127.61600315570831, + "p90": 128.76799702644348, + "p95": 129.12000715732574, + "p99": 164.06400501728058 + }, + "roundtrip": { + "p50": 210.65600216388702, + "p90": 215.58399498462677, + "p95": 217.72800385951996, + "p99": 258.11201333999634 + }, + "isolatedSum": { + "p50": 231.29600286483765, + "p90": 235.9359934926033, + "p95": 238.17601054906845, + "p99": 293.4720069169998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4664a5e", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_22b1cbe5", + "comparisonKey": "dd3bcc1940aff320", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:55.032908+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.33600088953972, + "p90": 61.40799820423126, + "p95": 64.92800265550613, + "p99": 132.38400220870972 + }, + "combine": { + "p50": 66.94400310516357, + "p90": 67.58400052785873, + "p95": 69.023996591568, + "p99": 128.03199887275696 + }, + "roundtrip": { + "p50": 109.0880036354065, + "p90": 116.06399714946747, + "p95": 117.18399822711945, + "p99": 135.6160044670105 + }, + "isolatedSum": { + "p50": 125.28000399470329, + "p90": 128.99199873209, + "p95": 133.95199924707413, + "p99": 260.4160010814667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.43200162053108, + "p90": 61.88800185918808, + "p95": 68.35199892520905, + "p99": 142.81600713729858 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 72.48000055551529, + "p95": 79.39200103282928, + "p99": 128.12800705432892 + }, + "roundtrip": { + "p50": 110.88000237941742, + "p90": 136.83199882507324, + "p95": 143.39199662208557, + "p99": 198.14400374889374 + }, + "isolatedSum": { + "p50": 126.8480010330677, + "p90": 134.36800241470337, + "p95": 147.74399995803833, + "p99": 270.9440141916275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 60.256000608205795, + "p90": 87.16800063848495, + "p95": 96.44799679517746, + "p99": 143.13599467277527 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 78.65600287914276, + "p95": 79.03999835252762, + "p99": 93.88799965381622 + }, + "roundtrip": { + "p50": 110.97600311040878, + "p90": 118.9119964838028, + "p95": 122.75200337171555, + "p99": 168.99199783802032 + }, + "isolatedSum": { + "p50": 129.05600294470787, + "p90": 165.82400351762772, + "p95": 175.48799514770508, + "p99": 237.0239943265915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.40799820423126, + "p90": 65.92000275850296, + "p95": 68.44799965620041, + "p99": 166.24000668525696 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 79.13599908351898, + "p95": 79.71200346946716, + "p99": 103.67999970912933 + }, + "roundtrip": { + "p50": 124.92799758911133, + "p90": 128.57599556446075, + "p95": 130.43199479579926, + "p99": 204.0960043668747 + }, + "isolatedSum": { + "p50": 131.26400113105774, + "p90": 145.05600184202194, + "p95": 148.16000312566757, + "p99": 269.9200063943863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.52799928188324, + "p90": 77.7600035071373, + "p95": 85.9839990735054, + "p99": 131.84000551700592 + }, + "combine": { + "p50": 78.78399640321732, + "p90": 79.6160027384758, + "p95": 80.19199967384338, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 122.04799801111221, + "p90": 134.0160071849823, + "p95": 139.71200585365295, + "p99": 185.72799861431122 + }, + "isolatedSum": { + "p50": 141.31199568510056, + "p90": 157.3760062456131, + "p95": 166.17599874734879, + "p99": 234.72000658512115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.39999961853027, + "p90": 76.12799853086472, + "p95": 76.86399668455124, + "p99": 82.11199939250946 + }, + "combine": { + "p50": 79.64800298213959, + "p90": 91.64799749851227, + "p95": 94.36800330877304, + "p99": 108.47999900579453 + }, + "roundtrip": { + "p50": 136.1279934644699, + "p90": 143.19999516010284, + "p95": 147.87200093269348, + "p99": 166.52800142765045 + }, + "isolatedSum": { + "p50": 150.04800260066986, + "p90": 167.77599602937698, + "p95": 171.23199999332428, + "p99": 190.59199839830399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.01599997282028, + "p90": 95.23200243711472, + "p95": 101.6319990158081, + "p99": 161.24799847602844 + }, + "combine": { + "p50": 93.9520001411438, + "p90": 104.12800312042236, + "p95": 116.19199812412262, + "p99": 166.55999422073364 + }, + "roundtrip": { + "p50": 163.80800306797028, + "p90": 170.1440066099167, + "p95": 174.3360012769699, + "p99": 200.3519982099533 + }, + "isolatedSum": { + "p50": 183.96800011396408, + "p90": 199.36000555753708, + "p95": 217.82399713993073, + "p99": 327.8079926967621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.03200107812881, + "p90": 104.80000078678131, + "p95": 108.60799998044968, + "p99": 140.79999923706055 + }, + "combine": { + "p50": 116.31999909877777, + "p90": 117.95199662446976, + "p95": 119.71200257539749, + "p99": 170.49600183963776 + }, + "roundtrip": { + "p50": 195.48800587654114, + "p90": 201.75999402999878, + "p95": 208.76799523830414, + "p99": 264.0640139579773 + }, + "isolatedSum": { + "p50": 212.35200017690659, + "p90": 222.75199741125107, + "p95": 228.32000255584717, + "p99": 311.2960010766983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-047e089a", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_65aea461", + "comparisonKey": "4bdcadc90e3f4e91", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:47.395941+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 58.687999844551086, + "p90": 60.92799827456474, + "p95": 63.00800293684006, + "p99": 80.64000308513641 + }, + "combine": { + "p50": 66.84800237417221, + "p90": 67.48799979686737, + "p95": 68.38399916887283, + "p99": 102.68799960613251 + }, + "roundtrip": { + "p50": 109.6000000834465, + "p90": 116.03199690580368, + "p95": 116.80000275373459, + "p99": 132.51200318336487 + }, + "isolatedSum": { + "p50": 125.5360022187233, + "p90": 128.4159980714321, + "p95": 131.3920021057129, + "p99": 183.32800269126892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 58.88000130653381, + "p90": 74.68800246715546, + "p95": 80.1599994301796, + "p99": 124.25599992275238 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 79.03999835252762, + "p95": 79.32800054550171, + "p99": 104.00000214576721 + }, + "roundtrip": { + "p50": 109.69600081443787, + "p90": 116.95999652147293, + "p95": 120.41600048542023, + "p99": 162.9759967327118 + }, + "isolatedSum": { + "p50": 127.87199765443802, + "p90": 153.72800081968307, + "p95": 159.4879999756813, + "p99": 228.2560020685196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 59.007998555898666, + "p90": 61.344001442193985, + "p95": 63.10400366783142, + "p99": 76.35200023651123 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 70.52800059318542, + "p95": 77.72800326347351, + "p99": 91.26400202512741 + }, + "roundtrip": { + "p50": 111.51999980211258, + "p90": 119.07199770212173, + "p95": 121.8239963054657, + "p99": 146.04799449443817 + }, + "isolatedSum": { + "p50": 127.51999869942665, + "p90": 131.8720020353794, + "p95": 140.83200693130493, + "p99": 167.61600226163864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.99199876189232, + "p90": 63.74400109052658, + "p95": 65.85600227117538, + "p99": 82.20800012350082 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 79.00799810886383, + "p95": 79.42400127649307, + "p99": 104.38399761915207 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 128.4479945898056, + "p95": 129.63199615478516, + "p99": 139.96799290180206 + }, + "isolatedSum": { + "p50": 130.14399632811546, + "p90": 142.7519991993904, + "p95": 145.28000354766846, + "p99": 186.5919977426529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.49599903821945, + "p90": 68.60800087451935, + "p95": 70.88000327348709, + "p99": 87.8399983048439 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 79.55200225114822, + "p95": 79.77599650621414, + "p99": 84.35200154781342 + }, + "roundtrip": { + "p50": 121.63200229406357, + "p90": 126.39999389648438, + "p95": 128.38399410247803, + "p99": 153.28000485897064 + }, + "isolatedSum": { + "p50": 141.2160024046898, + "p90": 148.16000312566757, + "p95": 150.65599977970123, + "p99": 172.19199985265732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.16799801588058, + "p90": 77.2479996085167, + "p95": 78.59200239181519, + "p99": 87.20000088214874 + }, + "combine": { + "p50": 79.55200225114822, + "p90": 81.4720019698143, + "p95": 82.71999657154083, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 135.3279948234558, + "p90": 139.1039937734604, + "p95": 141.50400459766388, + "p99": 172.86400496959686 + }, + "isolatedSum": { + "p50": 150.7200002670288, + "p90": 158.720001578331, + "p95": 161.31199896335602, + "p99": 202.08000391721725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 89.82399851083755, + "p90": 92.86399930715561, + "p95": 94.17600184679031, + "p99": 144.16000247001648 + }, + "combine": { + "p50": 93.02400052547455, + "p90": 95.2640026807785, + "p95": 102.68799960613251, + "p99": 116.35199934244156 + }, + "roundtrip": { + "p50": 163.07200491428375, + "p90": 167.35999286174774, + "p95": 168.99199783802032, + "p99": 192.83199310302734 + }, + "isolatedSum": { + "p50": 182.8479990363121, + "p90": 188.1280019879341, + "p95": 196.86400145292282, + "p99": 260.51200181245804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.96800059080124, + "p90": 101.21600329875946, + "p95": 104.5759990811348, + "p99": 154.81600165367126 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 117.85600334405899, + "p95": 118.75200271606445, + "p99": 129.31199371814728 + }, + "roundtrip": { + "p50": 195.51999866962433, + "p90": 200.9280025959015, + "p95": 203.77600193023682, + "p99": 224.16000068187714 + }, + "isolatedSum": { + "p50": 212.25599944591522, + "p90": 219.07200664281845, + "p95": 223.32800179719925, + "p99": 284.12799537181854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e5246404", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_6de9f46e", + "comparisonKey": "543cfb81d6e0f5bd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:45.696664+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 50.944000482559204, + "p90": 59.61599946022034, + "p95": 62.49599903821945, + "p99": 93.98400038480759 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 70.0799971818924, + "p95": 71.99999690055847, + "p99": 115.99999666213989 + }, + "roundtrip": { + "p50": 101.50399804115295, + "p90": 123.52000176906586, + "p95": 132.9279989004135, + "p99": 156.15999698638916 + }, + "isolatedSum": { + "p50": 118.52800101041794, + "p90": 129.69599664211273, + "p95": 134.49599593877792, + "p99": 209.98399704694748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 51.7439991235733, + "p90": 62.68800050020218, + "p95": 66.72000139951706, + "p99": 120.67200243473053 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 69.95200365781784, + "p95": 70.81600278615952, + "p99": 79.00799810886383 + }, + "roundtrip": { + "p50": 101.9200012087822, + "p90": 104.19200360774994, + "p95": 107.26399719715118, + "p99": 125.56800246238708 + }, + "isolatedSum": { + "p50": 119.64800208806992, + "p90": 132.64000415802002, + "p95": 137.53600418567657, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.2879995405674, + "p90": 57.69599974155426, + "p95": 60.32000109553337, + "p99": 109.53599959611893 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 82.94399827718735, + "p95": 91.23200178146362, + "p99": 131.45600259304047 + }, + "roundtrip": { + "p50": 108.03200304508209, + "p90": 120.80000340938568, + "p95": 124.51200187206268, + "p99": 185.56800484657288 + }, + "isolatedSum": { + "p50": 121.05600163340569, + "p90": 140.6399980187416, + "p95": 151.552002876997, + "p99": 240.9920021891594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 53.98400127887726, + "p90": 56.832000613212585, + "p95": 60.896001756191254, + "p99": 78.46400141716003 + }, + "combine": { + "p50": 69.5360004901886, + "p90": 79.45600152015686, + "p95": 81.28000050783157, + "p99": 131.9040060043335 + }, + "roundtrip": { + "p50": 114.3999993801117, + "p90": 120.92799693346024, + "p95": 123.03999811410904, + "p99": 174.6560037136078 + }, + "isolatedSum": { + "p50": 123.52000176906586, + "p90": 136.28800213336945, + "p95": 142.17600226402283, + "p99": 210.36800742149353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.69600039720535, + "p90": 70.01599669456482, + "p95": 71.1359977722168, + "p99": 154.78399395942688 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 79.58400249481201, + "p95": 80.1599994301796, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 114.01599645614624, + "p90": 117.18399822711945, + "p95": 121.31199985742569, + "p99": 156.92800283432007 + }, + "isolatedSum": { + "p50": 139.93600010871887, + "p90": 149.59999918937683, + "p95": 151.2959972023964, + "p99": 245.34399062395096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 63.680000603199005, + "p90": 69.72800195217133, + "p95": 75.1039981842041, + "p99": 174.30399358272552 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 80.92799782752991, + "p95": 82.30400085449219, + "p99": 92.83199906349182 + }, + "roundtrip": { + "p50": 126.71999633312225, + "p90": 131.23199343681335, + "p95": 133.18400084972382, + "p99": 182.20800161361694 + }, + "isolatedSum": { + "p50": 143.16800236701965, + "p90": 150.65599977970123, + "p95": 157.4079990386963, + "p99": 267.13599264621735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 77.34400033950806, + "p90": 84.19200032949448, + "p95": 86.04799956083298, + "p99": 144.51199769973755 + }, + "combine": { + "p50": 92.8959995508194, + "p90": 95.29600292444229, + "p95": 103.10400277376175, + "p99": 141.34399592876434 + }, + "roundtrip": { + "p50": 153.85599434375763, + "p90": 157.9200029373169, + "p95": 160.41600704193115, + "p99": 192.60799884796143 + }, + "isolatedSum": { + "p50": 170.23999989032745, + "p90": 179.48800325393677, + "p95": 189.15200233459473, + "p99": 285.8559936285019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.99999952316284, + "p90": 92.8959995508194, + "p95": 94.4959968328476, + "p99": 143.51999759674072 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 117.95199662446976, + "p95": 119.9679970741272, + "p99": 166.59200191497803 + }, + "roundtrip": { + "p50": 190.46400487422943, + "p90": 195.74399292469025, + "p95": 199.10399615764618, + "p99": 242.5280064344406 + }, + "isolatedSum": { + "p50": 204.25599813461304, + "p90": 210.84799617528915, + "p95": 214.4639939069748, + "p99": 310.11199951171875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eac05882", + "identity": "b300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "9acfb29ae403b686", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:18.781068+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.41599917411804, + "p90": 116.60800129175186, + "p95": 119.58400160074234, + "p99": 194.815993309021 + }, + "combine": { + "p50": 52.352000027894974, + "p90": 54.368000477552414, + "p95": 55.135998874902725, + "p99": 65.21599739789963 + }, + "roundtrip": { + "p50": 153.98399531841278, + "p90": 159.10400450229645, + "p95": 163.10399770736694, + "p99": 219.13599967956543 + }, + "isolatedSum": { + "p50": 164.76799920201302, + "p90": 170.97600176930428, + "p95": 174.72000047564507, + "p99": 260.0319907069206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.79200220108032, + "p90": 117.53600090742111, + "p95": 119.6800023317337, + "p99": 140.6719982624054 + }, + "combine": { + "p50": 54.016001522541046, + "p90": 55.64799904823303, + "p95": 56.12799897789955, + "p99": 57.66399949789047 + }, + "roundtrip": { + "p50": 157.47199952602386, + "p90": 162.52799332141876, + "p95": 166.4000004529953, + "p99": 204.79999482631683 + }, + "isolatedSum": { + "p50": 167.80800372362137, + "p90": 173.18399995565414, + "p95": 175.80800130963326, + "p99": 198.33599776029587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 113.53600025177002, + "p90": 119.71200257539749, + "p95": 125.791996717453, + "p99": 303.8400113582611 + }, + "combine": { + "p50": 54.84800040721893, + "p90": 56.8000003695488, + "p95": 57.82400071620941, + "p99": 83.10399949550629 + }, + "roundtrip": { + "p50": 159.19999778270721, + "p90": 164.70399498939514, + "p95": 176.28799378871918, + "p99": 295.00800371170044 + }, + "isolatedSum": { + "p50": 168.38400065898895, + "p90": 176.5120029449463, + "p95": 183.61599743366241, + "p99": 386.9440108537674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.28799885511398, + "p90": 121.63200229406357, + "p95": 125.44000148773193, + "p99": 232.54400491714478 + }, + "combine": { + "p50": 57.69599974155426, + "p90": 59.74400043487549, + "p95": 60.38400158286095, + "p99": 65.8240020275116 + }, + "roundtrip": { + "p50": 164.06400501728058, + "p90": 171.87200486660004, + "p95": 183.32800269126892, + "p99": 339.35999870300293 + }, + "isolatedSum": { + "p50": 173.98399859666824, + "p90": 181.37600272893906, + "p95": 185.82400307059288, + "p99": 298.3680069446564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 116.5120005607605, + "p90": 120.7680031657219, + "p95": 124.32000041007996, + "p99": 192.60799884796143 + }, + "combine": { + "p50": 58.400001376867294, + "p90": 60.32000109553337, + "p95": 61.02399900555611, + "p99": 79.16799932718277 + }, + "roundtrip": { + "p50": 164.86400365829468, + "p90": 169.0559983253479, + "p95": 172.0000058412552, + "p99": 194.87999379634857 + }, + "isolatedSum": { + "p50": 174.9120019376278, + "p90": 181.08800426125526, + "p95": 185.34399941563606, + "p99": 271.7759981751442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.93599683046341, + "p90": 124.1919994354248, + "p95": 126.68800354003906, + "p99": 200.22399723529816 + }, + "combine": { + "p50": 61.824001371860504, + "p90": 63.80800157785416, + "p95": 64.28799778223038, + "p99": 68.54400038719177 + }, + "roundtrip": { + "p50": 171.07200622558594, + "p90": 175.99999904632568, + "p95": 179.32799458503723, + "p99": 225.055992603302 + }, + "isolatedSum": { + "p50": 181.7599982023239, + "p90": 188.00000101327896, + "p95": 190.97600132226944, + "p99": 268.76799762248993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 129.40800189971924, + "p90": 133.5040032863617, + "p95": 135.77599823474884, + "p99": 205.63200116157532 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 75.07199794054031, + "p95": 76.57600194215775, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 194.94399428367615, + "p90": 200.6399929523468, + "p95": 210.14399826526642, + "p99": 245.82399427890778 + }, + "isolatedSum": { + "p50": 202.36799865961075, + "p90": 208.576001226902, + "p95": 212.35200017690659, + "p99": 293.2159975171089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.9279934167862, + "p90": 150.04800260066986, + "p95": 154.55999970436096, + "p99": 230.27199506759644 + }, + "combine": { + "p50": 91.42400324344635, + "p90": 93.9520001411438, + "p95": 95.04000097513199, + "p99": 113.11999708414078 + }, + "roundtrip": { + "p50": 228.28799486160278, + "p90": 233.15200209617615, + "p95": 236.83199286460876, + "p99": 257.4079930782318 + }, + "isolatedSum": { + "p50": 236.35199666023254, + "p90": 244.00000274181366, + "p95": 249.60000067949295, + "p99": 343.3919921517372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b99081d1", + "identity": "b300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "054a0cc209fe6408", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:40.932294+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.73600161075592, + "p90": 116.5120005607605, + "p95": 118.65600198507309, + "p99": 128.06400656700134 + }, + "combine": { + "p50": 54.91200089454651, + "p90": 56.96000158786774, + "p95": 58.17599967122078, + "p99": 76.19199901819229 + }, + "roundtrip": { + "p50": 156.76799416542053, + "p90": 162.1759980916977, + "p95": 167.00799763202667, + "p99": 210.65600216388702 + }, + "isolatedSum": { + "p50": 167.64800250530243, + "p90": 173.47200214862823, + "p95": 176.83200165629387, + "p99": 204.25600558519363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.72800171375275, + "p90": 119.23199892044067, + "p95": 122.68800288438797, + "p99": 211.16800606250763 + }, + "combine": { + "p50": 56.832000613212585, + "p90": 58.720000088214874, + "p95": 59.87200140953064, + "p99": 67.84000247716904 + }, + "roundtrip": { + "p50": 159.96800363063812, + "p90": 181.92000687122345, + "p95": 196.60800695419312, + "p99": 255.51998615264893 + }, + "isolatedSum": { + "p50": 170.56000232696533, + "p90": 177.95199900865555, + "p95": 182.5600042939186, + "p99": 279.00800853967667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.62400108575821, + "p90": 118.6240017414093, + "p95": 120.51200121641159, + "p99": 175.135999917984 + }, + "combine": { + "p50": 58.59199911355972, + "p90": 60.47999858856201, + "p95": 61.983998864889145, + "p99": 66.46399945020676 + }, + "roundtrip": { + "p50": 163.90399634838104, + "p90": 167.9999977350235, + "p95": 171.1360067129135, + "p99": 213.79199624061584 + }, + "isolatedSum": { + "p50": 173.21600019931793, + "p90": 179.1040003299713, + "p95": 182.49600008130074, + "p99": 241.59999936819077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.19199812412262, + "p90": 120.51200121641159, + "p95": 122.30399996042252, + "p99": 132.25600123405457 + }, + "combine": { + "p50": 60.92799827456474, + "p90": 63.35999816656113, + "p95": 64.09599632024765, + "p99": 70.0799971818924 + }, + "roundtrip": { + "p50": 167.77600347995758, + "p90": 172.41600155830383, + "p95": 174.52800273895264, + "p99": 196.51199877262115 + }, + "isolatedSum": { + "p50": 177.11999639868736, + "p90": 183.87199938297272, + "p95": 186.39999628067017, + "p99": 202.33599841594696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 116.89600348472595, + "p90": 121.66400253772736, + "p95": 125.76000392436981, + "p99": 231.1359941959381 + }, + "combine": { + "p50": 61.792001128196716, + "p90": 63.61600011587143, + "p95": 64.25599753856659, + "p99": 74.65600222349167 + }, + "roundtrip": { + "p50": 168.2240068912506, + "p90": 173.43999445438385, + "p95": 177.0240068435669, + "p99": 282.6879918575287 + }, + "isolatedSum": { + "p50": 178.68800461292267, + "p90": 185.28000265359879, + "p95": 190.0160014629364, + "p99": 305.7919964194298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.20799922943115, + "p90": 126.65599584579468, + "p95": 129.2160004377365, + "p99": 206.62400126457214 + }, + "combine": { + "p50": 64.99200314283371, + "p90": 67.74400174617767, + "p95": 68.25599819421768, + "p99": 74.78400319814682 + }, + "roundtrip": { + "p50": 177.40799486637115, + "p90": 182.0479929447174, + "p95": 184.4799965620041, + "p99": 218.9760059118271 + }, + "isolatedSum": { + "p50": 187.20000237226486, + "p90": 194.39999759197235, + "p95": 197.4719986319542, + "p99": 281.40800446271896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 132.9600065946579, + "p90": 137.88799941539764, + "p95": 140.28799533843994, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 78.8159966468811, + "p90": 81.216000020504, + "p95": 83.23200047016144, + "p99": 102.94400155544281 + }, + "roundtrip": { + "p50": 203.80799472332, + "p90": 208.639994263649, + "p95": 211.42399311065674, + "p99": 247.6159930229187 + }, + "isolatedSum": { + "p50": 211.776003241539, + "p90": 219.10399943590164, + "p95": 223.51999580860138, + "p99": 271.39200270175934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.99199521541595, + "p90": 156.54399991035461, + "p95": 158.33599865436554, + "p99": 179.1040003299713 + }, + "combine": { + "p50": 98.11200201511383, + "p90": 100.76799988746643, + "p95": 101.24800354242325, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 244.3840056657791, + "p90": 249.7600018978119, + "p95": 254.07999753952026, + "p99": 306.0159981250763 + }, + "isolatedSum": { + "p50": 251.10399723052979, + "p90": 257.31199979782104, + "p95": 259.5840021967888, + "p99": 283.26400369405746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-617e9cda", + "identity": "b300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "f959262a95be60d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:10.544128+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.42399972677231, + "p90": 120.64000219106674, + "p95": 125.2799928188324, + "p99": 238.52799832820892 + }, + "combine": { + "p50": 58.78400057554245, + "p90": 60.99199876189232, + "p95": 62.17600032687187, + "p99": 71.68000191450119 + }, + "roundtrip": { + "p50": 163.455992937088, + "p90": 170.23999989032745, + "p95": 176.38400197029114, + "p99": 274.04800057411194 + }, + "isolatedSum": { + "p50": 174.20800030231476, + "p90": 181.63200095295906, + "p95": 187.45599314570427, + "p99": 310.2080002427101 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.37599968910217, + "p90": 126.62400305271149, + "p95": 134.11200046539307, + "p99": 210.36800742149353 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 62.33600154519081, + "p95": 63.680000603199005, + "p99": 68.15999746322632 + }, + "roundtrip": { + "p50": 167.7439957857132, + "p90": 203.71200144290924, + "p95": 232.92799293994904, + "p99": 305.34398555755615 + }, + "isolatedSum": { + "p50": 178.0799999833107, + "p90": 188.9600045979023, + "p95": 197.79200106859207, + "p99": 278.52800488471985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.34399944543839, + "p90": 121.31199985742569, + "p95": 123.74400347471237, + "p99": 143.64799857139587 + }, + "combine": { + "p50": 62.463998794555664, + "p90": 64.28799778223038, + "p95": 65.31199812889099, + "p99": 70.52800059318542 + }, + "roundtrip": { + "p50": 169.79199647903442, + "p90": 174.3679940700531, + "p95": 176.86399817466736, + "p99": 198.7520009279251 + }, + "isolatedSum": { + "p50": 179.80799823999405, + "p90": 185.59999763965607, + "p95": 189.05600160360336, + "p99": 214.1759991645813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 119.29599940776825, + "p90": 123.77600371837616, + "p95": 126.52799487113953, + "p99": 287.26398944854736 + }, + "combine": { + "p50": 64.25599753856659, + "p90": 66.23999774456024, + "p95": 67.32799857854843, + "p99": 78.33600044250488 + }, + "roundtrip": { + "p50": 175.9359985589981, + "p90": 181.5039962530136, + "p95": 187.29600310325623, + "p99": 264.16000723838806 + }, + "isolatedSum": { + "p50": 183.55199694633484, + "p90": 190.0160014629364, + "p95": 193.85599344968796, + "p99": 365.59998989105225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 120.64000219106674, + "p90": 135.51999628543854, + "p95": 152.96000242233276, + "p99": 249.15200471878052 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 72.64000177383423, + "p95": 74.40000027418137, + "p99": 84.41600203514099 + }, + "roundtrip": { + "p50": 177.5359958410263, + "p90": 216.09599888324738, + "p95": 221.37600183486938, + "p99": 262.36799359321594 + }, + "isolatedSum": { + "p50": 186.71999871730804, + "p90": 208.15999805927277, + "p95": 227.36000269651413, + "p99": 333.5680067539215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.67200309038162, + "p90": 131.23199343681335, + "p95": 137.56799697875977, + "p99": 301.7280101776123 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 73.02399724721909, + "p95": 76.7040029168129, + "p99": 87.00799942016602 + }, + "roundtrip": { + "p50": 185.85599958896637, + "p90": 193.02399456501007, + "p95": 204.51200008392334, + "p99": 300.7360100746155 + }, + "isolatedSum": { + "p50": 194.65599954128265, + "p90": 204.25599068403244, + "p95": 214.27199989557266, + "p99": 388.7360095977783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.1839940547943, + "p90": 141.37600362300873, + "p95": 142.97600090503693, + "p99": 216.86400473117828 + }, + "combine": { + "p50": 84.25600081682205, + "p90": 86.5280032157898, + "p95": 88.70399743318558, + "p99": 112.09599673748016 + }, + "roundtrip": { + "p50": 213.79199624061584, + "p90": 218.46400201320648, + "p95": 222.1439927816391, + "p99": 277.1199941635132 + }, + "isolatedSum": { + "p50": 221.43999487161636, + "p90": 227.90400683879852, + "p95": 231.6799983382225, + "p99": 328.96000146865845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 157.95199573040009, + "p90": 162.91199624538422, + "p95": 166.36799275875092, + "p99": 236.00000143051147 + }, + "combine": { + "p50": 103.93600165843964, + "p90": 106.9440022110939, + "p95": 107.4879989027977, + "p99": 116.19199812412262 + }, + "roundtrip": { + "p50": 255.5519938468933, + "p90": 261.9200050830841, + "p95": 275.2639949321747, + "p99": 412.8960072994232 + }, + "isolatedSum": { + "p50": 261.8879973888397, + "p90": 269.8559984564781, + "p95": 273.8559916615486, + "p99": 352.1919995546341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f68963ef", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_0fa25a65", + "comparisonKey": "129599aea007081a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:09.339035+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 56.63999915122986, + "p90": 58.848001062870026, + "p95": 59.647999703884125, + "p99": 71.77600264549255 + }, + "combine": { + "p50": 62.72000074386597, + "p90": 64.92800265550613, + "p95": 66.20799750089645, + "p99": 69.95200365781784 + }, + "roundtrip": { + "p50": 122.01599776744843, + "p90": 124.67200309038162, + "p95": 128.12800705432892, + "p99": 147.74399995803833 + }, + "isolatedSum": { + "p50": 119.35999989509583, + "p90": 123.77600371837616, + "p95": 125.85599720478058, + "p99": 141.7280063033104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.5999990105629, + "p90": 60.38400158286095, + "p95": 62.07999959588051, + "p99": 81.69600367546082 + }, + "combine": { + "p50": 64.60800021886826, + "p90": 66.72000139951706, + "p95": 68.15999746322632, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 125.05599856376648, + "p90": 127.23200023174286, + "p95": 128.12800705432892, + "p99": 137.08800077438354 + }, + "isolatedSum": { + "p50": 122.20799922943115, + "p90": 127.104002982378, + "p95": 130.23999705910683, + "p99": 158.11200439929962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 58.01599845290184, + "p90": 60.7680007815361, + "p95": 63.07200342416763, + "p99": 73.11999797821045 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 68.60800087451935, + "p95": 69.2799985408783, + "p99": 73.88799637556076 + }, + "roundtrip": { + "p50": 126.8479973077774, + "p90": 129.18399274349213, + "p95": 131.77600502967834, + "p99": 143.90400052070618 + }, + "isolatedSum": { + "p50": 124.63999912142754, + "p90": 129.37600165605545, + "p95": 132.35200196504593, + "p99": 147.0079943537712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 60.095999389886856, + "p90": 62.68800050020218, + "p95": 64.86400216817856, + "p99": 86.5280032157898 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 70.72000205516815, + "p95": 71.9040036201477, + "p99": 83.55200290679932 + }, + "roundtrip": { + "p50": 130.65600395202637, + "p90": 133.95200669765472, + "p95": 137.60000467300415, + "p99": 152.3520052433014 + }, + "isolatedSum": { + "p50": 128.73600050807, + "p90": 133.40800255537033, + "p95": 136.76800578832626, + "p99": 170.0800061225891 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 61.47199869155884, + "p90": 64.06400352716446, + "p95": 65.5359998345375, + "p99": 74.75200295448303 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 71.23199850320816, + "p95": 73.15199822187424, + "p99": 94.46399658918381 + }, + "roundtrip": { + "p50": 133.2480013370514, + "p90": 135.903999209404, + "p95": 137.05599308013916, + "p99": 148.3519971370697 + }, + "isolatedSum": { + "p50": 131.55199587345123, + "p90": 135.29600203037262, + "p95": 138.68799805641174, + "p99": 169.21599954366684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 65.8240020275116, + "p90": 73.91999661922455, + "p95": 85.31200140714645, + "p99": 196.28800451755524 + }, + "combine": { + "p50": 74.30399954319, + "p90": 76.19199901819229, + "p95": 76.89599692821503, + "p99": 83.10399949550629 + }, + "roundtrip": { + "p50": 145.79200744628906, + "p90": 148.95999431610107, + "p95": 151.58399939537048, + "p99": 170.3999936580658 + }, + "isolatedSum": { + "p50": 140.1280015707016, + "p90": 150.11199563741684, + "p95": 162.20799833536148, + "p99": 279.3920040130615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 75.00799745321274, + "p90": 83.5840031504631, + "p95": 87.07199990749359, + "p99": 94.17600184679031 + }, + "combine": { + "p50": 89.4400030374527, + "p90": 91.36000275611877, + "p95": 92.51199662685394, + "p99": 98.68799895048141 + }, + "roundtrip": { + "p50": 174.78400468826294, + "p90": 193.53599846363068, + "p95": 208.0959975719452, + "p99": 425.28000473976135 + }, + "isolatedSum": { + "p50": 164.44800049066544, + "p90": 174.94400590658188, + "p95": 179.58399653434753, + "p99": 192.86400079727173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.25600081682205, + "p90": 87.5839963555336, + "p95": 90.81599861383438, + "p99": 168.41599345207214 + }, + "combine": { + "p50": 110.30399799346924, + "p90": 113.18399757146835, + "p95": 115.39199948310852, + "p99": 130.65600395202637 + }, + "roundtrip": { + "p50": 219.9680060148239, + "p90": 223.36000204086304, + "p95": 226.1119931936264, + "p99": 239.55200612545013 + }, + "isolatedSum": { + "p50": 194.5599988102913, + "p90": 200.76799392700195, + "p95": 206.2079980969429, + "p99": 299.0719974040985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-442829bb", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "8726247a6d2e6892", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:35.764258+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.99200356006622, + "p90": 117.69600212574005, + "p95": 122.36800044775009, + "p99": 145.1520025730133 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 65.05600363016129, + "p95": 66.14399701356888, + "p99": 73.85600358247757 + }, + "roundtrip": { + "p50": 167.7439957857132, + "p90": 196.57599925994873, + "p95": 234.047994017601, + "p99": 356.8960130214691 + }, + "isolatedSum": { + "p50": 176.57600343227386, + "p90": 182.75200575590134, + "p95": 188.51199746131897, + "p99": 219.00800615549088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.17599767446518, + "p90": 117.5680011510849, + "p95": 119.74400281906128, + "p99": 127.71199643611908 + }, + "combine": { + "p50": 65.0240033864975, + "p90": 67.03999638557434, + "p95": 68.12799721956253, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 169.0240055322647, + "p90": 172.38399386405945, + "p95": 175.07199943065643, + "p99": 196.76800072193146 + }, + "isolatedSum": { + "p50": 179.20000106096268, + "p90": 184.60799753665924, + "p95": 187.8720000386238, + "p99": 216.22399985790253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.97599631547928, + "p90": 118.59200149774551, + "p95": 121.50400131940842, + "p99": 133.15199315547943 + }, + "combine": { + "p50": 66.72000139951706, + "p90": 68.67200136184692, + "p95": 69.66400146484375, + "p99": 74.75200295448303 + }, + "roundtrip": { + "p50": 172.41600155830383, + "p90": 177.08800733089447, + "p95": 181.536003947258, + "p99": 213.15200626850128 + }, + "isolatedSum": { + "p50": 181.69599771499634, + "p90": 187.26400285959244, + "p95": 191.16800278425217, + "p99": 207.90399610996246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.72800236940384, + "p90": 122.56000190973282, + "p95": 126.30400061607361, + "p99": 197.79199361801147 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 70.46400010585785, + "p95": 71.03999704122543, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 175.55199563503265, + "p90": 178.97599935531616, + "p95": 181.15200102329254, + "p99": 195.13599574565887 + }, + "isolatedSum": { + "p50": 186.20800226926804, + "p90": 193.02400201559067, + "p95": 197.34399765729904, + "p99": 284.7039923071861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.72000247240067, + "p90": 123.03999811410904, + "p95": 125.05599856376648, + "p99": 137.34400272369385 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 71.99999690055847, + "p95": 72.54400104284286, + "p99": 78.97599786520004 + }, + "roundtrip": { + "p50": 179.83999848365784, + "p90": 187.3600035905838, + "p95": 195.45599818229675, + "p99": 253.1839907169342 + }, + "isolatedSum": { + "p50": 188.89600038528442, + "p90": 195.0399950146675, + "p95": 197.59999960660934, + "p99": 216.3200005888939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.86399710178375, + "p90": 128.4479945898056, + "p95": 130.23999333381653, + "p99": 147.20000326633453 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 76.38400048017502, + "p95": 77.05599814653397, + "p99": 99.90400075912476 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 194.4960057735443, + "p95": 197.1520036458969, + "p99": 220.09600698947906 + }, + "isolatedSum": { + "p50": 199.00799542665482, + "p90": 204.83199506998062, + "p95": 207.2959914803505, + "p99": 247.1040040254593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 139.3599957227707, + "p90": 143.64799857139587, + "p95": 147.42399752140045, + "p99": 166.78400337696075 + }, + "combine": { + "p50": 89.37600255012512, + "p90": 91.32800251245499, + "p95": 92.57599711418152, + "p99": 100.03200173377991 + }, + "roundtrip": { + "p50": 219.64800357818604, + "p90": 231.9680005311966, + "p95": 255.36000728607178, + "p99": 393.40800046920776 + }, + "isolatedSum": { + "p50": 228.7359982728958, + "p90": 234.97600108385086, + "p95": 239.99999463558197, + "p99": 266.81600511074066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.43999993801117, + "p90": 165.6000018119812, + "p95": 167.84000396728516, + "p99": 183.20000171661377 + }, + "combine": { + "p50": 110.43199896812439, + "p90": 112.99200356006622, + "p95": 113.66400122642517, + "p99": 117.76000261306763 + }, + "roundtrip": { + "p50": 265.6640112400055, + "p90": 270.84800601005554, + "p95": 273.6000120639801, + "p99": 298.3680069446564 + }, + "isolatedSum": { + "p50": 271.87199890613556, + "p90": 278.5920053720474, + "p95": 281.5040051937103, + "p99": 300.9600043296814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e88815e", + "identity": "b300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "a77b8f9fc0bdba72", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:51.810817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.68800157308578, + "p90": 121.56800180673599, + "p95": 124.9919980764389, + "p99": 307.2960078716278 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 62.72000074386597, + "p95": 64.38399851322174, + "p99": 93.40800344944 + }, + "roundtrip": { + "p50": 166.30400717258453, + "p90": 174.01599884033203, + "p95": 178.14399302005768, + "p99": 245.40799856185913 + }, + "isolatedSum": { + "p50": 175.36000162363052, + "p90": 184.28800255060196, + "p95": 189.37599658966064, + "p99": 400.7040113210678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.00799655914307, + "p90": 157.6319932937622, + "p95": 183.87199938297272, + "p99": 217.53600239753723 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 66.01600348949432, + "p95": 66.72000139951706, + "p99": 71.1359977722168 + }, + "roundtrip": { + "p50": 171.9679981470108, + "p90": 203.23200523853302, + "p95": 232.35200345516205, + "p99": 287.9999876022339 + }, + "isolatedSum": { + "p50": 178.847998380661, + "p90": 223.64799678325653, + "p95": 250.59200078248978, + "p99": 288.672000169754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 116.38399958610535, + "p90": 122.14399874210358, + "p95": 124.28800016641617, + "p99": 180.86400628089905 + }, + "combine": { + "p50": 65.11999666690826, + "p90": 67.1359971165657, + "p95": 68.38399916887283, + "p99": 79.52000200748444 + }, + "roundtrip": { + "p50": 175.64800381660461, + "p90": 200.44800639152527, + "p95": 225.8560061454773, + "p99": 306.11199140548706 + }, + "isolatedSum": { + "p50": 181.5039962530136, + "p90": 189.27999585866928, + "p95": 192.671999335289, + "p99": 260.3840082883835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.91999638080597, + "p90": 122.68800288438797, + "p95": 126.08000636100769, + "p99": 178.46399545669556 + }, + "combine": { + "p50": 68.03199648857117, + "p90": 69.92000341415405, + "p95": 70.62400132417679, + "p99": 78.43200117349625 + }, + "roundtrip": { + "p50": 177.21599340438843, + "p90": 202.43200659751892, + "p95": 212.16000616550446, + "p99": 315.744012594223 + }, + "isolatedSum": { + "p50": 185.95199286937714, + "p90": 192.60800629854202, + "p95": 196.70400768518448, + "p99": 256.8959966301918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.46400052309036, + "p90": 124.79999661445618, + "p95": 129.15199995040894, + "p99": 144.6080058813095 + }, + "combine": { + "p50": 69.72800195217133, + "p90": 71.52000069618225, + "p95": 72.76800274848938, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 178.39999496936798, + "p90": 184.92799997329712, + "p95": 188.86399269104004, + "p99": 258.7200105190277 + }, + "isolatedSum": { + "p50": 188.1920024752617, + "p90": 196.31999731063843, + "p95": 201.92000269889832, + "p99": 242.0480027794838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.56800246238708, + "p90": 138.94400000572205, + "p95": 150.14399588108063, + "p99": 252.76800990104675 + }, + "combine": { + "p50": 74.52800124883652, + "p90": 76.51200145483017, + "p95": 77.08799839019775, + "p99": 81.88799768686295 + }, + "roundtrip": { + "p50": 191.26400351524353, + "p90": 200.00000298023224, + "p95": 212.41599321365356, + "p99": 326.880007982254 + }, + "isolatedSum": { + "p50": 200.0960037112236, + "p90": 215.45600146055222, + "p95": 227.23199427127838, + "p99": 334.6560075879097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.79200613498688, + "p90": 153.1199961900711, + "p95": 161.43999993801117, + "p99": 196.96000218391418 + }, + "combine": { + "p50": 90.46400338411331, + "p90": 92.67199784517288, + "p95": 93.59999746084213, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 220.57600319385529, + "p90": 228.44800353050232, + "p95": 237.18400299549103, + "p99": 490.62401056289673 + }, + "isolatedSum": { + "p50": 228.2560095191002, + "p90": 245.791994035244, + "p95": 255.0399973988533, + "p99": 308.0959990620613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.5440012216568, + "p90": 170.49600183963776, + "p95": 174.27200078964233, + "p99": 206.59199357032776 + }, + "combine": { + "p50": 109.53599959611893, + "p90": 111.93600296974182, + "p95": 113.50400000810623, + "p99": 124.57600235939026 + }, + "roundtrip": { + "p50": 268.2879865169525, + "p90": 280.19198775291443, + "p95": 303.9360046386719, + "p99": 574.783980846405 + }, + "isolatedSum": { + "p50": 274.0800008177757, + "p90": 282.4320048093796, + "p95": 287.77600079774857, + "p99": 331.167995929718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa5d4fcf", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_b62311b1", + "comparisonKey": "4ca9ce86c1566f47", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:22.299734+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 51.072001457214355, + "p90": 53.408000618219376, + "p95": 55.64799904823303, + "p99": 132.64000415802002 + }, + "combine": { + "p50": 62.49599903821945, + "p90": 64.54399973154068, + "p95": 65.85600227117538, + "p99": 72.73600250482559 + }, + "roundtrip": { + "p50": 115.26399850845337, + "p90": 117.85600334405899, + "p95": 119.71200257539749, + "p99": 141.53599739074707 + }, + "isolatedSum": { + "p50": 113.56800049543381, + "p90": 117.95200034976006, + "p95": 121.50400131940842, + "p99": 205.3760066628456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.25599929690361, + "p90": 54.88000065088272, + "p95": 59.51999872922897, + "p99": 161.3759994506836 + }, + "combine": { + "p50": 64.86400216817856, + "p90": 66.97600334882736, + "p95": 68.31999868154526, + "p99": 74.68800246715546 + }, + "roundtrip": { + "p50": 118.27199906110764, + "p90": 120.86399644613266, + "p95": 124.95999783277512, + "p99": 166.143998503685 + }, + "isolatedSum": { + "p50": 117.12000146508217, + "p90": 121.85600399971008, + "p95": 127.83999741077423, + "p99": 236.06400191783905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.639998495578766, + "p90": 54.9440011382103, + "p95": 57.0559985935688, + "p99": 84.09599959850311 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 68.44799965620041, + "p95": 70.11199742555618, + "p99": 116.09599739313126 + }, + "roundtrip": { + "p50": 120.15999853610992, + "p90": 122.36800044775009, + "p95": 125.34399330615997, + "p99": 174.52800273895264 + }, + "isolatedSum": { + "p50": 118.84799599647522, + "p90": 123.3920007944107, + "p95": 127.16799601912498, + "p99": 200.19199699163437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 54.23999950289726, + "p90": 56.51199817657471, + "p95": 57.34400078654289, + "p99": 79.3600007891655 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 70.52800059318542, + "p95": 71.9359964132309, + "p99": 131.80799782276154 + }, + "roundtrip": { + "p50": 124.83199685811996, + "p90": 127.51999497413635, + "p95": 130.52800297737122, + "p99": 186.52799725532532 + }, + "isolatedSum": { + "p50": 122.75199964642525, + "p90": 127.03999876976013, + "p95": 129.2799971997738, + "p99": 211.16799861192703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 55.39200082421303, + "p90": 57.82400071620941, + "p95": 59.67999994754791, + "p99": 80.4160013794899 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 71.68000191450119, + "p95": 73.02399724721909, + "p99": 126.94400548934937 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 131.23199343681335, + "p95": 136.28800213336945, + "p99": 186.52799725532532 + }, + "isolatedSum": { + "p50": 125.37599727511406, + "p90": 129.5040026307106, + "p95": 132.703997194767, + "p99": 207.36000686883926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 58.30400064587593, + "p90": 60.83200126886368, + "p95": 63.32799792289734, + "p99": 118.59200149774551 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 76.76800340414047, + "p95": 78.87999713420868, + "p99": 134.24000144004822 + }, + "roundtrip": { + "p50": 138.7840062379837, + "p90": 142.14399456977844, + "p95": 149.85600113868713, + "p99": 202.78400182724 + }, + "isolatedSum": { + "p50": 132.54399970173836, + "p90": 137.60000467300415, + "p95": 142.20799505710602, + "p99": 252.83200293779373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 67.55200028419495, + "p90": 70.14399766921997, + "p95": 71.99999690055847, + "p99": 133.63200426101685 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 91.5519967675209, + "p95": 92.73599833250046, + "p99": 100.92800110578537 + }, + "roundtrip": { + "p50": 168.09600591659546, + "p90": 170.6240028142929, + "p95": 173.95199835300446, + "p99": 208.3519995212555 + }, + "isolatedSum": { + "p50": 156.89600259065628, + "p90": 161.69599443674088, + "p95": 164.73599523305893, + "p99": 234.56000536680222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 78.33600044250488, + "p90": 80.92799782752991, + "p95": 82.07999914884567, + "p99": 99.2640033364296 + }, + "combine": { + "p50": 110.07999628782272, + "p90": 113.08799684047699, + "p95": 115.13599753379822, + "p99": 134.65599715709686 + }, + "roundtrip": { + "p50": 213.76000344753265, + "p90": 218.36799383163452, + "p95": 222.78399765491486, + "p99": 298.0799973011017 + }, + "isolatedSum": { + "p50": 188.4159967303276, + "p90": 194.0159946680069, + "p95": 197.2159966826439, + "p99": 233.92000049352646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-564301f5", + "identity": "b300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_4328f415", + "comparisonKey": "519898589b2390eb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:40.719813+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.45600217580795, + "p90": 86.46400272846222, + "p95": 89.9839997291565, + "p99": 104.41599786281586 + }, + "combine": { + "p50": 92.92799979448318, + "p90": 101.75999999046326, + "p95": 102.08000242710114, + "p99": 105.82400113344193 + }, + "roundtrip": { + "p50": 166.4319932460785, + "p90": 174.5920032262802, + "p95": 175.58400332927704, + "p99": 200.19200444221497 + }, + "isolatedSum": { + "p50": 176.38400197029114, + "p90": 188.22400271892548, + "p95": 192.06400215625763, + "p99": 210.23999899625778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.57600235939026, + "p90": 127.51999497413635, + "p95": 128.60800325870514, + "p99": 146.7839926481247 + }, + "combine": { + "p50": 129.37599420547485, + "p90": 138.65600526332855, + "p95": 139.39200341701508, + "p99": 164.2879992723465 + }, + "roundtrip": { + "p50": 234.27200317382812, + "p90": 239.9359941482544, + "p95": 241.31199717521667, + "p99": 266.975998878479 + }, + "isolatedSum": { + "p50": 253.9519965648651, + "p90": 266.1760002374649, + "p95": 268.0000066757202, + "p99": 311.0719919204712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 173.95199835300446, + "p90": 176.7680048942566, + "p95": 178.68800461292267, + "p99": 188.6720061302185 + }, + "combine": { + "p50": 191.9039934873581, + "p90": 200.95999538898468, + "p95": 202.14399695396423, + "p99": 231.1359941959381 + }, + "roundtrip": { + "p50": 345.8879888057709, + "p90": 351.0400056838989, + "p95": 353.63200306892395, + "p99": 385.6320083141327 + }, + "isolatedSum": { + "p50": 365.85599184036255, + "p90": 377.7280002832413, + "p95": 380.8320015668869, + "p99": 419.8080003261566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 290.46401381492615, + "p90": 296.1600124835968, + "p95": 301.7599880695343, + "p99": 316.32000207901 + }, + "combine": { + "p50": 389.5359933376312, + "p90": 398.71999621391296, + "p95": 401.12000703811646, + "p99": 414.14400935173035 + }, + "roundtrip": { + "p50": 596.7040061950684, + "p90": 604.8319935798645, + "p95": 609.9839806556702, + "p99": 625.0560283660889 + }, + "isolatedSum": { + "p50": 680.0000071525574, + "p90": 694.8800086975098, + "p95": 702.8799951076508, + "p99": 730.4640114307404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 521.120011806488, + "p90": 525.5360007286072, + "p95": 531.9679975509644, + "p99": 577.2799849510193 + }, + "combine": { + "p50": 755.3279995918274, + "p90": 766.3999795913696, + "p95": 767.1679854393005, + "p99": 790.9119725227356 + }, + "roundtrip": { + "p50": 1256.0319900512695, + "p90": 1266.7200565338135, + "p95": 1275.5839824676514, + "p99": 1359.7439527511597 + }, + "isolatedSum": { + "p50": 1276.4480113983154, + "p90": 1291.9359803199768, + "p95": 1299.135982990265, + "p99": 1368.1919574737549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 979.7760248184204, + "p90": 990.1760220527649, + "p95": 994.4319725036621, + "p99": 1019.4879770278931 + }, + "combine": { + "p50": 1442.4959421157837, + "p90": 1455.0399780273438, + "p95": 1466.528058052063, + "p99": 1932.6399564743042 + }, + "roundtrip": { + "p50": 2390.2080059051514, + "p90": 2410.720109939575, + "p95": 2428.5120964050293, + "p99": 2653.1200408935547 + }, + "isolatedSum": { + "p50": 2422.271966934204, + "p90": 2445.2160000801086, + "p95": 2460.960030555725, + "p99": 2952.1279335021973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1136e36b", + "identity": "b300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_4328f415", + "comparisonKey": "a3f0888421c5cbde", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:30.244616+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.92799979448318, + "p90": 100.54399818181992, + "p95": 101.98400169610977, + "p99": 145.75999975204468 + }, + "combine": { + "p50": 103.55199873447418, + "p90": 104.76800054311752, + "p95": 106.49599879980087, + "p99": 155.008003115654 + }, + "roundtrip": { + "p50": 176.1920005083084, + "p90": 183.03999304771423, + "p95": 184.86399948596954, + "p99": 199.5519995689392 + }, + "isolatedSum": { + "p50": 196.47999852895737, + "p90": 205.31199872493744, + "p95": 208.48000049591064, + "p99": 300.76800286769867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 119.45600062608719, + "p90": 126.52799487113953, + "p95": 130.94399869441986, + "p99": 155.32800555229187 + }, + "combine": { + "p50": 140.70400595664978, + "p90": 143.19999516010284, + "p95": 144.54400539398193, + "p99": 154.9759954214096 + }, + "roundtrip": { + "p50": 246.7840015888214, + "p90": 251.64800882339478, + "p95": 255.77598810195923, + "p99": 298.94399642944336 + }, + "isolatedSum": { + "p50": 260.16000658273697, + "p90": 269.72799003124237, + "p95": 275.4880040884018, + "p99": 310.3040009737015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.81600689888, + "p90": 190.3039962053299, + "p95": 191.6159987449646, + "p99": 206.36799931526184 + }, + "combine": { + "p50": 216.63999557495117, + "p90": 226.01599991321564, + "p95": 226.59200429916382, + "p99": 238.62400650978088 + }, + "roundtrip": { + "p50": 371.8079924583435, + "p90": 379.040002822876, + "p95": 382.9120099544525, + "p99": 404.6719968318939 + }, + "isolatedSum": { + "p50": 403.4560024738312, + "p90": 416.31999611854553, + "p95": 418.2080030441284, + "p99": 444.9920058250427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 298.46400022506714, + "p90": 307.23199248313904, + "p95": 309.2159926891327, + "p99": 320.576012134552 + }, + "combine": { + "p50": 436.5119934082031, + "p90": 446.399986743927, + "p95": 447.2000002861023, + "p99": 508.2240104675293 + }, + "roundtrip": { + "p50": 698.3360052108765, + "p90": 705.7600021362305, + "p95": 709.8879814147949, + "p99": 749.0559816360474 + }, + "isolatedSum": { + "p50": 734.9759936332703, + "p90": 753.631979227066, + "p95": 756.415992975235, + "p99": 828.8000226020813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 546.8800067901611, + "p90": 555.8720231056213, + "p95": 561.3759756088257, + "p99": 629.6640038490295 + }, + "combine": { + "p50": 779.0399789810181, + "p90": 790.4000282287598, + "p95": 791.5199995040894, + "p99": 826.2400031089783 + }, + "roundtrip": { + "p50": 1310.528039932251, + "p90": 1320.9919929504395, + "p95": 1325.119972229004, + "p99": 1424.9919652938843 + }, + "isolatedSum": { + "p50": 1325.9199857711792, + "p90": 1346.272051334381, + "p95": 1352.895975112915, + "p99": 1455.9040069580078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1033.3759784698486, + "p90": 1042.6239967346191, + "p95": 1048.5119819641113, + "p99": 1096.9280004501343 + }, + "combine": { + "p50": 1478.592038154602, + "p90": 1490.239977836609, + "p95": 1492.6719665527344, + "p99": 1654.1759967803955 + }, + "roundtrip": { + "p50": 2482.912063598633, + "p90": 2496.0319995880127, + "p95": 2503.7760734558105, + "p99": 2825.0880241394043 + }, + "isolatedSum": { + "p50": 2511.9680166244507, + "p90": 2532.863974571228, + "p95": 2541.1839485168457, + "p99": 2751.10399723053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-84eaeebd", + "identity": "b300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_4328f415", + "comparisonKey": "eb1e0cbb3bd4dae7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:21.053270+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.96000134944916, + "p90": 103.67999970912933, + "p95": 104.92800176143646, + "p99": 116.83200299739838 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 114.49600011110306, + "p95": 114.88000303506851, + "p99": 119.07199770212173 + }, + "roundtrip": { + "p50": 185.56800484657288, + "p90": 190.59200584888458, + "p95": 191.77600741386414, + "p99": 209.79200303554535 + }, + "isolatedSum": { + "p50": 206.11200481653214, + "p90": 218.1759998202324, + "p95": 219.80800479650497, + "p99": 235.9040006995201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 130.8159977197647, + "p90": 137.95199990272522, + "p95": 139.23199474811554, + "p99": 147.96799421310425 + }, + "combine": { + "p50": 143.45599710941315, + "p90": 152.12799608707428, + "p95": 152.67199277877808, + "p99": 164.09599781036377 + }, + "roundtrip": { + "p50": 259.5199942588806, + "p90": 263.93601298332214, + "p95": 265.9839987754822, + "p99": 284.1919958591461 + }, + "isolatedSum": { + "p50": 274.27199482917786, + "p90": 290.0799959897995, + "p95": 291.9039875268936, + "p99": 312.063992023468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 198.11199605464935, + "p90": 201.63199305534363, + "p95": 202.68799364566803, + "p99": 212.6079946756363 + }, + "combine": { + "p50": 241.37599766254425, + "p90": 250.40000677108765, + "p95": 250.94398856163025, + "p99": 289.4720137119293 + }, + "roundtrip": { + "p50": 409.15200114250183, + "p90": 417.34400391578674, + "p95": 421.2479889392853, + "p99": 428.76800894737244 + }, + "isolatedSum": { + "p50": 439.4879937171936, + "p90": 452.0319998264313, + "p95": 453.6319822072983, + "p99": 502.0800083875656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.96000695228577, + "p90": 321.9200074672699, + "p95": 327.13600993156433, + "p99": 339.1999900341034 + }, + "combine": { + "p50": 445.79198956489563, + "p90": 447.2320079803467, + "p95": 450.20800828933716, + "p99": 494.27199363708496 + }, + "roundtrip": { + "p50": 742.4319982528687, + "p90": 749.7599720954895, + "p95": 752.2240281105042, + "p99": 770.4319953918457 + }, + "isolatedSum": { + "p50": 762.7519965171814, + "p90": 769.1520154476166, + "p95": 777.3440182209015, + "p99": 833.4719836711884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 568.0959820747375, + "p90": 571.9680190086365, + "p95": 574.4959712028503, + "p99": 609.3760132789612 + }, + "combine": { + "p50": 802.1759986877441, + "p90": 804.0000200271606, + "p95": 806.4000010490417, + "p99": 842.6240086555481 + }, + "roundtrip": { + "p50": 1345.311999320984, + "p90": 1354.464054107666, + "p95": 1357.9519987106323, + "p99": 1370.0799942016602 + }, + "isolatedSum": { + "p50": 1370.2719807624817, + "p90": 1375.9680390357971, + "p95": 1380.895972251892, + "p99": 1452.0000219345093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1056.3199520111084, + "p90": 1065.4720067977905, + "p95": 1067.9999589920044, + "p99": 1126.8160343170166 + }, + "combine": { + "p50": 1503.2960176467896, + "p90": 1515.1679515838623, + "p95": 1515.9039497375488, + "p99": 1577.6000022888184 + }, + "roundtrip": { + "p50": 2541.408061981201, + "p90": 2552.9279708862305, + "p95": 2557.919979095459, + "p99": 2725.087881088257 + }, + "isolatedSum": { + "p50": 2559.615969657898, + "p90": 2580.639958381653, + "p95": 2583.903908729553, + "p99": 2704.416036605835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-768532b8", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_822258bd", + "comparisonKey": "00d7e1e908693678", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:28.377326+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.2640026807785, + "p90": 103.93600165843964, + "p95": 107.13600367307663, + "p99": 158.9760035276413 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 116.48000031709671, + "p95": 117.44000017642975, + "p99": 131.67999684810638 + }, + "roundtrip": { + "p50": 194.87999379634857, + "p90": 200.19200444221497, + "p95": 202.2400051355362, + "p99": 220.60799598693848 + }, + "isolatedSum": { + "p50": 210.6880024075508, + "p90": 220.41600197553635, + "p95": 224.57600384950638, + "p99": 290.6560003757477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.60800457000732, + "p90": 140.06400108337402, + "p95": 141.50400459766388, + "p99": 155.61600029468536 + }, + "combine": { + "p50": 155.64799308776855, + "p90": 164.32000696659088, + "p95": 165.27999937534332, + "p99": 190.46400487422943 + }, + "roundtrip": { + "p50": 272.92799949645996, + "p90": 279.55201268196106, + "p95": 283.07199478149414, + "p99": 313.4399950504303 + }, + "isolatedSum": { + "p50": 292.2559976577759, + "p90": 304.3840080499649, + "p95": 306.7840039730072, + "p99": 346.0800051689148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.50400567054749, + "p90": 200.15999674797058, + "p95": 203.5199999809265, + "p99": 220.38400173187256 + }, + "combine": { + "p50": 266.2400007247925, + "p90": 274.6559977531433, + "p95": 275.4879891872406, + "p99": 278.9439857006073 + }, + "roundtrip": { + "p50": 445.6639885902405, + "p90": 452.38399505615234, + "p95": 459.52001214027405, + "p99": 474.07999634742737 + }, + "isolatedSum": { + "p50": 459.74400639533997, + "p90": 474.8159945011139, + "p95": 479.0079891681671, + "p99": 499.32798743247986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.07199454307556, + "p90": 330.6240141391754, + "p95": 333.0560028553009, + "p99": 381.76000118255615 + }, + "combine": { + "p50": 458.9119851589203, + "p90": 462.3680114746094, + "p95": 470.68798542022705, + "p99": 482.59198665618896 + }, + "roundtrip": { + "p50": 764.7039890289307, + "p90": 772.5759744644165, + "p95": 774.7520208358765, + "p99": 812.6400113105774 + }, + "isolatedSum": { + "p50": 785.9839797019958, + "p90": 792.9920256137848, + "p95": 803.743988275528, + "p99": 864.3519878387451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 576.6080021858215, + "p90": 583.0079913139343, + "p95": 585.2159857749939, + "p99": 599.4560122489929 + }, + "combine": { + "p50": 817.2799944877625, + "p90": 828.0640244483948, + "p95": 829.695999622345, + "p99": 912.992000579834 + }, + "roundtrip": { + "p50": 1376.3200044631958, + "p90": 1384.9279880523682, + "p95": 1390.463948249817, + "p99": 1435.0080490112305 + }, + "isolatedSum": { + "p50": 1393.887996673584, + "p90": 1411.072015762329, + "p95": 1414.9119853973389, + "p99": 1512.448012828827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1070.3359842300415, + "p90": 1078.879952430725, + "p95": 1083.1040143966675, + "p99": 1126.911997795105 + }, + "combine": { + "p50": 1530.6240320205688, + "p90": 1541.3119792938232, + "p95": 1552.8000593185425, + "p99": 1805.0559759140015 + }, + "roundtrip": { + "p50": 2585.5040550231934, + "p90": 2600.7039546966553, + "p95": 2616.096019744873, + "p99": 3067.199945449829 + }, + "isolatedSum": { + "p50": 2600.9600162506104, + "p90": 2620.1919317245483, + "p95": 2635.90407371521, + "p99": 2931.9679737091064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-972f2e9c", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_4328f415", + "comparisonKey": "857dead233b989f7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:14.975012+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.16800194978714, + "p90": 102.62399911880493, + "p95": 104.92800176143646, + "p99": 155.2640050649643 + }, + "combine": { + "p50": 115.55200070142746, + "p90": 116.48000031709671, + "p95": 117.37599968910217, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 196.73599302768707, + "p90": 200.76799392700195, + "p95": 202.4639993906021, + "p99": 229.44000363349915 + }, + "isolatedSum": { + "p50": 210.7200026512146, + "p90": 219.10399943590164, + "p95": 222.30400145053864, + "p99": 295.072004199028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.1839940547943, + "p90": 139.74399864673615, + "p95": 141.31200313568115, + "p99": 156.92800283432007 + }, + "combine": { + "p50": 155.74400126934052, + "p90": 164.5440012216568, + "p95": 166.97600483894348, + "p99": 200.51200687885284 + }, + "roundtrip": { + "p50": 273.47201108932495, + "p90": 280.12800216674805, + "p95": 283.9359939098358, + "p99": 310.2079927921295 + }, + "isolatedSum": { + "p50": 292.9279953241348, + "p90": 304.28799986839294, + "p95": 308.28800797462463, + "p99": 357.4400097131729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.82399320602417, + "p90": 200.22399723529816, + "p95": 202.72000133991241, + "p99": 243.83999407291412 + }, + "combine": { + "p50": 266.55998826026917, + "p90": 275.55200457572937, + "p95": 278.6239981651306, + "p99": 299.45600032806396 + }, + "roundtrip": { + "p50": 445.6000030040741, + "p90": 454.94401454925537, + "p95": 464.32000398635864, + "p99": 561.3759756088257 + }, + "isolatedSum": { + "p50": 460.38398146629333, + "p90": 475.7760018110275, + "p95": 481.34399950504303, + "p99": 543.2959944009781 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.8079926967621, + "p90": 339.1680121421814, + "p95": 351.29600763320923, + "p99": 444.2239999771118 + }, + "combine": { + "p50": 458.9439928531647, + "p90": 462.8159999847412, + "p95": 471.807986497879, + "p99": 520.255982875824 + }, + "roundtrip": { + "p50": 764.4799947738647, + "p90": 774.3679881095886, + "p95": 783.2000255584717, + "p99": 859.4239950180054 + }, + "isolatedSum": { + "p50": 786.7519855499268, + "p90": 801.9840121269226, + "p95": 823.1039941310883, + "p99": 964.4799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 576.8640041351318, + "p90": 583.7119817733765, + "p95": 586.3680243492126, + "p99": 632.2240233421326 + }, + "combine": { + "p50": 816.7999982833862, + "p90": 827.9039859771729, + "p95": 831.0719728469849, + "p99": 844.4799780845642 + }, + "roundtrip": { + "p50": 1374.8159408569336, + "p90": 1386.0479593276978, + "p95": 1394.11199092865, + "p99": 1466.4640426635742 + }, + "isolatedSum": { + "p50": 1393.664002418518, + "p90": 1411.6159677505493, + "p95": 1417.4399971961975, + "p99": 1476.7040014266968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1071.552038192749, + "p90": 1086.143970489502, + "p95": 1101.1199951171875, + "p99": 1231.711983680725 + }, + "combine": { + "p50": 1530.6880474090576, + "p90": 1552.4159669876099, + "p95": 1565.0559663772583, + "p99": 1761.9199752807617 + }, + "roundtrip": { + "p50": 2585.8240127563477, + "p90": 2606.271982192993, + "p95": 2631.3281059265137, + "p99": 2733.5360050201416 + }, + "isolatedSum": { + "p50": 2602.2400856018066, + "p90": 2638.559937477112, + "p95": 2666.175961494446, + "p99": 2993.631958961487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3fcdab07", + "identity": "b300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_4328f415", + "comparisonKey": "5364460ef0054762", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:55.025902+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.77599912881851, + "p90": 102.49599814414978, + "p95": 106.55999928712845, + "p99": 149.4079977273941 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 116.44800007343292, + "p95": 117.0559972524643, + "p99": 139.23199474811554 + }, + "roundtrip": { + "p50": 197.31199741363525, + "p90": 202.4960070848465, + "p95": 204.41600680351257, + "p99": 233.15200209617615 + }, + "isolatedSum": { + "p50": 211.19999885559082, + "p90": 218.9439982175827, + "p95": 223.61599653959274, + "p99": 288.63999247550964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.6720050573349, + "p90": 141.56800508499146, + "p95": 145.47200500965118, + "p99": 237.92000114917755 + }, + "combine": { + "p50": 154.65599298477173, + "p90": 164.19200599193573, + "p95": 164.57599401474, + "p99": 176.256000995636 + }, + "roundtrip": { + "p50": 273.44000339508057, + "p90": 280.2239954471588, + "p95": 284.0319871902466, + "p99": 348.83201122283936 + }, + "isolatedSum": { + "p50": 291.3279980421066, + "p90": 305.7600110769272, + "p95": 310.0479990243912, + "p99": 414.17600214481354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.7599927186966, + "p90": 201.56799256801605, + "p95": 204.6079933643341, + "p99": 267.61600375175476 + }, + "combine": { + "p50": 266.400009393692, + "p90": 275.61599016189575, + "p95": 278.30401062965393, + "p99": 372.3520040512085 + }, + "roundtrip": { + "p50": 443.07199120521545, + "p90": 453.0239999294281, + "p95": 462.8159999847412, + "p99": 529.4399857521057 + }, + "isolatedSum": { + "p50": 460.1600021123886, + "p90": 477.1839827299118, + "p95": 482.91200399398804, + "p99": 639.9680078029633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.32799649238586, + "p90": 333.98398756980896, + "p95": 343.9039885997772, + "p99": 420.6080138683319 + }, + "combine": { + "p50": 459.6799910068512, + "p90": 471.2640047073364, + "p95": 473.2480049133301, + "p99": 547.1680164337158 + }, + "roundtrip": { + "p50": 768.4159874916077, + "p90": 776.7040133476257, + "p95": 785.0559949874878, + "p99": 830.9440016746521 + }, + "isolatedSum": { + "p50": 787.0079874992371, + "p90": 805.2479922771454, + "p95": 817.1519935131073, + "p99": 967.7760303020477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.3439841270447, + "p90": 574.9120116233826, + "p95": 581.9200277328491, + "p99": 633.9520215988159 + }, + "combine": { + "p50": 814.8159980773926, + "p90": 819.0400004386902, + "p95": 828.0640244483948, + "p99": 887.55202293396 + }, + "roundtrip": { + "p50": 1361.9199991226196, + "p90": 1373.2800483703613, + "p95": 1379.1999816894531, + "p99": 1444.543957710266 + }, + "isolatedSum": { + "p50": 1384.1599822044373, + "p90": 1393.9520120620728, + "p95": 1409.984052181244, + "p99": 1521.5040445327759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1064.8319721221924, + "p90": 1070.847988128662, + "p95": 1082.3040008544922, + "p99": 1155.8719873428345 + }, + "combine": { + "p50": 1516.0959959030151, + "p90": 1529.0240049362183, + "p95": 1553.3759593963623, + "p99": 1638.592004776001 + }, + "roundtrip": { + "p50": 2564.0320777893066, + "p90": 2578.04799079895, + "p95": 2605.3760051727295, + "p99": 3108.2561016082764 + }, + "isolatedSum": { + "p50": 2580.9279680252075, + "p90": 2599.8719930648804, + "p95": 2635.6799602508545, + "p99": 2794.4639921188354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e38c75bb", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_e8841c1f", + "comparisonKey": "8d7998734b5940ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:16.926104+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.65600001811981, + "p90": 109.69600081443787, + "p95": 111.64800077676773, + "p99": 122.079998254776 + }, + "combine": { + "p50": 130.46400249004364, + "p90": 139.8719996213913, + "p95": 140.28799533843994, + "p99": 153.85599434375763 + }, + "roundtrip": { + "p50": 229.37600314617157, + "p90": 235.9039932489395, + "p95": 237.05600202083588, + "p99": 257.1839988231659 + }, + "isolatedSum": { + "p50": 237.12000250816345, + "p90": 249.56800043582916, + "p95": 251.93599611520767, + "p99": 275.93599259853363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.76000237464905, + "p90": 164.09599781036377, + "p95": 165.6319946050644, + "p99": 186.0480010509491 + }, + "combine": { + "p50": 201.9519954919815, + "p90": 204.67199385166168, + "p95": 212.16000616550446, + "p99": 215.16799926757812 + }, + "roundtrip": { + "p50": 335.2639973163605, + "p90": 340.60800075531006, + "p95": 344.2879915237427, + "p99": 368.9599931240082 + }, + "isolatedSum": { + "p50": 363.71199786663055, + "p90": 368.76799166202545, + "p95": 377.79200077056885, + "p99": 401.2160003185272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.1119945049286, + "p90": 241.72799289226532, + "p95": 244.09599602222443, + "p99": 274.59201216697693 + }, + "combine": { + "p50": 338.8479948043823, + "p90": 348.5119938850403, + "p95": 350.5600094795227, + "p99": 368.80001425743103 + }, + "roundtrip": { + "p50": 554.8800230026245, + "p90": 567.4560070037842, + "p95": 577.023983001709, + "p99": 607.2319746017456 + }, + "isolatedSum": { + "p50": 572.9599893093109, + "p90": 590.2399867773056, + "p95": 594.6560055017471, + "p99": 643.392026424408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 408.4160029888153, + "p90": 416.0960018634796, + "p95": 417.7919924259186, + "p99": 448.63998889923096 + }, + "combine": { + "p50": 595.3279733657837, + "p90": 605.8560013771057, + "p95": 610.1120114326477, + "p99": 632.7360272407532 + }, + "roundtrip": { + "p50": 986.9120121002197, + "p90": 998.1759786605835, + "p95": 1007.3599815368652, + "p99": 1049.66402053833 + }, + "isolatedSum": { + "p50": 1003.743976354599, + "p90": 1021.9520032405853, + "p95": 1027.9040038585663, + "p99": 1081.3760161399841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 757.4080228805542, + "p90": 768.5760259628296, + "p95": 775.2959728240967, + "p99": 896.7040181159973 + }, + "combine": { + "p50": 1112.6400232315063, + "p90": 1126.207947731018, + "p95": 1135.4880332946777, + "p99": 1171.2000370025635 + }, + "roundtrip": { + "p50": 1856.9600582122803, + "p90": 1872.7999925613403, + "p95": 1884.6720457077026, + "p99": 1935.0719451904297 + }, + "isolatedSum": { + "p50": 1870.0480461120605, + "p90": 1894.7839736938477, + "p95": 1910.7840061187744, + "p99": 2067.904055118561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1458.6880207061768, + "p90": 1478.71994972229, + "p95": 1487.9039525985718, + "p99": 1568.7040090560913 + }, + "combine": { + "p50": 2142.8160667419434, + "p90": 2156.543970108032, + "p95": 2190.3998851776123, + "p99": 2240.7679557800293 + }, + "roundtrip": { + "p50": 3586.8799686431885, + "p90": 3613.600015640259, + "p95": 3638.5281085968018, + "p99": 3919.872045516968 + }, + "isolatedSum": { + "p50": 3601.50408744812, + "p90": 3635.2639198303223, + "p95": 3678.303837776184, + "p99": 3809.4719648361206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7514355f", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_3b4b5c66", + "comparisonKey": "0b202690013ae316", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:00.541092+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.88800317049026, + "p90": 72.22399860620499, + "p95": 74.5600014925003, + "p99": 90.81599861383438 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 69.43999975919724, + "p95": 69.7920024394989, + "p99": 83.99999886751175 + }, + "roundtrip": { + "p50": 121.11999839544296, + "p90": 127.80800461769104, + "p95": 129.31199371814728, + "p99": 143.8719928264618 + }, + "isolatedSum": { + "p50": 137.82400637865067, + "p90": 141.66399836540222, + "p95": 144.3520039319992, + "p99": 174.81599748134613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 94.24000233411789, + "p90": 99.58399832248688, + "p95": 101.02400183677673, + "p99": 115.58400094509125 + }, + "combine": { + "p50": 115.55200070142746, + "p90": 116.44800007343292, + "p95": 116.99199676513672, + "p99": 131.1040073633194 + }, + "roundtrip": { + "p50": 194.43200528621674, + "p90": 199.71199333667755, + "p95": 200.6720006465912, + "p99": 214.65599536895752 + }, + "isolatedSum": { + "p50": 209.79200303554535, + "p90": 216.0319983959198, + "p95": 218.01599860191345, + "p99": 246.68800830841064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 198.4959989786148, + "p90": 200.8640021085739, + "p95": 202.36800611019135, + "p99": 249.6960014104843 + }, + "combine": { + "p50": 248.79999458789825, + "p90": 250.4960000514984, + "p95": 251.0719895362854, + "p99": 263.13599944114685 + }, + "roundtrip": { + "p50": 430.6879937648773, + "p90": 435.263991355896, + "p95": 438.59198689460754, + "p99": 455.1360011100769 + }, + "isolatedSum": { + "p50": 447.29599356651306, + "p90": 451.3600021600723, + "p95": 453.43999564647675, + "p99": 512.8320008516312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e4836fa", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_3dc9ebbf", + "comparisonKey": "5538d086f188f970", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:28.851948+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.24000036716461, + "p90": 90.04800021648407, + "p95": 91.64799749851227, + "p99": 147.8399932384491 + }, + "combine": { + "p50": 82.0159986615181, + "p90": 91.00800007581711, + "p95": 92.25600212812424, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 146.43199741840363, + "p90": 150.62400698661804, + "p95": 154.55999970436096, + "p99": 199.2959976196289 + }, + "isolatedSum": { + "p50": 164.2559990286827, + "p90": 181.05600029230118, + "p95": 183.9039996266365, + "p99": 262.87999004125595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.57599776983261, + "p90": 99.58399832248688, + "p95": 105.18400371074677, + "p99": 168.5439944267273 + }, + "combine": { + "p50": 104.99200224876404, + "p90": 113.66400122642517, + "p95": 115.10399729013443, + "p99": 165.92000424861908 + }, + "roundtrip": { + "p50": 184.51200425624847, + "p90": 192.47999787330627, + "p95": 196.16000354290009, + "p99": 227.29599475860596 + }, + "isolatedSum": { + "p50": 201.56800001859665, + "p90": 213.24799954891205, + "p95": 220.2880010008812, + "p99": 334.4639986753464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 137.31199502944946, + "p90": 141.4400041103363, + "p95": 143.45599710941315, + "p99": 199.23199713230133 + }, + "combine": { + "p50": 143.327996134758, + "p90": 152.319997549057, + "p95": 152.76800096035004, + "p99": 176.7359972000122 + }, + "roundtrip": { + "p50": 260.51199436187744, + "p90": 267.8079903125763, + "p95": 271.61601185798645, + "p99": 319.0079927444458 + }, + "isolatedSum": { + "p50": 280.63999116420746, + "p90": 293.7600016593933, + "p95": 296.2239980697632, + "p99": 375.96799433231354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 199.20000433921814, + "p90": 206.7839950323105, + "p95": 208.8640034198761, + "p99": 247.00799584388733 + }, + "combine": { + "p50": 262.719988822937, + "p90": 264.51200246810913, + "p95": 266.6879892349243, + "p99": 325.408011674881 + }, + "roundtrip": { + "p50": 435.39199233055115, + "p90": 443.1680142879486, + "p95": 449.0880072116852, + "p99": 565.9840106964111 + }, + "isolatedSum": { + "p50": 461.91999316215515, + "p90": 471.2959975004196, + "p95": 475.5519926548004, + "p99": 572.4160075187683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 347.9680120944977, + "p90": 355.9679985046387, + "p95": 370.62400579452515, + "p99": 435.2959990501404 + }, + "combine": { + "p50": 460.35200357437134, + "p90": 471.3920056819916, + "p95": 483.39200019836426, + "p99": 574.7519731521606 + }, + "roundtrip": { + "p50": 785.9839797019958, + "p90": 793.4079766273499, + "p95": 796.4800000190735, + "p99": 869.9520230293274 + }, + "isolatedSum": { + "p50": 808.320015668869, + "p90": 827.3600041866302, + "p95": 854.0160059928894, + "p99": 1010.047972202301 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 649.7600078582764, + "p90": 661.3759994506836, + "p95": 664.8960113525391, + "p99": 690.9120082855225 + }, + "combine": { + "p50": 828.3839821815491, + "p90": 839.5839929580688, + "p95": 843.3600068092346, + "p99": 975.4559993743896 + }, + "roundtrip": { + "p50": 1455.3279876708984, + "p90": 1468.4159755706787, + "p95": 1480.672001838684, + "p99": 1532.6080322265625 + }, + "isolatedSum": { + "p50": 1478.1439900398254, + "p90": 1500.9599924087524, + "p95": 1508.2560181617737, + "p99": 1666.368007659912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-58715273", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_9b10df89", + "comparisonKey": "755b7e752537dac5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:47.196572+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.47999835014343, + "p90": 107.93600231409073, + "p95": 112.73600161075592, + "p99": 177.95200645923615 + }, + "combine": { + "p50": 127.74400413036346, + "p90": 130.17599284648895, + "p95": 138.43199610710144, + "p99": 180.1919937133789 + }, + "roundtrip": { + "p50": 217.0879989862442, + "p90": 224.2240011692047, + "p95": 226.81599855422974, + "p99": 262.65600323677063 + }, + "isolatedSum": { + "p50": 232.2240024805069, + "p90": 238.11199516057968, + "p95": 251.16799771785736, + "p99": 358.14400017261505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.46399700641632, + "p90": 145.37599682807922, + "p95": 149.53599870204926, + "p99": 178.65599691867828 + }, + "combine": { + "p50": 188.03200125694275, + "p90": 190.0479942560196, + "p95": 191.13600254058838, + "p99": 202.14399695396423 + }, + "roundtrip": { + "p50": 312.0959997177124, + "p90": 319.6159899234772, + "p95": 323.10399413108826, + "p99": 336.95998787879944 + }, + "isolatedSum": { + "p50": 330.49599826335907, + "p90": 335.4239910840988, + "p95": 340.67200124263763, + "p99": 380.7999938726425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.72800385951996, + "p90": 223.10400009155273, + "p95": 227.9359996318817, + "p99": 252.19199061393738 + }, + "combine": { + "p50": 336.35199069976807, + "p90": 337.40800619125366, + "p95": 338.3359909057617, + "p99": 350.6239950656891 + }, + "roundtrip": { + "p50": 530.4319858551025, + "p90": 538.4640097618103, + "p95": 542.9440140724182, + "p99": 586.080014705658 + }, + "isolatedSum": { + "p50": 554.079994559288, + "p90": 560.5120062828064, + "p95": 566.2719905376434, + "p99": 602.8159856796265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 368.99200081825256, + "p90": 377.75999307632446, + "p95": 380.47999143600464, + "p99": 419.6479916572571 + }, + "combine": { + "p50": 580.4479718208313, + "p90": 582.144021987915, + "p95": 583.6799740791321, + "p99": 622.7520108222961 + }, + "roundtrip": { + "p50": 940.0960206985474, + "p90": 946.1119771003723, + "p95": 960.5759978294373, + "p99": 1016.1279439926147 + }, + "isolatedSum": { + "p50": 949.4399726390839, + "p90": 959.9040150642395, + "p95": 964.1599655151367, + "p99": 1042.4000024795532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 690.3679966926575, + "p90": 696.1600184440613, + "p95": 707.3280215263367, + "p99": 742.2720193862915 + }, + "combine": { + "p50": 1085.6640338897705, + "p90": 1088.8320207595825, + "p95": 1098.3680486679077, + "p99": 1186.4960193634033 + }, + "roundtrip": { + "p50": 1755.0079822540283, + "p90": 1765.3759717941284, + "p95": 1778.4960269927979, + "p99": 2051.2959957122803 + }, + "isolatedSum": { + "p50": 1776.032030582428, + "p90": 1784.9920392036438, + "p95": 1805.6960701942444, + "p99": 1928.7680387496948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1325.3120183944702, + "p90": 1339.8720026016235, + "p95": 1353.6959886550903, + "p99": 1402.3040533065796 + }, + "combine": { + "p50": 2080.9600353240967, + "p90": 2083.712100982666, + "p95": 2106.9440841674805, + "p99": 2488.192081451416 + }, + "roundtrip": { + "p50": 3388.159990310669, + "p90": 3405.247926712036, + "p95": 3426.8479347229004, + "p99": 3662.3361110687256 + }, + "isolatedSum": { + "p50": 3406.272053718567, + "p90": 3423.5841035842896, + "p95": 3460.640072822571, + "p99": 3890.4961347579956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02cc4182", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_1a43c4c5", + "comparisonKey": "4aef435c6117b9c5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:45.850922+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.551997423172, + "p90": 98.33600372076035, + "p95": 100.19200295209885, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 116.99199676513672, + "p95": 118.14399808645248, + "p99": 128.12800705432892 + }, + "roundtrip": { + "p50": 194.7840005159378, + "p90": 200.06400346755981, + "p95": 203.77600193023682, + "p99": 233.2800030708313 + }, + "isolatedSum": { + "p50": 211.07199788093567, + "p90": 215.32800048589706, + "p95": 218.33600103855133, + "p99": 238.8480082154274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.7359974384308, + "p90": 139.90400731563568, + "p95": 140.73599874973297, + "p99": 176.03200674057007 + }, + "combine": { + "p50": 155.5200070142746, + "p90": 164.51199352741241, + "p95": 165.02399742603302, + "p99": 200.51200687885284 + }, + "roundtrip": { + "p50": 273.9520072937012, + "p90": 280.8319926261902, + "p95": 284.2240035533905, + "p99": 306.5600097179413 + }, + "isolatedSum": { + "p50": 288.2560044527054, + "p90": 304.4160008430481, + "p95": 305.759996175766, + "p99": 376.5440136194229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.5279985666275, + "p90": 201.47199928760529, + "p95": 203.71200144290924, + "p99": 230.14399409294128 + }, + "combine": { + "p50": 265.8880054950714, + "p90": 274.78399872779846, + "p95": 275.2000093460083, + "p99": 290.75199365615845 + }, + "roundtrip": { + "p50": 441.6640102863312, + "p90": 449.5680034160614, + "p95": 454.912006855011, + "p99": 484.47999358177185 + }, + "isolatedSum": { + "p50": 460.4160040616989, + "p90": 476.25599801540375, + "p95": 478.91201078891754, + "p99": 520.8959877490997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.61600613594055, + "p90": 331.03999495506287, + "p95": 333.0880105495453, + "p99": 383.39200615882874 + }, + "combine": { + "p50": 463.03999423980713, + "p90": 471.5839922428131, + "p95": 475.16798973083496, + "p99": 524.3840217590332 + }, + "roundtrip": { + "p50": 771.776020526886, + "p90": 779.3279886245728, + "p95": 786.2399816513062, + "p99": 801.3439774513245 + }, + "isolatedSum": { + "p50": 790.6560003757477, + "p90": 802.623987197876, + "p95": 808.2560002803802, + "p99": 907.7760279178619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.8479881286621, + "p90": 579.8400044441223, + "p95": 584.4799876213074, + "p99": 654.2720198631287 + }, + "combine": { + "p50": 815.455973148346, + "p90": 827.5840282440186, + "p95": 830.3679823875427, + "p99": 878.2079815864563 + }, + "roundtrip": { + "p50": 1369.920015335083, + "p90": 1381.8880319595337, + "p95": 1392.3200368881226, + "p99": 1443.4560537338257 + }, + "isolatedSum": { + "p50": 1386.303961277008, + "p90": 1407.4240326881409, + "p95": 1414.84797000885, + "p99": 1532.480001449585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1065.343976020813, + "p90": 1072.543978691101, + "p95": 1081.6320180892944, + "p99": 1172.6399660110474 + }, + "combine": { + "p50": 1527.5839567184448, + "p90": 1541.2800312042236, + "p95": 1553.3440113067627, + "p99": 3285.0239276885986 + }, + "roundtrip": { + "p50": 2570.6560611724854, + "p90": 2593.600034713745, + "p95": 2606.0800552368164, + "p99": 2862.4320030212402 + }, + "isolatedSum": { + "p50": 2592.927932739258, + "p90": 2613.8240098953247, + "p95": 2634.976029396057, + "p99": 4457.663893699646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ffbb8f33", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_35838e41", + "comparisonKey": "78bf5e9428ebaf0d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:40.319614+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.00000083446503, + "p90": 100.99200159311295, + "p95": 102.36799716949463, + "p99": 107.90400207042694 + }, + "combine": { + "p50": 115.77600240707397, + "p90": 117.98399686813354, + "p95": 120.15999853610992, + "p99": 142.33599603176117 + }, + "roundtrip": { + "p50": 195.23200392723083, + "p90": 201.47199928760529, + "p95": 202.4960070848465, + "p99": 209.72800254821777 + }, + "isolatedSum": { + "p50": 211.776003241539, + "p90": 218.9759984612465, + "p95": 222.52799570560455, + "p99": 250.2399981021881 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.85600662231445, + "p90": 140.70400595664978, + "p95": 142.81600713729858, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 155.10399639606476, + "p90": 164.38399255275726, + "p95": 164.86400365829468, + "p99": 176.35199427604675 + }, + "roundtrip": { + "p50": 273.0239927768707, + "p90": 279.776006937027, + "p95": 283.9680016040802, + "p99": 306.17600679397583 + }, + "isolatedSum": { + "p50": 292.9600030183792, + "p90": 305.08799850940704, + "p95": 307.68001079559326, + "p99": 331.07198774814606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.03200256824493, + "p90": 204.51200008392334, + "p95": 205.9520035982132, + "p99": 220.86399793624878 + }, + "combine": { + "p50": 274.1760015487671, + "p90": 276.1920094490051, + "p95": 277.69601345062256, + "p99": 290.6560003757477 + }, + "roundtrip": { + "p50": 442.01600551605225, + "p90": 449.47201013565063, + "p95": 451.9360065460205, + "p99": 471.5839922428131 + }, + "isolatedSum": { + "p50": 470.208004117012, + "p90": 480.70400953292847, + "p95": 483.64801704883575, + "p99": 511.51999831199646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 328.2240033149719, + "p90": 332.12798833847046, + "p95": 341.2800133228302, + "p99": 435.16799807548523 + }, + "combine": { + "p50": 460.7999920845032, + "p90": 471.0719883441925, + "p95": 473.9840030670166, + "p99": 495.2960014343262 + }, + "roundtrip": { + "p50": 768.8320279121399, + "p90": 777.8880000114441, + "p95": 785.3440046310425, + "p99": 867.1360015869141 + }, + "isolatedSum": { + "p50": 789.0239953994751, + "p90": 803.199976682663, + "p95": 815.2640163898468, + "p99": 930.4639995098114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.7280168533325, + "p90": 574.1119980812073, + "p95": 581.6320180892944, + "p99": 659.2959761619568 + }, + "combine": { + "p50": 814.7839903831482, + "p90": 818.8160061836243, + "p95": 828.3839821815491, + "p99": 876.5119910240173 + }, + "roundtrip": { + "p50": 1362.0799779891968, + "p90": 1373.5359907150269, + "p95": 1381.216049194336, + "p99": 1405.8560132980347 + }, + "isolatedSum": { + "p50": 1384.5120072364807, + "p90": 1392.9280042648315, + "p95": 1410.0160002708435, + "p99": 1535.8079671859741 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1064.6400451660156, + "p90": 1071.071982383728, + "p95": 1077.9199600219727, + "p99": 1131.6479444503784 + }, + "combine": { + "p50": 1517.4399614334106, + "p90": 1529.1520357131958, + "p95": 1540.0960445404053, + "p99": 1568.4800148010254 + }, + "roundtrip": { + "p50": 2568.511962890625, + "p90": 2583.5840702056885, + "p95": 2592.0639038085938, + "p99": 2614.2399311065674 + }, + "isolatedSum": { + "p50": 2582.0800065994263, + "p90": 2600.224018096924, + "p95": 2618.016004562378, + "p99": 2700.127959251404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db1cf5ac", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_a17fe97c", + "comparisonKey": "2bf3f89e645f682d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:55.820271+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.29599678516388, + "p90": 106.6880002617836, + "p95": 110.91200262308121, + "p99": 115.26399850845337 + }, + "combine": { + "p50": 126.88000500202179, + "p90": 127.96799838542938, + "p95": 128.4160017967224, + "p99": 130.72000443935394 + }, + "roundtrip": { + "p50": 210.55999398231506, + "p90": 215.13600647449493, + "p95": 216.51199460029602, + "p99": 224.5119959115982 + }, + "isolatedSum": { + "p50": 230.17600178718567, + "p90": 234.65599864721298, + "p95": 239.32800441980362, + "p99": 245.9840029478073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.68800485134125, + "p90": 140.1280015707016, + "p95": 141.7279988527298, + "p99": 164.44799304008484 + }, + "combine": { + "p50": 177.12000012397766, + "p90": 180.00000715255737, + "p95": 187.55200505256653, + "p99": 206.84799551963806 + }, + "roundtrip": { + "p50": 298.7839877605438, + "p90": 304.51199412345886, + "p95": 307.8719973564148, + "p99": 336.95998787879944 + }, + "isolatedSum": { + "p50": 311.8080049753189, + "p90": 320.128008723259, + "p95": 329.2800039052963, + "p99": 371.2959885597229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.65599405765533, + "p90": 212.67199516296387, + "p95": 213.85599672794342, + "p99": 233.50399732589722 + }, + "combine": { + "p50": 326.1440098285675, + "p90": 335.87199449539185, + "p95": 336.38399839401245, + "p99": 348.32000732421875 + }, + "roundtrip": { + "p50": 509.0559720993042, + "p90": 515.2959823608398, + "p95": 518.6880230903625, + "p99": 535.1679921150208 + }, + "isolatedSum": { + "p50": 532.8000038862228, + "p90": 548.5439896583557, + "p95": 550.2399951219559, + "p99": 581.824004650116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 349.69601035118103, + "p90": 353.95199060440063, + "p95": 355.1360070705414, + "p99": 376.6399919986725 + }, + "combine": { + "p50": 585.1200222969055, + "p90": 594.048023223877, + "p95": 594.4960117340088, + "p99": 610.2719902992249 + }, + "roundtrip": { + "p50": 913.6319756507874, + "p90": 921.3759899139404, + "p95": 928.5759925842285, + "p99": 1004.5759677886963 + }, + "isolatedSum": { + "p50": 934.8160326480865, + "p90": 948.0000138282776, + "p95": 949.6320188045502, + "p99": 986.9119822978973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 648.576021194458, + "p90": 659.1039896011353, + "p95": 670.9439754486084, + "p99": 1028.9920568466187 + }, + "combine": { + "p50": 1063.904047012329, + "p90": 1073.7919807434082, + "p95": 1086.9120359420776, + "p99": 1209.6960544586182 + }, + "roundtrip": { + "p50": 1699.936032295227, + "p90": 1709.280014038086, + "p95": 1715.775966644287, + "p99": 1849.6960401535034 + }, + "isolatedSum": { + "p50": 1712.480068206787, + "p90": 1732.8959703445435, + "p95": 1757.856011390686, + "p99": 2238.688111305237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1266.7839527130127, + "p90": 1278.0799865722656, + "p95": 1281.8559408187866, + "p99": 1311.5839958190918 + }, + "combine": { + "p50": 2044.640064239502, + "p90": 2054.879903793335, + "p95": 2068.25590133667, + "p99": 2143.264055252075 + }, + "roundtrip": { + "p50": 3303.48801612854, + "p90": 3318.4640407562256, + "p95": 3328.416109085083, + "p99": 3583.456039428711 + }, + "isolatedSum": { + "p50": 3311.4240169525146, + "p90": 3332.9598903656006, + "p95": 3350.1118421554565, + "p99": 3454.848051071167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-66dafeee", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_a31a9c8e", + "comparisonKey": "0c17b62c056da1e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:51.927926+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.19200164079666, + "p90": 94.65599805116653, + "p95": 96.25600278377533, + "p99": 110.97600311040878 + }, + "combine": { + "p50": 117.18399822711945, + "p90": 126.62400305271149, + "p95": 127.3919939994812, + "p99": 191.20000302791595 + }, + "roundtrip": { + "p50": 196.57599925994873, + "p90": 204.6079933643341, + "p95": 206.81600272655487, + "p99": 245.69599330425262 + }, + "isolatedSum": { + "p50": 209.3759998679161, + "p90": 221.28000110387802, + "p95": 223.64799678325653, + "p99": 302.17600613832474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 127.07200646400452, + "p90": 130.3039938211441, + "p95": 134.91199910640717, + "p99": 150.91200172901154 + }, + "combine": { + "p50": 175.4239946603775, + "p90": 177.05599963665009, + "p95": 177.37600207328796, + "p99": 190.49599766731262 + }, + "roundtrip": { + "p50": 283.58399868011475, + "p90": 288.7679934501648, + "p95": 290.8479869365692, + "p99": 304.80000376701355 + }, + "isolatedSum": { + "p50": 302.496001124382, + "p90": 307.3599934577942, + "p95": 312.28800117969513, + "p99": 341.40799939632416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 180.28800189495087, + "p90": 188.31999599933624, + "p95": 190.3039962053299, + "p99": 214.01600539684296 + }, + "combine": { + "p50": 311.5839958190918, + "p90": 313.6320114135742, + "p95": 322.81601428985596, + "p99": 337.18401193618774 + }, + "roundtrip": { + "p50": 481.7279875278473, + "p90": 488.22399973869324, + "p95": 493.8240051269531, + "p99": 551.4240264892578 + }, + "isolatedSum": { + "p50": 491.87199771404266, + "p90": 501.95200741291046, + "p95": 513.1200104951859, + "p99": 551.2000173330307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 310.40000915527344, + "p90": 318.6880052089691, + "p95": 328.6080062389374, + "p99": 338.7199938297272 + }, + "combine": { + "p50": 584.2239856719971, + "p90": 594.1759943962097, + "p95": 594.8160290718079, + "p99": 633.3760023117065 + }, + "roundtrip": { + "p50": 888.9279961585999, + "p90": 894.976019859314, + "p95": 899.2000222206116, + "p99": 1010.4000568389893 + }, + "isolatedSum": { + "p50": 894.6239948272705, + "p90": 912.8639996051788, + "p95": 923.4240353107452, + "p99": 972.0959961414337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 580.7999968528748, + "p90": 592.2560095787048, + "p95": 598.1119871139526, + "p99": 618.4639930725098 + }, + "combine": { + "p50": 1109.1519594192505, + "p90": 1114.3039464950562, + "p95": 1137.760043144226, + "p99": 1319.5840120315552 + }, + "roundtrip": { + "p50": 1617.535948753357, + "p90": 1629.8880577087402, + "p95": 1639.0719413757324, + "p99": 1816.3199424743652 + }, + "isolatedSum": { + "p50": 1689.9519562721252, + "p90": 1706.559956073761, + "p95": 1735.8720302581787, + "p99": 1938.048005104065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1134.2400312423706, + "p90": 1146.2719440460205, + "p95": 1153.3440351486206, + "p99": 1224.9599695205688 + }, + "combine": { + "p50": 2069.888114929199, + "p90": 2081.376075744629, + "p95": 2130.431890487671, + "p99": 2415.7440662384033 + }, + "roundtrip": { + "p50": 3149.280071258545, + "p90": 3173.856019973755, + "p95": 3216.223955154419, + "p99": 3654.7839641571045 + }, + "isolatedSum": { + "p50": 3204.12814617157, + "p90": 3227.6480197906494, + "p95": 3283.7759256362915, + "p99": 3640.704035758972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8db6921", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_714eb7c0", + "comparisonKey": "7ae31e320dbdb831", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:52.666735+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.41599655151367, + "p90": 100.19200295209885, + "p95": 103.32799702882767, + "p99": 159.55199301242828 + }, + "combine": { + "p50": 115.74400216341019, + "p90": 117.08799749612808, + "p95": 118.46400052309036, + "p99": 140.54399728775024 + }, + "roundtrip": { + "p50": 195.16800343990326, + "p90": 201.4079988002777, + "p95": 205.05599677562714, + "p99": 236.67199909687042 + }, + "isolatedSum": { + "p50": 212.15999871492386, + "p90": 217.28000044822693, + "p95": 221.79199755191803, + "p99": 300.0959903001785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.15200126171112, + "p90": 141.05600118637085, + "p95": 143.93599331378937, + "p99": 158.75199437141418 + }, + "combine": { + "p50": 155.61600029468536, + "p90": 164.57599401474, + "p95": 165.3759926557541, + "p99": 186.36800348758698 + }, + "roundtrip": { + "p50": 273.3440101146698, + "p90": 279.9359858036041, + "p95": 283.9359939098358, + "p99": 300.79999566078186 + }, + "isolatedSum": { + "p50": 292.7680015563965, + "p90": 305.63199520111084, + "p95": 309.31198596954346, + "p99": 345.11999785900116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.30400431156158, + "p90": 200.70399343967438, + "p95": 204.73599433898926, + "p99": 272.6080119609833 + }, + "combine": { + "p50": 274.944007396698, + "p90": 278.1760096549988, + "p95": 285.66399216651917, + "p99": 324.5759904384613 + }, + "roundtrip": { + "p50": 448.15999269485474, + "p90": 454.71999049186707, + "p95": 463.80800008773804, + "p99": 521.888017654419 + }, + "isolatedSum": { + "p50": 469.2480117082596, + "p90": 478.88000309467316, + "p95": 490.3999865055084, + "p99": 597.1840023994446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 328.031986951828, + "p90": 332.92800188064575, + "p95": 339.9040102958679, + "p99": 408.70401263237 + }, + "combine": { + "p50": 470.7520008087158, + "p90": 474.5280146598816, + "p95": 483.0079972743988, + "p99": 508.60798358917236 + }, + "roundtrip": { + "p50": 775.3919959068298, + "p90": 786.2719893455505, + "p95": 788.4799838066101, + "p99": 837.9520177841187 + }, + "isolatedSum": { + "p50": 798.7839877605438, + "p90": 807.4560165405273, + "p95": 822.9120075702667, + "p99": 917.3119962215424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 583.6799740791321, + "p90": 594.5919752120972, + "p95": 618.8799738883972, + "p99": 775.6800055503845 + }, + "combine": { + "p50": 839.0719890594482, + "p90": 841.920018196106, + "p95": 852.6719808578491, + "p99": 913.3440256118774 + }, + "roundtrip": { + "p50": 1398.368000984192, + "p90": 1408.6400270462036, + "p95": 1421.9199419021606, + "p99": 1563.0079507827759 + }, + "isolatedSum": { + "p50": 1422.7519631385803, + "p90": 1436.5119934082031, + "p95": 1471.5519547462463, + "p99": 1689.024031162262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1089.8239612579346, + "p90": 1098.3999967575073, + "p95": 1106.9120168685913, + "p99": 1163.0079746246338 + }, + "combine": { + "p50": 1576.7680406570435, + "p90": 1580.7360410690308, + "p95": 1590.432047843933, + "p99": 1713.1839990615845 + }, + "roundtrip": { + "p50": 2643.7759399414062, + "p90": 2656.320095062256, + "p95": 2666.6879653930664, + "p99": 2890.4318809509277 + }, + "isolatedSum": { + "p50": 2666.592001914978, + "p90": 2679.136037826538, + "p95": 2697.3440647125244, + "p99": 2876.1919736862183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-58204192", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_0209cdf1", + "comparisonKey": "c4bbb3e486d89a96", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:40.461104+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.8320010304451, + "p90": 107.64800012111664, + "p95": 109.11999642848969, + "p99": 138.7840062379837 + }, + "combine": { + "p50": 127.36000120639801, + "p90": 128.4479945898056, + "p95": 129.37599420547485, + "p99": 139.3599957227707 + }, + "roundtrip": { + "p50": 208.19200575351715, + "p90": 216.22399985790253, + "p95": 218.46400201320648, + "p99": 235.45600473880768 + }, + "isolatedSum": { + "p50": 232.1920022368431, + "p90": 236.09599471092224, + "p95": 238.49599063396454, + "p99": 278.1440019607544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 141.9840008020401, + "p90": 144.70399916172028, + "p95": 147.67999947071075, + "p99": 167.10400581359863 + }, + "combine": { + "p50": 178.27199399471283, + "p90": 188.28800320625305, + "p95": 188.83199989795685, + "p99": 200.9280025959015 + }, + "roundtrip": { + "p50": 307.48799443244934, + "p90": 314.7520124912262, + "p95": 316.6399896144867, + "p99": 338.27200531959534 + }, + "isolatedSum": { + "p50": 320.25599479675293, + "p90": 332.9920023679733, + "p95": 336.5119993686676, + "p99": 368.0320084095001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 215.7440036535263, + "p90": 223.29600155353546, + "p95": 226.46400332450867, + "p99": 266.62400364875793 + }, + "combine": { + "p50": 327.61600613594055, + "p90": 336.2239897251129, + "p95": 337.0879888534546, + "p99": 397.40800857543945 + }, + "roundtrip": { + "p50": 523.9999890327454, + "p90": 532.7360033988953, + "p95": 534.8479747772217, + "p99": 557.8879714012146 + }, + "isolatedSum": { + "p50": 543.3600097894669, + "p90": 559.5199912786484, + "p95": 563.5519921779633, + "p99": 664.0320122241974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 357.5040102005005, + "p90": 367.23199486732483, + "p95": 379.96798753738403, + "p99": 423.20001125335693 + }, + "combine": { + "p50": 570.1119899749756, + "p90": 580.9280276298523, + "p95": 582.2719931602478, + "p99": 643.9679861068726 + }, + "roundtrip": { + "p50": 923.2959747314453, + "p90": 931.7759871482849, + "p95": 939.520001411438, + "p99": 979.9360036849976 + }, + "isolatedSum": { + "p50": 927.6160001754761, + "p90": 948.1600224971771, + "p95": 962.2399806976318, + "p99": 1067.1679973602295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 665.4080152511597, + "p90": 672.7359890937805, + "p95": 690.8159852027893, + "p99": 767.8719758987427 + }, + "combine": { + "p50": 1049.1199493408203, + "p90": 1060.6720447540283, + "p95": 1062.656044960022, + "p99": 1249.0559816360474 + }, + "roundtrip": { + "p50": 1694.7200298309326, + "p90": 1704.7040462493896, + "p95": 1719.8400497436523, + "p99": 1830.1119804382324 + }, + "isolatedSum": { + "p50": 1714.52796459198, + "p90": 1733.4080338478088, + "p95": 1753.4720301628113, + "p99": 2016.92795753479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1290.2400493621826, + "p90": 1304.479956626892, + "p95": 1313.5679960250854, + "p99": 1377.72798538208 + }, + "combine": { + "p50": 2019.1359519958496, + "p90": 2023.5199928283691, + "p95": 2034.3999862670898, + "p99": 2142.911911010742 + }, + "roundtrip": { + "p50": 3303.8079738616943, + "p90": 3318.1440830230713, + "p95": 3331.360101699829, + "p99": 3400.0320434570312 + }, + "isolatedSum": { + "p50": 3309.376001358032, + "p90": 3327.9999494552612, + "p95": 3347.9679822921753, + "p99": 3520.6398963928223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9b09f1d2", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_d24e13bd", + "comparisonKey": "6ee0c4630fcbe50e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:10.381815+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.67999839782715, + "p90": 99.07200187444687, + "p95": 101.50399804115295, + "p99": 135.3600025177002 + }, + "combine": { + "p50": 115.68000167608261, + "p90": 117.76000261306763, + "p95": 120.06399780511856, + "p99": 151.296004652977 + }, + "roundtrip": { + "p50": 194.0159946680069, + "p90": 198.7839937210083, + "p95": 202.7519941329956, + "p99": 225.40800273418427 + }, + "isolatedSum": { + "p50": 211.36000007390976, + "p90": 216.8320044875145, + "p95": 221.56799584627151, + "p99": 286.6560071706772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.85599851608276, + "p90": 140.4159963130951, + "p95": 143.5520052909851, + "p99": 189.63199853897095 + }, + "combine": { + "p50": 156.76799416542053, + "p90": 164.76799547672272, + "p95": 165.92000424861908, + "p99": 168.41599345207214 + }, + "roundtrip": { + "p50": 274.2080092430115, + "p90": 281.0240089893341, + "p95": 284.5439910888672, + "p99": 302.4959862232208 + }, + "isolatedSum": { + "p50": 290.6239926815033, + "p90": 305.1839917898178, + "p95": 309.4720095396042, + "p99": 358.0479919910431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.14399564266205, + "p90": 199.61600005626678, + "p95": 204.76800203323364, + "p99": 266.55998826026917 + }, + "combine": { + "p50": 265.1520073413849, + "p90": 274.4959890842438, + "p95": 275.29600262641907, + "p99": 298.68799448013306 + }, + "roundtrip": { + "p50": 443.0080056190491, + "p90": 448.5119879245758, + "p95": 450.75199007987976, + "p99": 469.760000705719 + }, + "isolatedSum": { + "p50": 459.29600298404694, + "p90": 474.11198914051056, + "p95": 480.0640046596527, + "p99": 565.2479827404022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 328.3520042896271, + "p90": 331.2639892101288, + "p95": 332.5760066509247, + "p99": 346.97601199150085 + }, + "combine": { + "p50": 461.3119959831238, + "p90": 471.42401337623596, + "p95": 472.7039933204651, + "p99": 499.83999133110046 + }, + "roundtrip": { + "p50": 771.4880108833313, + "p90": 780.3199887275696, + "p95": 788.1600260734558, + "p99": 934.8480105400085 + }, + "isolatedSum": { + "p50": 789.6640002727509, + "p90": 802.6880025863647, + "p95": 805.2799999713898, + "p99": 846.8160033226013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.5599784851074, + "p90": 580.2559852600098, + "p95": 587.5200033187866, + "p99": 617.7279949188232 + }, + "combine": { + "p50": 816.7679905891418, + "p90": 828.1919956207275, + "p95": 838.9120101928711, + "p99": 888.8319730758667 + }, + "roundtrip": { + "p50": 1373.3439445495605, + "p90": 1384.3519687652588, + "p95": 1394.3359851837158, + "p99": 1521.2479829788208 + }, + "isolatedSum": { + "p50": 1387.3279690742493, + "p90": 1408.4479808807373, + "p95": 1426.4320135116577, + "p99": 1506.55996799469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1070.4959630966187, + "p90": 1082.3040008544922, + "p95": 1096.2560176849365, + "p99": 1183.3280324935913 + }, + "combine": { + "p50": 1531.615972518921, + "p90": 1546.3680028915405, + "p95": 1558.9439868927002, + "p99": 1665.4080152511597 + }, + "roundtrip": { + "p50": 2585.6640338897705, + "p90": 2600.99196434021, + "p95": 2609.8239421844482, + "p99": 2674.175977706909 + }, + "isolatedSum": { + "p50": 2602.1119356155396, + "p90": 2628.6720037460327, + "p95": 2655.2000045776367, + "p99": 2848.736047744751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1c9b2e8", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_57af4dde", + "comparisonKey": "72375bad994cdecf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:28.238165+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.12800312042236, + "p90": 124.57600235939026, + "p95": 130.43199479579926, + "p99": 187.26399540901184 + }, + "combine": { + "p50": 127.07200646400452, + "p90": 128.1599998474121, + "p95": 128.48000228405, + "p99": 142.56000518798828 + }, + "roundtrip": { + "p50": 209.88799631595612, + "p90": 214.84799683094025, + "p95": 217.69599616527557, + "p99": 240.48000574111938 + }, + "isolatedSum": { + "p50": 231.20000958442688, + "p90": 252.73600220680237, + "p95": 258.91199707984924, + "p99": 329.8240005970001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 136.9280070066452, + "p90": 151.71200037002563, + "p95": 160.12799739837646, + "p99": 219.90400552749634 + }, + "combine": { + "p50": 177.279993891716, + "p90": 180.51199615001678, + "p95": 188.31999599933624, + "p99": 201.6959935426712 + }, + "roundtrip": { + "p50": 298.40001463890076, + "p90": 304.86398935317993, + "p95": 308.51200222969055, + "p99": 355.55198788642883 + }, + "isolatedSum": { + "p50": 314.2080008983612, + "p90": 332.2239965200424, + "p95": 348.4479933977127, + "p99": 421.59999907016754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.05599677562714, + "p90": 214.27200734615326, + "p95": 220.99199891090393, + "p99": 257.4079930782318 + }, + "combine": { + "p50": 326.1120021343231, + "p90": 335.61599254608154, + "p95": 336.2880051136017, + "p99": 360.79999804496765 + }, + "roundtrip": { + "p50": 510.1439952850342, + "p90": 516.864001750946, + "p95": 520.2879905700684, + "p99": 543.4560179710388 + }, + "isolatedSum": { + "p50": 531.1679989099503, + "p90": 549.8879998922348, + "p95": 557.2800040245056, + "p99": 618.2079911231995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 350.40000081062317, + "p90": 354.6240031719208, + "p95": 357.60000348091125, + "p99": 399.29598569869995 + }, + "combine": { + "p50": 584.8320126533508, + "p90": 594.0160155296326, + "p95": 594.5919752120972, + "p99": 669.2799925804138 + }, + "roundtrip": { + "p50": 913.1199717521667, + "p90": 920.6719994544983, + "p95": 928.4800291061401, + "p99": 1000.991940498352 + }, + "isolatedSum": { + "p50": 935.232013463974, + "p90": 948.6400187015533, + "p95": 952.1919786930084, + "p99": 1068.5759782791138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 647.7439999580383, + "p90": 659.8719954490662, + "p95": 671.1999773979187, + "p99": 779.583990573883 + }, + "combine": { + "p50": 1063.904047012329, + "p90": 1073.6000537872314, + "p95": 1086.0799551010132, + "p99": 1161.1839532852173 + }, + "roundtrip": { + "p50": 1698.9760398864746, + "p90": 1708.5440158843994, + "p95": 1715.8399820327759, + "p99": 1924.1280555725098 + }, + "isolatedSum": { + "p50": 1711.6480469703674, + "p90": 1733.4720492362976, + "p95": 1757.2799324989319, + "p99": 1940.7679438591003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1265.8239603042603, + "p90": 1278.1120538711548, + "p95": 1286.5279912948608, + "p99": 1324.9280452728271 + }, + "combine": { + "p50": 2044.8319911956787, + "p90": 2056.191921234131, + "p95": 2070.3680515289307, + "p99": 2168.735980987549 + }, + "roundtrip": { + "p50": 3302.687883377075, + "p90": 3324.415922164917, + "p95": 3360.447883605957, + "p99": 3500.8959770202637 + }, + "isolatedSum": { + "p50": 3310.655951499939, + "p90": 3334.3039751052856, + "p95": 3356.8960428237915, + "p99": 3493.664026260376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2cd4cfa2", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_389b1c50", + "comparisonKey": "4396b06c77bd5ed3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:58.887790+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.77599912881851, + "p90": 101.34399682283401, + "p95": 106.23999685049057, + "p99": 160.3199988603592 + }, + "combine": { + "p50": 115.7120019197464, + "p90": 117.72800236940384, + "p95": 120.44800072908401, + "p99": 156.47999942302704 + }, + "roundtrip": { + "p50": 194.94399428367615, + "p90": 205.63200116157532, + "p95": 213.1199985742569, + "p99": 230.68800568580627 + }, + "isolatedSum": { + "p50": 211.4880010485649, + "p90": 219.07199919223785, + "p95": 226.68799757957458, + "p99": 316.79999828338623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.85600662231445, + "p90": 140.73599874973297, + "p95": 142.4960047006607, + "p99": 155.35999834537506 + }, + "combine": { + "p50": 155.20000457763672, + "p90": 164.38399255275726, + "p95": 165.50399363040924, + "p99": 180.2240014076233 + }, + "roundtrip": { + "p50": 273.98398518562317, + "p90": 280.70399165153503, + "p95": 286.52799129486084, + "p99": 307.42400884628296 + }, + "isolatedSum": { + "p50": 293.0560111999512, + "p90": 305.11999130249023, + "p95": 307.99999833106995, + "p99": 335.58399975299835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.04800236225128, + "p90": 201.1519968509674, + "p95": 206.33600652217865, + "p99": 253.1839907169342 + }, + "combine": { + "p50": 265.9839987754822, + "p90": 274.9119997024536, + "p95": 275.9360074996948, + "p99": 349.95201230049133 + }, + "roundtrip": { + "p50": 446.0799992084503, + "p90": 455.80801367759705, + "p95": 468.2239890098572, + "p99": 596.9600081443787 + }, + "isolatedSum": { + "p50": 460.03200113773346, + "p90": 476.063996553421, + "p95": 482.2720140218735, + "p99": 603.1360030174255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 323.0400085449219, + "p90": 330.30399680137634, + "p95": 332.41599798202515, + "p99": 374.30399656295776 + }, + "combine": { + "p50": 457.43998885154724, + "p90": 460.54399013519287, + "p95": 471.1039960384369, + "p99": 544.8319911956787 + }, + "roundtrip": { + "p50": 761.5039944648743, + "p90": 773.1519937515259, + "p95": 785.3760123252869, + "p99": 892.1599984169006 + }, + "isolatedSum": { + "p50": 780.4799973964691, + "p90": 790.8479869365692, + "p95": 803.519994020462, + "p99": 919.1359877586365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 573.0239748954773, + "p90": 582.5279951095581, + "p95": 587.0720148086548, + "p99": 641.4080262184143 + }, + "combine": { + "p50": 830.8159708976746, + "p90": 840.2559757232666, + "p95": 841.4720296859741, + "p99": 876.8960237503052 + }, + "roundtrip": { + "p50": 1390.336036682129, + "p90": 1398.9440202713013, + "p95": 1406.1119556427002, + "p99": 1486.8799448013306 + }, + "isolatedSum": { + "p50": 1403.8399457931519, + "p90": 1422.7839708328247, + "p95": 1428.544044494629, + "p99": 1518.3040499687195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1066.5919780731201, + "p90": 1082.5599431991577, + "p95": 1110.8160018920898, + "p99": 1285.9840393066406 + }, + "combine": { + "p50": 1539.680004119873, + "p90": 1545.151948928833, + "p95": 1553.920030593872, + "p99": 1790.2079820632935 + }, + "roundtrip": { + "p50": 2586.1120223999023, + "p90": 2601.088047027588, + "p95": 2609.4400882720947, + "p99": 2812.671899795532 + }, + "isolatedSum": { + "p50": 2606.271982192993, + "p90": 2627.7118921279907, + "p95": 2664.736032485962, + "p99": 3076.192021369934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c8fe7204", + "identity": "b300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_b6ab71d2", + "comparisonKey": "af4f4064fbb794a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:22.454526+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.87199985980988, + "p90": 101.02400183677673, + "p95": 105.56799918413162, + "p99": 162.59199380874634 + }, + "combine": { + "p50": 115.61600118875504, + "p90": 117.3119992017746, + "p95": 118.68800222873688, + "p99": 131.6159963607788 + }, + "roundtrip": { + "p50": 194.11200284957886, + "p90": 200.8640021085739, + "p95": 203.67999374866486, + "p99": 225.3119945526123 + }, + "isolatedSum": { + "p50": 211.4880010485649, + "p90": 218.33600103855133, + "p95": 224.2560014128685, + "p99": 294.20799016952515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.92000710964203, + "p90": 142.0159935951233, + "p95": 147.35999703407288, + "p99": 230.335995554924 + }, + "combine": { + "p50": 155.16799688339233, + "p90": 164.19200599193573, + "p95": 164.57599401474, + "p99": 177.21599340438843 + }, + "roundtrip": { + "p50": 273.53599667549133, + "p90": 279.7119915485382, + "p95": 285.6000065803528, + "p99": 323.10399413108826 + }, + "isolatedSum": { + "p50": 293.08800399303436, + "p90": 306.207999587059, + "p95": 311.93599104881287, + "p99": 407.55198895931244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.82399320602417, + "p90": 206.33600652217865, + "p95": 218.27200055122375, + "p99": 281.15200996398926 + }, + "combine": { + "p50": 266.4639949798584, + "p90": 274.7519910335541, + "p95": 275.7120132446289, + "p99": 297.63200879096985 + }, + "roundtrip": { + "p50": 445.1200067996979, + "p90": 450.080007314682, + "p95": 457.40801095962524, + "p99": 521.9519734382629 + }, + "isolatedSum": { + "p50": 460.28798818588257, + "p90": 481.0879975557327, + "p95": 493.98401379585266, + "p99": 578.7840187549591 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 323.96799325942993, + "p90": 330.9119939804077, + "p95": 336.92800998687744, + "p99": 404.35200929641724 + }, + "combine": { + "p50": 457.88800716400146, + "p90": 459.83999967575073, + "p95": 462.7839922904968, + "p99": 499.90400671958923 + }, + "roundtrip": { + "p50": 761.7599964141846, + "p90": 774.1760015487671, + "p95": 783.3920121192932, + "p99": 944.8639750480652 + }, + "isolatedSum": { + "p50": 781.8560004234314, + "p90": 790.7519936561584, + "p95": 799.7120022773743, + "p99": 904.2560160160065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 575.6800174713135, + "p90": 584.9279761314392, + "p95": 599.776029586792, + "p99": 739.0400171279907 + }, + "combine": { + "p50": 830.3999900817871, + "p90": 840.4480218887329, + "p95": 842.7839875221252, + "p99": 934.3360066413879 + }, + "roundtrip": { + "p50": 1390.4000520706177, + "p90": 1399.0720510482788, + "p95": 1404.6399593353271, + "p99": 1450.7839679718018 + }, + "isolatedSum": { + "p50": 1406.0800075531006, + "p90": 1425.3759980201721, + "p95": 1442.5600171089172, + "p99": 1673.3760237693787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1065.8559799194336, + "p90": 1080.8320045471191, + "p95": 1103.4560203552246, + "p99": 1211.135983467102 + }, + "combine": { + "p50": 1539.7759675979614, + "p90": 1552.3200035095215, + "p95": 1556.3520193099976, + "p99": 1791.0399436950684 + }, + "roundtrip": { + "p50": 2587.552070617676, + "p90": 2615.2639389038086, + "p95": 2655.103921890259, + "p99": 3146.1119651794434 + }, + "isolatedSum": { + "p50": 2605.631947517395, + "p90": 2633.1520080566406, + "p95": 2659.808039665222, + "p99": 3002.1759271621704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26c4c560", + "identity": "b300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_213466e9", + "comparisonKey": "7a4b08a32ad20d12", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:21.501848+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.0960002541542, + "p90": 93.9520001411438, + "p95": 95.77599912881851, + "p99": 154.4959992170334 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 125.85599720478058, + "p95": 127.74400413036346, + "p99": 177.63200402259827 + }, + "roundtrip": { + "p50": 186.5919977426529, + "p90": 192.73599982261658, + "p95": 194.07999515533447, + "p99": 238.8480007648468 + }, + "isolatedSum": { + "p50": 203.61600071191788, + "p90": 219.80799734592438, + "p95": 223.52000325918198, + "p99": 332.12800323963165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 130.3360015153885, + "p90": 136.09600067138672, + "p95": 143.39199662208557, + "p99": 197.40800559520721 + }, + "combine": { + "p50": 155.5519998073578, + "p90": 164.76799547672272, + "p95": 166.01599752902985, + "p99": 217.3759937286377 + }, + "roundtrip": { + "p50": 266.7520046234131, + "p90": 273.69600534439087, + "p95": 277.5680124759674, + "p99": 312.063992023468 + }, + "isolatedSum": { + "p50": 285.8880013227463, + "p90": 300.86399614810944, + "p95": 309.4079941511154, + "p99": 414.7839993238449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 187.23200261592865, + "p90": 195.00799477100372, + "p95": 199.16799664497375, + "p99": 259.13599133491516 + }, + "combine": { + "p50": 265.79201221466064, + "p90": 275.04000067710876, + "p95": 277.3439884185791, + "p99": 340.256005525589 + }, + "roundtrip": { + "p50": 438.944011926651, + "p90": 444.09599900245667, + "p95": 448.4800100326538, + "p99": 471.5839922428131 + }, + "isolatedSum": { + "p50": 453.0240148305893, + "p90": 470.0479954481125, + "p95": 476.51198506355286, + "p99": 599.3919968605042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.73601269721985, + "p90": 320.6399977207184, + "p95": 324.3519961833954, + "p99": 377.75999307632446 + }, + "combine": { + "p50": 458.9439928531647, + "p90": 463.29599618911743, + "p95": 471.0400104522705, + "p99": 531.9679975509644 + }, + "roundtrip": { + "p50": 756.0960054397583, + "p90": 763.1999850273132, + "p95": 766.9439911842346, + "p99": 830.4640054702759 + }, + "isolatedSum": { + "p50": 775.6800055503845, + "p90": 783.9359939098358, + "p95": 795.3920066356659, + "p99": 909.7279906272888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 560.0000023841858, + "p90": 565.3120279312134, + "p95": 568.3199763298035, + "p99": 654.5600295066833 + }, + "combine": { + "p50": 817.0560002326965, + "p90": 827.9359936714172, + "p95": 829.2160034179688, + "p99": 841.2479758262634 + }, + "roundtrip": { + "p50": 1357.7280044555664, + "p90": 1366.7199611663818, + "p95": 1372.1280097961426, + "p99": 1381.8880319595337 + }, + "isolatedSum": { + "p50": 1377.0560026168823, + "p90": 1393.2480216026306, + "p95": 1397.5359797477722, + "p99": 1495.8080053329468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1036.352038383484, + "p90": 1047.4560260772705, + "p95": 1068.3200359344482, + "p99": 1139.2320394515991 + }, + "combine": { + "p50": 1530.4319858551025, + "p90": 1542.5599813461304, + "p95": 1554.4639825820923, + "p99": 1640.4160261154175 + }, + "roundtrip": { + "p50": 2550.4000186920166, + "p90": 2563.999891281128, + "p95": 2596.4159965515137, + "p99": 3043.488025665283 + }, + "isolatedSum": { + "p50": 2566.7840242385864, + "p90": 2590.016007423401, + "p95": 2622.7840185165405, + "p99": 2779.6480655670166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01ff82b4", + "identity": "b300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_39fa33d8", + "comparisonKey": "e8be2e1676dd9fc7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:49.653113+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.6399986743927, + "p90": 152.48000621795654, + "p95": 159.58400070667267, + "p99": 233.21600258350372 + }, + "combine": { + "p50": 90.43200314044952, + "p90": 92.57599711418152, + "p95": 94.2080020904541, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 229.21599447727203, + "p90": 252.22399830818176, + "p95": 269.6959972381592, + "p99": 333.6000144481659 + }, + "isolatedSum": { + "p50": 235.07200181484222, + "p90": 245.05600333213806, + "p95": 253.79200279712677, + "p99": 347.9040041565895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.82400679588318, + "p90": 189.85599279403687, + "p95": 193.7599927186966, + "p99": 245.08799612522125 + }, + "combine": { + "p50": 125.44000148773193, + "p90": 128.9920061826706, + "p95": 130.49599528312683, + "p99": 142.94399321079254 + }, + "roundtrip": { + "p50": 306.36799335479736, + "p90": 319.4560110569, + "p95": 332.12798833847046, + "p99": 402.72000432014465 + }, + "isolatedSum": { + "p50": 311.2640082836151, + "p90": 318.84799897670746, + "p95": 324.2559880018234, + "p99": 388.0319893360138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 268.6080038547516, + "p90": 285.72800755500793, + "p95": 313.85600566864014, + "p99": 364.5760118961334 + }, + "combine": { + "p50": 188.960000872612, + "p90": 193.02399456501007, + "p95": 194.94399428367615, + "p99": 261.56800985336304 + }, + "roundtrip": { + "p50": 448.7040042877197, + "p90": 455.1680088043213, + "p95": 460.9279930591583, + "p99": 523.0399966239929 + }, + "isolatedSum": { + "p50": 457.5680047273636, + "p90": 478.752002120018, + "p95": 508.7999999523163, + "p99": 626.1440217494965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 455.80801367759705, + "p90": 464.5119905471802, + "p95": 468.1279957294464, + "p99": 533.9199900627136 + }, + "combine": { + "p50": 390.1439905166626, + "p90": 399.3600010871887, + "p95": 403.1679928302765, + "p99": 417.2160029411316 + }, + "roundtrip": { + "p50": 838.7519717216492, + "p90": 847.8400111198425, + "p95": 854.0480136871338, + "p99": 875.3920197486877 + }, + "isolatedSum": { + "p50": 845.9520041942596, + "p90": 863.8719916343689, + "p95": 871.2959885597229, + "p99": 951.1359930038452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 826.3999819755554, + "p90": 838.1760120391846, + "p95": 842.7839875221252, + "p99": 901.2799859046936 + }, + "combine": { + "p50": 751.2000203132629, + "p90": 755.9040188789368, + "p95": 758.8800191879272, + "p99": 768.9599990844727 + }, + "roundtrip": { + "p50": 1557.31201171875, + "p90": 1570.688009262085, + "p95": 1577.7599811553955, + "p99": 2053.8558959960938 + }, + "isolatedSum": { + "p50": 1577.6000022888184, + "p90": 1594.0800309181213, + "p95": 1601.6640067100525, + "p99": 1670.2399849891663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1579.4240236282349, + "p90": 1596.2239503860474, + "p95": 1603.7119626998901, + "p99": 1785.248041152954 + }, + "combine": { + "p50": 1437.2800588607788, + "p90": 1446.079969406128, + "p95": 1450.943946838379, + "p99": 1533.247947692871 + }, + "roundtrip": { + "p50": 2989.5360469818115, + "p90": 3004.35209274292, + "p95": 3011.615991592407, + "p99": 3088.4480476379395 + }, + "isolatedSum": { + "p50": 3016.7040824890137, + "p90": 3042.3039197921753, + "p95": 3054.655909538269, + "p99": 3318.495988845825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56a1dfa0", + "identity": "b300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_39fa33d8", + "comparisonKey": "c6108f71c4b61f0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:36.423235+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.31199765205383, + "p90": 158.9760035276413, + "p95": 163.80800306797028, + "p99": 226.81599855422974 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 100.63999891281128, + "p95": 102.36799716949463, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 243.45600605010986, + "p90": 248.83200228214264, + "p95": 253.9840042591095, + "p99": 302.3039996623993 + }, + "isolatedSum": { + "p50": 251.071996986866, + "p90": 259.6160024404526, + "p95": 266.1760002374649, + "p99": 339.2319977283478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 200.51200687885284, + "p90": 205.53599298000336, + "p95": 207.71199464797974, + "p99": 262.81601190567017 + }, + "combine": { + "p50": 135.26399433612823, + "p90": 139.8719996213913, + "p95": 141.82400703430176, + "p99": 160.16000509262085 + }, + "roundtrip": { + "p50": 331.712007522583, + "p90": 337.40800619125366, + "p95": 340.4160141944885, + "p99": 407.29600191116333 + }, + "isolatedSum": { + "p50": 335.7760012149811, + "p90": 345.40799260139465, + "p95": 349.5360016822815, + "p99": 422.976016998291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 303.8080036640167, + "p90": 308.4160089492798, + "p95": 312.032014131546, + "p99": 360.22400856018066 + }, + "combine": { + "p50": 206.7199945449829, + "p90": 211.42399311065674, + "p95": 214.56000208854675, + "p99": 233.37599635124207 + }, + "roundtrip": { + "p50": 525.3440141677856, + "p90": 533.3120226860046, + "p95": 537.1519923210144, + "p99": 580.3840160369873 + }, + "isolatedSum": { + "p50": 510.52799820899963, + "p90": 519.8400020599365, + "p95": 526.5920162200928, + "p99": 593.6000049114227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 526.2719988822937, + "p90": 532.6399803161621, + "p95": 535.8399748802185, + "p99": 587.9359841346741 + }, + "combine": { + "p50": 434.6559941768646, + "p90": 439.64800238609314, + "p95": 442.111998796463, + "p99": 491.456001996994 + }, + "roundtrip": { + "p50": 940.671980381012, + "p90": 949.6319890022278, + "p95": 953.3439874649048, + "p99": 1048.8959550857544 + }, + "isolatedSum": { + "p50": 960.9279930591583, + "p90": 972.2879827022552, + "p95": 977.9519736766815, + "p99": 1079.391986131668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 963.1040096282959, + "p90": 972.7680087089539, + "p95": 983.7120175361633, + "p99": 1274.0479707717896 + }, + "combine": { + "p50": 778.11199426651, + "p90": 784.7999930381775, + "p95": 789.6000146865845, + "p99": 855.4239869117737 + }, + "roundtrip": { + "p50": 1727.6480197906494, + "p90": 1740.0319576263428, + "p95": 1749.2480278015137, + "p99": 1902.5599956512451 + }, + "isolatedSum": { + "p50": 1741.216003894806, + "p90": 1757.5680017471313, + "p95": 1773.3120322227478, + "p99": 2129.4719576835632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1866.2400245666504, + "p90": 1879.90403175354, + "p95": 1886.687994003296, + "p99": 1965.2479887008667 + }, + "combine": { + "p50": 1472.7040529251099, + "p90": 1482.3039770126343, + "p95": 1487.7760410308838, + "p99": 1667.072057723999 + }, + "roundtrip": { + "p50": 3320.575952529907, + "p90": 3336.1599445343018, + "p95": 3347.1999168395996, + "p99": 3562.175989151001 + }, + "isolatedSum": { + "p50": 3338.9440774917603, + "p90": 3362.2080087661743, + "p95": 3374.4640350341797, + "p99": 3632.3200464248657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-08787880", + "identity": "b300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_39fa33d8", + "comparisonKey": "48ee27023d65f532", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:27.374271+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 157.3760062456131, + "p90": 161.79199516773224, + "p95": 164.57599401474, + "p99": 316.79999828338623 + }, + "combine": { + "p50": 102.75200009346008, + "p90": 105.02400249242783, + "p95": 106.65600001811981, + "p99": 118.97599697113037 + }, + "roundtrip": { + "p50": 253.24800610542297, + "p90": 258.6880028247833, + "p95": 264.92801308631897, + "p99": 304.9600124359131 + }, + "isolatedSum": { + "p50": 260.1280063390732, + "p90": 266.81599766016006, + "p95": 271.2319940328598, + "p99": 435.7759952545166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 211.64800226688385, + "p90": 222.81600534915924, + "p95": 231.9359928369522, + "p99": 268.8960134983063 + }, + "combine": { + "p50": 143.0400013923645, + "p90": 147.23199605941772, + "p95": 149.98400211334229, + "p99": 164.12800550460815 + }, + "roundtrip": { + "p50": 354.97599840164185, + "p90": 363.5199964046478, + "p95": 372.6719915866852, + "p99": 392.7359879016876 + }, + "isolatedSum": { + "p50": 354.68800365924835, + "p90": 370.04800140857697, + "p95": 381.9199949502945, + "p99": 433.02401900291443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 334.3360126018524, + "p90": 338.1440043449402, + "p95": 340.09599685668945, + "p99": 356.25600814819336 + }, + "combine": { + "p50": 238.27199637889862, + "p90": 242.91199445724487, + "p95": 244.80000138282776, + "p99": 255.13601303100586 + }, + "roundtrip": { + "p50": 577.6000022888184, + "p90": 583.9999914169312, + "p95": 589.7600054740906, + "p99": 686.9440078735352 + }, + "isolatedSum": { + "p50": 572.608008980751, + "p90": 581.0559988021851, + "p95": 584.8959982395172, + "p99": 611.3920211791992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 583.8720202445984, + "p90": 589.6639823913574, + "p95": 591.808021068573, + "p99": 604.9280166625977 + }, + "combine": { + "p50": 439.87199664115906, + "p90": 445.4079866409302, + "p95": 448.63998889923096, + "p99": 533.3759784698486 + }, + "roundtrip": { + "p50": 1008.1280469894409, + "p90": 1018.0799961090088, + "p95": 1024.2559909820557, + "p99": 1039.6480560302734 + }, + "isolatedSum": { + "p50": 1023.7440168857574, + "p90": 1035.0719690322876, + "p95": 1040.448009967804, + "p99": 1138.3039951324463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1080.448031425476, + "p90": 1089.7279977798462, + "p95": 1098.3680486679077, + "p99": 1186.0159635543823 + }, + "combine": { + "p50": 795.7119941711426, + "p90": 802.4640083312988, + "p95": 804.8319816589355, + "p99": 816.9919848442078 + }, + "roundtrip": { + "p50": 1859.935998916626, + "p90": 1869.7279691696167, + "p95": 1874.4640350341797, + "p99": 1925.2480268478394 + }, + "isolatedSum": { + "p50": 1876.1600255966187, + "p90": 1892.192006111145, + "p95": 1903.2000303268433, + "p99": 2003.00794839859 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2078.8800716400146, + "p90": 2090.6240940093994, + "p95": 2100.735902786255, + "p99": 2233.247995376587 + }, + "combine": { + "p50": 1498.7839460372925, + "p90": 1508.0000162124634, + "p95": 1515.0079727172852, + "p99": 1577.50403881073 + }, + "roundtrip": { + "p50": 3562.335968017578, + "p90": 3573.728084564209, + "p95": 3584.2878818511963, + "p99": 3939.9681091308594 + }, + "isolatedSum": { + "p50": 3577.664017677307, + "p90": 3598.624110221863, + "p95": 3615.74387550354, + "p99": 3810.752034187317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7fa3db5f", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_89708fb0", + "comparisonKey": "526e476360de7f5e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:10.448139+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.35200154781342, + "p90": 87.26400136947632, + "p95": 89.9839997291565, + "p99": 164.0319973230362 + }, + "combine": { + "p50": 109.76000130176544, + "p90": 118.72000247240067, + "p95": 123.32800030708313, + "p99": 186.43200397491455 + }, + "roundtrip": { + "p50": 219.4560021162033, + "p90": 223.29600155353546, + "p95": 226.49599611759186, + "p99": 286.72000765800476 + }, + "isolatedSum": { + "p50": 194.11200284957886, + "p90": 205.98400384187698, + "p95": 213.31200003623962, + "p99": 350.46400129795074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 118.49600076675415, + "p90": 121.95199728012085, + "p95": 125.11999905109406, + "p99": 150.30400454998016 + }, + "combine": { + "p50": 157.02399611473083, + "p90": 161.8880033493042, + "p95": 163.80800306797028, + "p99": 182.3360025882721 + }, + "roundtrip": { + "p50": 326.2079954147339, + "p90": 332.09601044654846, + "p95": 343.1040048599243, + "p99": 439.10399079322815 + }, + "isolatedSum": { + "p50": 275.519996881485, + "p90": 283.84000062942505, + "p95": 288.92800211906433, + "p99": 332.64000713825226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 179.58399653434753, + "p90": 189.91999328136444, + "p95": 198.46400618553162, + "p99": 258.04799795150757 + }, + "combine": { + "p50": 268.3520019054413, + "p90": 273.44000339508057, + "p95": 277.15200185775757, + "p99": 327.4880051612854 + }, + "roundtrip": { + "p50": 553.056001663208, + "p90": 559.552013874054, + "p95": 563.9039874076843, + "p99": 606.1760187149048 + }, + "isolatedSum": { + "p50": 447.9359984397888, + "p90": 463.359996676445, + "p95": 475.6160080432892, + "p99": 585.536003112793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 299.77598786354065, + "p90": 306.335985660553, + "p95": 314.2719864845276, + "p99": 377.27999687194824 + }, + "combine": { + "p50": 454.367995262146, + "p90": 460.1280093193054, + "p95": 466.65599942207336, + "p99": 522.0159888267517 + }, + "roundtrip": { + "p50": 979.1039824485779, + "p90": 989.4400238990784, + "p95": 997.5680112838745, + "p99": 1073.4080076217651 + }, + "isolatedSum": { + "p50": 754.1439831256866, + "p90": 766.4639949798584, + "p95": 780.927985906601, + "p99": 899.2959856987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.0799851417542, + "p90": 548.7040281295776, + "p95": 554.0159940719604, + "p99": 596.5759754180908 + }, + "combine": { + "p50": 816.6400194168091, + "p90": 824.4479894638062, + "p95": 828.5120129585266, + "p99": 856.607973575592 + }, + "roundtrip": { + "p50": 1817.952036857605, + "p90": 1832.800030708313, + "p95": 1850.43203830719, + "p99": 1928.3839464187622 + }, + "isolatedSum": { + "p50": 1358.7200045585632, + "p90": 1373.1520175933838, + "p95": 1382.528007030487, + "p99": 1453.1839489936829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1018.7519788742065, + "p90": 1029.4400453567505, + "p95": 1040.7999753952026, + "p99": 1073.472023010254 + }, + "combine": { + "p50": 1528.7359952926636, + "p90": 1542.4000024795532, + "p95": 1556.2880039215088, + "p99": 1726.1439561843872 + }, + "roundtrip": { + "p50": 3476.0639667510986, + "p90": 3499.903917312622, + "p95": 3514.240026473999, + "p99": 3815.200090408325 + }, + "isolatedSum": { + "p50": 2547.48797416687, + "p90": 2571.8400478363037, + "p95": 2597.0879793167114, + "p99": 2799.615979194641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f052acdf", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_39fa33d8", + "comparisonKey": "2de67e659afde6a9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:03.088140+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.96799683570862, + "p90": 169.76000368595123, + "p95": 173.15199971199036, + "p99": 188.1600022315979 + }, + "combine": { + "p50": 108.57599973678589, + "p90": 110.84800213575363, + "p95": 111.90400272607803, + "p99": 124.44800138473511 + }, + "roundtrip": { + "p50": 266.6879892349243, + "p90": 271.5519964694977, + "p95": 274.944007396698, + "p99": 299.1360127925873 + }, + "isolatedSum": { + "p50": 272.5439965724945, + "p90": 280.60800582170486, + "p95": 285.0560024380684, + "p99": 312.608003616333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 220.60799598693848, + "p90": 224.83199834823608, + "p95": 228.15999388694763, + "p99": 242.33600497245789 + }, + "combine": { + "p50": 153.85599434375763, + "p90": 157.82399475574493, + "p95": 159.45599973201752, + "p99": 166.62399470806122 + }, + "roundtrip": { + "p50": 374.84800815582275, + "p90": 380.7680010795593, + "p95": 385.4080140590668, + "p99": 413.6640131473541 + }, + "isolatedSum": { + "p50": 374.4639903306961, + "p90": 382.655993103981, + "p95": 387.61599361896515, + "p99": 408.9599996805191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 362.4959886074066, + "p90": 367.48799681663513, + "p95": 369.85599994659424, + "p99": 392.38399267196655 + }, + "combine": { + "p50": 264.70398902893066, + "p90": 268.5120105743408, + "p95": 270.30399441719055, + "p99": 286.655992269516 + }, + "roundtrip": { + "p50": 615.1999831199646, + "p90": 620.7039952278137, + "p95": 624.1599917411804, + "p99": 646.7840075492859 + }, + "isolatedSum": { + "p50": 627.1999776363373, + "p90": 636.000007390976, + "p95": 640.1599943637848, + "p99": 679.0399849414825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 630.016028881073, + "p90": 635.1680159568787, + "p95": 637.4719738960266, + "p99": 652.1599888801575 + }, + "combine": { + "p50": 452.57601141929626, + "p90": 457.40801095962524, + "p95": 460.25601029396057, + "p99": 469.6640074253082 + }, + "roundtrip": { + "p50": 1068.2560205459595, + "p90": 1077.407956123352, + "p95": 1086.8159532546997, + "p99": 1287.9359722137451 + }, + "isolatedSum": { + "p50": 1082.5920403003693, + "p90": 1092.576026916504, + "p95": 1097.7279841899872, + "p99": 1121.8239963054657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1178.015947341919, + "p90": 1186.1759424209595, + "p95": 1190.3040409088135, + "p99": 1216.3519859313965 + }, + "combine": { + "p50": 817.0560002326965, + "p90": 824.9599933624268, + "p95": 829.5040130615234, + "p99": 858.847975730896 + }, + "roundtrip": { + "p50": 1975.3919839859009, + "p90": 1985.3119850158691, + "p95": 1991.5839433670044, + "p99": 2007.9679489135742 + }, + "isolatedSum": { + "p50": 1995.0719475746155, + "p90": 2011.1359357833862, + "p95": 2019.808053970337, + "p99": 2075.1999616622925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2263.5838985443115, + "p90": 2273.792028427124, + "p95": 2280.224084854126, + "p99": 2347.3598957061768 + }, + "combine": { + "p50": 1528.4160375595093, + "p90": 1537.343978881836, + "p95": 1544.1279411315918, + "p99": 1593.3120250701904 + }, + "roundtrip": { + "p50": 3780.319929122925, + "p90": 3792.2561168670654, + "p95": 3799.9680042266846, + "p99": 3876.2240409851074 + }, + "isolatedSum": { + "p50": 3791.999936103821, + "p90": 3811.13600730896, + "p95": 3824.352025985718, + "p99": 3940.671920776367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-340a5944", + "identity": "b300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_39fa33d8", + "comparisonKey": "7a25a977d7951037", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:01.373011+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.39200735092163, + "p90": 167.58400201797485, + "p95": 170.46399414539337, + "p99": 183.96799266338348 + }, + "combine": { + "p50": 108.67200046777725, + "p90": 110.84800213575363, + "p95": 111.84000223875046, + "p99": 120.7680031657219 + }, + "roundtrip": { + "p50": 266.30398631095886, + "p90": 271.36000990867615, + "p95": 273.79199862480164, + "p99": 294.75200176239014 + }, + "isolatedSum": { + "p50": 272.0640078186989, + "p90": 278.4320041537285, + "p95": 282.30399638414383, + "p99": 304.7359958291054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 221.44000232219696, + "p90": 226.97600722312927, + "p95": 235.80799996852875, + "p99": 310.4639947414398 + }, + "combine": { + "p50": 152.5759994983673, + "p90": 156.19200468063354, + "p95": 157.82399475574493, + "p99": 172.09599912166595 + }, + "roundtrip": { + "p50": 375.4560053348541, + "p90": 380.2880048751831, + "p95": 384.3519985675812, + "p99": 458.624005317688 + }, + "isolatedSum": { + "p50": 374.01600182056427, + "p90": 383.1680119037628, + "p95": 393.6319947242737, + "p99": 482.5599938631058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 358.88001322746277, + "p90": 363.20000886917114, + "p95": 367.96799302101135, + "p99": 441.0879909992218 + }, + "combine": { + "p50": 266.400009393692, + "p90": 271.39198780059814, + "p95": 273.47201108932495, + "p99": 287.200003862381 + }, + "roundtrip": { + "p50": 613.7279868125916, + "p90": 622.1439838409424, + "p95": 631.6800117492676, + "p99": 749.3759989738464 + }, + "isolatedSum": { + "p50": 625.2800226211548, + "p90": 634.5919966697693, + "p95": 641.4400041103363, + "p99": 728.2879948616028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 628.8639903068542, + "p90": 634.6880197525024, + "p95": 643.2960033416748, + "p99": 702.1440267562866 + }, + "combine": { + "p50": 455.9679925441742, + "p90": 462.14398741722107, + "p95": 466.623991727829, + "p99": 476.51201486587524 + }, + "roundtrip": { + "p50": 1069.9520111083984, + "p90": 1078.4640312194824, + "p95": 1084.7359895706177, + "p99": 1136.0960006713867 + }, + "isolatedSum": { + "p50": 1084.8319828510284, + "p90": 1096.8320071697235, + "p95": 1109.9199950695038, + "p99": 1178.6560416221619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1168.7359809875488, + "p90": 1175.4239797592163, + "p95": 1183.519959449768, + "p99": 1205.183982849121 + }, + "combine": { + "p50": 810.8479976654053, + "p90": 818.2719945907593, + "p95": 821.6639757156372, + "p99": 863.9039993286133 + }, + "roundtrip": { + "p50": 1967.5519466400146, + "p90": 1977.6639938354492, + "p95": 1983.1360578536987, + "p99": 2105.3121089935303 + }, + "isolatedSum": { + "p50": 1979.583978652954, + "p90": 1993.6959743499756, + "p95": 2005.1839351654053, + "p99": 2069.0879821777344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2256.927967071533, + "p90": 2267.712116241455, + "p95": 2274.303913116455, + "p99": 2341.248035430908 + }, + "combine": { + "p50": 1512.1920108795166, + "p90": 1521.3760137557983, + "p95": 1525.7279872894287, + "p99": 1593.6000347137451 + }, + "roundtrip": { + "p50": 3757.535934448242, + "p90": 3770.8799839019775, + "p95": 3776.7040729522705, + "p99": 3841.18390083313 + }, + "isolatedSum": { + "p50": 3769.11997795105, + "p90": 3789.0881299972534, + "p95": 3800.031900405884, + "p99": 3934.8480701446533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3fa90eb", + "identity": "b300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_88c2290c", + "comparisonKey": "5077860301e458d7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:04.907207+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_01", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.40800082683563, + "p90": 80.22399991750717, + "p95": 81.4720019698143, + "p99": 101.82400047779083 + }, + "combine": { + "p50": 108.60799998044968, + "p90": 111.68000102043152, + "p95": 115.55200070142746, + "p99": 136.7039978504181 + }, + "roundtrip": { + "p50": 212.51200139522552, + "p90": 215.93600511550903, + "p95": 219.7760045528412, + "p99": 233.7920069694519 + }, + "isolatedSum": { + "p50": 186.0160008072853, + "p90": 191.9040009379387, + "p95": 197.02400267124176, + "p99": 238.52799832820892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.26399785280228, + "p90": 115.03999680280685, + "p95": 117.91999638080597, + "p99": 135.77599823474884 + }, + "combine": { + "p50": 156.8319946527481, + "p90": 161.18399798870087, + "p95": 163.07200491428375, + "p99": 218.6560034751892 + }, + "roundtrip": { + "p50": 320.032000541687, + "p90": 324.73599910736084, + "p95": 329.5679986476898, + "p99": 350.71998834609985 + }, + "isolatedSum": { + "p50": 268.0959925055504, + "p90": 276.2239947915077, + "p95": 280.9920012950897, + "p99": 354.43200170993805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 172.0000058412552, + "p90": 176.9919991493225, + "p95": 178.5919964313507, + "p99": 194.36800479888916 + }, + "combine": { + "p50": 268.15998554229736, + "p90": 272.96000719070435, + "p95": 279.35999631881714, + "p99": 343.55199337005615 + }, + "roundtrip": { + "p50": 545.7919836044312, + "p90": 551.7759919166565, + "p95": 555.3920269012451, + "p99": 606.6240072250366 + }, + "isolatedSum": { + "p50": 440.15999138355255, + "p90": 449.95200634002686, + "p95": 457.95199275016785, + "p99": 537.9199981689453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 289.44000601768494, + "p90": 295.1680123806, + "p95": 299.4239926338196, + "p99": 403.3919870853424 + }, + "combine": { + "p50": 454.5600116252899, + "p90": 459.83999967575073, + "p95": 465.34401178359985, + "p99": 516.7359709739685 + }, + "roundtrip": { + "p50": 968.1919813156128, + "p90": 977.7920246124268, + "p95": 984.7679734230042, + "p99": 1053.6320209503174 + }, + "isolatedSum": { + "p50": 744.0000176429749, + "p90": 755.0080120563507, + "p95": 764.7680044174194, + "p99": 920.1279580593109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.8400101661682, + "p90": 529.8879742622375, + "p95": 533.7920188903809, + "p99": 546.3359951972961 + }, + "combine": { + "p50": 816.6720271110535, + "p90": 824.1919875144958, + "p95": 828.1919956207275, + "p99": 853.1519770622253 + }, + "roundtrip": { + "p50": 1800.0320196151733, + "p90": 1810.4000091552734, + "p95": 1818.4640407562256, + "p99": 1983.1039905548096 + }, + "isolatedSum": { + "p50": 1340.5120372772217, + "p90": 1354.0799617767334, + "p95": 1361.9840145111084, + "p99": 1399.4879722595215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.1279983520508, + "p90": 992.2879934310913, + "p95": 996.8960285186768, + "p99": 1061.311960220337 + }, + "combine": { + "p50": 1527.9680490493774, + "p90": 1540.38405418396, + "p95": 1547.935962677002, + "p99": 1599.8400449752808 + }, + "roundtrip": { + "p50": 3441.6959285736084, + "p90": 3462.3360633850098, + "p95": 3496.5438842773438, + "p99": 4703.743934631348 + }, + "isolatedSum": { + "p50": 2512.096047401428, + "p90": 2532.6720476150513, + "p95": 2544.8319911956787, + "p99": 2661.1520051956177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0258d354", + "identity": "b300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_f08976e2", + "comparisonKey": "77ee2f10324804b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:08.776039+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.91199803352356, + "p90": 85.1840004324913, + "p95": 86.43200248479843, + "p99": 93.79199892282486 + }, + "combine": { + "p50": 93.37600320577621, + "p90": 102.62399911880493, + "p95": 103.35999727249146, + "p99": 105.50399869680405 + }, + "roundtrip": { + "p50": 165.98400473594666, + "p90": 173.66400361061096, + "p95": 175.58400332927704, + "p99": 182.01600015163422 + }, + "isolatedSum": { + "p50": 176.28800123929977, + "p90": 187.80799955129623, + "p95": 189.7919997572899, + "p99": 199.2959976196289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 125.72799623012543, + "p90": 128.1919926404953, + "p95": 129.2800009250641, + "p99": 138.20800185203552 + }, + "combine": { + "p50": 129.66400384902954, + "p90": 132.51200318336487, + "p95": 139.8400068283081, + "p99": 142.30400323867798 + }, + "roundtrip": { + "p50": 236.25600337982178, + "p90": 241.34400486946106, + "p95": 243.0720031261444, + "p99": 260.22401452064514 + }, + "isolatedSum": { + "p50": 255.39200007915497, + "p90": 260.70399582386017, + "p95": 269.1200077533722, + "p99": 280.5120050907135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 173.7920045852661, + "p90": 177.18400061130524, + "p95": 178.27199399471283, + "p99": 188.76799941062927 + }, + "combine": { + "p50": 192.03199446201324, + "p90": 201.6959935426712, + "p95": 201.9840031862259, + "p99": 204.03200387954712 + }, + "roundtrip": { + "p50": 348.9600121974945, + "p90": 354.71999645233154, + "p95": 359.5519959926605, + "p99": 376.6080141067505 + }, + "isolatedSum": { + "p50": 365.82399904727936, + "p90": 378.87999415397644, + "p95": 380.2559971809387, + "p99": 392.8000032901764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 287.7439856529236, + "p90": 293.1840121746063, + "p95": 297.40801453590393, + "p99": 311.0400140285492 + }, + "combine": { + "p50": 386.30399107933044, + "p90": 397.95199036598206, + "p95": 398.6560106277466, + "p99": 402.75201201438904 + }, + "roundtrip": { + "p50": 599.9360084533691, + "p90": 611.9040250778198, + "p95": 615.9359812736511, + "p99": 629.0559768676758 + }, + "isolatedSum": { + "p50": 674.047976732254, + "p90": 691.1360025405884, + "p95": 696.0640251636505, + "p99": 713.7920260429382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.967981338501, + "p90": 533.6959958076477, + "p95": 535.103976726532, + "p99": 546.6880202293396 + }, + "combine": { + "p50": 765.4399871826172, + "p90": 767.3919796943665, + "p95": 768.9279913902283, + "p99": 780.2240252494812 + }, + "roundtrip": { + "p50": 1265.7599449157715, + "p90": 1274.623990058899, + "p95": 1280.511975288391, + "p99": 1453.6319971084595 + }, + "isolatedSum": { + "p50": 1289.4079685211182, + "p90": 1301.0879755020142, + "p95": 1304.0319681167603, + "p99": 1326.9120454788208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 986.847996711731, + "p90": 994.7839975357056, + "p95": 1000.5439519882202, + "p99": 1030.2720069885254 + }, + "combine": { + "p50": 1443.519949913025, + "p90": 1455.4879665374756, + "p95": 1457.4400186538696, + "p99": 1518.720030784607 + }, + "roundtrip": { + "p50": 2404.639959335327, + "p90": 2415.1999950408936, + "p95": 2419.872045516968, + "p99": 2488.640069961548 + }, + "isolatedSum": { + "p50": 2430.367946624756, + "p90": 2450.271964073181, + "p95": 2457.98397064209, + "p99": 2548.9920377731323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0e4fd97a", + "identity": "b300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_f08976e2", + "comparisonKey": "3e3c37b3d7a864c0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:37.790410+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.07200121879578, + "p90": 103.80800068378448, + "p95": 107.87200182676315, + "p99": 121.66400253772736 + }, + "combine": { + "p50": 103.90400141477585, + "p90": 105.53599894046783, + "p95": 107.93600231409073, + "p99": 128.22400033473969 + }, + "roundtrip": { + "p50": 177.44000256061554, + "p90": 184.4159960746765, + "p95": 186.71999871730804, + "p99": 199.20000433921814 + }, + "isolatedSum": { + "p50": 198.97600263357162, + "p90": 209.34399962425232, + "p95": 215.80800414085388, + "p99": 249.88800287246704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 119.48800086975098, + "p90": 125.34399330615997, + "p95": 128.38399410247803, + "p99": 142.30400323867798 + }, + "combine": { + "p50": 140.73599874973297, + "p90": 141.95199310779572, + "p95": 143.93599331378937, + "p99": 156.8640023469925 + }, + "roundtrip": { + "p50": 248.19199740886688, + "p90": 252.0959973335266, + "p95": 253.50400805473328, + "p99": 269.4399952888489 + }, + "isolatedSum": { + "p50": 260.22399961948395, + "p90": 267.2959864139557, + "p95": 272.3199874162674, + "p99": 299.16800558567047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.64000523090363, + "p90": 190.36799669265747, + "p95": 194.11200284957886, + "p99": 203.10400426387787 + }, + "combine": { + "p50": 205.88800311088562, + "p90": 214.65599536895752, + "p95": 215.10399878025055, + "p99": 229.76000607013702 + }, + "roundtrip": { + "p50": 374.11201000213623, + "p90": 380.3519904613495, + "p95": 384.44799184799194, + "p99": 405.63198924064636 + }, + "isolatedSum": { + "p50": 390.52800834178925, + "p90": 405.023992061615, + "p95": 409.2160016298294, + "p99": 432.8640103340149 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.1279911994934, + "p90": 313.05599212646484, + "p95": 315.10400772094727, + "p99": 322.7840065956116 + }, + "combine": { + "p50": 435.7439875602722, + "p90": 446.8800127506256, + "p95": 447.7120041847229, + "p99": 470.91200947761536 + }, + "roundtrip": { + "p50": 709.8559737205505, + "p90": 719.2639708518982, + "p95": 723.2959866523743, + "p99": 735.1999878883362 + }, + "isolatedSum": { + "p50": 739.8719787597656, + "p90": 759.9360048770905, + "p95": 762.8160119056702, + "p99": 793.6960160732269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 548.8319993019104, + "p90": 557.6639771461487, + "p95": 563.9680027961731, + "p99": 628.5439729690552 + }, + "combine": { + "p50": 779.3279886245728, + "p90": 790.6879782676697, + "p95": 792.2880053520203, + "p99": 855.8080196380615 + }, + "roundtrip": { + "p50": 1309.8560571670532, + "p90": 1320.7679986953735, + "p95": 1327.5519609451294, + "p99": 1383.3279609680176 + }, + "isolatedSum": { + "p50": 1328.1599879264832, + "p90": 1348.3519554138184, + "p95": 1356.2560081481934, + "p99": 1484.3519926071167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1036.9919538497925, + "p90": 1043.5199737548828, + "p95": 1046.5279817581177, + "p99": 1074.5279788970947 + }, + "combine": { + "p50": 1479.1040420532227, + "p90": 1482.4960231781006, + "p95": 1491.3920164108276, + "p99": 1495.967984199524 + }, + "roundtrip": { + "p50": 2485.599994659424, + "p90": 2497.3440170288086, + "p95": 2504.35209274292, + "p99": 2569.4079399108887 + }, + "isolatedSum": { + "p50": 2516.095995903015, + "p90": 2526.0159969329834, + "p95": 2537.9199981689453, + "p99": 2570.4959630966187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec67f4fc", + "identity": "b300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_f08976e2", + "comparisonKey": "bd48fa7d956d9fb6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:04.431864+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.60799866914749, + "p90": 103.32799702882767, + "p95": 104.47999835014343, + "p99": 114.81600254774094 + }, + "combine": { + "p50": 106.01600259542465, + "p90": 115.39199948310852, + "p95": 115.80800265073776, + "p99": 118.07999759912491 + }, + "roundtrip": { + "p50": 186.17600202560425, + "p90": 190.72000682353973, + "p95": 192.06400215625763, + "p99": 209.47200059890747 + }, + "isolatedSum": { + "p50": 206.62400126457214, + "p90": 218.7199965119362, + "p95": 220.2880010008812, + "p99": 232.89600014686584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 129.85600531101227, + "p90": 137.1839940547943, + "p95": 139.23199474811554, + "p99": 152.25599706172943 + }, + "combine": { + "p50": 145.08800208568573, + "p90": 153.31199765205383, + "p95": 155.39200603961945, + "p99": 226.49599611759186 + }, + "roundtrip": { + "p50": 261.59998774528503, + "p90": 267.10399985313416, + "p95": 278.3359885215759, + "p99": 301.85601115226746 + }, + "isolatedSum": { + "p50": 274.944007396698, + "p90": 290.49599170684814, + "p95": 294.624000787735, + "p99": 378.7519931793213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 198.43199849128723, + "p90": 201.56799256801605, + "p95": 203.64800095558167, + "p99": 223.68000447750092 + }, + "combine": { + "p50": 241.69600009918213, + "p90": 251.10399723052979, + "p95": 251.55198574066162, + "p99": 263.13599944114685 + }, + "roundtrip": { + "p50": 408.9919924736023, + "p90": 414.88000750541687, + "p95": 416.3520038127899, + "p99": 439.5520091056824 + }, + "isolatedSum": { + "p50": 440.12799859046936, + "p90": 452.67198979854584, + "p95": 455.1999866962433, + "p99": 486.81600391864777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.00000643730164, + "p90": 319.5520043373108, + "p95": 321.3120102882385, + "p99": 330.1439881324768 + }, + "combine": { + "p50": 445.98400592803955, + "p90": 447.80799746513367, + "p95": 449.2799937725067, + "p99": 460.1919949054718 + }, + "roundtrip": { + "p50": 745.3759908676147, + "p90": 751.4240145683289, + "p95": 754.6240091323853, + "p99": 783.6160063743591 + }, + "isolatedSum": { + "p50": 757.9840123653412, + "p90": 767.3600018024445, + "p95": 770.5920040607452, + "p99": 790.3359830379486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 568.2880282402039, + "p90": 572.767972946167, + "p95": 580.6080102920532, + "p99": 684.8959922790527 + }, + "combine": { + "p50": 802.7200102806091, + "p90": 805.791974067688, + "p95": 815.936028957367, + "p99": 963.1680250167847 + }, + "roundtrip": { + "p50": 1347.4880456924438, + "p90": 1357.632040977478, + "p95": 1365.5999898910522, + "p99": 1652.3840427398682 + }, + "isolatedSum": { + "p50": 1371.008038520813, + "p90": 1378.559947013855, + "p95": 1396.5440392494202, + "p99": 1648.0640172958374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1056.5760135650635, + "p90": 1065.7919645309448, + "p95": 1068.127989768982, + "p99": 1119.1680431365967 + }, + "combine": { + "p50": 1504.1600465774536, + "p90": 1515.9679651260376, + "p95": 1518.3039903640747, + "p99": 1676.2880086898804 + }, + "roundtrip": { + "p50": 2542.6559448242188, + "p90": 2553.6959171295166, + "p95": 2557.5358867645264, + "p99": 2595.7119464874268 + }, + "isolatedSum": { + "p50": 2560.736060142517, + "p90": 2581.7599296569824, + "p95": 2586.4319801330566, + "p99": 2795.456051826477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa7fce39", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_27a5238e", + "comparisonKey": "0484fdcbaa6c315c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:13.910275+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.99200159311295, + "p90": 140.47999680042267, + "p95": 143.00799369812012, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 140.57600498199463, + "p90": 164.35199975967407, + "p95": 165.0560051202774, + "p99": 166.84800386428833 + }, + "roundtrip": { + "p50": 199.93600249290466, + "p90": 240.35200476646423, + "p95": 244.60799992084503, + "p99": 252.28801369667053 + }, + "isolatedSum": { + "p50": 241.56800657510757, + "p90": 304.83199656009674, + "p95": 308.0639988183975, + "p99": 320.22400200366974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 138.65600526332855, + "p90": 177.88800597190857, + "p95": 181.2800019979477, + "p99": 194.11200284957886 + }, + "combine": { + "p50": 164.22399878501892, + "p90": 193.50400567054749, + "p95": 202.39999890327454, + "p99": 213.59999477863312 + }, + "roundtrip": { + "p50": 275.9999930858612, + "p90": 310.8479976654053, + "p95": 315.10400772094727, + "p99": 324.7680068016052 + }, + "isolatedSum": { + "p50": 302.8800040483475, + "p90": 371.39201164245605, + "p95": 383.68000090122223, + "p99": 407.711997628212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.2079974412918, + "p90": 239.74399268627167, + "p95": 243.71199309825897, + "p99": 260.5440020561218 + }, + "combine": { + "p50": 275.519996881485, + "p90": 303.42400074005127, + "p95": 312.608003616333, + "p99": 315.64798951148987 + }, + "roundtrip": { + "p50": 462.336003780365, + "p90": 479.0079891681671, + "p95": 484.3519926071167, + "p99": 503.7440061569214 + }, + "isolatedSum": { + "p50": 477.7279943227768, + "p90": 543.1679934263229, + "p95": 556.319996714592, + "p99": 576.1919915676117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.7760148048401, + "p90": 364.80000615119934, + "p95": 370.49600481987, + "p99": 382.1119964122772 + }, + "combine": { + "p50": 460.4800045490265, + "p90": 497.0239996910095, + "p95": 500.86402893066406, + "p99": 509.40799713134766 + }, + "roundtrip": { + "p50": 770.8799839019775, + "p90": 803.7440180778503, + "p95": 810.7200264930725, + "p99": 875.1999735832214 + }, + "isolatedSum": { + "p50": 788.2560193538666, + "p90": 861.8240058422089, + "p95": 871.3600337505341, + "p99": 891.5199935436249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 581.3440084457397, + "p90": 614.6240234375, + "p95": 625.3439784049988, + "p99": 671.7759966850281 + }, + "combine": { + "p50": 828.000009059906, + "p90": 864.3199801445007, + "p95": 868.1920170783997, + "p99": 959.0399861335754 + }, + "roundtrip": { + "p50": 1386.6560459136963, + "p90": 1420.3519821166992, + "p95": 1426.4320135116577, + "p99": 1483.7759733200073 + }, + "isolatedSum": { + "p50": 1409.3440175056458, + "p90": 1478.9440035820007, + "p95": 1493.5359954833984, + "p99": 1630.8159828186035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1078.4640312194824, + "p90": 1113.152027130127, + "p95": 1123.9999532699585, + "p99": 1281.440019607544 + }, + "combine": { + "p50": 1540.8639907836914, + "p90": 1565.9199953079224, + "p95": 1579.0079832077026, + "p99": 1688.7680292129517 + }, + "roundtrip": { + "p50": 2591.4559364318848, + "p90": 2614.367961883545, + "p95": 2621.72794342041, + "p99": 2891.711950302124 + }, + "isolatedSum": { + "p50": 2619.328022003174, + "p90": 2679.0720224380493, + "p95": 2703.007936477661, + "p99": 2970.2080488204956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fced565", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_f08976e2", + "comparisonKey": "1c988d2a613e11c6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:36.239594+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.29600292444229, + "p90": 99.35999661684036, + "p95": 103.2319962978363, + "p99": 114.94400352239609 + }, + "combine": { + "p50": 116.22399836778641, + "p90": 117.63200163841248, + "p95": 119.77600306272507, + "p99": 139.42399621009827 + }, + "roundtrip": { + "p50": 197.9839950799942, + "p90": 202.14399695396423, + "p95": 204.25599813461304, + "p99": 225.3440022468567 + }, + "isolatedSum": { + "p50": 211.5200012922287, + "p90": 216.99199825525284, + "p95": 223.00799936056137, + "p99": 254.36799973249435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.66400516033173, + "p90": 140.19200205802917, + "p95": 142.11200177669525, + "p99": 157.0879966020584 + }, + "combine": { + "p50": 156.8319946527481, + "p90": 165.40800034999847, + "p95": 166.84800386428833, + "p99": 178.0800074338913 + }, + "roundtrip": { + "p50": 275.13599395751953, + "p90": 284.41599011421204, + "p95": 294.75200176239014, + "p99": 329.72800731658936 + }, + "isolatedSum": { + "p50": 294.49599981307983, + "p90": 305.60000240802765, + "p95": 308.9600056409836, + "p99": 335.1680040359497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.62400674819946, + "p90": 201.82399451732635, + "p95": 204.352006316185, + "p99": 224.83199834823608 + }, + "combine": { + "p50": 274.30400252342224, + "p90": 276.41600370407104, + "p95": 277.44001150131226, + "p99": 339.4879996776581 + }, + "roundtrip": { + "p50": 441.6640102863312, + "p90": 448.89599084854126, + "p95": 451.9360065460205, + "p99": 528.1919836997986 + }, + "isolatedSum": { + "p50": 468.9280092716217, + "p90": 478.2399982213974, + "p95": 481.79201781749725, + "p99": 564.3199980258942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.7520070075989, + "p90": 330.01598715782166, + "p95": 331.39199018478394, + "p99": 351.6159951686859 + }, + "combine": { + "p50": 459.9039852619171, + "p90": 470.94398736953735, + "p95": 472.57599234580994, + "p99": 524.2879986763 + }, + "roundtrip": { + "p50": 765.4719948768616, + "p90": 773.311972618103, + "p95": 775.0719785690308, + "p99": 805.4400086402893 + }, + "isolatedSum": { + "p50": 786.655992269516, + "p90": 800.959974527359, + "p95": 803.9679825305939, + "p99": 875.903993844986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.1520137786865, + "p90": 585.9519839286804, + "p95": 607.4240207672119, + "p99": 805.6319952011108 + }, + "combine": { + "p50": 819.6160197257996, + "p90": 830.3999900817871, + "p95": 840.3199911117554, + "p99": 1299.8080253601074 + }, + "roundtrip": { + "p50": 1378.8800239562988, + "p90": 1388.5760307312012, + "p95": 1396.1280584335327, + "p99": 1514.4319534301758 + }, + "isolatedSum": { + "p50": 1396.768033504486, + "p90": 1416.3519740104675, + "p95": 1447.7440118789673, + "p99": 2105.4400205612183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1071.679949760437, + "p90": 1079.200029373169, + "p95": 1083.8719606399536, + "p99": 1171.1679697036743 + }, + "combine": { + "p50": 1530.6880474090576, + "p90": 1541.7920351028442, + "p95": 1544.1279411315918, + "p99": 1603.9999723434448 + }, + "roundtrip": { + "p50": 2586.0800743103027, + "p90": 2597.503900527954, + "p95": 2603.16801071167, + "p99": 2641.2479877471924 + }, + "isolatedSum": { + "p50": 2602.3679971694946, + "p90": 2620.992064476013, + "p95": 2627.9999017715454, + "p99": 2775.167942047119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-59429596", + "identity": "b300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_f08976e2", + "comparisonKey": "8d64896a45f8a0d5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:46.467173+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.09600156545639, + "p90": 99.39199686050415, + "p95": 101.82400047779083, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 116.19199812412262, + "p90": 117.24799871444702, + "p95": 118.27199906110764, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 198.7839937210083, + "p90": 202.11200416088104, + "p95": 203.5199999809265, + "p99": 230.04800081253052 + }, + "isolatedSum": { + "p50": 212.287999689579, + "p90": 216.63999557495117, + "p95": 220.09599953889847, + "p99": 229.12000119686127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.15200126171112, + "p90": 141.37600362300873, + "p95": 144.31999623775482, + "p99": 198.59200716018677 + }, + "combine": { + "p50": 155.008003115654, + "p90": 164.92800414562225, + "p95": 165.3439998626709, + "p99": 178.56000363826752 + }, + "roundtrip": { + "p50": 274.2080092430115, + "p90": 279.2640030384064, + "p95": 280.89600801467896, + "p99": 323.10399413108826 + }, + "isolatedSum": { + "p50": 292.1600043773651, + "p90": 306.304007768631, + "p95": 309.6639961004257, + "p99": 377.1520107984543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.62400674819946, + "p90": 202.2079974412918, + "p95": 205.31199872493744, + "p99": 254.8159956932068 + }, + "combine": { + "p50": 275.2319872379303, + "p90": 276.92800760269165, + "p95": 279.04000878334045, + "p99": 311.93599104881287 + }, + "roundtrip": { + "p50": 441.6640102863312, + "p90": 450.52799582481384, + "p95": 459.0719938278198, + "p99": 603.9360165596008 + }, + "isolatedSum": { + "p50": 469.85599398612976, + "p90": 479.13600504398346, + "p95": 484.3520075082779, + "p99": 566.7519867420197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 328.12801003456116, + "p90": 332.19200372695923, + "p95": 334.01599526405334, + "p99": 356.25600814819336 + }, + "combine": { + "p50": 460.4159891605377, + "p90": 470.8479940891266, + "p95": 472.06398844718933, + "p99": 496.19200825691223 + }, + "roundtrip": { + "p50": 764.6399736404419, + "p90": 772.8319764137268, + "p95": 776.1600017547607, + "p99": 823.1679797172546 + }, + "isolatedSum": { + "p50": 788.5439991950989, + "p90": 803.0399978160858, + "p95": 806.0799837112427, + "p99": 852.4480164051056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 568.7999725341797, + "p90": 573.632001876831, + "p95": 595.6159830093384, + "p99": 659.0080261230469 + }, + "combine": { + "p50": 815.9679770469666, + "p90": 827.9359936714172, + "p95": 840.3840065002441, + "p99": 892.0959830284119 + }, + "roundtrip": { + "p50": 1364.3519878387451, + "p90": 1375.9360313415527, + "p95": 1386.9760036468506, + "p99": 1438.4000301361084 + }, + "isolatedSum": { + "p50": 1384.7679495811462, + "p90": 1401.5679955482483, + "p95": 1435.9999895095825, + "p99": 1551.1040091514587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1064.9280548095703, + "p90": 1070.6239938735962, + "p95": 1081.2159776687622, + "p99": 1170.0160503387451 + }, + "combine": { + "p50": 1517.2159671783447, + "p90": 1529.312014579773, + "p95": 1532.2240591049194, + "p99": 1545.2799797058105 + }, + "roundtrip": { + "p50": 2566.0479068756104, + "p90": 2579.7760486602783, + "p95": 2588.927984237671, + "p99": 2604.1600704193115 + }, + "isolatedSum": { + "p50": 2582.144021987915, + "p90": 2599.936008453369, + "p95": 2613.4400367736816, + "p99": 2715.2960300445557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ce9bf8b8", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_26bc4356", + "comparisonKey": "dec053d253f54369", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:17.243272+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.64800012111664, + "p90": 109.8880022764206, + "p95": 112.03200370073318, + "p99": 122.5920021533966 + }, + "combine": { + "p50": 131.80799782276154, + "p90": 140.73599874973297, + "p95": 141.40799641609192, + "p99": 154.27200496196747 + }, + "roundtrip": { + "p50": 229.312002658844, + "p90": 236.1920028924942, + "p95": 237.8239929676056, + "p99": 250.40000677108765 + }, + "isolatedSum": { + "p50": 239.45599794387817, + "p90": 250.62400102615356, + "p95": 253.4400001168251, + "p99": 276.8640071153641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 153.50399911403656, + "p90": 160.76800227165222, + "p95": 162.62400150299072, + "p99": 182.6239973306656 + }, + "combine": { + "p50": 202.84800231456757, + "p90": 206.33600652217865, + "p95": 213.53599429130554, + "p99": 238.71999979019165 + }, + "roundtrip": { + "p50": 334.49599146842957, + "p90": 339.6480083465576, + "p95": 341.5359854698181, + "p99": 359.48801040649414 + }, + "isolatedSum": { + "p50": 356.3520014286041, + "p90": 367.1040087938309, + "p95": 376.15999579429626, + "p99": 421.34399712085724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.28800296783447, + "p90": 237.7600073814392, + "p95": 241.82400107383728, + "p99": 263.5839879512787 + }, + "combine": { + "p50": 347.4879860877991, + "p90": 349.88799691200256, + "p95": 351.74399614334106, + "p99": 386.1440122127533 + }, + "roundtrip": { + "p50": 544.1280007362366, + "p90": 552.8960227966309, + "p95": 558.0800175666809, + "p99": 575.6480097770691 + }, + "isolatedSum": { + "p50": 579.7759890556335, + "p90": 587.6480042934418, + "p95": 593.5679972171783, + "p99": 649.728000164032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 382.52800703048706, + "p90": 393.8240110874176, + "p95": 401.2160003185272, + "p99": 440.44798612594604 + }, + "combine": { + "p50": 596.0320234298706, + "p90": 606.4640283584595, + "p95": 609.5359921455383, + "p99": 630.7839751243591 + }, + "roundtrip": { + "p50": 962.4320268630981, + "p90": 980.9920191764832, + "p95": 989.2160296440125, + "p99": 1046.015977859497 + }, + "isolatedSum": { + "p50": 978.5600304603577, + "p90": 1000.2880394458771, + "p95": 1010.7519924640656, + "p99": 1071.2319612503052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 711.1999988555908, + "p90": 729.6000123023987, + "p95": 748.3199834823608, + "p99": 1096.4800119400024 + }, + "combine": { + "p50": 1114.848017692566, + "p90": 1127.1040439605713, + "p95": 1136.8319988250732, + "p99": 1283.29598903656 + }, + "roundtrip": { + "p50": 1811.2319707870483, + "p90": 1844.2879915237427, + "p95": 1851.199984550476, + "p99": 1930.6559562683105 + }, + "isolatedSum": { + "p50": 1826.0480165481567, + "p90": 1856.70405626297, + "p95": 1885.151982307434, + "p99": 2379.7760009765625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1344.9280261993408, + "p90": 1363.8720512390137, + "p95": 1428.4160137176514, + "p99": 1450.943946838379 + }, + "combine": { + "p50": 2144.0000534057617, + "p90": 2157.8879356384277, + "p95": 2168.735980987549, + "p99": 2316.2879943847656 + }, + "roundtrip": { + "p50": 3476.0639667510986, + "p90": 3531.5520763397217, + "p95": 3569.279909133911, + "p99": 3907.9999923706055 + }, + "isolatedSum": { + "p50": 3488.9280796051025, + "p90": 3521.7599868774414, + "p95": 3597.1519947052, + "p99": 3767.2319412231445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5de5480e", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_36af80c9", + "comparisonKey": "04265e6daa28f0bd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:19.243822+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.98399645090103, + "p90": 72.31999933719635, + "p95": 74.36800003051758, + "p99": 106.78400099277496 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 70.75200229883194, + "p95": 78.91199737787247, + "p99": 127.58399546146393 + }, + "roundtrip": { + "p50": 121.85599654912949, + "p90": 126.94400548934937, + "p95": 129.7920048236847, + "p99": 181.7599982023239 + }, + "isolatedSum": { + "p50": 138.71999830007553, + "p90": 143.0720016360283, + "p95": 153.27999740839005, + "p99": 234.3679964542389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 94.27200257778168, + "p90": 98.2080027461052, + "p95": 103.90400141477585, + "p99": 167.10400581359863 + }, + "combine": { + "p50": 116.5120005607605, + "p90": 118.68800222873688, + "p95": 140.35199582576752, + "p99": 182.17599391937256 + }, + "roundtrip": { + "p50": 195.5520063638687, + "p90": 199.3280053138733, + "p95": 202.36800611019135, + "p99": 223.83999824523926 + }, + "isolatedSum": { + "p50": 210.78400313854218, + "p90": 216.89600497484207, + "p95": 244.25599724054337, + "p99": 349.2799997329712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 191.48799777030945, + "p90": 197.2160041332245, + "p95": 199.23199713230133, + "p99": 243.48799884319305 + }, + "combine": { + "p50": 251.77600979804993, + "p90": 254.5279860496521, + "p95": 262.84798979759216, + "p99": 312.4479949474335 + }, + "roundtrip": { + "p50": 441.76000356674194, + "p90": 446.399986743927, + "p95": 448.5760033130646, + "p99": 468.9280092716217 + }, + "isolatedSum": { + "p50": 443.2640075683594, + "p90": 451.7439901828766, + "p95": 462.0799869298935, + "p99": 555.9359937906265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a60b343", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_81013c18", + "comparisonKey": "adec1d5f7cfe23e9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:15.971643+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.17599987983704, + "p90": 88.51200342178345, + "p95": 89.9839997291565, + "p99": 101.6319990158081 + }, + "combine": { + "p50": 90.55999666452408, + "p90": 91.87199920415878, + "p95": 92.32000261545181, + "p99": 94.68799829483032 + }, + "roundtrip": { + "p50": 147.23199605941772, + "p90": 154.6880006790161, + "p95": 158.75199437141418, + "p99": 198.55999946594238 + }, + "isolatedSum": { + "p50": 172.73599654436111, + "p90": 180.38400262594223, + "p95": 182.3040023446083, + "p99": 196.31999731063843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.03200107812881, + "p90": 98.24000298976898, + "p95": 99.93600100278854, + "p99": 112.0000034570694 + }, + "combine": { + "p50": 105.92000186443329, + "p90": 114.88000303506851, + "p95": 115.4559999704361, + "p99": 118.14399808645248 + }, + "roundtrip": { + "p50": 185.7600063085556, + "p90": 198.17599654197693, + "p95": 205.72799444198608, + "p99": 226.623997092247 + }, + "isolatedSum": { + "p50": 201.9520029425621, + "p90": 213.1200060248375, + "p95": 215.39200097322464, + "p99": 230.14400154352188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 135.51999628543854, + "p90": 141.4719969034195, + "p95": 146.01600170135498, + "p99": 156.0640037059784 + }, + "combine": { + "p50": 144.3839967250824, + "p90": 153.24799716472626, + "p95": 154.14400398731232, + "p99": 165.02399742603302 + }, + "roundtrip": { + "p50": 259.99999046325684, + "p90": 265.53601026535034, + "p95": 269.1839933395386, + "p99": 342.52798557281494 + }, + "isolatedSum": { + "p50": 279.90399301052094, + "p90": 294.71999406814575, + "p95": 300.1600056886673, + "p99": 321.0880011320114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 198.43199849128723, + "p90": 206.14400506019592, + "p95": 209.3760073184967, + "p99": 224.09600019454956 + }, + "combine": { + "p50": 263.16800713539124, + "p90": 264.3199861049652, + "p95": 264.8639976978302, + "p99": 362.43200302124023 + }, + "roundtrip": { + "p50": 438.27199935913086, + "p90": 447.2320079803467, + "p95": 454.97599244117737, + "p99": 471.6480076313019 + }, + "isolatedSum": { + "p50": 461.60000562667847, + "p90": 470.46399116516113, + "p95": 474.2400050163269, + "p99": 586.5280032157898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 347.4879860877991, + "p90": 356.83199763298035, + "p95": 362.39999532699585, + "p99": 371.4880049228668 + }, + "combine": { + "p50": 460.640013217926, + "p90": 463.99998664855957, + "p95": 472.25600481033325, + "p99": 521.2159752845764 + }, + "roundtrip": { + "p50": 792.03200340271, + "p90": 801.4079928398132, + "p95": 808.0959916114807, + "p99": 912.0320081710815 + }, + "isolatedSum": { + "p50": 808.1279993057251, + "p90": 820.8319842815399, + "p95": 834.6560001373291, + "p99": 892.7039802074432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 649.9199867248535, + "p90": 659.9360108375549, + "p95": 663.1680130958557, + "p99": 735.264003276825 + }, + "combine": { + "p50": 829.7600150108337, + "p90": 840.831995010376, + "p95": 843.999981880188, + "p99": 914.0480160713196 + }, + "roundtrip": { + "p50": 1459.2959880828857, + "p90": 1470.7520008087158, + "p95": 1479.904055595398, + "p99": 1561.568021774292 + }, + "isolatedSum": { + "p50": 1479.6800017356873, + "p90": 1500.768005847931, + "p95": 1507.1679949760437, + "p99": 1649.3120193481445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e6f285c", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_9804a17c", + "comparisonKey": "33a0abb2d76c9808", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:57.002644+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.99200224876404, + "p90": 107.13600367307663, + "p95": 109.79200154542923, + "p99": 121.34400010108948 + }, + "combine": { + "p50": 128.76799702644348, + "p90": 130.46400249004364, + "p95": 132.35199451446533, + "p99": 158.36800634860992 + }, + "roundtrip": { + "p50": 218.9439982175827, + "p90": 225.79200565814972, + "p95": 227.35999524593353, + "p99": 242.5599992275238 + }, + "isolatedSum": { + "p50": 233.75999927520752, + "p90": 237.60000616312027, + "p95": 242.14399605989456, + "p99": 279.7120064496994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.10400187969208, + "p90": 145.50399780273438, + "p95": 146.84799313545227, + "p99": 155.07200360298157 + }, + "combine": { + "p50": 189.56799805164337, + "p90": 191.16799533367157, + "p95": 192.06400215625763, + "p99": 202.14399695396423 + }, + "roundtrip": { + "p50": 307.8399896621704, + "p90": 315.2959942817688, + "p95": 317.27999448776245, + "p99": 335.29600501060486 + }, + "isolatedSum": { + "p50": 332.67199993133545, + "p90": 336.67199313640594, + "p95": 338.9119952917099, + "p99": 357.2160005569458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.95199811458588, + "p90": 223.55200350284576, + "p95": 226.68799757957458, + "p99": 242.65600740909576 + }, + "combine": { + "p50": 336.9919955730438, + "p90": 338.3359909057617, + "p95": 339.58399295806885, + "p99": 361.82400584220886 + }, + "roundtrip": { + "p50": 531.5520167350769, + "p90": 537.9840135574341, + "p95": 540.3519868850708, + "p99": 555.7119846343994 + }, + "isolatedSum": { + "p50": 554.9439936876297, + "p90": 561.8879944086075, + "p95": 566.2719905376434, + "p99": 604.4800132513046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 368.8960075378418, + "p90": 376.73598527908325, + "p95": 378.4320056438446, + "p99": 392.12799072265625 + }, + "combine": { + "p50": 582.0800065994263, + "p90": 583.7119817733765, + "p95": 584.1599702835083, + "p99": 618.5280084609985 + }, + "roundtrip": { + "p50": 940.7359957695007, + "p90": 945.248007774353, + "p95": 947.3599791526794, + "p99": 968.0960178375244 + }, + "isolatedSum": { + "p50": 950.9760141372681, + "p90": 960.4479670524597, + "p95": 962.5919759273529, + "p99": 1010.6559991836548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 686.784029006958, + "p90": 695.2319741249084, + "p95": 697.1200108528137, + "p99": 780.9280157089233 + }, + "combine": { + "p50": 1087.2639417648315, + "p90": 1090.5920267105103, + "p95": 1098.9439487457275, + "p99": 1126.2400150299072 + }, + "roundtrip": { + "p50": 1756.5439939498901, + "p90": 1765.7920122146606, + "p95": 1774.4319438934326, + "p99": 2025.439977645874 + }, + "isolatedSum": { + "p50": 1774.0479707717896, + "p90": 1785.8240008354187, + "p95": 1796.0639595985413, + "p99": 1907.1680307388306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1324.8319625854492, + "p90": 1335.3279829025269, + "p95": 1338.271975517273, + "p99": 1360.9600067138672 + }, + "combine": { + "p50": 2082.1120738983154, + "p90": 2084.9599838256836, + "p95": 2094.6879386901855, + "p99": 2204.2880058288574 + }, + "roundtrip": { + "p50": 3391.455888748169, + "p90": 3402.9440879821777, + "p95": 3416.703939437866, + "p99": 3469.8879718780518 + }, + "isolatedSum": { + "p50": 3406.9440364837646, + "p90": 3420.2879667282104, + "p95": 3432.9599142074585, + "p99": 3565.2480125427246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8815dd3", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_54cb99d2", + "comparisonKey": "82ca703eba4f81d6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:39.917096+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.8639999628067, + "p90": 103.13600301742554, + "p95": 104.8320010304451, + "p99": 122.04799801111221 + }, + "combine": { + "p50": 116.31999909877777, + "p90": 118.20799857378006, + "p95": 119.9679970741272, + "p99": 139.42399621009827 + }, + "roundtrip": { + "p50": 197.91999459266663, + "p90": 203.74399423599243, + "p95": 208.76799523830414, + "p99": 246.07999622821808 + }, + "isolatedSum": { + "p50": 213.18399906158447, + "p90": 221.3440015912056, + "p95": 224.7999981045723, + "p99": 261.4719942212105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.34399461746216, + "p90": 139.615997672081, + "p95": 140.6400054693222, + "p99": 150.27199685573578 + }, + "combine": { + "p50": 156.15999698638916, + "p90": 165.12000560760498, + "p95": 165.6319946050644, + "p99": 180.54400384426117 + }, + "roundtrip": { + "p50": 274.6880054473877, + "p90": 280.5120050907135, + "p95": 282.55999088287354, + "p99": 303.2959997653961 + }, + "isolatedSum": { + "p50": 289.5039916038513, + "p90": 304.736003279686, + "p95": 306.2720000743866, + "p99": 330.81600069999695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.10400295257568, + "p90": 206.40000700950623, + "p95": 216.0319983959198, + "p99": 256.0960054397583 + }, + "combine": { + "p50": 267.1999931335449, + "p90": 275.84001421928406, + "p95": 276.6079902648926, + "p99": 371.4880049228668 + }, + "roundtrip": { + "p50": 440.6079947948456, + "p90": 451.200008392334, + "p95": 458.0160081386566, + "p99": 526.7519950866699 + }, + "isolatedSum": { + "p50": 462.3039960861206, + "p90": 482.2400212287903, + "p95": 492.6399886608124, + "p99": 627.5840103626251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.90398597717285, + "p90": 331.4560055732727, + "p95": 334.46401357650757, + "p99": 442.8800046443939 + }, + "combine": { + "p50": 464.28799629211426, + "p90": 472.79998660087585, + "p95": 474.9760031700134, + "p99": 545.9520220756531 + }, + "roundtrip": { + "p50": 773.0879783630371, + "p90": 780.2879810333252, + "p95": 785.2799892425537, + "p99": 859.8399758338928 + }, + "isolatedSum": { + "p50": 792.1919822692871, + "p90": 804.2559921741486, + "p95": 809.440016746521, + "p99": 988.832026720047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.6319937705994, + "p90": 579.200029373169, + "p95": 586.5920186042786, + "p99": 634.0159773826599 + }, + "combine": { + "p50": 816.32000207901, + "p90": 828.2880187034607, + "p95": 832.319974899292, + "p99": 1332.3520421981812 + }, + "roundtrip": { + "p50": 1369.088053703308, + "p90": 1380.4479837417603, + "p95": 1387.8400325775146, + "p99": 1520.19202709198 + }, + "isolatedSum": { + "p50": 1385.9519958496094, + "p90": 1407.4880480766296, + "p95": 1418.9119935035706, + "p99": 1966.368019580841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1065.1520490646362, + "p90": 1071.5839862823486, + "p95": 1079.4559717178345, + "p99": 1224.5759963989258 + }, + "combine": { + "p50": 1521.9520330429077, + "p90": 1532.960057258606, + "p95": 1542.1119928359985, + "p99": 1639.840006828308 + }, + "roundtrip": { + "p50": 2569.6001052856445, + "p90": 2584.4480991363525, + "p95": 2594.815969467163, + "p99": 2643.1679725646973 + }, + "isolatedSum": { + "p50": 2587.104082107544, + "p90": 2604.5440435409546, + "p95": 2621.567964553833, + "p99": 2864.416003227234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1f8e56c", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_e3448cc0", + "comparisonKey": "95d22967ea7a1911", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:08.639404+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.58399766683578, + "p90": 100.70399940013885, + "p95": 101.95200145244598, + "p99": 109.18399691581726 + }, + "combine": { + "p50": 116.19199812412262, + "p90": 117.5680011510849, + "p95": 118.46400052309036, + "p99": 127.42400169372559 + }, + "roundtrip": { + "p50": 197.1520036458969, + "p90": 204.70400154590607, + "p95": 210.24000644683838, + "p99": 225.3119945526123 + }, + "isolatedSum": { + "p50": 211.7759957909584, + "p90": 218.27200055122375, + "p95": 220.41600197553635, + "p99": 236.60799860954285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.63200426101685, + "p90": 140.25600254535675, + "p95": 141.37600362300873, + "p99": 150.751993060112 + }, + "combine": { + "p50": 155.16799688339233, + "p90": 164.73600268363953, + "p95": 165.43999314308167, + "p99": 177.5359958410263 + }, + "roundtrip": { + "p50": 273.8879919052124, + "p90": 280.0000011920929, + "p95": 283.26401114463806, + "p99": 295.52000761032104 + }, + "isolatedSum": { + "p50": 288.8000011444092, + "p90": 304.9920052289963, + "p95": 306.8159967660904, + "p99": 328.2879889011383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.0719952583313, + "p90": 202.72000133991241, + "p95": 206.36799931526184, + "p99": 220.15999257564545 + }, + "combine": { + "p50": 263.8719975948334, + "p90": 266.6560113430023, + "p95": 269.0559923648834, + "p99": 277.44001150131226 + }, + "roundtrip": { + "p50": 445.1200067996979, + "p90": 450.78399777412415, + "p95": 452.5440037250519, + "p99": 467.3280119895935 + }, + "isolatedSum": { + "p50": 458.9439928531647, + "p90": 469.37601268291473, + "p95": 475.42399168014526, + "p99": 497.6000040769577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.7199993133545, + "p90": 331.5519988536835, + "p95": 340.4799997806549, + "p99": 382.56001472473145 + }, + "combine": { + "p50": 461.40798926353455, + "p90": 471.52000665664673, + "p95": 472.1919894218445, + "p99": 483.7760031223297 + }, + "roundtrip": { + "p50": 766.431987285614, + "p90": 774.1760015487671, + "p95": 776.6720056533813, + "p99": 789.3120050430298 + }, + "isolatedSum": { + "p50": 788.127988576889, + "p90": 803.0720055103302, + "p95": 812.6719892024994, + "p99": 866.3360178470612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 567.520022392273, + "p90": 573.0239748954773, + "p95": 576.6720175743103, + "p99": 638.5279893875122 + }, + "combine": { + "p50": 815.7439827919006, + "p90": 819.8400139808655, + "p95": 828.0959725379944, + "p99": 840.6080007553101 + }, + "roundtrip": { + "p50": 1363.1680011749268, + "p90": 1373.792052268982, + "p95": 1380.128026008606, + "p99": 1463.1999731063843 + }, + "isolatedSum": { + "p50": 1383.2640051841736, + "p90": 1392.8639888763428, + "p95": 1404.7679901123047, + "p99": 1479.1359901428223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1064.7679567337036, + "p90": 1072.383999824524, + "p95": 1079.6799659729004, + "p99": 1124.2560148239136 + }, + "combine": { + "p50": 1517.4720287322998, + "p90": 1528.9280414581299, + "p95": 1532.3200225830078, + "p99": 1593.3120250701904 + }, + "roundtrip": { + "p50": 2571.3601112365723, + "p90": 2581.2160968780518, + "p95": 2588.927984237671, + "p99": 2701.6639709472656 + }, + "isolatedSum": { + "p50": 2582.2399854660034, + "p90": 2601.312041282654, + "p95": 2611.999988555908, + "p99": 2717.568039894104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16870303", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_fa8ff6dd", + "comparisonKey": "d01e84fa9710eb81", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:31.595196+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.27199643850327, + "p90": 105.31199723482132, + "p95": 107.00800269842148, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 126.68800354003906, + "p90": 128.57599556446075, + "p95": 128.76799702644348, + "p99": 131.99999928474426 + }, + "roundtrip": { + "p50": 210.87999641895294, + "p90": 216.09599888324738, + "p95": 217.47200191020966, + "p99": 228.83200645446777 + }, + "isolatedSum": { + "p50": 228.95999997854233, + "p90": 233.88799279928207, + "p95": 235.77599972486496, + "p99": 248.99199604988098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.5040032863617, + "p90": 139.80799913406372, + "p95": 141.4400041103363, + "p99": 153.4080058336258 + }, + "combine": { + "p50": 177.63200402259827, + "p90": 179.71199750900269, + "p95": 181.37599527835846, + "p99": 201.6959935426712 + }, + "roundtrip": { + "p50": 296.79998755455017, + "p90": 304.25599217414856, + "p95": 310.4639947414398, + "p99": 344.38401460647583 + }, + "isolatedSum": { + "p50": 311.13600730895996, + "p90": 319.5199966430664, + "p95": 322.81599938869476, + "p99": 355.103999376297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.8960030078888, + "p90": 212.99199759960175, + "p95": 214.52799439430237, + "p99": 233.18399488925934 + }, + "combine": { + "p50": 325.28001070022583, + "p90": 328.2560110092163, + "p95": 336.38399839401245, + "p99": 338.3359909057617 + }, + "roundtrip": { + "p50": 508.8000297546387, + "p90": 515.2000188827515, + "p95": 518.6240077018738, + "p99": 533.7920188903809 + }, + "isolatedSum": { + "p50": 530.1760137081146, + "p90": 541.248008608818, + "p95": 550.9119927883148, + "p99": 571.5199857950211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 344.92799639701843, + "p90": 351.80801153182983, + "p95": 353.5360097885132, + "p99": 392.8639888763428 + }, + "combine": { + "p50": 584.7039818763733, + "p90": 594.9119925498962, + "p95": 595.4560041427612, + "p99": 620.2560067176819 + }, + "roundtrip": { + "p50": 909.600019454956, + "p90": 916.4800047874451, + "p95": 920.1599955558777, + "p99": 970.3360199928284 + }, + "isolatedSum": { + "p50": 929.6319782733917, + "p90": 946.7200040817261, + "p95": 948.9920139312744, + "p99": 1013.1199955940247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 645.8240151405334, + "p90": 656.000018119812, + "p95": 661.2480282783508, + "p99": 696.287989616394 + }, + "combine": { + "p50": 1066.1439895629883, + "p90": 1075.32799243927, + "p95": 1087.0720148086548, + "p99": 1185.7600212097168 + }, + "roundtrip": { + "p50": 1699.552059173584, + "p90": 1708.8639736175537, + "p95": 1716.8320417404175, + "p99": 1894.4319486618042 + }, + "isolatedSum": { + "p50": 1711.9680047035217, + "p90": 1731.328010559082, + "p95": 1748.3200430870056, + "p99": 1882.0480108261108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1261.504054069519, + "p90": 1271.1679935455322, + "p95": 1274.6880054473877, + "p99": 1287.1999740600586 + }, + "combine": { + "p50": 2045.6960201263428, + "p90": 2055.488109588623, + "p95": 2057.6319694519043, + "p99": 2156.1601161956787 + }, + "roundtrip": { + "p50": 3295.8080768585205, + "p90": 3307.584047317505, + "p95": 3314.9120807647705, + "p99": 3393.02396774292 + }, + "isolatedSum": { + "p50": 3307.200074195862, + "p90": 3326.6561031341553, + "p95": 3332.319974899292, + "p99": 3443.3600902557373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c8fe197d", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_63dcd15f", + "comparisonKey": "17af197dccef8f68", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:44.004681+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.63199770450592, + "p90": 95.45599669218063, + "p95": 96.6079980134964, + "p99": 104.60799932479858 + }, + "combine": { + "p50": 116.83200299739838, + "p90": 119.77600306272507, + "p95": 127.3919939994812, + "p99": 128.67200374603271 + }, + "roundtrip": { + "p50": 195.68000733852386, + "p90": 199.8720020055771, + "p95": 203.23200523853302, + "p99": 213.6639952659607 + }, + "isolatedSum": { + "p50": 210.4640007019043, + "p90": 215.2319997549057, + "p95": 223.9999920129776, + "p99": 233.2800030708313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 128.35200130939484, + "p90": 131.74399733543396, + "p95": 133.59999656677246, + "p99": 143.00799369812012 + }, + "combine": { + "p50": 167.87199676036835, + "p90": 177.279993891716, + "p95": 177.69600450992584, + "p99": 188.83199989795685 + }, + "roundtrip": { + "p50": 283.4239900112152, + "p90": 289.18400406837463, + "p95": 290.68800806999207, + "p99": 302.7839958667755 + }, + "isolatedSum": { + "p50": 296.2239980697632, + "p90": 309.02399122714996, + "p95": 311.2960010766983, + "p99": 331.83999359607697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.3040030002594, + "p90": 189.85599279403687, + "p95": 191.13600254058838, + "p99": 202.81599462032318 + }, + "combine": { + "p50": 305.5360019207001, + "p90": 312.9279911518097, + "p95": 314.2400085926056, + "p99": 337.5680148601532 + }, + "roundtrip": { + "p50": 480.6399941444397, + "p90": 486.55998706817627, + "p95": 487.93599009513855, + "p99": 497.44001030921936 + }, + "isolatedSum": { + "p50": 491.8400049209595, + "p90": 502.78398394584656, + "p95": 505.37601113319397, + "p99": 540.3840094804764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 311.2959861755371, + "p90": 320.41600346565247, + "p95": 329.27998900413513, + "p99": 335.87199449539185 + }, + "combine": { + "p50": 583.4239721298218, + "p90": 586.9439840316772, + "p95": 594.9119925498962, + "p99": 611.3920211791992 + }, + "roundtrip": { + "p50": 888.3200287818909, + "p90": 893.6319947242737, + "p95": 898.3359932899475, + "p99": 971.3600277900696 + }, + "isolatedSum": { + "p50": 894.7199583053589, + "p90": 907.3599874973297, + "p95": 924.1919815540314, + "p99": 947.2640156745911 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 574.400007724762, + "p90": 586.4319801330566, + "p95": 602.1119952201843, + "p99": 632.9600214958191 + }, + "combine": { + "p50": 1101.3760566711426, + "p90": 1111.680030822754, + "p95": 1125.3759860992432, + "p99": 1676.2559413909912 + }, + "roundtrip": { + "p50": 1616.8639659881592, + "p90": 1628.5120248794556, + "p95": 1640.9920454025269, + "p99": 2025.1519680023193 + }, + "isolatedSum": { + "p50": 1675.7760643959045, + "p90": 1698.1120109558105, + "p95": 1727.4879813194275, + "p99": 2309.2159628868103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1123.6159801483154, + "p90": 1136.8319988250732, + "p95": 1143.455982208252, + "p99": 1192.031979560852 + }, + "combine": { + "p50": 2070.2719688415527, + "p90": 2082.0159912109375, + "p95": 2106.1758995056152, + "p99": 2265.0880813598633 + }, + "roundtrip": { + "p50": 3137.727975845337, + "p90": 3150.2718925476074, + "p95": 3158.6239337921143, + "p99": 3240.031957626343 + }, + "isolatedSum": { + "p50": 3193.887948989868, + "p90": 3218.8479900360107, + "p95": 3249.631881713867, + "p99": 3457.1200609207153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8aaee1d8", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_aaea337f", + "comparisonKey": "9248ac03e59d06c6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:31:27.770864+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.19200229644775, + "p90": 98.84800016880035, + "p95": 102.75200009346008, + "p99": 113.24799805879593 + }, + "combine": { + "p50": 116.80000275373459, + "p90": 117.72800236940384, + "p95": 118.6240017414093, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 198.30399751663208, + "p90": 203.71200144290924, + "p95": 204.96000349521637, + "p99": 226.4000028371811 + }, + "isolatedSum": { + "p50": 212.99200505018234, + "p90": 216.5760025382042, + "p95": 221.37600183486938, + "p99": 234.75199937820435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 138.87999951839447, + "p90": 141.4400041103363, + "p95": 142.43200421333313, + "p99": 149.50400590896606 + }, + "combine": { + "p50": 156.47999942302704, + "p90": 165.3759926557541, + "p95": 166.36799275875092, + "p99": 177.5680035352707 + }, + "roundtrip": { + "p50": 274.9119997024536, + "p90": 280.92798590660095, + "p95": 283.7119996547699, + "p99": 300.83200335502625 + }, + "isolatedSum": { + "p50": 295.3599989414215, + "p90": 306.8159967660904, + "p95": 308.79999697208405, + "p99": 327.07200944423676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.59199905395508, + "p90": 200.6399929523468, + "p95": 202.27199792861938, + "p99": 213.21600675582886 + }, + "combine": { + "p50": 275.39199590682983, + "p90": 277.69601345062256, + "p95": 279.90400791168213, + "p99": 301.7280101776123 + }, + "roundtrip": { + "p50": 445.76001167297363, + "p90": 451.7439901828766, + "p95": 456.0000002384186, + "p99": 469.85599398612976 + }, + "isolatedSum": { + "p50": 469.9839949607849, + "p90": 478.33600640296936, + "p95": 482.1760058403015, + "p99": 514.9440169334412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 327.87200808525085, + "p90": 330.6879997253418, + "p95": 331.58400654792786, + "p99": 345.0239896774292 + }, + "combine": { + "p50": 471.42401337623596, + "p90": 473.1839895248413, + "p95": 475.23200511932373, + "p99": 484.99199748039246 + }, + "roundtrip": { + "p50": 779.4560194015503, + "p90": 787.7439856529236, + "p95": 790.9119725227356, + "p99": 810.1760149002075 + }, + "isolatedSum": { + "p50": 799.2960214614868, + "p90": 803.8719892501831, + "p95": 806.8160116672516, + "p99": 830.0159871578217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 582.5920104980469, + "p90": 587.2319936752319, + "p95": 594.5280194282532, + "p99": 657.8879952430725 + }, + "combine": { + "p50": 840.1600122451782, + "p90": 842.9120182991028, + "p95": 852.2239923477173, + "p99": 916.9279932975769 + }, + "roundtrip": { + "p50": 1399.2960453033447, + "p90": 1408.7680578231812, + "p95": 1415.168046951294, + "p99": 1601.4080047607422 + }, + "isolatedSum": { + "p50": 1422.752022743225, + "p90": 1430.1440119743347, + "p95": 1446.7520117759705, + "p99": 1574.8159885406494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1089.5999670028687, + "p90": 1095.0080156326294, + "p95": 1102.1440029144287, + "p99": 1124.832034111023 + }, + "combine": { + "p50": 1578.0800580978394, + "p90": 1589.5999670028687, + "p95": 1590.5280113220215, + "p99": 1627.4240016937256 + }, + "roundtrip": { + "p50": 2648.8959789276123, + "p90": 2659.8401069641113, + "p95": 2665.7919883728027, + "p99": 2722.2719192504883 + }, + "isolatedSum": { + "p50": 2667.680025100708, + "p90": 2684.607982635498, + "p95": 2692.67201423645, + "p99": 2752.2560358047485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7d72e377", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_2784b9e2", + "comparisonKey": "1647488540344927", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:12.003590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.99200224876404, + "p90": 106.88000172376633, + "p95": 109.0560033917427, + "p99": 114.656001329422 + }, + "combine": { + "p50": 128.28800082206726, + "p90": 129.60000336170197, + "p95": 130.0799995660782, + "p99": 133.91999900341034 + }, + "roundtrip": { + "p50": 209.34399962425232, + "p90": 214.91199731826782, + "p95": 217.21599996089935, + "p99": 237.88799345493317 + }, + "isolatedSum": { + "p50": 233.2800030708313, + "p90": 236.4800050854683, + "p95": 239.1360029578209, + "p99": 248.57600033283234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.43200421333313, + "p90": 147.61599898338318, + "p95": 150.2079963684082, + "p99": 162.36799955368042 + }, + "combine": { + "p50": 178.0479997396469, + "p90": 181.11999332904816, + "p95": 188.63999843597412, + "p99": 192.60799884796143 + }, + "roundtrip": { + "p50": 309.05601382255554, + "p90": 314.8159980773926, + "p95": 317.21600890159607, + "p99": 359.96800661087036 + }, + "isolatedSum": { + "p50": 320.48000395298004, + "p90": 328.73599231243134, + "p95": 338.8479948043823, + "p99": 354.97599840164185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 216.06400609016418, + "p90": 224.0000069141388, + "p95": 225.66400468349457, + "p99": 233.8559925556183 + }, + "combine": { + "p50": 336.2239897251129, + "p90": 337.8239870071411, + "p95": 339.61600065231323, + "p99": 423.8080084323883 + }, + "roundtrip": { + "p50": 523.0399966239929, + "p90": 530.1759839057922, + "p95": 533.5680246353149, + "p99": 547.1680164337158 + }, + "isolatedSum": { + "p50": 552.2879958152771, + "p90": 561.8239939212799, + "p95": 565.2800053358078, + "p99": 657.6640009880066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 357.2480082511902, + "p90": 365.63199758529663, + "p95": 367.39200353622437, + "p99": 375.328004360199 + }, + "combine": { + "p50": 571.7120170593262, + "p90": 582.1120142936707, + "p95": 583.0720067024231, + "p99": 594.6559906005859 + }, + "roundtrip": { + "p50": 923.583984375, + "p90": 932.3520064353943, + "p95": 935.0079894065857, + "p99": 1081.2480449676514 + }, + "isolatedSum": { + "p50": 928.9600253105164, + "p90": 947.7440118789673, + "p95": 950.4640102386475, + "p99": 969.9839949607849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 662.4320149421692, + "p90": 669.7919964790344, + "p95": 671.6160178184509, + "p99": 697.4719762802124 + }, + "combine": { + "p50": 1050.75204372406, + "p90": 1062.1440410614014, + "p95": 1077.6959657669067, + "p99": 1295.199990272522 + }, + "roundtrip": { + "p50": 1693.5360431671143, + "p90": 1703.07195186615, + "p95": 1707.200050354004, + "p99": 2120.4159259796143 + }, + "isolatedSum": { + "p50": 1713.1840586662292, + "p90": 1731.9360375404358, + "p95": 1749.3119835853577, + "p99": 1992.6719665527344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1285.3120565414429, + "p90": 1296.3839769363403, + "p95": 1301.0879755020142, + "p99": 1343.2639837265015 + }, + "combine": { + "p50": 2020.0960636138916, + "p90": 2024.224042892456, + "p95": 2032.5119495391846, + "p99": 2100.224018096924 + }, + "roundtrip": { + "p50": 3293.7920093536377, + "p90": 3306.976079940796, + "p95": 3317.6639080047607, + "p99": 3360.383987426758 + }, + "isolatedSum": { + "p50": 3305.4081201553345, + "p90": 3320.6080198287964, + "p95": 3333.5999250411987, + "p99": 3443.4880018234253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d76c9a3d", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_cd6abfac", + "comparisonKey": "aba5826c9c5535ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:51.318537+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.93600034713745, + "p90": 100.99200159311295, + "p95": 102.4319976568222, + "p99": 120.35199999809265 + }, + "combine": { + "p50": 116.57600104808807, + "p90": 118.9119964838028, + "p95": 120.12799829244614, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 197.28000462055206, + "p90": 201.24800503253937, + "p95": 202.62399315834045, + "p99": 215.2319997549057 + }, + "isolatedSum": { + "p50": 212.51200139522552, + "p90": 219.90399807691574, + "p95": 222.55999594926834, + "p99": 249.95200335979462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.15199315547943, + "p90": 139.8400068283081, + "p95": 141.02399349212646, + "p99": 148.8640010356903 + }, + "combine": { + "p50": 155.45600652694702, + "p90": 164.8000031709671, + "p95": 165.3439998626709, + "p99": 183.3920031785965 + }, + "roundtrip": { + "p50": 275.519996881485, + "p90": 281.8880081176758, + "p95": 283.80799293518066, + "p99": 292.959988117218 + }, + "isolatedSum": { + "p50": 288.60799968242645, + "p90": 304.6400099992752, + "p95": 306.36799335479736, + "p99": 332.2560042142868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.33599710464478, + "p90": 198.68800044059753, + "p95": 200.22399723529816, + "p99": 207.2959989309311 + }, + "combine": { + "p50": 265.6320035457611, + "p90": 275.2000093460083, + "p95": 275.58401226997375, + "p99": 290.367990732193 + }, + "roundtrip": { + "p50": 439.7119879722595, + "p90": 446.5920031070709, + "p95": 450.81600546836853, + "p99": 475.0080108642578 + }, + "isolatedSum": { + "p50": 459.9680006504059, + "p90": 473.88800978660583, + "p95": 475.8080095052719, + "p99": 497.6639896631241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 326.911985874176, + "p90": 329.79199290275574, + "p95": 331.2320113182068, + "p99": 352.9280126094818 + }, + "combine": { + "p50": 460.4479968547821, + "p90": 471.6480076313019, + "p95": 472.3840057849884, + "p99": 486.88000440597534 + }, + "roundtrip": { + "p50": 769.1199779510498, + "p90": 777.5999903678894, + "p95": 783.9999794960022, + "p99": 842.3359990119934 + }, + "isolatedSum": { + "p50": 787.3599827289581, + "p90": 801.4400005340576, + "p95": 803.6160171031952, + "p99": 839.8080170154572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 573.4080076217651, + "p90": 582.3040008544922, + "p95": 585.1200222969055, + "p99": 675.5840182304382 + }, + "combine": { + "p50": 817.3120021820068, + "p90": 828.6719918251038, + "p95": 830.2720189094543, + "p99": 852.7359962463379 + }, + "roundtrip": { + "p50": 1373.4079599380493, + "p90": 1384.7999572753906, + "p95": 1395.1679468154907, + "p99": 1553.663969039917 + }, + "isolatedSum": { + "p50": 1390.720009803772, + "p90": 1410.975992679596, + "p95": 1415.3920412063599, + "p99": 1528.3200144767761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1073.2159614562988, + "p90": 1079.7760486602783, + "p95": 1086.143970489502, + "p99": 1117.7599430084229 + }, + "combine": { + "p50": 1532.256007194519, + "p90": 1544.4480180740356, + "p95": 1557.4719905853271, + "p99": 1628.0640363693237 + }, + "roundtrip": { + "p50": 2588.0959033966064, + "p90": 2605.2799224853516, + "p95": 2617.3439025878906, + "p99": 2653.599977493286 + }, + "isolatedSum": { + "p50": 2605.471968650818, + "p90": 2624.224066734314, + "p95": 2643.615961074829, + "p99": 2745.8239793777466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb853e19", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_d1101c29", + "comparisonKey": "2d8f969566261aa1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:36.080336+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.71199995279312, + "p90": 107.32799768447876, + "p95": 110.46399921178818, + "p99": 139.42399621009827 + }, + "combine": { + "p50": 127.58399546146393, + "p90": 128.89599800109863, + "p95": 129.60000336170197, + "p99": 142.36800372600555 + }, + "roundtrip": { + "p50": 210.62399446964264, + "p90": 216.22399985790253, + "p95": 217.72800385951996, + "p99": 236.76800727844238 + }, + "isolatedSum": { + "p50": 231.29599541425705, + "p90": 236.2239956855774, + "p95": 240.06400257349014, + "p99": 281.7919999361038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.39999520778656, + "p90": 140.4159963130951, + "p95": 142.91200041770935, + "p99": 164.06400501728058 + }, + "combine": { + "p50": 177.69600450992584, + "p90": 179.83999848365784, + "p95": 180.95999956130981, + "p99": 189.82400000095367 + }, + "roundtrip": { + "p50": 295.9359884262085, + "p90": 301.4400005340576, + "p95": 303.5840094089508, + "p99": 316.895991563797 + }, + "isolatedSum": { + "p50": 312.0959997177124, + "p90": 320.25599479675293, + "p95": 323.87199997901917, + "p99": 353.88800501823425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.96000349521637, + "p90": 212.38400042057037, + "p95": 214.4639939069748, + "p99": 256.00001215934753 + }, + "combine": { + "p50": 325.3760039806366, + "p90": 336.16000413894653, + "p95": 337.0560109615326, + "p99": 374.59200620651245 + }, + "roundtrip": { + "p50": 509.11998748779297, + "p90": 517.1840190887451, + "p95": 521.5680003166199, + "p99": 561.7920160293579 + }, + "isolatedSum": { + "p50": 530.336007475853, + "p90": 548.5440045595169, + "p95": 551.5200048685074, + "p99": 630.59201836586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 345.0559973716736, + "p90": 352.7680039405823, + "p95": 354.43198680877686, + "p99": 390.0800049304962 + }, + "combine": { + "p50": 584.8640203475952, + "p90": 594.8799848556519, + "p95": 595.6799983978271, + "p99": 620.2239990234375 + }, + "roundtrip": { + "p50": 909.9519848823547, + "p90": 918.175995349884, + "p95": 923.8399863243103, + "p99": 987.392008304596 + }, + "isolatedSum": { + "p50": 929.9200177192688, + "p90": 947.6479887962341, + "p95": 950.111985206604, + "p99": 1010.3040039539337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 645.6000208854675, + "p90": 655.9039950370789, + "p95": 658.1119894981384, + "p99": 733.3440184593201 + }, + "combine": { + "p50": 1065.7600164413452, + "p90": 1074.720025062561, + "p95": 1077.023983001709, + "p99": 1161.2160205841064 + }, + "roundtrip": { + "p50": 1699.0079879760742, + "p90": 1706.7519426345825, + "p95": 1713.2799625396729, + "p99": 1818.4319734573364 + }, + "isolatedSum": { + "p50": 1711.3600373268127, + "p90": 1730.62402009964, + "p95": 1735.1359724998474, + "p99": 1894.5600390434265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1261.407971382141, + "p90": 1271.7119455337524, + "p95": 1275.8400440216064, + "p99": 1292.5759553909302 + }, + "combine": { + "p50": 2045.6960201263428, + "p90": 2057.1200847625732, + "p95": 2070.2080726623535, + "p99": 2242.9120540618896 + }, + "roundtrip": { + "p50": 3295.6480979919434, + "p90": 3310.7199668884277, + "p95": 3324.7361183166504, + "p99": 3618.016004562378 + }, + "isolatedSum": { + "p50": 3307.103991508484, + "p90": 3328.8320302963257, + "p95": 3346.04811668396, + "p99": 3535.48800945282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-88b36201", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_22b1cbe5", + "comparisonKey": "5b2dcd7ec1667734", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:16.452126+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.16000205278397, + "p90": 139.615997672081, + "p95": 143.00799369812012, + "p99": 148.5760062932968 + }, + "combine": { + "p50": 118.17599833011627, + "p90": 157.151997089386, + "p95": 164.48000073432922, + "p99": 165.6000018119812 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 237.69600689411163, + "p95": 242.2720044851303, + "p99": 248.9279955625534 + }, + "isolatedSum": { + "p50": 214.33600038290024, + "p90": 296.767994761467, + "p95": 307.48799443244934, + "p99": 314.176008105278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.3040019273758, + "p90": 151.5520066022873, + "p95": 161.56800091266632, + "p99": 182.75199830532074 + }, + "combine": { + "p50": 155.74400126934052, + "p90": 164.63999450206757, + "p95": 165.0560051202774, + "p99": 186.43200397491455 + }, + "roundtrip": { + "p50": 275.61599016189575, + "p90": 299.9039888381958, + "p95": 310.91201305389404, + "p99": 320.70401310920715 + }, + "isolatedSum": { + "p50": 290.0480031967163, + "p90": 316.19200110435486, + "p95": 326.6240060329437, + "p99": 369.1840022802353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.87999379634857, + "p90": 234.6239984035492, + "p95": 237.56800591945648, + "p99": 247.3279982805252 + }, + "combine": { + "p50": 274.9119997024536, + "p90": 312.5759959220886, + "p95": 315.45600295066833, + "p99": 336.2239897251129 + }, + "roundtrip": { + "p50": 439.9360120296478, + "p90": 475.96800327301025, + "p95": 480.9280037879944, + "p99": 492.92799830436707 + }, + "isolatedSum": { + "p50": 469.7919934988022, + "p90": 547.1999943256378, + "p95": 553.0240088701248, + "p99": 583.5519880056381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 331.2320113182068, + "p90": 363.77599835395813, + "p95": 369.05598640441895, + "p99": 377.1519958972931 + }, + "combine": { + "p50": 461.4720046520233, + "p90": 496.41600251197815, + "p95": 497.98399209976196, + "p99": 520.7039713859558 + }, + "roundtrip": { + "p50": 766.4639949798584, + "p90": 796.8000173568726, + "p95": 799.6479868888855, + "p99": 815.6160116195679 + }, + "isolatedSum": { + "p50": 792.7040159702301, + "p90": 860.1920008659363, + "p95": 867.0399785041809, + "p99": 897.8559672832489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 582.912027835846, + "p90": 606.7519783973694, + "p95": 613.6000156402588, + "p99": 627.6479959487915 + }, + "combine": { + "p50": 839.7759795188904, + "p90": 853.5040020942688, + "p95": 864.9920225143433, + "p99": 877.3120045661926 + }, + "roundtrip": { + "p50": 1396.4799642562866, + "p90": 1417.7279472351074, + "p95": 1422.4319458007812, + "p99": 1447.3919868469238 + }, + "isolatedSum": { + "p50": 1422.6880073547363, + "p90": 1460.2559804916382, + "p95": 1478.592038154602, + "p99": 1504.9600005149841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1065.951943397522, + "p90": 1102.0480394363403, + "p95": 1109.4720363616943, + "p99": 1122.9759454727173 + }, + "combine": { + "p50": 1541.4719581604004, + "p90": 1565.0240182876587, + "p95": 1576.3200521469116, + "p99": 1615.1360273361206 + }, + "roundtrip": { + "p50": 2592.672109603882, + "p90": 2620.800018310547, + "p95": 2630.4640769958496, + "p99": 2938.4639263153076 + }, + "isolatedSum": { + "p50": 2607.4239015579224, + "p90": 2667.072057723999, + "p95": 2685.792088508606, + "p99": 2738.111972808838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1748e2d", + "identity": "b300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_65aea461", + "comparisonKey": "05f40bf57fbaa5b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:27.229272+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.00000083446503, + "p90": 100.76799988746643, + "p95": 102.33599692583084, + "p99": 107.39199817180634 + }, + "combine": { + "p50": 116.70400202274323, + "p90": 117.98399686813354, + "p95": 119.48800086975098, + "p99": 132.4480026960373 + }, + "roundtrip": { + "p50": 199.42399859428406, + "p90": 202.94399559497833, + "p95": 204.28800582885742, + "p99": 209.6319943666458 + }, + "isolatedSum": { + "p50": 212.70400285720825, + "p90": 218.75199675559998, + "p95": 221.82399779558182, + "p99": 239.84000086784363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.0719928741455, + "p90": 140.32000303268433, + "p95": 141.184002161026, + "p99": 152.5759994983673 + }, + "combine": { + "p50": 156.00000321865082, + "p90": 164.76799547672272, + "p95": 165.3759926557541, + "p99": 176.41599476337433 + }, + "roundtrip": { + "p50": 275.07200837135315, + "p90": 280.3199887275696, + "p95": 282.20799565315247, + "p99": 297.85600304603577 + }, + "isolatedSum": { + "p50": 291.0719960927963, + "p90": 305.08799850940704, + "p95": 306.5599948167801, + "p99": 328.99199426174164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.33599710464478, + "p90": 200.03199577331543, + "p95": 202.2079974412918, + "p99": 212.76800334453583 + }, + "combine": { + "p50": 267.4880027770996, + "p90": 276.16000175476074, + "p95": 276.70401334762573, + "p99": 279.61599826812744 + }, + "roundtrip": { + "p50": 436.15999817848206, + "p90": 443.36000084877014, + "p95": 448.2879936695099, + "p99": 460.1599872112274 + }, + "isolatedSum": { + "p50": 461.8239998817444, + "p90": 476.1919975280762, + "p95": 478.91201078891754, + "p99": 492.38400161266327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 322.6880133152008, + "p90": 330.3680121898651, + "p95": 331.743985414505, + "p99": 347.00798988342285 + }, + "combine": { + "p50": 459.9359929561615, + "p90": 463.3280038833618, + "p95": 471.48799896240234, + "p99": 484.3840003013611 + }, + "roundtrip": { + "p50": 761.6959810256958, + "p90": 767.7440047264099, + "p95": 772.0320224761963, + "p99": 791.0720109939575 + }, + "isolatedSum": { + "p50": 782.6240062713623, + "p90": 793.6960160732269, + "p95": 803.2319843769073, + "p99": 831.3919901847839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 574.8159885406494, + "p90": 583.3920240402222, + "p95": 584.8320126533508, + "p99": 602.8800010681152 + }, + "combine": { + "p50": 832.7040076255798, + "p90": 841.69602394104, + "p95": 842.1440124511719, + "p99": 879.7439932823181 + }, + "roundtrip": { + "p50": 1393.1200504302979, + "p90": 1399.392008781433, + "p95": 1404.4159650802612, + "p99": 1462.9759788513184 + }, + "isolatedSum": { + "p50": 1407.5199961662292, + "p90": 1425.0880479812622, + "p95": 1426.9760251045227, + "p99": 1482.6239943504333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1065.0240182876587, + "p90": 1072.1280574798584, + "p95": 1077.855944633484, + "p99": 1118.2719469070435 + }, + "combine": { + "p50": 1541.0239696502686, + "p90": 1544.6079969406128, + "p95": 1552.8960227966309, + "p99": 1564.4160509109497 + }, + "roundtrip": { + "p50": 2590.7199382781982, + "p90": 2600.2559661865234, + "p95": 2605.4399013519287, + "p99": 2641.0560607910156 + }, + "isolatedSum": { + "p50": 2606.0479879379272, + "p90": 2616.736054420471, + "p95": 2630.7519674301147, + "p99": 2682.687997817993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28219fa1", + "identity": "b300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_6de9f46e", + "comparisonKey": "e670b46ea928a655", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:25.128303+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.19200098514557, + "p90": 93.34400296211243, + "p95": 95.93600034713745, + "p99": 131.74399733543396 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 118.07999759912491, + "p95": 120.92799693346024, + "p99": 190.23999571800232 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 194.68800723552704, + "p95": 197.34400510787964, + "p99": 252.79998779296875 + }, + "isolatedSum": { + "p50": 204.44799959659576, + "p90": 211.42400056123734, + "p95": 216.8639972805977, + "p99": 321.9839930534363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 130.2720010280609, + "p90": 133.63200426101685, + "p95": 135.29600203037262, + "p99": 147.039994597435 + }, + "combine": { + "p50": 163.80800306797028, + "p90": 166.46400094032288, + "p95": 169.76000368595123, + "p99": 189.88800048828125 + }, + "roundtrip": { + "p50": 268.22400093078613, + "p90": 276.67200565338135, + "p95": 284.9920094013214, + "p99": 307.42400884628296 + }, + "isolatedSum": { + "p50": 294.0800040960312, + "p90": 300.0960052013397, + "p95": 305.05600571632385, + "p99": 336.92799508571625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 189.18399512767792, + "p90": 201.6959935426712, + "p95": 215.42400121688843, + "p99": 282.5919985771179 + }, + "combine": { + "p50": 267.93599128723145, + "p90": 276.5119969844818, + "p95": 278.75199913978577, + "p99": 336.9919955730438 + }, + "roundtrip": { + "p50": 435.13599038124084, + "p90": 441.9200122356415, + "p95": 445.50400972366333, + "p99": 472.54401445388794 + }, + "isolatedSum": { + "p50": 457.11998641490936, + "p90": 478.207990527153, + "p95": 494.1760003566742, + "p99": 619.5839941501617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 317.6960051059723, + "p90": 324.73599910736084, + "p95": 331.13598823547363, + "p99": 402.8480052947998 + }, + "combine": { + "p50": 460.06399393081665, + "p90": 470.71999311447144, + "p95": 472.00000286102295, + "p99": 484.44798588752747 + }, + "roundtrip": { + "p50": 754.8480033874512, + "p90": 763.9999985694885, + "p95": 772.3519802093506, + "p99": 840.3840065002441 + }, + "isolatedSum": { + "p50": 777.7599990367889, + "p90": 795.4559922218323, + "p95": 803.1359910964966, + "p99": 887.2959911823273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 561.3120198249817, + "p90": 571.3919997215271, + "p95": 587.1359705924988, + "p99": 699.3600130081177 + }, + "combine": { + "p50": 818.4639811515808, + "p90": 829.5680284500122, + "p95": 840.8640027046204, + "p99": 913.9519929885864 + }, + "roundtrip": { + "p50": 1361.0880374908447, + "p90": 1370.2399730682373, + "p95": 1379.5839548110962, + "p99": 1455.2960395812988 + }, + "isolatedSum": { + "p50": 1379.7760009765625, + "p90": 1400.9600281715393, + "p95": 1427.9999732971191, + "p99": 1613.312005996704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1039.9359464645386, + "p90": 1046.9119548797607, + "p95": 1057.088017463684, + "p99": 1261.3120079040527 + }, + "combine": { + "p50": 1531.1360359191895, + "p90": 1542.5599813461304, + "p95": 1553.2159805297852, + "p99": 1678.2079935073853 + }, + "roundtrip": { + "p50": 2555.039882659912, + "p90": 2571.5839862823486, + "p95": 2609.2159748077393, + "p99": 2777.951955795288 + }, + "isolatedSum": { + "p50": 2571.071982383728, + "p90": 2589.471936225891, + "p95": 2610.3039979934692, + "p99": 2939.520001411438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e8e8137", + "identity": "b300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "f9ee2edbc3202be2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:58.680017+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.20000326633453, + "p90": 151.64799988269806, + "p95": 153.6960005760193, + "p99": 169.95200514793396 + }, + "combine": { + "p50": 91.67999774217606, + "p90": 94.43199634552002, + "p95": 95.13600170612335, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 231.07199370861053, + "p90": 235.48799753189087, + "p95": 237.92000114917755, + "p99": 254.59200143814087 + }, + "isolatedSum": { + "p50": 238.8800010085106, + "p90": 246.07999622821808, + "p95": 248.83200228214264, + "p99": 271.1680084466934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 189.98399376869202, + "p90": 194.4960057735443, + "p95": 199.39200580120087, + "p99": 255.16799092292786 + }, + "combine": { + "p50": 127.23200023174286, + "p90": 130.3039938211441, + "p95": 131.84000551700592, + "p99": 148.22399616241455 + }, + "roundtrip": { + "p50": 308.1279993057251, + "p90": 313.728004693985, + "p95": 317.8560137748718, + "p99": 350.3679931163788 + }, + "isolatedSum": { + "p50": 317.2159940004349, + "p90": 324.7999995946884, + "p95": 331.2320113182068, + "p99": 403.3919870853424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 270.143985748291, + "p90": 275.64799785614014, + "p95": 277.9200077056885, + "p99": 294.3359911441803 + }, + "combine": { + "p50": 191.00800156593323, + "p90": 195.10400295257568, + "p95": 196.9279944896698, + "p99": 215.03999829292297 + }, + "roundtrip": { + "p50": 448.3200013637543, + "p90": 454.367995262146, + "p95": 457.72799849510193, + "p99": 556.8959712982178 + }, + "isolatedSum": { + "p50": 461.15198731422424, + "p90": 470.7520008087158, + "p95": 474.8480021953583, + "p99": 509.37598943710327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 459.03998613357544, + "p90": 465.05600214004517, + "p95": 467.77600049972534, + "p99": 526.8160104751587 + }, + "combine": { + "p50": 384.89601016044617, + "p90": 393.0880129337311, + "p95": 396.38400077819824, + "p99": 407.8400135040283 + }, + "roundtrip": { + "p50": 842.0799970626831, + "p90": 849.7599959373474, + "p95": 852.512001991272, + "p99": 983.9360117912292 + }, + "isolatedSum": { + "p50": 843.9359962940216, + "p90": 858.1440150737762, + "p95": 864.1600012779236, + "p99": 934.656023979187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 837.8239870071411, + "p90": 848.4159708023071, + "p95": 852.2560000419617, + "p99": 872.54399061203 + }, + "combine": { + "p50": 758.7839961051941, + "p90": 764.5440101623535, + "p95": 766.6559815406799, + "p99": 770.687997341156 + }, + "roundtrip": { + "p50": 1568.0320262908936, + "p90": 1581.279993057251, + "p95": 1588.3519649505615, + "p99": 1716.1279916763306 + }, + "isolatedSum": { + "p50": 1596.6079831123352, + "p90": 1612.9599809646606, + "p95": 1618.9119815826416, + "p99": 1643.231987953186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1599.4240045547485, + "p90": 1612.1280193328857, + "p95": 1617.1519756317139, + "p99": 1857.9519987106323 + }, + "combine": { + "p50": 1440.7999515533447, + "p90": 1450.2400159835815, + "p95": 1454.591989517212, + "p99": 1547.4239587783813 + }, + "roundtrip": { + "p50": 3014.911890029907, + "p90": 3028.287887573242, + "p95": 3033.247947692871, + "p99": 3141.0560607910156 + }, + "isolatedSum": { + "p50": 3040.2239561080933, + "p90": 3062.3680353164673, + "p95": 3071.743965148926, + "p99": 3405.3759574890137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87aa4117", + "identity": "b300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "6b00c198c68a134d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:27.809832+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.59200739860535, + "p90": 171.10399901866913, + "p95": 178.97599935531616, + "p99": 250.30401349067688 + }, + "combine": { + "p50": 98.4639972448349, + "p90": 100.99200159311295, + "p95": 102.78400033712387, + "p99": 113.82400244474411 + }, + "roundtrip": { + "p50": 244.76799368858337, + "p90": 249.95200335979462, + "p95": 252.9279887676239, + "p99": 287.6800000667572 + }, + "isolatedSum": { + "p50": 253.05600464344025, + "p90": 272.0960006117821, + "p95": 281.75999969244003, + "p99": 364.128015935421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 201.56799256801605, + "p90": 206.59199357032776, + "p95": 209.85600352287292, + "p99": 229.72799837589264 + }, + "combine": { + "p50": 135.55200397968292, + "p90": 141.82400703430176, + "p95": 145.6640064716339, + "p99": 150.07999539375305 + }, + "roundtrip": { + "p50": 327.58399844169617, + "p90": 336.12799644470215, + "p95": 341.3119912147522, + "p99": 373.27998876571655 + }, + "isolatedSum": { + "p50": 337.119996547699, + "p90": 348.4160006046295, + "p95": 355.52000999450684, + "p99": 379.8079937696457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 306.65600299835205, + "p90": 311.2640082836151, + "p95": 313.56799602508545, + "p99": 338.01600337028503 + }, + "combine": { + "p50": 206.84799551963806, + "p90": 213.18399906158447, + "p95": 216.67200326919556, + "p99": 224.89599883556366 + }, + "roundtrip": { + "p50": 524.4479775428772, + "p90": 533.951997756958, + "p95": 545.7280278205872, + "p99": 597.1199870109558 + }, + "isolatedSum": { + "p50": 513.5039985179901, + "p90": 524.4480073451996, + "p95": 530.239999294281, + "p99": 562.9120022058487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 527.1999835968018, + "p90": 532.9599976539612, + "p95": 535.968005657196, + "p99": 564.8639798164368 + }, + "combine": { + "p50": 434.56000089645386, + "p90": 440.16000628471375, + "p95": 442.4319863319397, + "p99": 455.80801367759705 + }, + "roundtrip": { + "p50": 945.1519846916199, + "p90": 954.5919895172119, + "p95": 964.4160270690918, + "p99": 1028.607964515686 + }, + "isolatedSum": { + "p50": 961.7599844932556, + "p90": 973.1200039386749, + "p95": 978.3999919891357, + "p99": 1020.6719934940338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 967.2319889068604, + "p90": 977.5040149688721, + "p95": 985.4720234870911, + "p99": 1027.0400047302246 + }, + "combine": { + "p50": 778.2080173492432, + "p90": 784.6400141716003, + "p95": 787.8080010414124, + "p99": 811.2000226974487 + }, + "roundtrip": { + "p50": 1729.2799949645996, + "p90": 1740.7679557800293, + "p95": 1746.783971786499, + "p99": 1830.4320573806763 + }, + "isolatedSum": { + "p50": 1745.4400062561035, + "p90": 1762.1440291404724, + "p95": 1773.2800245285034, + "p99": 1838.2400274276733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1873.3439445495605, + "p90": 1887.0079517364502, + "p95": 1893.3119773864746, + "p99": 1914.0160083770752 + }, + "combine": { + "p50": 1473.7279415130615, + "p90": 1484.1279983520508, + "p95": 1490.3039932250977, + "p99": 1505.6320428848267 + }, + "roundtrip": { + "p50": 3330.0158977508545, + "p90": 3345.695972442627, + "p95": 3352.1599769592285, + "p99": 3364.3839359283447 + }, + "isolatedSum": { + "p50": 3347.071886062622, + "p90": 3371.135950088501, + "p95": 3383.6159706115723, + "p99": 3419.648051261902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06144abf", + "identity": "b300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "52380ab42deb47fc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:52.596832+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 157.75999426841736, + "p90": 162.9440039396286, + "p95": 165.3439998626709, + "p99": 173.2800006866455 + }, + "combine": { + "p50": 103.64799946546555, + "p90": 106.1440035700798, + "p95": 107.07200318574905, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 253.4720003604889, + "p90": 258.04799795150757, + "p95": 260.127991437912, + "p99": 268.8960134983063 + }, + "isolatedSum": { + "p50": 261.4079937338829, + "p90": 269.0880075097084, + "p95": 272.41600304841995, + "p99": 315.2319937944412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 211.39200031757355, + "p90": 215.58399498462677, + "p95": 217.40800142288208, + "p99": 232.2240024805069 + }, + "combine": { + "p50": 142.91200041770935, + "p90": 146.2399959564209, + "p95": 147.67999947071075, + "p99": 171.51999473571777 + }, + "roundtrip": { + "p50": 352.03200578689575, + "p90": 357.56799578666687, + "p95": 360.8640134334564, + "p99": 390.20800590515137 + }, + "isolatedSum": { + "p50": 354.3040007352829, + "p90": 361.82399094104767, + "p95": 365.08800089359283, + "p99": 403.74399721622467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 337.3120129108429, + "p90": 341.91998839378357, + "p95": 344.2240059375763, + "p99": 367.45598912239075 + }, + "combine": { + "p50": 235.87200045585632, + "p90": 245.88799476623535, + "p95": 248.6400008201599, + "p99": 258.36798548698425 + }, + "roundtrip": { + "p50": 578.719973564148, + "p90": 585.9839916229248, + "p95": 588.9599919319153, + "p99": 609.2159748077393 + }, + "isolatedSum": { + "p50": 573.1840133666992, + "p90": 587.8079831600189, + "p95": 592.8640067577362, + "p99": 625.823974609375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 584.2559933662415, + "p90": 589.9519920349121, + "p95": 593.5360193252563, + "p99": 615.6479716300964 + }, + "combine": { + "p50": 438.84798884391785, + "p90": 443.5200095176697, + "p95": 446.1120069026947, + "p99": 459.1360092163086 + }, + "roundtrip": { + "p50": 1007.0719718933105, + "p90": 1015.6160593032837, + "p95": 1022.3040580749512, + "p99": 1051.4559745788574 + }, + "isolatedSum": { + "p50": 1023.1039822101593, + "p90": 1033.4720015525818, + "p95": 1039.648026227951, + "p99": 1074.783980846405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1082.5920104980469, + "p90": 1090.6239748001099, + "p95": 1096.992015838623, + "p99": 1253.216028213501 + }, + "combine": { + "p50": 795.9679961204529, + "p90": 802.944004535675, + "p95": 806.1439990997314, + "p99": 827.135980129242 + }, + "roundtrip": { + "p50": 1862.9120588302612, + "p90": 1873.3439445495605, + "p95": 1882.5600147247314, + "p99": 1908.352017402649 + }, + "isolatedSum": { + "p50": 1878.5600066184998, + "p90": 1893.567979335785, + "p95": 1903.1360149383545, + "p99": 2080.352008342743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2081.536054611206, + "p90": 2092.639923095703, + "p95": 2099.263906478882, + "p99": 2150.048017501831 + }, + "combine": { + "p50": 1499.519944190979, + "p90": 1509.376049041748, + "p95": 1520.3520059585571, + "p99": 1581.1840295791626 + }, + "roundtrip": { + "p50": 3564.255952835083, + "p90": 3579.5199871063232, + "p95": 3590.9440517425537, + "p99": 3672.1279621124268 + }, + "isolatedSum": { + "p50": 3581.055998802185, + "p90": 3602.015972137451, + "p95": 3619.615912437439, + "p99": 3731.2320470809937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8b4f61c", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_0fa25a65", + "comparisonKey": "60f7aa8254c9a366", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:50.101384+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 83.77599716186523, + "p90": 86.62399649620056, + "p95": 89.02399986982346, + "p99": 121.95199728012085 + }, + "combine": { + "p50": 110.55999994277954, + "p90": 113.21599781513214, + "p95": 114.27199840545654, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 219.80799734592438, + "p90": 223.07200729846954, + "p95": 225.5679965019226, + "p99": 249.95200335979462 + }, + "isolatedSum": { + "p50": 194.33599710464478, + "p90": 199.8399943113327, + "p95": 203.29599827528, + "p99": 243.42399835586548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 117.24799871444702, + "p90": 120.60800194740295, + "p95": 121.98399752378464, + "p99": 129.56799566745758 + }, + "combine": { + "p50": 156.25600516796112, + "p90": 162.08000481128693, + "p95": 166.81599617004395, + "p99": 190.5599981546402 + }, + "roundtrip": { + "p50": 326.55999064445496, + "p90": 330.6240141391754, + "p95": 332.96000957489014, + "p99": 345.34400701522827 + }, + "isolatedSum": { + "p50": 273.50400388240814, + "p90": 282.6880067586899, + "p95": 288.7999936938286, + "p99": 320.1279938220978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 180.7360053062439, + "p90": 185.47199666500092, + "p95": 188.7039989233017, + "p99": 206.33600652217865 + }, + "combine": { + "p50": 268.8960134983063, + "p90": 273.75999093055725, + "p95": 275.7120132446289, + "p99": 341.0879969596863 + }, + "roundtrip": { + "p50": 554.4319748878479, + "p90": 561.0560178756714, + "p95": 563.3919835090637, + "p99": 578.5279870033264 + }, + "isolatedSum": { + "p50": 449.63201880455017, + "p90": 459.23198759555817, + "p95": 464.4160121679306, + "p99": 547.4240034818649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 302.2400140762329, + "p90": 307.13599920272827, + "p95": 308.73599648475647, + "p99": 331.5199911594391 + }, + "combine": { + "p50": 456.54401183128357, + "p90": 462.0479941368103, + "p95": 464.9600088596344, + "p99": 507.80802965164185 + }, + "roundtrip": { + "p50": 977.3439764976501, + "p90": 984.3519926071167, + "p95": 988.1280064582825, + "p99": 1033.1519842147827 + }, + "isolatedSum": { + "p50": 758.7840259075165, + "p90": 769.1839933395386, + "p95": 773.6960053443909, + "p99": 839.3280208110809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 542.3039793968201, + "p90": 548.3520030975342, + "p95": 553.1839728355408, + "p99": 579.8720121383667 + }, + "combine": { + "p50": 818.9120292663574, + "p90": 825.8240222930908, + "p95": 830.2080035209656, + "p99": 856.5120100975037 + }, + "roundtrip": { + "p50": 1820.512056350708, + "p90": 1831.9040536880493, + "p95": 1837.1200561523438, + "p99": 1862.239956855774 + }, + "isolatedSum": { + "p50": 1361.2160086631775, + "p90": 1374.176025390625, + "p95": 1383.3919763565063, + "p99": 1436.3840222358704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1019.8080539703369, + "p90": 1027.5520086288452, + "p95": 1032.960057258606, + "p99": 1082.7200412750244 + }, + "combine": { + "p50": 1528.447985649109, + "p90": 1536.9600057601929, + "p95": 1543.3599948883057, + "p99": 1611.9680404663086 + }, + "roundtrip": { + "p50": 3480.1599979400635, + "p90": 3495.7120418548584, + "p95": 3504.319906234741, + "p99": 3576.927900314331 + }, + "isolatedSum": { + "p50": 2548.256039619446, + "p90": 2564.512014389038, + "p95": 2576.3200521469116, + "p99": 2694.688081741333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87627ba8", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "4442e4f9468ed740", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:19.077975+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.59199380874634, + "p90": 169.24799978733063, + "p95": 176.256000995636, + "p99": 222.78399765491486 + }, + "combine": { + "p50": 110.59200018644333, + "p90": 113.15199732780457, + "p95": 114.04799669981003, + "p99": 119.23199892044067 + }, + "roundtrip": { + "p50": 265.9839987754822, + "p90": 276.41600370407104, + "p95": 308.1279993057251, + "p99": 354.3359935283661 + }, + "isolatedSum": { + "p50": 273.18399399518967, + "p90": 282.3999971151352, + "p95": 290.303997695446, + "p99": 342.01599657535553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 220.60799598693848, + "p90": 224.48000311851501, + "p95": 225.8560061454773, + "p99": 239.45599794387817 + }, + "combine": { + "p50": 154.36799824237823, + "p90": 157.79200196266174, + "p95": 159.0079963207245, + "p99": 168.70400309562683 + }, + "roundtrip": { + "p50": 372.76801466941833, + "p90": 377.6960074901581, + "p95": 380.511999130249, + "p99": 402.5599956512451 + }, + "isolatedSum": { + "p50": 374.9759942293167, + "p90": 382.27200508117676, + "p95": 384.8640024662018, + "p99": 408.160001039505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 361.91999912261963, + "p90": 366.33598804473877, + "p95": 367.90400743484497, + "p99": 388.19199800491333 + }, + "combine": { + "p50": 267.36000180244446, + "p90": 272.67199754714966, + "p95": 274.7519910335541, + "p99": 285.6000065803528 + }, + "roundtrip": { + "p50": 616.5440082550049, + "p90": 623.6159801483154, + "p95": 627.776026725769, + "p99": 713.7920260429382 + }, + "isolatedSum": { + "p50": 629.2800009250641, + "p90": 639.0079855918884, + "p95": 642.655998468399, + "p99": 673.7920045852661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 630.5599808692932, + "p90": 635.8720064163208, + "p95": 640.3840184211731, + "p99": 817.6000118255615 + }, + "combine": { + "p50": 455.80801367759705, + "p90": 461.5679979324341, + "p95": 465.1840031147003, + "p99": 474.3039906024933 + }, + "roundtrip": { + "p50": 1068.735957145691, + "p90": 1075.9999752044678, + "p95": 1082.1119546890259, + "p99": 1104.9599647521973 + }, + "isolatedSum": { + "p50": 1086.3679945468903, + "p90": 1097.4400043487549, + "p95": 1105.5680215358734, + "p99": 1291.9040024280548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1179.744005203247, + "p90": 1187.4879598617554, + "p95": 1193.503975868225, + "p99": 1287.5200510025024 + }, + "combine": { + "p50": 818.3680176734924, + "p90": 826.528012752533, + "p95": 832.5440287590027, + "p99": 914.1119718551636 + }, + "roundtrip": { + "p50": 1980.3839921951294, + "p90": 1992.8319454193115, + "p95": 1999.1999864578247, + "p99": 2083.359956741333 + }, + "isolatedSum": { + "p50": 1998.1120228767395, + "p90": 2014.0159726142883, + "p95": 2026.0480046272278, + "p99": 2201.632022857666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2265.984058380127, + "p90": 2276.0000228881836, + "p95": 2281.1200618743896, + "p99": 2299.936056137085 + }, + "combine": { + "p50": 1529.919981956482, + "p90": 1538.8799905776978, + "p95": 1546.7840433120728, + "p99": 1597.856044769287 + }, + "roundtrip": { + "p50": 3783.360004425049, + "p90": 3799.5519638061523, + "p95": 3808.896064758301, + "p99": 3984.4799041748047 + }, + "isolatedSum": { + "p50": 3795.904040336609, + "p90": 3814.8800134658813, + "p95": 3827.9041051864624, + "p99": 3897.792100906372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bcb1b947", + "identity": "b300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_00e7a4ad", + "comparisonKey": "33c1cd1ed42a00cc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:35.350974+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.0879979133606, + "p90": 169.44000124931335, + "p95": 172.7679967880249, + "p99": 190.36799669265747 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 112.28799819946289, + "p95": 114.20799791812897, + "p99": 137.37599551677704 + }, + "roundtrip": { + "p50": 267.16798543930054, + "p90": 273.1519937515259, + "p95": 278.52800488471985, + "p99": 318.33600997924805 + }, + "isolatedSum": { + "p50": 274.9760001897812, + "p90": 281.72799944877625, + "p95": 286.97599470615387, + "p99": 327.7439922094345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 221.5680032968521, + "p90": 226.43199563026428, + "p95": 231.29600286483765, + "p99": 336.544007062912 + }, + "combine": { + "p50": 155.83999454975128, + "p90": 160.25599837303162, + "p95": 164.35199975967407, + "p99": 193.63200664520264 + }, + "roundtrip": { + "p50": 375.0399947166443, + "p90": 387.58400082588196, + "p95": 414.2720103263855, + "p99": 487.5200092792511 + }, + "isolatedSum": { + "p50": 377.4079978466034, + "p90": 386.6879940032959, + "p95": 395.6480026245117, + "p99": 530.1760137081146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 359.5519959926605, + "p90": 364.1279935836792, + "p95": 367.42401123046875, + "p99": 400.5120098590851 + }, + "combine": { + "p50": 268.38400959968567, + "p90": 272.7360129356384, + "p95": 275.1680016517639, + "p99": 293.4719920158386 + }, + "roundtrip": { + "p50": 614.5600080490112, + "p90": 622.1439838409424, + "p95": 631.1360001564026, + "p99": 713.4720087051392 + }, + "isolatedSum": { + "p50": 627.9360055923462, + "p90": 636.8640065193176, + "p95": 642.5920128822327, + "p99": 693.9840018749237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 629.9840211868286, + "p90": 636.9280219078064, + "p95": 646.6559767723083, + "p99": 834.3999981880188 + }, + "combine": { + "p50": 456.03200793266296, + "p90": 463.48801255226135, + "p95": 467.74399280548096, + "p99": 514.4000053405762 + }, + "roundtrip": { + "p50": 1071.2000131607056, + "p90": 1080.2240371704102, + "p95": 1094.3360328674316, + "p99": 1151.5840291976929 + }, + "isolatedSum": { + "p50": 1086.0160291194916, + "p90": 1100.4160344600677, + "p95": 1114.3999695777893, + "p99": 1348.800003528595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1171.455979347229, + "p90": 1180.0320148468018, + "p95": 1190.4000043869019, + "p99": 1245.4400062561035 + }, + "combine": { + "p50": 811.680018901825, + "p90": 819.167971611023, + "p95": 823.5200047492981, + "p99": 842.6880240440369 + }, + "roundtrip": { + "p50": 1967.1679735183716, + "p90": 1979.3280363082886, + "p95": 1990.496039390564, + "p99": 2265.3119564056396 + }, + "isolatedSum": { + "p50": 1983.135998249054, + "p90": 1999.1999864578247, + "p95": 2013.9200091362, + "p99": 2088.1280303001404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2258.9759826660156, + "p90": 2269.376039505005, + "p95": 2279.360055923462, + "p99": 2351.0079383850098 + }, + "combine": { + "p50": 1515.0079727172852, + "p90": 1527.4560451507568, + "p95": 1538.3039712905884, + "p99": 1601.7919778823853 + }, + "roundtrip": { + "p50": 3763.5838985443115, + "p90": 3777.2159576416016, + "p95": 3785.9840393066406, + "p99": 3856.800079345703 + }, + "isolatedSum": { + "p50": 3773.983955383301, + "p90": 3796.8320846557617, + "p95": 3817.6640272140503, + "p99": 3952.799916267395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e4841fc", + "identity": "b300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_b62311b1", + "comparisonKey": "8ddbf8a6192f5acd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:02.849298+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_05", + "sku": "b300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 78.27199995517731, + "p90": 81.44000172615051, + "p95": 84.19200032949448, + "p99": 119.07199770212173 + }, + "combine": { + "p50": 110.81600189208984, + "p90": 113.3119985461235, + "p95": 115.4559999704361, + "p99": 126.5919953584671 + }, + "roundtrip": { + "p50": 213.59999477863312, + "p90": 221.40799462795258, + "p95": 228.41599583625793, + "p99": 266.59199595451355 + }, + "isolatedSum": { + "p50": 189.08800184726715, + "p90": 194.75200027227402, + "p95": 199.64800029993057, + "p99": 245.66399306058884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.87200248241425, + "p90": 118.14399808645248, + "p95": 123.9359974861145, + "p99": 182.11199343204498 + }, + "combine": { + "p50": 156.09599649906158, + "p90": 160.44799983501434, + "p95": 162.36799955368042, + "p99": 210.36800742149353 + }, + "roundtrip": { + "p50": 320.3839957714081, + "p90": 324.95999336242676, + "p95": 328.3520042896271, + "p99": 362.36798763275146 + }, + "isolatedSum": { + "p50": 267.96799898147583, + "p90": 278.5919979214668, + "p95": 286.3039970397949, + "p99": 392.4800008535385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 174.112007021904, + "p90": 179.1359931230545, + "p95": 181.21600151062012, + "p99": 223.13599288463593 + }, + "combine": { + "p50": 268.99200677871704, + "p90": 274.6559977531433, + "p95": 276.8320143222809, + "p99": 317.7599906921387 + }, + "roundtrip": { + "p50": 547.7120280265808, + "p90": 554.4000267982483, + "p95": 557.4399828910828, + "p99": 594.3999886512756 + }, + "isolatedSum": { + "p50": 443.10401380062103, + "p90": 453.7919908761978, + "p95": 458.048015832901, + "p99": 540.8959835767746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 291.80800914764404, + "p90": 297.8239953517914, + "p95": 299.74400997161865, + "p99": 342.75200963020325 + }, + "combine": { + "p50": 455.7119905948639, + "p90": 463.1040096282959, + "p95": 467.77600049972534, + "p99": 510.68800687789917 + }, + "roundtrip": { + "p50": 968.9919948577881, + "p90": 977.8239727020264, + "p95": 983.4880232810974, + "p99": 1025.9519815444946 + }, + "isolatedSum": { + "p50": 747.5199997425079, + "p90": 760.9280049800873, + "p95": 767.520010471344, + "p99": 853.4400165081024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.0480046272278, + "p90": 539.0080213546753, + "p95": 550.8480072021484, + "p99": 617.4719929695129 + }, + "combine": { + "p50": 818.4639811515808, + "p90": 827.8080224990845, + "p95": 839.8079872131348, + "p99": 923.3599901199341 + }, + "roundtrip": { + "p50": 1804.8959970474243, + "p90": 1818.1439638137817, + "p95": 1830.4320573806763, + "p99": 1996.0960149765015 + }, + "isolatedSum": { + "p50": 1344.5119857788086, + "p90": 1366.8160438537598, + "p95": 1390.6559944152832, + "p99": 1540.831983089447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 987.4560236930847, + "p90": 996.2239861488342, + "p95": 1004.5119524002075, + "p99": 1075.9040117263794 + }, + "combine": { + "p50": 1528.5760164260864, + "p90": 1539.3600463867188, + "p95": 1550.4640340805054, + "p99": 1649.7600078582764 + }, + "roundtrip": { + "p50": 3447.8399753570557, + "p90": 3462.496042251587, + "p95": 3472.0959663391113, + "p99": 3553.215980529785 + }, + "isolatedSum": { + "p50": 2516.032040119171, + "p90": 2535.584032535553, + "p95": 2554.975986480713, + "p99": 2725.6640195846558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-affa7dab", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_9d3b385d", + "comparisonKey": "6cb3f1841938f6d9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:13.139063+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.367999792099, + "p90": 121.24799937009811, + "p95": 122.17599898576736, + "p99": 125.63200294971466 + }, + "combine": { + "p50": 35.93600168824196, + "p90": 37.408001720905304, + "p95": 37.88800165057182, + "p99": 39.903998374938965 + }, + "roundtrip": { + "p50": 151.10400319099426, + "p90": 154.52800691127777, + "p95": 156.12800419330597, + "p99": 179.00800704956055 + }, + "isolatedSum": { + "p50": 154.30400148034096, + "p90": 158.65600109100342, + "p95": 160.0640006363392, + "p99": 165.53600132465363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 121.08799815177917, + "p90": 123.58400225639343, + "p95": 124.7360035777092, + "p99": 139.0399932861328 + }, + "combine": { + "p50": 36.639999598264694, + "p90": 38.84800150990486, + "p95": 39.61599990725517, + "p99": 53.0879981815815 + }, + "roundtrip": { + "p50": 155.74400126934052, + "p90": 159.39199924468994, + "p95": 167.61599481105804, + "p99": 175.26400089263916 + }, + "isolatedSum": { + "p50": 157.72799775004387, + "p90": 162.4320037662983, + "p95": 164.35200348496437, + "p99": 192.1279914677143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.29600006341934, + "p90": 126.20800733566284, + "p95": 127.51999497413635, + "p99": 143.68000626564026 + }, + "combine": { + "p50": 39.96799886226654, + "p90": 41.471999138593674, + "p95": 42.047999799251556, + "p99": 43.5199998319149 + }, + "roundtrip": { + "p50": 159.9999964237213, + "p90": 162.9759967327118, + "p95": 164.57599401474, + "p99": 185.72799861431122 + }, + "isolatedSum": { + "p50": 163.26399892568588, + "p90": 167.68000647425652, + "p95": 169.5679947733879, + "p99": 187.20000609755516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 130.3360015153885, + "p90": 132.9279989004135, + "p95": 134.71999764442444, + "p99": 144.96000111103058 + }, + "combine": { + "p50": 40.32000154256821, + "p90": 41.919998824596405, + "p95": 42.65600070357323, + "p99": 47.10400104522705 + }, + "roundtrip": { + "p50": 167.93599724769592, + "p90": 170.75200378894806, + "p95": 172.5119948387146, + "p99": 192.73599982261658 + }, + "isolatedSum": { + "p50": 170.6560030579567, + "p90": 174.84799772500992, + "p95": 177.37599834799767, + "p99": 192.06400215625763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 142.65599846839905, + "p90": 145.50399780273438, + "p95": 146.84799313545227, + "p99": 160.51200032234192 + }, + "combine": { + "p50": 42.7200011909008, + "p90": 44.60800066590309, + "p95": 45.24800181388855, + "p99": 47.90399968624115 + }, + "roundtrip": { + "p50": 184.1920018196106, + "p90": 187.26399540901184, + "p95": 189.40800428390503, + "p99": 202.43200659751892 + }, + "isolatedSum": { + "p50": 185.37599965929985, + "p90": 190.11199846863747, + "p95": 192.09599494934082, + "p99": 208.41600000858307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 162.9759967327118, + "p90": 165.92000424861908, + "p95": 167.39200055599213, + "p99": 185.82400679588318 + }, + "combine": { + "p50": 42.75200143456459, + "p90": 45.21600157022476, + "p95": 46.271998435258865, + "p99": 55.615998804569244 + }, + "roundtrip": { + "p50": 205.1520049571991, + "p90": 208.48000049591064, + "p95": 210.01599729061127, + "p99": 226.8799990415573 + }, + "isolatedSum": { + "p50": 205.72799816727638, + "p90": 211.13600581884384, + "p95": 213.663998991251, + "p99": 241.44000560045242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 208.92800390720367, + "p90": 212.09600567817688, + "p95": 214.81600403785706, + "p99": 229.47199642658234 + }, + "combine": { + "p50": 47.968000173568726, + "p90": 49.92000013589859, + "p95": 50.27199909090996, + "p99": 51.80799961090088 + }, + "roundtrip": { + "p50": 253.2159984111786, + "p90": 256.22400641441345, + "p95": 259.16799902915955, + "p99": 276.44801139831543 + }, + "isolatedSum": { + "p50": 256.8960040807724, + "p90": 262.01600581407547, + "p95": 265.088003128767, + "p99": 281.2799960374832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.99200439453125, + "p90": 220.47999501228333, + "p95": 234.6239984035492, + "p99": 258.7200105190277 + }, + "combine": { + "p50": 55.296000093221664, + "p90": 57.18399956822395, + "p95": 58.04799869656563, + "p99": 63.13599646091461 + }, + "roundtrip": { + "p50": 262.59198784828186, + "p90": 265.9200131893158, + "p95": 267.10399985313416, + "p99": 277.72799134254456 + }, + "isolatedSum": { + "p50": 264.2880044877529, + "p90": 277.6639945805073, + "p95": 292.6719971001148, + "p99": 321.8560069799423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff9a5602", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_9e0f093f", + "comparisonKey": "56159d8c53d7db6f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:10.575113+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.23199892044067, + "p90": 125.2799928188324, + "p95": 133.215993642807, + "p99": 144.83200013637543 + }, + "combine": { + "p50": 37.856001406908035, + "p90": 40.063999593257904, + "p95": 45.56800052523613, + "p99": 54.976001381874084 + }, + "roundtrip": { + "p50": 151.93599462509155, + "p90": 162.84799575805664, + "p95": 173.18400740623474, + "p99": 224.95999932289124 + }, + "isolatedSum": { + "p50": 157.0880003273487, + "p90": 165.3439924120903, + "p95": 178.78399416804314, + "p99": 199.8080015182495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 122.36800044775009, + "p90": 129.60000336170197, + "p95": 136.63999736309052, + "p99": 148.44800531864166 + }, + "combine": { + "p50": 37.9519984126091, + "p90": 39.903998374938965, + "p95": 42.91199892759323, + "p99": 49.92000013589859 + }, + "roundtrip": { + "p50": 154.52800691127777, + "p90": 159.2639982700348, + "p95": 165.27999937534332, + "p99": 180.06399273872375 + }, + "isolatedSum": { + "p50": 160.3199988603592, + "p90": 169.50400173664093, + "p95": 179.55199629068375, + "p99": 198.36800545454025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 125.72799623012543, + "p90": 135.26399433612823, + "p95": 140.70400595664978, + "p99": 151.8079936504364 + }, + "combine": { + "p50": 41.85599833726883, + "p90": 43.90399903059006, + "p95": 47.520000487565994, + "p99": 54.1439987719059 + }, + "roundtrip": { + "p50": 163.13600540161133, + "p90": 169.40799355506897, + "p95": 175.48799514770508, + "p99": 186.46399676799774 + }, + "isolatedSum": { + "p50": 167.58399456739426, + "p90": 179.1679933667183, + "p95": 188.22400644421577, + "p99": 205.9519924223423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 134.68800485134125, + "p90": 140.44800400733948, + "p95": 147.8080004453659, + "p99": 162.08000481128693 + }, + "combine": { + "p50": 42.75200143456459, + "p90": 44.51199993491173, + "p95": 48.70399832725525, + "p99": 53.50400134921074 + }, + "roundtrip": { + "p50": 174.01599884033203, + "p90": 180.7039976119995, + "p95": 186.65599822998047, + "p99": 203.0400037765503 + }, + "isolatedSum": { + "p50": 177.44000628590584, + "p90": 184.9600039422512, + "p95": 196.51199877262115, + "p99": 215.58400616049767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 151.74399316310883, + "p90": 157.8879952430725, + "p95": 170.1440066099167, + "p99": 178.46399545669556 + }, + "combine": { + "p50": 42.75200143456459, + "p90": 44.95999962091446, + "p95": 49.855999648571014, + "p99": 53.92000079154968 + }, + "roundtrip": { + "p50": 192.32000410556793, + "p90": 204.70400154590607, + "p95": 212.3199999332428, + "p99": 241.88800156116486 + }, + "isolatedSum": { + "p50": 194.49599459767342, + "p90": 202.84799486398697, + "p95": 220.0000062584877, + "p99": 232.38399624824524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.58400464057922, + "p90": 195.39199769496918, + "p95": 201.31200551986694, + "p99": 212.16000616550446 + }, + "combine": { + "p50": 44.99199986457825, + "p90": 46.94399982690811, + "p95": 48.64000156521797, + "p99": 54.23999950289726 + }, + "roundtrip": { + "p50": 224.99200701713562, + "p90": 232.9919934272766, + "p95": 240.06399512290955, + "p99": 249.85599517822266 + }, + "isolatedSum": { + "p50": 228.57600450515747, + "p90": 242.3359975218773, + "p95": 249.95200708508492, + "p99": 266.4000056684017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 246.62399291992188, + "p90": 256.3839852809906, + "p95": 263.93601298332214, + "p99": 275.13599395751953 + }, + "combine": { + "p50": 51.13599821925163, + "p90": 53.95200103521347, + "p95": 57.95200169086456, + "p99": 61.184000223875046 + }, + "roundtrip": { + "p50": 293.2479977607727, + "p90": 304.7359883785248, + "p95": 314.4640028476715, + "p99": 330.1120102405548 + }, + "isolatedSum": { + "p50": 297.7599911391735, + "p90": 310.33598631620407, + "p95": 321.8880146741867, + "p99": 336.3199941813946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.15200471878052, + "p90": 256.3520073890686, + "p95": 263.9999985694885, + "p99": 280.2239954471588 + }, + "combine": { + "p50": 65.95200300216675, + "p90": 68.12799721956253, + "p95": 71.52000069618225, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 310.7840120792389, + "p90": 335.9360098838806, + "p95": 341.5040075778961, + "p99": 443.0080056190491 + }, + "isolatedSum": { + "p50": 315.10400772094727, + "p90": 324.48000460863113, + "p95": 335.5199992656708, + "p99": 370.14399468898773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-131f8dab", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_bbf50286", + "comparisonKey": "01b5066fb5adc54d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:07.437833+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.35999923944473, + "p90": 118.65600198507309, + "p95": 120.35199999809265, + "p99": 127.6479959487915 + }, + "combine": { + "p50": 32.96000137925148, + "p90": 34.55999866127968, + "p95": 35.19999980926514, + "p99": 37.31200098991394 + }, + "roundtrip": { + "p50": 146.01600170135498, + "p90": 159.61599349975586, + "p95": 183.3599954843521, + "p99": 233.95200073719025 + }, + "isolatedSum": { + "p50": 148.3200006186962, + "p90": 153.21600064635277, + "p95": 155.5519998073578, + "p99": 164.95999693870544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 120.35199999809265, + "p90": 129.60000336170197, + "p95": 136.4479959011078, + "p99": 159.19999778270721 + }, + "combine": { + "p50": 35.74400022625923, + "p90": 37.34400123357773, + "p95": 37.9519984126091, + "p99": 49.56800118088722 + }, + "roundtrip": { + "p50": 153.1520038843155, + "p90": 156.51200711727142, + "p95": 158.91200304031372, + "p99": 187.48800456523895 + }, + "isolatedSum": { + "p50": 156.09600022435188, + "p90": 166.9440045952797, + "p95": 174.3999943137169, + "p99": 208.76799896359444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 128.86400520801544, + "p90": 133.2480013370514, + "p95": 136.09600067138672, + "p99": 173.34400117397308 + }, + "combine": { + "p50": 35.679999738931656, + "p90": 37.28000074625015, + "p95": 37.88800165057182, + "p99": 43.5199998319149 + }, + "roundtrip": { + "p50": 161.69600188732147, + "p90": 165.15199840068817, + "p95": 167.42399334907532, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 164.5440049469471, + "p90": 170.52800208330154, + "p95": 173.98400232195854, + "p99": 216.86400100588799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.8400068283081, + "p90": 142.97600090503693, + "p95": 145.02400159835815, + "p99": 157.60000050067902 + }, + "combine": { + "p50": 38.40000182390213, + "p90": 40.09599983692169, + "p95": 41.152000427246094, + "p99": 48.48000034689903 + }, + "roundtrip": { + "p50": 176.32000148296356, + "p90": 198.33600521087646, + "p95": 215.10399878025055, + "p99": 237.7920001745224 + }, + "isolatedSum": { + "p50": 178.24000865221024, + "p90": 183.07200074195862, + "p95": 186.17600202560425, + "p99": 206.08000084757805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63b28e96", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_4f12e45f", + "comparisonKey": "9d2afffa8de16e2b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:49.092285+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.50400066375732, + "p90": 121.31199985742569, + "p95": 123.3920007944107, + "p99": 238.27199637889862 + }, + "combine": { + "p50": 34.272000193595886, + "p90": 35.840000957250595, + "p95": 36.51199862360954, + "p99": 44.19200122356415 + }, + "roundtrip": { + "p50": 147.5200057029724, + "p90": 151.42400562763214, + "p95": 153.1520038843155, + "p99": 201.12000405788422 + }, + "isolatedSum": { + "p50": 151.7760008573532, + "p90": 157.15200081467628, + "p95": 159.90399941802025, + "p99": 282.46399760246277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 118.01599711179733, + "p90": 121.47200107574463, + "p95": 124.41600114107132, + "p99": 144.28800344467163 + }, + "combine": { + "p50": 34.78400036692619, + "p90": 36.73600032925606, + "p95": 37.47199848294258, + "p99": 44.415999203920364 + }, + "roundtrip": { + "p50": 147.77599275112152, + "p90": 151.2320041656494, + "p95": 152.41600573062897, + "p99": 176.256000995636 + }, + "isolatedSum": { + "p50": 152.79999747872353, + "p90": 158.2080014050007, + "p95": 161.8879996240139, + "p99": 188.704002648592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 118.81600320339203, + "p90": 122.01599776744843, + "p95": 123.74400347471237, + "p99": 135.29600203037262 + }, + "combine": { + "p50": 36.159999668598175, + "p90": 37.408001720905304, + "p95": 37.79200091958046, + "p99": 40.57599976658821 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 155.61600029468536, + "p95": 159.10400450229645, + "p99": 214.11199867725372 + }, + "isolatedSum": { + "p50": 154.9760028719902, + "p90": 159.42399948835373, + "p95": 161.53600439429283, + "p99": 175.87200179696083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 120.86399644613266, + "p90": 123.99999797344208, + "p95": 126.24000012874603, + "p99": 144.896000623703 + }, + "combine": { + "p50": 36.3520011305809, + "p90": 37.567999213933945, + "p95": 38.55999931693077, + "p99": 43.20000112056732 + }, + "roundtrip": { + "p50": 152.92799472808838, + "p90": 156.67200088500977, + "p95": 159.9999964237213, + "p99": 199.5840072631836 + }, + "isolatedSum": { + "p50": 157.21599757671356, + "p90": 161.56799718737602, + "p95": 164.7999994456768, + "p99": 188.09600174427032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.13600695133209, + "p90": 130.65600395202637, + "p95": 133.44000279903412, + "p99": 191.9039934873581 + }, + "combine": { + "p50": 36.288000643253326, + "p90": 37.76000067591667, + "p95": 38.46399858593941, + "p99": 41.95199906826019 + }, + "roundtrip": { + "p50": 161.15200519561768, + "p90": 165.18400609493256, + "p95": 167.90400445461273, + "p99": 182.94399976730347 + }, + "isolatedSum": { + "p50": 163.42400759458542, + "p90": 168.41600462794304, + "p95": 171.90400138497353, + "p99": 233.8559925556183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 137.2160017490387, + "p90": 140.32000303268433, + "p95": 141.79199934005737, + "p99": 159.07199680805206 + }, + "combine": { + "p50": 36.54399886727333, + "p90": 38.36800158023834, + "p95": 38.784001022577286, + "p99": 48.19199815392494 + }, + "roundtrip": { + "p50": 171.55200242996216, + "p90": 176.06399953365326, + "p95": 178.5919964313507, + "p99": 216.09599888324738 + }, + "isolatedSum": { + "p50": 173.76000061631203, + "p90": 178.68800461292267, + "p95": 180.57600036263466, + "p99": 207.263994961977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 153.34400534629822, + "p90": 156.25600516796112, + "p95": 158.07999670505524, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 38.55999931693077, + "p90": 39.96799886226654, + "p95": 41.08799993991852, + "p99": 48.41599985957146 + }, + "roundtrip": { + "p50": 189.79200720787048, + "p90": 193.4719979763031, + "p95": 197.31199741363525, + "p99": 285.7919931411743 + }, + "isolatedSum": { + "p50": 191.904004663229, + "p90": 196.22400403022766, + "p95": 199.16799664497375, + "p99": 216.86400100588799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.8960019350052, + "p90": 156.6080003976822, + "p95": 158.78400206565857, + "p99": 178.52799594402313 + }, + "combine": { + "p50": 43.807998299598694, + "p90": 45.56800052523613, + "p95": 45.9199994802475, + "p99": 59.808000922203064 + }, + "roundtrip": { + "p50": 192.7040070295334, + "p90": 195.6160068511963, + "p95": 197.53600656986237, + "p99": 218.20800006389618 + }, + "isolatedSum": { + "p50": 196.70400023460388, + "p90": 202.17600092291832, + "p95": 204.70400154590607, + "p99": 238.3359968662262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9763ebe9", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_7febeea9", + "comparisonKey": "a5963fed677d016f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:53.252877+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.72800236940384, + "p90": 120.80000340938568, + "p95": 122.30399996042252, + "p99": 138.65600526332855 + }, + "combine": { + "p50": 35.679999738931656, + "p90": 37.31200098991394, + "p95": 38.46399858593941, + "p99": 65.43999910354614 + }, + "roundtrip": { + "p50": 149.3760049343109, + "p90": 152.8960019350052, + "p95": 154.84799444675446, + "p99": 174.52800273895264 + }, + "isolatedSum": { + "p50": 153.4080021083355, + "p90": 158.11200439929962, + "p95": 160.76799854636192, + "p99": 204.0960043668747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.4240003824234, + "p90": 123.3920007944107, + "p95": 127.07200646400452, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 35.999998450279236, + "p90": 37.9519984126091, + "p95": 38.84800150990486, + "p99": 42.97599941492081 + }, + "roundtrip": { + "p50": 151.2639969587326, + "p90": 155.03999590873718, + "p95": 156.76799416542053, + "p99": 173.6000031232834 + }, + "isolatedSum": { + "p50": 155.42399883270264, + "p90": 161.3439992070198, + "p95": 165.92000797390938, + "p99": 189.82399255037308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.75200337171555, + "p90": 127.3919939994812, + "p95": 130.14400005340576, + "p99": 158.11200439929962 + }, + "combine": { + "p50": 39.5519994199276, + "p90": 41.05599969625473, + "p95": 41.56799986958504, + "p99": 43.10400038957596 + }, + "roundtrip": { + "p50": 158.1439971923828, + "p90": 162.4000072479248, + "p95": 164.63999450206757, + "p99": 188.80000710487366 + }, + "isolatedSum": { + "p50": 162.30400279164314, + "p90": 168.44799369573593, + "p95": 171.7119999229908, + "p99": 201.21600478887558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.31999361515045, + "p90": 131.1040073633194, + "p95": 132.1280002593994, + "p99": 145.82400023937225 + }, + "combine": { + "p50": 39.903998374938965, + "p90": 41.37599840760231, + "p95": 42.01599955558777, + "p99": 43.935999274253845 + }, + "roundtrip": { + "p50": 165.12000560760498, + "p90": 168.06399822235107, + "p95": 170.3680008649826, + "p99": 193.24800372123718 + }, + "isolatedSum": { + "p50": 168.22399199008942, + "p90": 172.4800057709217, + "p95": 174.14399981498718, + "p99": 189.7599995136261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 142.0159935951233, + "p90": 144.83200013637543, + "p95": 146.17599546909332, + "p99": 165.18400609493256 + }, + "combine": { + "p50": 42.62400045990944, + "p90": 45.60000076889992, + "p95": 46.560000628232956, + "p99": 63.00800293684006 + }, + "roundtrip": { + "p50": 183.77600610256195, + "p90": 187.9040002822876, + "p95": 195.2960044145584, + "p99": 276.4799892902374 + }, + "isolatedSum": { + "p50": 184.63999405503273, + "p90": 190.43200090527534, + "p95": 192.73599609732628, + "p99": 228.1920090317726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 163.2000058889389, + "p90": 166.24000668525696, + "p95": 167.9999977350235, + "p99": 181.63199722766876 + }, + "combine": { + "p50": 44.51199993491173, + "p90": 46.65600135922432, + "p95": 47.10400104522705, + "p99": 52.191998809576035 + }, + "roundtrip": { + "p50": 206.01600408554077, + "p90": 209.60000157356262, + "p95": 213.28000724315643, + "p99": 261.50399446487427 + }, + "isolatedSum": { + "p50": 207.71200582385063, + "p90": 212.89600804448128, + "p95": 215.10399878025055, + "p99": 233.8239960372448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 204.6079933643341, + "p90": 208.73600244522095, + "p95": 213.82400393486023, + "p99": 331.167995929718 + }, + "combine": { + "p50": 48.70399832725525, + "p90": 50.71999877691269, + "p95": 51.80799961090088, + "p99": 58.14399942755699 + }, + "roundtrip": { + "p50": 250.65600872039795, + "p90": 254.2400062084198, + "p95": 257.7280104160309, + "p99": 328.8959860801697 + }, + "isolatedSum": { + "p50": 253.31199169158936, + "p90": 259.45600122213364, + "p95": 265.6320035457611, + "p99": 389.311995357275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 205.4399996995926, + "p90": 209.98400449752808, + "p95": 214.9759978055954, + "p99": 257.2160065174103 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 60.83200126886368, + "p95": 61.40799820423126, + "p99": 70.65600156784058 + }, + "roundtrip": { + "p50": 262.59198784828186, + "p90": 267.61600375175476, + "p95": 270.30399441719055, + "p99": 286.78399324417114 + }, + "isolatedSum": { + "p50": 264.15999978780746, + "p90": 270.81600576639175, + "p95": 276.38399600982666, + "p99": 327.87200808525085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da008212", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_9d04a365", + "comparisonKey": "2f8efab1b42f3bf4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:26.731937+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.03999745845795, + "p90": 122.8799968957901, + "p95": 126.0479986667633, + "p99": 143.93599331378937 + }, + "combine": { + "p50": 36.448001861572266, + "p90": 37.91999816894531, + "p95": 38.84800150990486, + "p99": 61.37600168585777 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 154.6880006790161, + "p95": 158.720001578331, + "p99": 248.51199984550476 + }, + "isolatedSum": { + "p50": 155.4879993200302, + "p90": 160.7999950647354, + "p95": 164.89600017666817, + "p99": 205.31199499964714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.70400267839432, + "p90": 123.99999797344208, + "p95": 124.9919980764389, + "p99": 132.9919993877411 + }, + "combine": { + "p50": 35.87200120091438, + "p90": 37.53599897027016, + "p95": 38.46399858593941, + "p99": 50.912000238895416 + }, + "roundtrip": { + "p50": 153.08800339698792, + "p90": 156.44800662994385, + "p95": 158.75199437141418, + "p99": 212.0639979839325 + }, + "isolatedSum": { + "p50": 156.5760038793087, + "p90": 161.53599694371223, + "p95": 163.4559966623783, + "p99": 183.9039996266365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.6240023970604, + "p90": 125.56800246238708, + "p95": 127.23200023174286, + "p99": 143.5839980840683 + }, + "combine": { + "p50": 39.51999917626381, + "p90": 41.4079986512661, + "p95": 42.14400053024292, + "p99": 60.35200133919716 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 162.01600432395935, + "p95": 165.02399742603302, + "p99": 189.95200097560883 + }, + "isolatedSum": { + "p50": 162.1440015733242, + "p90": 166.97600111365318, + "p95": 169.37600076198578, + "p99": 203.93599942326546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.7039965391159, + "p90": 131.84000551700592, + "p95": 133.56800377368927, + "p99": 154.4640064239502 + }, + "combine": { + "p50": 39.48799893260002, + "p90": 41.31200164556503, + "p95": 41.728001087903976, + "p99": 45.31199857592583 + }, + "roundtrip": { + "p50": 165.8560037612915, + "p90": 169.11999881267548, + "p95": 174.04800653457642, + "p99": 257.5039863586426 + }, + "isolatedSum": { + "p50": 168.19199547171593, + "p90": 173.15200716257095, + "p95": 175.29600486159325, + "p99": 199.77600499987602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 139.8400068283081, + "p90": 142.59199798107147, + "p95": 144.03200149536133, + "p99": 165.3759926557541 + }, + "combine": { + "p50": 41.471999138593674, + "p90": 43.5199998319149, + "p95": 44.51199993491173, + "p99": 62.24000081419945 + }, + "roundtrip": { + "p50": 181.63199722766876, + "p90": 185.5040043592453, + "p95": 188.6720061302185, + "p99": 249.439999461174 + }, + "isolatedSum": { + "p50": 181.31200596690178, + "p90": 186.11199781298637, + "p95": 188.54400143027306, + "p99": 227.61599346995354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 162.78399527072906, + "p90": 165.75999557971954, + "p95": 168.89600455760956, + "p99": 223.39199483394623 + }, + "combine": { + "p50": 41.85599833726883, + "p90": 43.68000105023384, + "p95": 44.256001710891724, + "p99": 60.896001756191254 + }, + "roundtrip": { + "p50": 203.71200144290924, + "p90": 207.07200467586517, + "p95": 208.639994263649, + "p99": 234.8800003528595 + }, + "isolatedSum": { + "p50": 204.6399936079979, + "p90": 209.43999662995338, + "p95": 213.15200626850128, + "p99": 284.2879965901375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 211.0079973936081, + "p90": 229.8240065574646, + "p95": 245.31200528144836, + "p99": 282.71999955177307 + }, + "combine": { + "p50": 48.35199937224388, + "p90": 51.10400170087814, + "p95": 52.22399905323982, + "p99": 72.9919970035553 + }, + "roundtrip": { + "p50": 254.33599948883057, + "p90": 258.30399990081787, + "p95": 261.4719867706299, + "p99": 314.4640028476715 + }, + "isolatedSum": { + "p50": 259.359996765852, + "p90": 280.92800825834274, + "p95": 297.5360043346882, + "p99": 355.71199655532837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.5759937763214, + "p90": 211.96800470352173, + "p95": 213.82400393486023, + "p99": 269.3440020084381 + }, + "combine": { + "p50": 56.671999394893646, + "p90": 58.52799862623215, + "p95": 58.94400179386139, + "p99": 60.47999858856201 + }, + "roundtrip": { + "p50": 260.44800877571106, + "p90": 263.96799087524414, + "p95": 266.1440074443817, + "p99": 293.2479977607727 + }, + "isolatedSum": { + "p50": 265.24799317121506, + "p90": 270.4960033297539, + "p95": 272.7680057287216, + "p99": 329.8240005970001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d116337", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_87cc08e1", + "comparisonKey": "40e29b5bdd967e07", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:51.926257+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.85600334405899, + "p90": 120.67200243473053, + "p95": 122.30399996042252, + "p99": 132.7359974384308 + }, + "combine": { + "p50": 35.232000052928925, + "p90": 36.67199984192848, + "p95": 37.18400001525879, + "p99": 38.68800029158592 + }, + "roundtrip": { + "p50": 147.5519984960556, + "p90": 151.8400013446808, + "p95": 155.03999590873718, + "p99": 244.09599602222443 + }, + "isolatedSum": { + "p50": 153.08800339698792, + "p90": 157.344002276659, + "p95": 159.4879999756813, + "p99": 171.4239977300167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.58400160074234, + "p90": 143.8400000333786, + "p95": 163.07200491428375, + "p99": 245.2480047941208 + }, + "combine": { + "p50": 36.41600161790848, + "p90": 40.44799879193306, + "p95": 41.471999138593674, + "p99": 43.5199998319149 + }, + "roundtrip": { + "p50": 150.68799257278442, + "p90": 153.98399531841278, + "p95": 155.93600273132324, + "p99": 172.83199727535248 + }, + "isolatedSum": { + "p50": 156.00000321865082, + "p90": 184.28799882531166, + "p95": 204.54400405287743, + "p99": 288.7680046260357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.68800288438797, + "p90": 140.60799777507782, + "p95": 156.70399367809296, + "p99": 192.89599359035492 + }, + "combine": { + "p50": 39.423998445272446, + "p90": 44.51199993491173, + "p95": 45.40799930691719, + "p99": 60.416001826524734 + }, + "roundtrip": { + "p50": 156.67200088500977, + "p90": 160.09600460529327, + "p95": 162.7199947834015, + "p99": 183.80799889564514 + }, + "isolatedSum": { + "p50": 162.11200132966042, + "p90": 185.11999770998955, + "p95": 202.11199298501015, + "p99": 253.31199541687965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.31999361515045, + "p90": 155.03999590873718, + "p95": 171.03999853134155, + "p99": 210.52800118923187 + }, + "combine": { + "p50": 40.12800008058548, + "p90": 42.49599948525429, + "p95": 43.487999588251114, + "p99": 45.75999826192856 + }, + "roundtrip": { + "p50": 163.39200735092163, + "p90": 169.76000368595123, + "p95": 219.29599344730377, + "p99": 301.7599880695343 + }, + "isolatedSum": { + "p50": 168.44799369573593, + "p90": 197.53599539399147, + "p95": 214.52799811959267, + "p99": 256.28799945116043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 138.20800185203552, + "p90": 148.0640023946762, + "p95": 154.7520011663437, + "p99": 167.23200678825378 + }, + "combine": { + "p50": 40.832001715898514, + "p90": 53.888000547885895, + "p95": 56.223999708890915, + "p99": 67.1359971165657 + }, + "roundtrip": { + "p50": 176.60799622535706, + "p90": 181.15200102329254, + "p95": 183.48799645900726, + "p99": 202.01599597930908 + }, + "isolatedSum": { + "p50": 179.04000356793404, + "p90": 201.9520029425621, + "p95": 210.9760008752346, + "p99": 234.3680039048195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 162.1759980916977, + "p90": 168.92799735069275, + "p95": 172.0000058412552, + "p99": 180.03199994564056 + }, + "combine": { + "p50": 42.65600070357323, + "p90": 44.47999969124794, + "p95": 45.152001082897186, + "p99": 51.29599943757057 + }, + "roundtrip": { + "p50": 203.16800475120544, + "p90": 206.94400370121002, + "p95": 210.27199923992157, + "p99": 247.26399779319763 + }, + "isolatedSum": { + "p50": 204.83199879527092, + "p90": 213.4079970419407, + "p95": 217.15200692415237, + "p99": 231.32799938321114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 208.03199708461761, + "p90": 211.2639993429184, + "p95": 213.31200003623962, + "p99": 280.35199642181396 + }, + "combine": { + "p50": 48.86399954557419, + "p90": 53.727999329566956, + "p95": 54.75199967622757, + "p99": 65.21599739789963 + }, + "roundtrip": { + "p50": 250.68798661231995, + "p90": 254.27201390266418, + "p95": 256.3839852809906, + "p99": 289.72798585891724 + }, + "isolatedSum": { + "p50": 256.8959966301918, + "p90": 264.99199867248535, + "p95": 268.0639997124672, + "p99": 345.5679938197136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.95999670028687, + "p90": 212.19199895858765, + "p95": 213.50400149822235, + "p99": 230.9119999408722 + }, + "combine": { + "p50": 57.8560009598732, + "p90": 59.74400043487549, + "p95": 60.54399907588959, + "p99": 64.31999802589417 + }, + "roundtrip": { + "p50": 260.2880001068115, + "p90": 264.3199861049652, + "p95": 267.67998933792114, + "p99": 373.4399974346161 + }, + "isolatedSum": { + "p50": 266.81599766016006, + "p90": 271.93599939346313, + "p95": 274.04800057411194, + "p99": 295.23199796676636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7de45655", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_1b42769c", + "comparisonKey": "bf41f62bbdfc06fc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:58.457276+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.27199906110764, + "p90": 121.18399888277054, + "p95": 123.87199699878693, + "p99": 181.40800297260284 + }, + "combine": { + "p50": 35.48799827694893, + "p90": 37.34400123357773, + "p95": 37.63199970126152, + "p99": 52.2879995405674 + }, + "roundtrip": { + "p50": 147.61599898338318, + "p90": 150.87999403476715, + "p95": 152.63999998569489, + "p99": 247.67999351024628 + }, + "isolatedSum": { + "p50": 153.75999733805656, + "p90": 158.52800011634827, + "p95": 161.50399670004845, + "p99": 233.69600251317024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.38400024175644, + "p90": 132.76800513267517, + "p95": 138.11199367046356, + "p99": 153.56799960136414 + }, + "combine": { + "p50": 36.06399893760681, + "p90": 37.53599897027016, + "p95": 38.27200084924698, + "p99": 43.776001781225204 + }, + "roundtrip": { + "p50": 152.6080071926117, + "p90": 170.23999989032745, + "p95": 192.671999335289, + "p99": 249.24799799919128 + }, + "isolatedSum": { + "p50": 156.44799917936325, + "p90": 170.30400410294533, + "p95": 176.38399451971054, + "p99": 197.34400138258934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.079998254776, + "p90": 132.83200562000275, + "p95": 138.0160003900528, + "p99": 149.31200444698334 + }, + "combine": { + "p50": 38.62399980425835, + "p90": 40.608000010252, + "p95": 41.34399816393852, + "p99": 42.24000126123428 + }, + "roundtrip": { + "p50": 157.3760062456131, + "p90": 162.20800578594208, + "p95": 164.95999693870544, + "p99": 189.28000330924988 + }, + "isolatedSum": { + "p50": 160.70399805903435, + "p90": 173.44000563025475, + "p95": 179.35999855399132, + "p99": 191.55200570821762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.2799928188324, + "p90": 128.92800569534302, + "p95": 132.64000415802002, + "p99": 223.64799678325653 + }, + "combine": { + "p50": 37.856001406908035, + "p90": 39.48799893260002, + "p95": 40.47999903559685, + "p99": 44.79999840259552 + }, + "roundtrip": { + "p50": 159.96800363063812, + "p90": 163.13600540161133, + "p95": 164.92800414562225, + "p99": 188.48000466823578 + }, + "isolatedSum": { + "p50": 163.13599422574043, + "p90": 168.41600462794304, + "p95": 173.12000319361687, + "p99": 268.44799518585205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 135.903999209404, + "p90": 138.72000575065613, + "p95": 140.32000303268433, + "p99": 148.03199470043182 + }, + "combine": { + "p50": 39.51999917626381, + "p90": 41.728001087903976, + "p95": 42.847998440265656, + "p99": 48.06400090456009 + }, + "roundtrip": { + "p50": 173.50399494171143, + "p90": 177.21599340438843, + "p95": 179.967999458313, + "p99": 201.664000749588 + }, + "isolatedSum": { + "p50": 175.4239983856678, + "p90": 180.4480068385601, + "p95": 183.16800147294998, + "p99": 196.0959956049919 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 149.1519957780838, + "p90": 152.19199657440186, + "p95": 154.2080044746399, + "p99": 168.57600212097168 + }, + "combine": { + "p50": 41.50399938225746, + "p90": 43.55200007557869, + "p95": 44.35199871659279, + "p99": 60.92799827456474 + }, + "roundtrip": { + "p50": 189.2479956150055, + "p90": 192.9280012845993, + "p95": 194.65599954128265, + "p99": 223.7440049648285 + }, + "isolatedSum": { + "p50": 190.65599516034126, + "p90": 195.74399664998055, + "p95": 198.56000319123268, + "p99": 229.50400039553642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 176.03200674057007, + "p90": 179.26399409770966, + "p95": 181.95199966430664, + "p99": 202.43200659751892 + }, + "combine": { + "p50": 45.951999723911285, + "p90": 47.58400097489357, + "p95": 48.25599864125252, + "p99": 56.44800141453743 + }, + "roundtrip": { + "p50": 219.64800357818604, + "p90": 223.4559953212738, + "p95": 226.3679951429367, + "p99": 294.75200176239014 + }, + "isolatedSum": { + "p50": 221.98400646448135, + "p90": 226.84799507260323, + "p95": 230.20799830555916, + "p99": 258.88000801205635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 177.95200645923615, + "p90": 182.75199830532074, + "p95": 189.2160028219223, + "p99": 244.4480061531067 + }, + "combine": { + "p50": 59.007998555898666, + "p90": 60.575999319553375, + "p95": 61.503998935222626, + "p99": 74.5920017361641 + }, + "roundtrip": { + "p50": 232.67200589179993, + "p90": 237.40799725055695, + "p95": 239.00799453258514, + "p99": 260.6399953365326 + }, + "isolatedSum": { + "p50": 236.9600050151348, + "p90": 243.32799762487411, + "p95": 250.72000175714493, + "p99": 319.0400078892708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-653a0d0e", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_1c62942e", + "comparisonKey": "faf267a01e7a2894", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:55.476787+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.47200042009354, + "p90": 145.60000598430634, + "p95": 163.87200355529785, + "p99": 204.70400154590607 + }, + "combine": { + "p50": 34.11199897527695, + "p90": 36.479998379945755, + "p95": 37.31200098991394, + "p99": 39.32800143957138 + }, + "roundtrip": { + "p50": 147.5519984960556, + "p90": 151.58399939537048, + "p95": 157.24800527095795, + "p99": 177.21599340438843 + }, + "isolatedSum": { + "p50": 151.58399939537048, + "p90": 182.0800043642521, + "p95": 201.1840045452118, + "p99": 244.03200298547745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.90400338172913, + "p90": 119.03999745845795, + "p95": 120.03199756145477, + "p99": 123.52000176906586 + }, + "combine": { + "p50": 34.4959981739521, + "p90": 36.288000643253326, + "p95": 36.67199984192848, + "p99": 41.439998894929886 + }, + "roundtrip": { + "p50": 146.84799313545227, + "p90": 151.67999267578125, + "p95": 154.01600301265717, + "p99": 170.43200135231018 + }, + "isolatedSum": { + "p50": 150.40000155568123, + "p90": 155.32799810171127, + "p95": 156.70399740338326, + "p99": 164.96000066399574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 119.13599818944931, + "p90": 130.17599284648895, + "p95": 135.42400300502777, + "p99": 146.55999839305878 + }, + "combine": { + "p50": 35.42400151491165, + "p90": 37.376001477241516, + "p95": 37.82400116324425, + "p99": 40.64000025391579 + }, + "roundtrip": { + "p50": 147.96799421310425, + "p90": 150.87999403476715, + "p95": 152.5759994983673, + "p99": 165.92000424861908 + }, + "isolatedSum": { + "p50": 154.55999970436096, + "p90": 167.55199432373047, + "p95": 173.24800416827202, + "p99": 187.19999864697456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 120.15999853610992, + "p90": 123.26399981975555, + "p95": 125.15200674533844, + "p99": 175.1679927110672 + }, + "combine": { + "p50": 35.51999852061272, + "p90": 37.21600025892258, + "p95": 37.50399872660637, + "p99": 41.31200164556503 + }, + "roundtrip": { + "p50": 152.96000242233276, + "p90": 157.8879952430725, + "p95": 160.38399934768677, + "p99": 177.50400304794312 + }, + "isolatedSum": { + "p50": 155.67999705672264, + "p90": 160.48000007867813, + "p95": 162.6560054719448, + "p99": 216.47999435663223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.66399574279785, + "p90": 149.59999918937683, + "p95": 168.41599345207214, + "p99": 219.07199919223785 + }, + "combine": { + "p50": 37.408001720905304, + "p90": 42.78400167822838, + "p95": 43.96799951791763, + "p99": 45.56800052523613 + }, + "roundtrip": { + "p50": 159.67999398708344, + "p90": 163.32800686359406, + "p95": 165.3759926557541, + "p99": 199.74400103092194 + }, + "isolatedSum": { + "p50": 163.07199746370316, + "p90": 192.3840008676052, + "p95": 212.38399296998978, + "p99": 264.639999717474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 133.4719955921173, + "p90": 136.48000359535217, + "p95": 137.56799697875977, + "p99": 144.83200013637543 + }, + "combine": { + "p50": 37.696000188589096, + "p90": 39.51999917626381, + "p95": 40.511999279260635, + "p99": 50.84799975156784 + }, + "roundtrip": { + "p50": 170.3999936580658, + "p90": 184.64000523090363, + "p95": 193.05600225925446, + "p99": 255.0399899482727 + }, + "isolatedSum": { + "p50": 171.1679957807064, + "p90": 176.00000277161598, + "p95": 178.0799962580204, + "p99": 195.67999988794327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 149.02399480342865, + "p90": 162.4000072479248, + "p95": 171.55200242996216, + "p99": 267.3279941082001 + }, + "combine": { + "p50": 41.728001087903976, + "p90": 43.58400031924248, + "p95": 44.576000422239304, + "p99": 60.92799827456474 + }, + "roundtrip": { + "p50": 187.9040002822876, + "p90": 191.71200692653656, + "p95": 193.79200041294098, + "p99": 213.47199380397797 + }, + "isolatedSum": { + "p50": 190.75199589133263, + "p90": 205.98400756716728, + "p95": 216.12800285220146, + "p99": 328.2559923827648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 149.1200029850006, + "p90": 152.63999998569489, + "p95": 153.6639928817749, + "p99": 161.43999993801117 + }, + "combine": { + "p50": 52.960000932216644, + "p90": 55.16799911856651, + "p95": 55.80800026655197, + "p99": 61.72800064086914 + }, + "roundtrip": { + "p50": 200.99200308322906, + "p90": 214.20800685882568, + "p95": 225.50399601459503, + "p99": 265.6320035457611 + }, + "isolatedSum": { + "p50": 202.08000391721725, + "p90": 207.8079991042614, + "p95": 209.47199314832687, + "p99": 223.1680005788803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9500a3d6", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_e57a98e0", + "comparisonKey": "80e302f7f550fbfc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:29.628338+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.15199798345566, + "p90": 120.28799951076508, + "p95": 122.36800044775009, + "p99": 136.4479959011078 + }, + "combine": { + "p50": 35.551998764276505, + "p90": 37.408001720905304, + "p95": 38.24000060558319, + "p99": 59.20000001788139 + }, + "roundtrip": { + "p50": 148.8640010356903, + "p90": 152.6080071926117, + "p95": 154.88000214099884, + "p99": 222.6880043745041 + }, + "isolatedSum": { + "p50": 152.70399674773216, + "p90": 157.69600123167038, + "p95": 160.60800105333328, + "p99": 195.64799591898918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.28799951076508, + "p90": 133.15199315547943, + "p95": 138.047993183136, + "p99": 149.98400211334229 + }, + "combine": { + "p50": 35.48799827694893, + "p90": 37.18400001525879, + "p95": 37.408001720905304, + "p99": 40.511999279260635 + }, + "roundtrip": { + "p50": 150.87999403476715, + "p90": 155.45600652694702, + "p95": 157.50400722026825, + "p99": 179.00800704956055 + }, + "isolatedSum": { + "p50": 155.775997787714, + "p90": 170.33599317073822, + "p95": 175.4559949040413, + "p99": 190.49600139260292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.3360002040863, + "p90": 133.18400084972382, + "p95": 139.3280029296875, + "p99": 149.27999675273895 + }, + "combine": { + "p50": 39.583999663591385, + "p90": 41.471999138593674, + "p95": 41.95199906826019, + "p99": 46.33599892258644 + }, + "roundtrip": { + "p50": 157.79200196266174, + "p90": 160.96000373363495, + "p95": 163.2319986820221, + "p99": 177.08800733089447 + }, + "isolatedSum": { + "p50": 161.9199998676777, + "p90": 174.6559999883175, + "p95": 181.2800019979477, + "p99": 195.6159956753254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.48000228405, + "p90": 131.3599944114685, + "p95": 133.12000036239624, + "p99": 145.91999351978302 + }, + "combine": { + "p50": 39.5519994199276, + "p90": 41.439998894929886, + "p95": 42.01599955558777, + "p99": 62.20800057053566 + }, + "roundtrip": { + "p50": 165.69599509239197, + "p90": 169.47199404239655, + "p95": 175.32800137996674, + "p99": 207.45599269866943 + }, + "isolatedSum": { + "p50": 168.03200170397758, + "p90": 172.7999933063984, + "p95": 175.135999917984, + "p99": 208.12799409031868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 142.11200177669525, + "p90": 146.11199498176575, + "p95": 149.6960073709488, + "p99": 274.7519910335541 + }, + "combine": { + "p50": 42.399998754262924, + "p90": 66.0799965262413, + "p95": 71.3919997215271, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 182.68799781799316, + "p90": 187.391996383667, + "p95": 189.5039975643158, + "p99": 205.02400398254395 + }, + "isolatedSum": { + "p50": 184.51200053095818, + "p90": 212.19199150800705, + "p95": 221.0880070924759, + "p99": 358.36799442768097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 167.4879938364029, + "p90": 182.52800405025482, + "p95": 196.60800695419312, + "p99": 215.7440036535263 + }, + "combine": { + "p50": 44.096000492572784, + "p90": 46.20800167322159, + "p95": 47.13600128889084, + "p99": 49.60000142455101 + }, + "roundtrip": { + "p50": 206.56000077724457, + "p90": 211.16800606250763, + "p95": 213.95200490951538, + "p99": 230.335995554924 + }, + "isolatedSum": { + "p50": 211.58399432897568, + "p90": 228.7360057234764, + "p95": 243.74400824308395, + "p99": 265.3440050780773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 204.12799715995789, + "p90": 207.2640061378479, + "p95": 209.18400585651398, + "p99": 228.7999987602234 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 49.56800118088722, + "p95": 50.08000135421753, + "p99": 53.47200110554695 + }, + "roundtrip": { + "p50": 250.36799907684326, + "p90": 254.65598702430725, + "p95": 256.99201226234436, + "p99": 273.50398898124695 + }, + "isolatedSum": { + "p50": 251.77599862217903, + "p90": 256.8320073187351, + "p95": 259.2640072107315, + "p99": 282.27199986577034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.20000565052032, + "p90": 223.64799678325653, + "p95": 239.23200368881226, + "p99": 411.9360148906708 + }, + "combine": { + "p50": 57.69599974155426, + "p90": 59.74400043487549, + "p95": 60.127999633550644, + "p99": 68.86400282382965 + }, + "roundtrip": { + "p50": 258.62398743629456, + "p90": 265.4080092906952, + "p95": 273.3759880065918, + "p99": 328.5439908504486 + }, + "isolatedSum": { + "p50": 264.8960053920746, + "p90": 283.391997218132, + "p95": 299.3600033223629, + "p99": 480.8000177145004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-906ff7b9", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_ad2ff111", + "comparisonKey": "a720e28b5167c37a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:41.181149+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.91999638080597, + "p90": 127.3919939994812, + "p95": 132.57600367069244, + "p99": 145.05599439144135 + }, + "combine": { + "p50": 35.51999852061272, + "p90": 36.99199855327606, + "p95": 37.43999823927879, + "p99": 39.423998445272446 + }, + "roundtrip": { + "p50": 148.54399859905243, + "p90": 151.90400183200836, + "p95": 154.84799444675446, + "p99": 176.35199427604675 + }, + "isolatedSum": { + "p50": 153.4399949014187, + "p90": 164.38399255275726, + "p95": 170.01600190997124, + "p99": 184.4799928367138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.12799829244614, + "p90": 125.47199428081512, + "p95": 133.34399461746216, + "p99": 145.88800072669983 + }, + "combine": { + "p50": 36.320000886917114, + "p90": 38.68800029158592, + "p95": 42.59200021624565, + "p99": 58.27200040221214 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 155.93600273132324, + "p95": 160.89600324630737, + "p99": 181.98400735855103 + }, + "isolatedSum": { + "p50": 156.44799917936325, + "p90": 164.15999457240105, + "p95": 175.9359948337078, + "p99": 204.16000112891197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 121.88799679279327, + "p90": 125.02400577068329, + "p95": 127.03999876976013, + "p99": 145.4080045223236 + }, + "combine": { + "p50": 38.27200084924698, + "p90": 40.32000154256821, + "p95": 41.280001401901245, + "p99": 42.91199892759323 + }, + "roundtrip": { + "p50": 157.72800147533417, + "p90": 175.64800381660461, + "p95": 185.15199422836304, + "p99": 222.52799570560455 + }, + "isolatedSum": { + "p50": 160.15999764204025, + "p90": 165.3440073132515, + "p95": 168.32000017166138, + "p99": 188.32000344991684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.00800597667694, + "p90": 129.72800433635712, + "p95": 131.77600502967834, + "p99": 150.30400454998016 + }, + "combine": { + "p50": 39.23200070858002, + "p90": 40.76800122857094, + "p95": 41.471999138593674, + "p99": 62.01599910855293 + }, + "roundtrip": { + "p50": 162.91199624538422, + "p90": 166.04800522327423, + "p95": 168.2559996843338, + "p99": 190.0479942560196 + }, + "isolatedSum": { + "p50": 166.24000668525696, + "p90": 170.49600556492805, + "p95": 173.24800416827202, + "p99": 212.3200036585331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 137.7599984407425, + "p90": 140.83200693130493, + "p95": 142.65599846839905, + "p99": 150.2400040626526 + }, + "combine": { + "p50": 41.53599962592125, + "p90": 43.39199885725975, + "p95": 43.87199878692627, + "p99": 45.9199994802475 + }, + "roundtrip": { + "p50": 179.29600179195404, + "p90": 205.85599541664124, + "p95": 225.40800273418427, + "p99": 299.9359965324402 + }, + "isolatedSum": { + "p50": 179.29599806666374, + "p90": 184.22400578856468, + "p95": 186.52799725532532, + "p99": 196.16000354290009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 159.2639982700348, + "p90": 166.20799899101257, + "p95": 172.2559928894043, + "p99": 181.8239986896515 + }, + "combine": { + "p50": 43.616000562906265, + "p90": 45.632001012563705, + "p95": 46.271998435258865, + "p99": 47.90399968624115 + }, + "roundtrip": { + "p50": 200.70399343967438, + "p90": 223.32799434661865, + "p95": 236.9920015335083, + "p99": 270.52798867225647 + }, + "isolatedSum": { + "p50": 202.87999883294106, + "p90": 211.84000000357628, + "p95": 218.52799132466316, + "p99": 229.72799837589264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 200.32000541687012, + "p90": 203.80799472332, + "p95": 206.56000077724457, + "p99": 225.0880002975464 + }, + "combine": { + "p50": 48.70399832725525, + "p90": 50.81599950790405, + "p95": 51.13599821925163, + "p99": 58.17599967122078 + }, + "roundtrip": { + "p50": 243.8720017671585, + "p90": 247.99999594688416, + "p95": 253.6959946155548, + "p99": 304.1279911994934 + }, + "isolatedSum": { + "p50": 249.02400374412537, + "p90": 254.62399423122406, + "p95": 257.6959989964962, + "p99": 283.26399996876717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 200.3840059041977, + "p90": 203.39199900627136, + "p95": 205.50400018692017, + "p99": 227.1679937839508 + }, + "combine": { + "p50": 59.039998799562454, + "p90": 60.28800085186958, + "p95": 61.15199998021126, + "p99": 67.19999760389328 + }, + "roundtrip": { + "p50": 254.8159956932068, + "p90": 258.432000875473, + "p95": 265.4719948768616, + "p99": 412.83199191093445 + }, + "isolatedSum": { + "p50": 259.42400470376015, + "p90": 263.67999985814095, + "p95": 266.6560001671314, + "p99": 294.3679913878441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a189933", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_926f0fdd", + "comparisonKey": "111ac5be09d664b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:08.041507+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.23199892044067, + "p90": 123.19999933242798, + "p95": 124.44800138473511, + "p99": 129.63199615478516 + }, + "combine": { + "p50": 36.25600039958954, + "p90": 37.53599897027016, + "p95": 38.24000060558319, + "p99": 47.68000170588493 + }, + "roundtrip": { + "p50": 151.36000514030457, + "p90": 154.55999970436096, + "p95": 156.15999698638916, + "p99": 173.34400117397308 + }, + "isolatedSum": { + "p50": 155.4879993200302, + "p90": 160.73599830269814, + "p95": 162.6880019903183, + "p99": 177.3119978606701 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.7360029220581, + "p90": 123.48800152540207, + "p95": 124.38400089740753, + "p99": 129.34400141239166 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 37.66399994492531, + "p95": 38.7520007789135, + "p99": 44.19200122356415 + }, + "roundtrip": { + "p50": 154.23999726772308, + "p90": 157.6319932937622, + "p95": 158.9760035276413, + "p99": 182.49599635601044 + }, + "isolatedSum": { + "p50": 157.31200203299522, + "p90": 161.15200147032738, + "p95": 163.13600167632103, + "p99": 173.5360026359558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.77600371837616, + "p90": 127.42400169372559, + "p95": 129.2479932308197, + "p99": 144.86399292945862 + }, + "combine": { + "p50": 40.12800008058548, + "p90": 41.53599962592125, + "p95": 42.399998754262924, + "p99": 46.78399860858917 + }, + "roundtrip": { + "p50": 159.87199544906616, + "p90": 163.5199934244156, + "p95": 166.36799275875092, + "p99": 187.19999492168427 + }, + "isolatedSum": { + "p50": 163.90400379896164, + "p90": 168.96000131964684, + "p95": 171.64799198508263, + "p99": 191.6479915380478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.88799810409546, + "p90": 133.40799510478973, + "p95": 135.8720064163208, + "p99": 149.72800016403198 + }, + "combine": { + "p50": 40.32000154256821, + "p90": 41.50399938225746, + "p95": 42.43199899792671, + "p99": 47.74399846792221 + }, + "roundtrip": { + "p50": 167.55199432373047, + "p90": 171.80800437927246, + "p95": 180.4479956626892, + "p99": 273.79199862480164 + }, + "isolatedSum": { + "p50": 170.20799964666367, + "p90": 174.9119944870472, + "p95": 178.3040054142475, + "p99": 197.4719986319542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 142.11200177669525, + "p90": 144.96000111103058, + "p95": 146.7839926481247, + "p99": 154.30399775505066 + }, + "combine": { + "p50": 43.55200007557869, + "p90": 46.68800160288811, + "p95": 47.39199951291084, + "p99": 52.000001072883606 + }, + "roundtrip": { + "p50": 183.61599743366241, + "p90": 187.6160055398941, + "p95": 189.4720047712326, + "p99": 206.04799687862396 + }, + "isolatedSum": { + "p50": 185.66400185227394, + "p90": 191.6480027139187, + "p95": 194.17599216103554, + "p99": 206.30399882793427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 166.143998503685, + "p90": 170.30400037765503, + "p95": 174.43199455738068, + "p99": 196.96000218391418 + }, + "combine": { + "p50": 44.096000492572784, + "p90": 45.88799923658371, + "p95": 46.94399982690811, + "p99": 55.16799911856651 + }, + "roundtrip": { + "p50": 208.25600624084473, + "p90": 211.74399554729462, + "p95": 213.85599672794342, + "p99": 231.10400140285492 + }, + "isolatedSum": { + "p50": 210.23999899625778, + "p90": 216.19199961423874, + "p95": 221.3759943842888, + "p99": 252.1280013024807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 209.24800634384155, + "p90": 213.34399282932281, + "p95": 216.60800278186798, + "p99": 263.5839879512787 + }, + "combine": { + "p50": 48.54400083422661, + "p90": 50.71999877691269, + "p95": 51.58400163054466, + "p99": 68.44799965620041 + }, + "roundtrip": { + "p50": 253.85600328445435, + "p90": 258.30399990081787, + "p95": 263.13599944114685, + "p99": 315.2639865875244 + }, + "isolatedSum": { + "p50": 257.79200717806816, + "p90": 264.0639916062355, + "p95": 268.19200441241264, + "p99": 332.0319876074791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.0959975719452, + "p90": 211.90400421619415, + "p95": 214.11199867725372, + "p99": 244.1280037164688 + }, + "combine": { + "p50": 55.87200075387955, + "p90": 57.82400071620941, + "p95": 58.14399942755699, + "p99": 59.36000123620033 + }, + "roundtrip": { + "p50": 261.50399446487427, + "p90": 265.5999958515167, + "p95": 273.8560140132904, + "p99": 333.1199884414673 + }, + "isolatedSum": { + "p50": 263.96799832582474, + "p90": 269.72800493240356, + "p95": 272.2559981048107, + "p99": 303.48800495266914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-169d1549", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_48835cfe", + "comparisonKey": "099801a2eb871a8d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:35.451898+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.32799965143204, + "p90": 123.29600006341934, + "p95": 126.91199779510498, + "p99": 257.82400369644165 + }, + "combine": { + "p50": 36.19199991226196, + "p90": 37.567999213933945, + "p95": 39.20000046491623, + "p99": 50.335999578237534 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 151.296004652977, + "p95": 152.8639942407608, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 155.519999563694, + "p90": 160.8639992773533, + "p95": 166.1119982600212, + "p99": 308.1600032746792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.06399780511856, + "p90": 123.19999933242798, + "p95": 125.34399330615997, + "p99": 145.82400023937225 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 38.015998899936676, + "p95": 38.816001266241074, + "p99": 45.56800052523613 + }, + "roundtrip": { + "p50": 152.19199657440186, + "p90": 156.41599893569946, + "p95": 159.19999778270721, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 156.63999691605568, + "p90": 161.21599823236465, + "p95": 164.15999457240105, + "p99": 191.39200076460838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.49600142240524, + "p90": 126.27199292182922, + "p95": 127.83999741077423, + "p99": 143.74400675296783 + }, + "combine": { + "p50": 38.84800150990486, + "p90": 40.832001715898514, + "p95": 41.31200164556503, + "p99": 43.776001781225204 + }, + "roundtrip": { + "p50": 157.18400478363037, + "p90": 160.73599457740784, + "p95": 162.9440039396286, + "p99": 177.59999632835388 + }, + "isolatedSum": { + "p50": 161.3440029323101, + "p90": 167.10399463772774, + "p95": 169.15199905633926, + "p99": 187.52000853419304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.11199915409088, + "p90": 129.15199995040894, + "p95": 130.75199723243713, + "p99": 144.48000490665436 + }, + "combine": { + "p50": 38.656000047922134, + "p90": 40.031999349594116, + "p95": 40.89599847793579, + "p99": 58.6559996008873 + }, + "roundtrip": { + "p50": 161.18399798870087, + "p90": 165.12000560760498, + "p95": 169.11999881267548, + "p99": 187.32799589633942 + }, + "isolatedSum": { + "p50": 164.76799920201302, + "p90": 169.18399930000305, + "p95": 171.64799571037292, + "p99": 203.13600450754166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 136.3839954137802, + "p90": 139.67999815940857, + "p95": 141.27999544143677, + "p99": 163.55200111865997 + }, + "combine": { + "p50": 40.192000567913055, + "p90": 43.20000112056732, + "p95": 43.807998299598694, + "p99": 45.823998749256134 + }, + "roundtrip": { + "p50": 175.23199319839478, + "p90": 179.48800325393677, + "p95": 181.88799917697906, + "p99": 227.48799622058868 + }, + "isolatedSum": { + "p50": 176.57599598169327, + "p90": 182.8799992799759, + "p95": 185.08799374103546, + "p99": 209.3759998679161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 149.53599870204926, + "p90": 152.54400670528412, + "p95": 154.01600301265717, + "p99": 167.58400201797485 + }, + "combine": { + "p50": 41.600000113248825, + "p90": 43.58400031924248, + "p95": 44.064000248909, + "p99": 47.93599992990494 + }, + "roundtrip": { + "p50": 190.8160001039505, + "p90": 194.07999515533447, + "p95": 196.06399536132812, + "p99": 215.29600024223328 + }, + "isolatedSum": { + "p50": 191.13599881529808, + "p90": 196.1280070245266, + "p95": 198.08000326156616, + "p99": 215.5200019478798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 176.70400440692902, + "p90": 179.87200617790222, + "p95": 181.72800540924072, + "p99": 202.5279998779297 + }, + "combine": { + "p50": 45.951999723911285, + "p90": 47.775998711586, + "p95": 48.67200180888176, + "p99": 62.111999839544296 + }, + "roundtrip": { + "p50": 220.96000611782074, + "p90": 224.73600506782532, + "p95": 228.41599583625793, + "p99": 325.6320059299469 + }, + "isolatedSum": { + "p50": 222.6560041308403, + "p90": 227.64800488948822, + "p95": 230.40000721812248, + "p99": 264.639999717474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 178.30400168895721, + "p90": 181.21600151062012, + "p95": 183.03999304771423, + "p99": 194.65599954128265 + }, + "combine": { + "p50": 58.81600081920624, + "p90": 60.447998344898224, + "p95": 61.535999178886414, + "p99": 74.72000271081924 + }, + "roundtrip": { + "p50": 232.89600014686584, + "p90": 235.87200045585632, + "p95": 239.3600046634674, + "p99": 338.591992855072 + }, + "isolatedSum": { + "p50": 237.12000250816345, + "p90": 241.66399985551834, + "p95": 244.57599222660065, + "p99": 269.3760022521019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-68739a97", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_cfba45f0", + "comparisonKey": "7ec2637013dcdd87", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:02.630387+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.30399930477142, + "p90": 122.01599776744843, + "p95": 125.11999905109406, + "p99": 143.26399564743042 + }, + "combine": { + "p50": 35.64799949526787, + "p90": 37.59999945759773, + "p95": 40.47999903559685, + "p99": 104.76800054311752 + }, + "roundtrip": { + "p50": 148.73600006103516, + "p90": 154.94400262832642, + "p95": 167.1999990940094, + "p99": 187.48800456523895 + }, + "isolatedSum": { + "p50": 153.9519988000393, + "p90": 159.61599722504616, + "p95": 165.5999980866909, + "p99": 248.03199619054794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.95999717712402, + "p90": 124.76799637079239, + "p95": 127.51999497413635, + "p99": 164.57599401474 + }, + "combine": { + "p50": 35.74400022625923, + "p90": 37.408001720905304, + "p95": 37.88800165057182, + "p99": 47.839999198913574 + }, + "roundtrip": { + "p50": 153.6960005760193, + "p90": 175.7120043039322, + "p95": 192.09599494934082, + "p99": 344.1919982433319 + }, + "isolatedSum": { + "p50": 156.70399740338326, + "p90": 162.1759980916977, + "p95": 165.40799662470818, + "p99": 212.41599321365356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.56000190973282, + "p90": 126.0479986667633, + "p95": 127.36000120639801, + "p99": 143.0400013923645 + }, + "combine": { + "p50": 39.23200070858002, + "p90": 40.54399952292442, + "p95": 40.991999208927155, + "p99": 44.79999840259552 + }, + "roundtrip": { + "p50": 157.3760062456131, + "p90": 161.98399662971497, + "p95": 168.16000640392303, + "p99": 280.5440127849579 + }, + "isolatedSum": { + "p50": 161.79200261831284, + "p90": 166.59199818968773, + "p95": 168.35200041532516, + "p99": 187.83999979496002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.2800009250641, + "p90": 133.2480013370514, + "p95": 137.5039964914322, + "p99": 224.70399737358093 + }, + "combine": { + "p50": 39.583999663591385, + "p90": 41.280001401901245, + "p95": 41.471999138593674, + "p99": 44.47999969124794 + }, + "roundtrip": { + "p50": 166.01599752902985, + "p90": 170.01600563526154, + "p95": 172.19200730323792, + "p99": 261.28000020980835 + }, + "isolatedSum": { + "p50": 168.86400058865547, + "p90": 174.52800273895264, + "p95": 178.97599563002586, + "p99": 269.1839970648289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 141.2159949541092, + "p90": 149.82399344444275, + "p95": 163.2319986820221, + "p99": 260.3519856929779 + }, + "combine": { + "p50": 42.59200021624565, + "p90": 45.184001326560974, + "p95": 46.78399860858917, + "p99": 56.063998490571976 + }, + "roundtrip": { + "p50": 183.6480051279068, + "p90": 218.30399334430695, + "p95": 245.95199525356293, + "p99": 346.2719917297363 + }, + "isolatedSum": { + "p50": 183.80799517035484, + "p90": 195.00799477100372, + "p95": 210.01599729061127, + "p99": 316.4159841835499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 162.20800578594208, + "p90": 166.36799275875092, + "p95": 171.32799327373505, + "p99": 196.76800072193146 + }, + "combine": { + "p50": 44.47999969124794, + "p90": 46.751998364925385, + "p95": 47.58400097489357, + "p99": 52.38400027155876 + }, + "roundtrip": { + "p50": 205.02400398254395, + "p90": 215.36000072956085, + "p95": 223.87200593948364, + "p99": 277.24799513816833 + }, + "isolatedSum": { + "p50": 206.68800547719002, + "p90": 213.1199911236763, + "p95": 218.91199424862862, + "p99": 249.15200099349022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 203.39199900627136, + "p90": 210.11200547218323, + "p95": 219.4560021162033, + "p99": 249.08800423145294 + }, + "combine": { + "p50": 48.09600114822388, + "p90": 50.464000552892685, + "p95": 51.58400163054466, + "p99": 65.79200178384781 + }, + "roundtrip": { + "p50": 249.08800423145294, + "p90": 254.46400046348572, + "p95": 274.6559977531433, + "p99": 413.9519929885864 + }, + "isolatedSum": { + "p50": 251.48800015449524, + "p90": 260.5760060250759, + "p95": 271.04000374674797, + "p99": 314.88000601530075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.73600244522095, + "p90": 212.3199999332428, + "p95": 219.10400688648224, + "p99": 421.1199879646301 + }, + "combine": { + "p50": 56.32000043988228, + "p90": 58.30400064587593, + "p95": 59.07199904322624, + "p99": 70.49600034952164 + }, + "roundtrip": { + "p50": 260.19200682640076, + "p90": 271.1679935455322, + "p95": 283.55199098587036, + "p99": 314.5279884338379 + }, + "isolatedSum": { + "p50": 265.0560028851032, + "p90": 270.62400057911873, + "p95": 278.1760059297085, + "p99": 491.61598831415176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5e104525", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_8b963cf2", + "comparisonKey": "c09233637cf6e3cb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:43.962171+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.13599818944931, + "p90": 130.46400249004364, + "p95": 136.83199882507324, + "p99": 149.75999295711517 + }, + "combine": { + "p50": 35.51999852061272, + "p90": 37.21600025892258, + "p95": 37.696000188589096, + "p99": 42.62400045990944 + }, + "roundtrip": { + "p50": 149.50400590896606, + "p90": 152.54400670528412, + "p95": 154.55999970436096, + "p99": 169.15200650691986 + }, + "isolatedSum": { + "p50": 154.65599671006203, + "p90": 167.68000274896622, + "p95": 174.52799901366234, + "p99": 192.3839934170246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.77600306272507, + "p90": 123.4240010380745, + "p95": 124.86399710178375, + "p99": 135.45599579811096 + }, + "combine": { + "p50": 35.711999982595444, + "p90": 37.31200098991394, + "p95": 37.88800165057182, + "p99": 41.63200035691261 + }, + "roundtrip": { + "p50": 152.70400047302246, + "p90": 156.70399367809296, + "p95": 159.2320054769516, + "p99": 177.5680035352707 + }, + "isolatedSum": { + "p50": 155.4880030453205, + "p90": 160.73600202798843, + "p95": 162.75199875235558, + "p99": 177.08799615502357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.36000055074692, + "p90": 133.18400084972382, + "p95": 140.44800400733948, + "p99": 149.82399344444275 + }, + "combine": { + "p50": 39.32800143957138, + "p90": 40.832001715898514, + "p95": 41.37599840760231, + "p99": 51.58400163054466 + }, + "roundtrip": { + "p50": 157.6640009880066, + "p90": 162.11199760437012, + "p95": 163.90399634838104, + "p99": 175.4560023546219 + }, + "isolatedSum": { + "p50": 162.6880019903183, + "p90": 174.01600256562233, + "p95": 181.8240024149418, + "p99": 201.4079950749874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.4479945898056, + "p90": 139.52000439167023, + "p95": 145.91999351978302, + "p99": 164.8319959640503 + }, + "combine": { + "p50": 39.51999917626381, + "p90": 41.120000183582306, + "p95": 41.471999138593674, + "p99": 42.33599826693535 + }, + "roundtrip": { + "p50": 165.47200083732605, + "p90": 178.6240041255951, + "p95": 200.8640021085739, + "p99": 230.78399896621704 + }, + "isolatedSum": { + "p50": 167.9679937660694, + "p90": 180.64000457525253, + "p95": 187.3919926583767, + "p99": 207.16799423098564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 140.99200069904327, + "p90": 146.04799449443817, + "p95": 151.5199989080429, + "p99": 159.42400693893433 + }, + "combine": { + "p50": 42.527999728918076, + "p90": 44.576000422239304, + "p95": 45.43999955058098, + "p99": 54.496001452207565 + }, + "roundtrip": { + "p50": 181.88799917697906, + "p90": 185.47199666500092, + "p95": 187.71199882030487, + "p99": 199.64799284934998 + }, + "isolatedSum": { + "p50": 183.52000042796135, + "p90": 190.62399491667747, + "p95": 196.9599984586239, + "p99": 213.9200083911419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 161.3759994506836, + "p90": 165.02399742603302, + "p95": 167.26399958133698, + "p99": 184.2239946126938 + }, + "combine": { + "p50": 44.38399896025658, + "p90": 46.560000628232956, + "p95": 47.42399975657463, + "p99": 49.79199916124344 + }, + "roundtrip": { + "p50": 203.64800095558167, + "p90": 207.07200467586517, + "p95": 209.72800254821777, + "p99": 228.35199534893036 + }, + "isolatedSum": { + "p50": 205.75999841094017, + "p90": 211.58399805426598, + "p95": 214.6879993379116, + "p99": 234.01599377393723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 203.61599326133728, + "p90": 206.7199945449829, + "p95": 208.8640034198761, + "p99": 237.31200397014618 + }, + "combine": { + "p50": 48.25599864125252, + "p90": 50.20799860358238, + "p95": 50.71999877691269, + "p99": 56.703999638557434 + }, + "roundtrip": { + "p50": 249.15200471878052, + "p90": 253.08799743652344, + "p95": 254.97600436210632, + "p99": 274.6880054473877 + }, + "isolatedSum": { + "p50": 251.8719919025898, + "p90": 256.9279931485653, + "p95": 259.5840021967888, + "p99": 294.0160036087036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 209.56799387931824, + "p90": 224.16000068187714, + "p95": 246.7840015888214, + "p99": 298.7839877605438 + }, + "combine": { + "p50": 56.48000165820122, + "p90": 58.59199911355972, + "p95": 58.94400179386139, + "p99": 63.13599646091461 + }, + "roundtrip": { + "p50": 259.90399718284607, + "p90": 263.2319927215576, + "p95": 265.6320035457611, + "p99": 274.2080092430115 + }, + "isolatedSum": { + "p50": 266.04799553751945, + "p90": 282.75199979543686, + "p95": 305.7280033826828, + "p99": 361.91998422145844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fb072dd8", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_9d3b385d", + "comparisonKey": "e35b7ffee4d4fef7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:43.528897+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.28799903392792, + "p90": 211.99999749660492, + "p95": 214.23999965190887, + "p99": 231.1999946832657 + }, + "combine": { + "p50": 55.80800026655197, + "p90": 57.95200169086456, + "p95": 58.848001062870026, + "p99": 66.3679987192154 + }, + "roundtrip": { + "p50": 259.2960000038147, + "p90": 263.3279860019684, + "p95": 265.76000452041626, + "p99": 285.6000065803528 + }, + "isolatedSum": { + "p50": 264.0959993004799, + "p90": 269.9519991874695, + "p95": 273.0880007147789, + "p99": 297.5679934024811 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 212.79999613761902, + "p90": 216.15999937057495, + "p95": 217.6000028848648, + "p99": 229.91999983787537 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 76.31999999284744, + "p95": 76.51200145483017, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 280.7359993457794, + "p90": 283.9680016040802, + "p95": 286.0479950904846, + "p99": 303.0399978160858 + }, + "isolatedSum": { + "p50": 287.6159995794296, + "p90": 292.4799993634224, + "p95": 294.112004339695, + "p99": 308.5760027170181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 225.600004196167, + "p90": 229.88800704479218, + "p95": 232.4800044298172, + "p99": 243.52000653743744 + }, + "combine": { + "p50": 107.93600231409073, + "p90": 110.11199653148651, + "p95": 110.97600311040878, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 328.15998792648315, + "p90": 331.8080008029938, + "p95": 333.5359990596771, + "p99": 352.1279990673065 + }, + "isolatedSum": { + "p50": 333.5360065102577, + "p90": 340.0000035762787, + "p95": 343.456007540226, + "p99": 357.6960042119026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 285.5679988861084, + "p90": 288.92800211906433, + "p95": 290.5600070953369, + "p99": 306.71998858451843 + }, + "combine": { + "p50": 186.78399920463562, + "p90": 188.9919936656952, + "p95": 189.69599902629852, + "p99": 191.39200448989868 + }, + "roundtrip": { + "p50": 468.095988035202, + "p90": 472.00000286102295, + "p95": 474.2079973220825, + "p99": 560.8000159263611 + }, + "isolatedSum": { + "p50": 472.351998090744, + "p90": 477.9199957847595, + "p95": 480.25600612163544, + "p99": 498.1119930744171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 456.2239944934845, + "p90": 459.9359929561615, + "p95": 461.60000562667847, + "p99": 491.2320077419281 + }, + "combine": { + "p50": 323.199987411499, + "p90": 325.50400495529175, + "p95": 326.04798674583435, + "p99": 331.13598823547363 + }, + "roundtrip": { + "p50": 776.0319709777832, + "p90": 780.2240252494812, + "p95": 783.0399870872498, + "p99": 839.8399949073792 + }, + "isolatedSum": { + "p50": 779.4239819049835, + "p90": 785.4399979114532, + "p95": 787.6479923725128, + "p99": 822.3679959774017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 728.2559871673584, + "p90": 731.7119836807251, + "p95": 733.0560088157654, + "p99": 741.5680289268494 + }, + "combine": { + "p50": 595.0400233268738, + "p90": 597.4720120429993, + "p95": 598.7200140953064, + "p99": 620.032012462616 + }, + "roundtrip": { + "p50": 1320.863962173462, + "p90": 1326.0799646377563, + "p95": 1331.9679498672485, + "p99": 1542.1439409255981 + }, + "isolatedSum": { + "p50": 1323.2960104942322, + "p90": 1329.1839957237244, + "p95": 1331.7760229110718, + "p99": 1361.6000413894653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ca9fc45b", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_9e0f093f", + "comparisonKey": "1825f275b5286425", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:42.080488+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 247.99999594688416, + "p90": 251.8399953842163, + "p95": 255.13601303100586, + "p99": 280.16000986099243 + }, + "combine": { + "p50": 65.85600227117538, + "p90": 67.87200272083282, + "p95": 68.31999868154526, + "p99": 74.14399832487106 + }, + "roundtrip": { + "p50": 308.51200222969055, + "p90": 315.0720000267029, + "p95": 322.6880133152008, + "p99": 357.7280044555664 + }, + "isolatedSum": { + "p50": 313.85599821805954, + "p90": 319.71199810504913, + "p95": 323.4560117125511, + "p99": 354.3040081858635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 252.44799256324768, + "p90": 256.3839852809906, + "p95": 259.10401344299316, + "p99": 344.0000116825104 + }, + "combine": { + "p50": 88.86399865150452, + "p90": 90.91199934482574, + "p95": 91.71199798583984, + "p99": 112.92800307273865 + }, + "roundtrip": { + "p50": 336.2559974193573, + "p90": 340.67198634147644, + "p95": 349.727988243103, + "p99": 440.5120015144348 + }, + "isolatedSum": { + "p50": 341.3119912147522, + "p90": 347.29598462581635, + "p95": 350.816011428833, + "p99": 456.928014755249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 265.56798815727234, + "p90": 269.82399821281433, + "p95": 272.6080119609833, + "p99": 292.1279966831207 + }, + "combine": { + "p50": 143.8719928264618, + "p90": 146.17599546909332, + "p95": 147.10399508476257, + "p99": 156.3519984483719 + }, + "roundtrip": { + "p50": 403.328001499176, + "p90": 411.16800904273987, + "p95": 425.7279932498932, + "p99": 461.40798926353455 + }, + "isolatedSum": { + "p50": 409.43998098373413, + "p90": 415.99999368190765, + "p95": 419.71200704574585, + "p99": 448.4799951314926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 365.28000235557556, + "p90": 369.376003742218, + "p95": 371.93599343299866, + "p99": 430.55999279022217 + }, + "combine": { + "p50": 249.9839961528778, + "p90": 252.19199061393738, + "p95": 252.9599964618683, + "p99": 258.432000875473 + }, + "roundtrip": { + "p50": 609.0880036354065, + "p90": 615.1360273361206, + "p95": 625.4720091819763, + "p99": 754.8159956932068 + }, + "isolatedSum": { + "p50": 615.2639985084534, + "p90": 621.5679943561554, + "p95": 624.895989894867, + "p99": 688.9919936656952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 616.8320178985596, + "p90": 621.3439702987671, + "p95": 624.4159936904907, + "p99": 712.3200297355652 + }, + "combine": { + "p50": 454.5600116252899, + "p90": 457.18398690223694, + "p95": 458.624005317688, + "p99": 475.96800327301025 + }, + "roundtrip": { + "p50": 1068.4800148010254, + "p90": 1074.336051940918, + "p95": 1082.368016242981, + "p99": 1207.2319984436035 + }, + "isolatedSum": { + "p50": 1071.3920295238495, + "p90": 1078.527957201004, + "p95": 1083.0399990081787, + "p99": 1188.2880330085754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1057.088017463684, + "p90": 1065.7919645309448, + "p95": 1070.464015007019, + "p99": 1168.8319444656372 + }, + "combine": { + "p50": 864.9600148200989, + "p90": 868.9919710159302, + "p95": 872.223973274231, + "p99": 977.9520034790039 + }, + "roundtrip": { + "p50": 1919.6480512619019, + "p90": 1930.7199716567993, + "p95": 1941.248059272766, + "p99": 2175.391912460327 + }, + "isolatedSum": { + "p50": 1922.048032283783, + "p90": 1934.783935546875, + "p95": 1942.68798828125, + "p99": 2146.783947944641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-066706ff", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_bbf50286", + "comparisonKey": "306981d7def5dc0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:31.999775+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.64800536632538, + "p90": 142.7839994430542, + "p95": 144.31999623775482, + "p99": 163.7759953737259 + }, + "combine": { + "p50": 38.14399987459183, + "p90": 40.063999593257904, + "p95": 41.02399945259094, + "p99": 47.29599878191948 + }, + "roundtrip": { + "p50": 174.78400468826294, + "p90": 178.43200266361237, + "p95": 184.1920018196106, + "p99": 240.03200232982635 + }, + "isolatedSum": { + "p50": 177.7920052409172, + "p90": 182.8479990363121, + "p95": 185.34399569034576, + "p99": 211.07199415564537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 141.184002161026, + "p90": 144.19199526309967, + "p95": 146.68799936771393, + "p99": 163.87200355529785 + }, + "combine": { + "p50": 50.4320003092289, + "p90": 52.319999784231186, + "p95": 53.408000618219376, + "p99": 58.49599838256836 + }, + "roundtrip": { + "p50": 186.39999628067017, + "p90": 189.63199853897095, + "p95": 191.96799397468567, + "p99": 212.73599565029144 + }, + "isolatedSum": { + "p50": 191.6160024702549, + "p90": 196.51199504733086, + "p95": 200.0959999859333, + "p99": 222.3680019378662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 204.96000349521637, + "p90": 208.44799280166626, + "p95": 211.32799983024597, + "p99": 234.55999791622162 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 99.13600236177444, + "p95": 99.5199978351593, + "p99": 102.84800082445145 + }, + "roundtrip": { + "p50": 297.85600304603577, + "p90": 301.88798904418945, + "p95": 303.3599853515625, + "p99": 331.2639892101288 + }, + "isolatedSum": { + "p50": 302.2080063819885, + "p90": 307.5839951634407, + "p95": 310.8479976654053, + "p99": 337.40799874067307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9847fc9a", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_4f12e45f", + "comparisonKey": "463ee6e6f95003b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:17.340332+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.23999726772308, + "p90": 161.53599321842194, + "p95": 166.4000004529953, + "p99": 183.80799889564514 + }, + "combine": { + "p50": 43.71200129389763, + "p90": 45.56800052523613, + "p95": 45.88799923658371, + "p99": 49.375999718904495 + }, + "roundtrip": { + "p50": 193.50400567054749, + "p90": 196.79999351501465, + "p95": 198.46400618553162, + "p99": 216.19200706481934 + }, + "isolatedSum": { + "p50": 197.9519985616207, + "p90": 207.10399374365807, + "p95": 212.287999689579, + "p99": 233.18399861454964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 156.12800419330597, + "p90": 175.23199319839478, + "p95": 188.1600022315979, + "p99": 217.056006193161 + }, + "combine": { + "p50": 51.42400041222572, + "p90": 54.30399999022484, + "p95": 55.80800026655197, + "p99": 65.18399715423584 + }, + "roundtrip": { + "p50": 202.81599462032318, + "p90": 206.40000700950623, + "p95": 208.54400098323822, + "p99": 273.9199995994568 + }, + "isolatedSum": { + "p50": 207.5520046055317, + "p90": 229.5359931886196, + "p95": 243.96800249814987, + "p99": 282.24000334739685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 158.59200060367584, + "p90": 161.82400286197662, + "p95": 164.86400365829468, + "p99": 290.6239926815033 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 70.3359991312027, + "p95": 71.1359977722168, + "p99": 86.71999722719193 + }, + "roundtrip": { + "p50": 223.13599288463593, + "p90": 258.4959864616394, + "p95": 277.2800028324127, + "p99": 349.98399019241333 + }, + "isolatedSum": { + "p50": 227.13600099086761, + "p90": 232.16000199317932, + "p95": 236.00000143051147, + "p99": 377.3439899086952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 192.09599494934082, + "p90": 195.48800587654114, + "p95": 196.70400023460388, + "p99": 212.64000236988068 + }, + "combine": { + "p50": 104.86400127410889, + "p90": 106.4319983124733, + "p95": 107.00800269842148, + "p99": 110.6560006737709 + }, + "roundtrip": { + "p50": 290.0159955024719, + "p90": 295.9040105342865, + "p95": 304.9600124359131, + "p99": 424.9599874019623 + }, + "isolatedSum": { + "p50": 296.9599962234497, + "p90": 301.92000418901443, + "p95": 303.71200293302536, + "p99": 323.2960030436516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 293.37599873542786, + "p90": 296.7039942741394, + "p95": 298.8159954547882, + "p99": 321.3120102882385 + }, + "combine": { + "p50": 177.59999632835388, + "p90": 179.55200374126434, + "p95": 180.51199615001678, + "p99": 185.69600582122803 + }, + "roundtrip": { + "p50": 467.9360091686249, + "p90": 471.9040095806122, + "p95": 475.71200132369995, + "p99": 552.7359843254089 + }, + "isolatedSum": { + "p50": 470.97599506378174, + "p90": 476.25599801540375, + "p95": 479.327991604805, + "p99": 507.00801610946655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 474.33599829673767, + "p90": 478.2080054283142, + "p95": 480.96001148223877, + "p99": 524.0319967269897 + }, + "combine": { + "p50": 311.6160035133362, + "p90": 313.6959969997406, + "p95": 314.39998745918274, + "p99": 316.6080117225647 + }, + "roundtrip": { + "p50": 784.1600179672241, + "p90": 788.864016532898, + "p95": 791.2960052490234, + "p99": 873.5359907150269 + }, + "isolatedSum": { + "p50": 785.9520018100739, + "p90": 791.9040024280548, + "p95": 795.3599989414215, + "p99": 840.6400084495544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c819ff13", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_7febeea9", + "comparisonKey": "cbb26668caf1635b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:24.547430+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 205.1520049571991, + "p90": 208.28799903392792, + "p95": 210.01599729061127, + "p99": 223.77599775791168 + }, + "combine": { + "p50": 58.59199911355972, + "p90": 59.58399921655655, + "p95": 60.7680007815361, + "p99": 65.0240033864975 + }, + "roundtrip": { + "p50": 261.02399826049805, + "p90": 265.31198620796204, + "p95": 271.87201380729675, + "p99": 590.2400016784668 + }, + "isolatedSum": { + "p50": 263.7440040707588, + "p90": 267.87199825048447, + "p95": 270.78399807214737, + "p99": 288.8000011444092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 213.59999477863312, + "p90": 217.02399849891663, + "p95": 218.27200055122375, + "p99": 231.77599906921387 + }, + "combine": { + "p50": 81.82399719953537, + "p90": 83.5840031504631, + "p95": 84.06399935483932, + "p99": 86.7839977145195 + }, + "roundtrip": { + "p50": 290.43200612068176, + "p90": 294.0160036087036, + "p95": 295.1680123806, + "p99": 305.7279884815216 + }, + "isolatedSum": { + "p50": 295.4239919781685, + "p90": 300.60800164937973, + "p95": 302.3359999060631, + "p99": 318.55999678373337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.08000719547272, + "p90": 225.66400468349457, + "p95": 227.7120053768158, + "p99": 241.31199717521667 + }, + "combine": { + "p50": 127.55200266838074, + "p90": 129.60000336170197, + "p95": 130.17599284648895, + "p99": 132.79999792575836 + }, + "roundtrip": { + "p50": 347.1679985523224, + "p90": 350.6239950656891, + "p95": 352.06401348114014, + "p99": 364.9919927120209 + }, + "isolatedSum": { + "p50": 349.63200986385345, + "p90": 355.26400804519653, + "p95": 357.88799822330475, + "p99": 374.11199510097504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.3520016670227, + "p90": 315.90399146080017, + "p95": 317.7599906921387, + "p99": 332.2240114212036 + }, + "combine": { + "p50": 222.24000096321106, + "p90": 224.67200458049774, + "p95": 225.40800273418427, + "p99": 240.31999707221985 + }, + "roundtrip": { + "p50": 533.1839919090271, + "p90": 536.8000268936157, + "p95": 538.9119982719421, + "p99": 549.9200224876404 + }, + "isolatedSum": { + "p50": 534.5920026302338, + "p90": 540.5759960412979, + "p95": 543.1679934263229, + "p99": 572.5440084934235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 515.1680111885071, + "p90": 518.7519788742065, + "p95": 521.1520195007324, + "p99": 550.3039956092834 + }, + "combine": { + "p50": 403.4239947795868, + "p90": 406.43200278282166, + "p95": 407.0720076560974, + "p99": 410.5919897556305 + }, + "roundtrip": { + "p50": 918.5280203819275, + "p90": 923.0719804763794, + "p95": 925.599992275238, + "p99": 964.031994342804 + }, + "isolatedSum": { + "p50": 918.5920059680939, + "p90": 925.1839816570282, + "p95": 928.2240271568298, + "p99": 960.8959853649139 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 944.8320269584656, + "p90": 948.6079812049866, + "p95": 949.567973613739, + "p99": 992.576003074646 + }, + "combine": { + "p50": 767.2320008277893, + "p90": 769.8559761047363, + "p95": 771.2640166282654, + "p99": 947.3919868469238 + }, + "roundtrip": { + "p50": 1713.9840126037598, + "p90": 1719.3280458450317, + "p95": 1727.7439832687378, + "p99": 1977.0879745483398 + }, + "isolatedSum": { + "p50": 1712.0640277862549, + "p90": 1718.463957309723, + "p95": 1720.8319902420044, + "p99": 1939.9679899215698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-121cdea2", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_9d04a365", + "comparisonKey": "b2e44f295d5e7349", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:57.045498+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.1760059595108, + "p90": 213.34399282932281, + "p95": 215.32799303531647, + "p99": 234.78400707244873 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 59.74400043487549, + "p95": 60.80000102519989, + "p99": 80.60800284147263 + }, + "roundtrip": { + "p50": 262.7840042114258, + "p90": 265.855997800827, + "p95": 267.16798543930054, + "p99": 283.90398621559143 + }, + "isolatedSum": { + "p50": 268.0960074067116, + "p90": 273.0879932641983, + "p95": 276.12799406051636, + "p99": 315.39200991392136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 213.53599429130554, + "p90": 216.8319970369339, + "p95": 218.52800250053406, + "p99": 237.95199394226074 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 75.74400305747986, + "p95": 76.38400048017502, + "p99": 92.92799979448318 + }, + "roundtrip": { + "p50": 282.81599283218384, + "p90": 286.4319980144501, + "p95": 288.09601068496704, + "p99": 303.6479949951172 + }, + "isolatedSum": { + "p50": 287.03999519348145, + "p90": 292.57600009441376, + "p95": 294.9120029807091, + "p99": 330.8799937367439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.7440049648285, + "p90": 227.48799622058868, + "p95": 228.64000499248505, + "p99": 253.05598974227905 + }, + "combine": { + "p50": 105.92000186443329, + "p90": 108.25599730014801, + "p95": 109.31199789047241, + "p99": 115.58400094509125 + }, + "roundtrip": { + "p50": 326.52801275253296, + "p90": 330.78399300575256, + "p95": 332.3200047016144, + "p99": 349.2799997329712 + }, + "isolatedSum": { + "p50": 329.6640068292618, + "p90": 335.7439935207367, + "p95": 337.95200288295746, + "p99": 368.6399906873703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 287.80800104141235, + "p90": 291.26399755477905, + "p95": 292.7680015563965, + "p99": 307.3279857635498 + }, + "combine": { + "p50": 184.32000279426575, + "p90": 186.91200017929077, + "p95": 187.99999356269836, + "p99": 206.59199357032776 + }, + "roundtrip": { + "p50": 467.9360091686249, + "p90": 471.8399941921234, + "p95": 473.471999168396, + "p99": 491.0080134868622 + }, + "isolatedSum": { + "p50": 472.1280038356781, + "p90": 478.1759977340698, + "p95": 480.76799511909485, + "p99": 513.9199793338776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 459.9680006504059, + "p90": 464.2240107059479, + "p95": 466.5600061416626, + "p99": 552.3520112037659 + }, + "combine": { + "p50": 320.8320140838623, + "p90": 323.10399413108826, + "p95": 323.743999004364, + "p99": 332.15999603271484 + }, + "roundtrip": { + "p50": 777.4720191955566, + "p90": 782.3039889335632, + "p95": 788.2239818572998, + "p99": 887.6479864120483 + }, + "isolatedSum": { + "p50": 780.8000147342682, + "p90": 787.3280048370361, + "p95": 790.3040051460266, + "p99": 884.5120072364807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 736.5120053291321, + "p90": 740.2560114860535, + "p95": 742.143988609314, + "p99": 799.9359965324402 + }, + "combine": { + "p50": 594.1759943962097, + "p90": 596.992015838623, + "p95": 597.9200005531311, + "p99": 615.4239773750305 + }, + "roundtrip": { + "p50": 1326.8159627914429, + "p90": 1331.7760229110718, + "p95": 1334.8159790039062, + "p99": 1674.880027770996 + }, + "isolatedSum": { + "p50": 1330.6879997253418, + "p90": 1337.2480273246765, + "p95": 1340.063989162445, + "p99": 1415.3599739074707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b77c74d3", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_87cc08e1", + "comparisonKey": "ad338a0129d3e8c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:22.661159+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.2959989309311, + "p90": 212.3199999332428, + "p95": 217.31199324131012, + "p99": 350.94401240348816 + }, + "combine": { + "p50": 56.86400085687637, + "p90": 58.88000130653381, + "p95": 59.74400043487549, + "p99": 69.37599927186966 + }, + "roundtrip": { + "p50": 260.22401452064514, + "p90": 278.49599719047546, + "p95": 298.14401268959045, + "p99": 347.29599952697754 + }, + "isolatedSum": { + "p50": 264.15999978780746, + "p90": 271.2000012397766, + "p95": 277.0559936761856, + "p99": 420.3200116753578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 212.47999370098114, + "p90": 216.63999557495117, + "p95": 220.38400173187256, + "p99": 235.9679937362671 + }, + "combine": { + "p50": 73.47200065851212, + "p90": 75.48800110816956, + "p95": 76.38400048017502, + "p99": 78.49600166082382 + }, + "roundtrip": { + "p50": 280.67201375961304, + "p90": 284.7039997577667, + "p95": 290.3999984264374, + "p99": 348.60798716545105 + }, + "isolatedSum": { + "p50": 285.95199435949326, + "p90": 292.1279966831207, + "p95": 296.7680022120476, + "p99": 314.4639953970909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.07200729846954, + "p90": 226.84800624847412, + "p95": 229.34399545192719, + "p99": 249.15200471878052 + }, + "combine": { + "p50": 108.73600095510483, + "p90": 110.91200262308121, + "p95": 111.32799834012985, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 327.42398977279663, + "p90": 331.36001229286194, + "p95": 334.23998951911926, + "p99": 406.6239893436432 + }, + "isolatedSum": { + "p50": 331.80800825357437, + "p90": 337.7600088715553, + "p95": 340.67199379205704, + "p99": 363.8400062918663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 286.72000765800476, + "p90": 290.46401381492615, + "p95": 292.54400730133057, + "p99": 305.63199520111084 + }, + "combine": { + "p50": 187.00799345970154, + "p90": 189.2479956150055, + "p95": 190.36799669265747, + "p99": 197.31199741363525 + }, + "roundtrip": { + "p50": 467.6800072193146, + "p90": 472.00000286102295, + "p95": 474.07999634742737, + "p99": 532.7360033988953 + }, + "isolatedSum": { + "p50": 473.7280011177063, + "p90": 479.71200942993164, + "p95": 482.91200399398804, + "p99": 502.9439926147461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 458.75200629234314, + "p90": 462.5599980354309, + "p95": 465.7599925994873, + "p99": 638.975977897644 + }, + "combine": { + "p50": 320.8959996700287, + "p90": 323.2960104942322, + "p95": 325.0559866428375, + "p99": 342.1759903430939 + }, + "roundtrip": { + "p50": 776.8319845199585, + "p90": 781.4720273017883, + "p95": 790.4000282287598, + "p99": 912.5760197639465 + }, + "isolatedSum": { + "p50": 779.6480059623718, + "p90": 785.8560085296631, + "p95": 790.8159792423248, + "p99": 981.1519682407379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 737.6000285148621, + "p90": 741.6639924049377, + "p95": 744.0639734268188, + "p99": 818.7839984893799 + }, + "combine": { + "p50": 591.3919806480408, + "p90": 595.1359868049622, + "p95": 596.4800119400024, + "p99": 656.3199758529663 + }, + "roundtrip": { + "p50": 1329.5999765396118, + "p90": 1335.360050201416, + "p95": 1346.8159437179565, + "p99": 1612.671971321106 + }, + "isolatedSum": { + "p50": 1328.9920091629028, + "p90": 1336.7999792099, + "p95": 1340.5439853668213, + "p99": 1475.1039743423462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cbd255cc", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_1b42769c", + "comparisonKey": "64b471173ccd100c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:28.935129+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 179.07199263572693, + "p90": 182.91200697422028, + "p95": 184.2239946126938, + "p99": 200.95999538898468 + }, + "combine": { + "p50": 58.94400179386139, + "p90": 60.32000109553337, + "p95": 61.08799949288368, + "p99": 67.90400296449661 + }, + "roundtrip": { + "p50": 232.54400491714478, + "p90": 236.83199286460876, + "p95": 238.39999735355377, + "p99": 248.22400510311127 + }, + "isolatedSum": { + "p50": 238.01599442958832, + "p90": 243.23200806975365, + "p95": 245.31199410557747, + "p99": 268.8639983534813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 187.51999735832214, + "p90": 191.42399728298187, + "p95": 193.88799369335175, + "p99": 209.50399339199066 + }, + "combine": { + "p50": 78.91199737787247, + "p90": 81.05599880218506, + "p95": 82.30400085449219, + "p99": 87.48800307512283 + }, + "roundtrip": { + "p50": 260.3519856929779, + "p90": 265.0879919528961, + "p95": 267.7119970321655, + "p99": 279.3920040130615 + }, + "isolatedSum": { + "p50": 266.4319947361946, + "p90": 272.47999608516693, + "p95": 276.19199454784393, + "p99": 296.9919964671135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 215.00800549983978, + "p90": 218.4000015258789, + "p95": 220.70400416851044, + "p99": 268.0320143699646 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 124.51200187206268, + "p95": 125.59999525547028, + "p99": 131.45600259304047 + }, + "roundtrip": { + "p50": 336.95998787879944, + "p90": 341.8239951133728, + "p95": 347.135990858078, + "p99": 360.9279990196228 + }, + "isolatedSum": { + "p50": 337.44000643491745, + "p90": 342.9120033979416, + "p95": 346.3039994239807, + "p99": 399.48801696300507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.9440071582794, + "p90": 322.4000036716461, + "p95": 323.42401146888733, + "p99": 332.96000957489014 + }, + "combine": { + "p50": 216.41600131988525, + "p90": 218.87999773025513, + "p95": 219.9680060148239, + "p99": 224.35200214385986 + }, + "roundtrip": { + "p50": 537.4720096588135, + "p90": 541.5679812431335, + "p95": 545.8239912986755, + "p99": 594.43199634552 + }, + "isolatedSum": { + "p50": 535.3600084781647, + "p90": 541.2800014019012, + "p95": 543.3920174837112, + "p99": 557.31201171875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 512.83198595047, + "p90": 516.9919729232788, + "p95": 518.8800096511841, + "p99": 533.2480072975159 + }, + "combine": { + "p50": 386.81599497795105, + "p90": 389.0239894390106, + "p95": 389.95200395584106, + "p99": 394.9440121650696 + }, + "roundtrip": { + "p50": 901.9839763641357, + "p90": 907.4239730834961, + "p95": 910.9119772911072, + "p99": 1008.1599950790405 + }, + "isolatedSum": { + "p50": 899.647980928421, + "p90": 906.0159623622894, + "p95": 908.8320136070251, + "p99": 928.1920194625854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 910.3360176086426, + "p90": 914.7199988365173, + "p95": 916.4479970932007, + "p99": 1018.3360576629639 + }, + "combine": { + "p50": 738.1119728088379, + "p90": 741.536021232605, + "p95": 742.2720193862915, + "p99": 744.7360157966614 + }, + "roundtrip": { + "p50": 1653.5040140151978, + "p90": 1659.775972366333, + "p95": 1665.5679941177368, + "p99": 1813.439965248108 + }, + "isolatedSum": { + "p50": 1648.4479904174805, + "p90": 1656.2560200691223, + "p95": 1658.7200164794922, + "p99": 1763.0720734596252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-910c630e", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_1c62942e", + "comparisonKey": "308ce86fad6aa446", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:27.161927+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 149.6639996767044, + "p90": 173.8239973783493, + "p95": 196.60800695419312, + "p99": 234.8800003528595 + }, + "combine": { + "p50": 52.76799947023392, + "p90": 57.0559985935688, + "p95": 58.17599967122078, + "p99": 62.49599903821945 + }, + "roundtrip": { + "p50": 202.36800611019135, + "p90": 215.13600647449493, + "p95": 224.2880016565323, + "p99": 255.39198517799377 + }, + "isolatedSum": { + "p50": 202.43199914693832, + "p90": 230.8799959719181, + "p95": 254.7840066254139, + "p99": 297.37599939107895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.9760035276413, + "p90": 162.30399906635284, + "p95": 163.4880006313324, + "p99": 175.10400712490082 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 72.35199958086014, + "p95": 73.34399968385696, + "p99": 74.62400197982788 + }, + "roundtrip": { + "p50": 229.0560007095337, + "p90": 232.5119972229004, + "p95": 235.3920042514801, + "p99": 262.2399926185608 + }, + "isolatedSum": { + "p50": 229.50400412082672, + "p90": 234.65599864721298, + "p95": 236.83200031518936, + "p99": 249.7280091047287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 209.27999913692474, + "p90": 212.351992726326, + "p95": 213.34399282932281, + "p99": 257.56800174713135 + }, + "combine": { + "p50": 116.64000153541565, + "p90": 118.56000125408173, + "p95": 119.29599940776825, + "p99": 123.74400347471237 + }, + "roundtrip": { + "p50": 325.6640136241913, + "p90": 329.9199938774109, + "p95": 336.95998787879944, + "p99": 454.27200198173523 + }, + "isolatedSum": { + "p50": 325.9200006723404, + "p90": 330.9119939804077, + "p95": 332.63999223709106, + "p99": 381.3120052218437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 309.6959888935089, + "p90": 316.6399896144867, + "p95": 334.23998951911926, + "p99": 353.69598865509033 + }, + "combine": { + "p50": 202.2400051355362, + "p90": 204.03200387954712, + "p95": 204.57600057125092, + "p99": 206.68800175189972 + }, + "roundtrip": { + "p50": 511.1680030822754, + "p90": 514.847993850708, + "p95": 515.999972820282, + "p99": 530.9759974479675 + }, + "isolatedSum": { + "p50": 511.9359940290451, + "p90": 520.6719934940338, + "p95": 538.8159900903702, + "p99": 560.38399040699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.84797763824463, + "p90": 510.3039741516113, + "p95": 511.7120146751404, + "p99": 606.5599918365479 + }, + "combine": { + "p50": 369.8880076408386, + "p90": 372.3520040512085, + "p95": 373.27998876571655, + "p99": 379.96798753738403 + }, + "roundtrip": { + "p50": 881.4079761505127, + "p90": 885.4079842567444, + "p95": 890.2720212936401, + "p99": 947.1039772033691 + }, + "isolatedSum": { + "p50": 876.7359852790833, + "p90": 882.6559782028198, + "p95": 884.9920034408569, + "p99": 986.5279793739319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 899.9680280685425, + "p90": 903.328001499176, + "p95": 904.5439958572388, + "p99": 925.599992275238 + }, + "combine": { + "p50": 707.6799869537354, + "p90": 710.7200026512146, + "p95": 711.7760181427002, + "p99": 714.0799760818481 + }, + "roundtrip": { + "p50": 1613.9839887619019, + "p90": 1620.4479932785034, + "p95": 1637.7919912338257, + "p99": 2321.824073791504 + }, + "isolatedSum": { + "p50": 1607.6480150222778, + "p90": 1614.0480041503906, + "p95": 1616.320013999939, + "p99": 1639.6799683570862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7cac29c1", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_e57a98e0", + "comparisonKey": "6589c44aa5928999", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:59.948036+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 211.67999505996704, + "p90": 215.32799303531647, + "p95": 218.72000396251678, + "p99": 337.92001008987427 + }, + "combine": { + "p50": 58.20799991488457, + "p90": 60.15999987721443, + "p95": 60.736000537872314, + "p99": 80.86399734020233 + }, + "roundtrip": { + "p50": 262.2080147266388, + "p90": 265.5999958515167, + "p95": 267.90401339530945, + "p99": 290.94401001930237 + }, + "isolatedSum": { + "p50": 269.8879949748516, + "p90": 275.4879929125309, + "p95": 279.4560045003891, + "p99": 418.7840074300766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 217.24799275398254, + "p90": 230.78399896621704, + "p95": 240.03200232982635, + "p99": 269.3440020084381 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 76.9599974155426, + "p95": 78.36800068616867, + "p99": 82.56000280380249 + }, + "roundtrip": { + "p50": 286.1120104789734, + "p90": 289.792001247406, + "p95": 291.9999957084656, + "p99": 309.63200330734253 + }, + "isolatedSum": { + "p50": 292.06399619579315, + "p90": 307.74399638175964, + "p95": 318.400003015995, + "p99": 351.9040048122406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 226.75199806690216, + "p90": 230.68800568580627, + "p95": 235.48799753189087, + "p99": 268.0320143699646 + }, + "combine": { + "p50": 109.72800105810165, + "p90": 111.48799955844879, + "p95": 112.28799819946289, + "p99": 120.95999717712402 + }, + "roundtrip": { + "p50": 332.0640027523041, + "p90": 336.2880051136017, + "p95": 337.5680148601532, + "p99": 352.4160087108612 + }, + "isolatedSum": { + "p50": 336.4799991250038, + "p90": 342.17600524425507, + "p95": 347.77599573135376, + "p99": 388.9920115470886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 291.4240062236786, + "p90": 295.55198550224304, + "p95": 297.1520125865936, + "p99": 315.744012594223 + }, + "combine": { + "p50": 187.26399540901184, + "p90": 189.60000574588776, + "p95": 190.14400243759155, + "p99": 197.11999595165253 + }, + "roundtrip": { + "p50": 473.82399439811707, + "p90": 477.2160053253174, + "p95": 480.22401332855225, + "p99": 512.9920244216919 + }, + "isolatedSum": { + "p50": 478.68800163269043, + "p90": 485.1519912481308, + "p95": 487.2960150241852, + "p99": 512.8640085458755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 465.9520089626312, + "p90": 470.0799882411957, + "p95": 474.9760031700134, + "p99": 594.1439867019653 + }, + "combine": { + "p50": 328.19199562072754, + "p90": 330.78399300575256, + "p95": 331.4880132675171, + "p99": 385.919988155365 + }, + "roundtrip": { + "p50": 792.3840284347534, + "p90": 796.8000173568726, + "p95": 802.1759986877441, + "p99": 906.2399864196777 + }, + "isolatedSum": { + "p50": 794.1440045833588, + "p90": 800.8639812469482, + "p95": 806.4640164375305, + "p99": 980.0639748573303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 753.279983997345, + "p90": 757.6959729194641, + "p95": 760.6080174446106, + "p99": 784.991979598999 + }, + "combine": { + "p50": 608.9919805526733, + "p90": 611.7119789123535, + "p95": 612.9599809646606, + "p99": 630.079984664917 + }, + "roundtrip": { + "p50": 1361.3760471343994, + "p90": 1365.280032157898, + "p95": 1367.743968963623, + "p99": 1669.8880195617676 + }, + "isolatedSum": { + "p50": 1362.2719645500183, + "p90": 1369.4079518318176, + "p95": 1373.5679984092712, + "p99": 1415.071964263916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d6883932", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_ad2ff111", + "comparisonKey": "18d0ba60c0c5400f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:38.743185+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 200.95999538898468, + "p90": 204.70400154590607, + "p95": 207.23199844360352, + "p99": 273.79199862480164 + }, + "combine": { + "p50": 58.75200033187866, + "p90": 60.35200133919716, + "p95": 60.95999851822853, + "p99": 64.92800265550613 + }, + "roundtrip": { + "p50": 256.9279968738556, + "p90": 260.5760097503662, + "p95": 263.5839879512787, + "p99": 326.3680040836334 + }, + "isolatedSum": { + "p50": 259.71199572086334, + "p90": 265.0560028851032, + "p95": 268.19199696183205, + "p99": 338.72000128030777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 208.15999805927277, + "p90": 211.84000372886658, + "p95": 214.08000588417053, + "p99": 240.03200232982635 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 83.93599838018417, + "p95": 84.60800349712372, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 284.7360074520111, + "p90": 288.2240116596222, + "p95": 290.912002325058, + "p99": 304.80000376701355 + }, + "isolatedSum": { + "p50": 289.8240014910698, + "p90": 295.77600210905075, + "p95": 298.68800938129425, + "p99": 327.93600112199783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 216.5759950876236, + "p90": 221.0559993982315, + "p95": 222.9440063238144, + "p99": 244.28799748420715 + }, + "combine": { + "p50": 123.48800152540207, + "p90": 125.5359947681427, + "p95": 126.30400061607361, + "p99": 131.1360001564026 + }, + "roundtrip": { + "p50": 335.7119858264923, + "p90": 340.0000035762787, + "p95": 343.1360125541687, + "p99": 381.18401169776917 + }, + "isolatedSum": { + "p50": 340.06399661302567, + "p90": 346.5919941663742, + "p95": 349.248006939888, + "p99": 375.42399764060974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.3760094642639, + "p90": 317.440003156662, + "p95": 318.9760148525238, + "p99": 335.7439935207367 + }, + "combine": { + "p50": 219.39200162887573, + "p90": 221.88800573349, + "p95": 222.6880043745041, + "p99": 235.29599606990814 + }, + "roundtrip": { + "p50": 531.1999917030334, + "p90": 534.4319939613342, + "p95": 535.8719825744629, + "p99": 559.4879984855652 + }, + "isolatedSum": { + "p50": 532.7680110931396, + "p90": 539.328008890152, + "p95": 541.6640192270279, + "p99": 571.0399895906448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 508.28802585601807, + "p90": 512.1600031852722, + "p95": 513.3119821548462, + "p99": 530.5280089378357 + }, + "combine": { + "p50": 384.99200344085693, + "p90": 387.84000277519226, + "p95": 388.51198554039, + "p99": 399.80798959732056 + }, + "roundtrip": { + "p50": 895.6480026245117, + "p90": 901.6960263252258, + "p95": 916.3519740104675, + "p99": 1002.6240348815918 + }, + "isolatedSum": { + "p50": 893.280029296875, + "p90": 900.0000059604645, + "p95": 901.8239676952362, + "p99": 930.3359985351562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 906.9439768791199, + "p90": 909.8560214042664, + "p95": 910.7840061187744, + "p99": 913.1199717521667 + }, + "combine": { + "p50": 735.6160283088684, + "p90": 739.6159768104553, + "p95": 741.0240173339844, + "p99": 743.6479926109314 + }, + "roundtrip": { + "p50": 1643.455982208252, + "p90": 1650.4319906234741, + "p95": 1657.9840183258057, + "p99": 1732.6079607009888 + }, + "isolatedSum": { + "p50": 1642.5600051879883, + "p90": 1649.4719982147217, + "p95": 1651.8080234527588, + "p99": 1656.7679643630981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-81f2c9f2", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_926f0fdd", + "comparisonKey": "5ca1f1d90f7f967f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:09.111278+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 209.1519981622696, + "p90": 214.6880030632019, + "p95": 218.33600103855133, + "p99": 235.87200045585632 + }, + "combine": { + "p50": 56.703999638557434, + "p90": 58.52799862623215, + "p95": 59.13599953055382, + "p99": 68.92800331115723 + }, + "roundtrip": { + "p50": 262.5280022621155, + "p90": 270.3999876976013, + "p95": 277.6319980621338, + "p99": 319.0400004386902 + }, + "isolatedSum": { + "p50": 265.855997800827, + "p90": 273.21600168943405, + "p95": 277.47200056910515, + "p99": 304.80000376701355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 214.88000452518463, + "p90": 225.3119945526123, + "p95": 232.7679991722107, + "p99": 246.14399671554565 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 76.09599828720093, + "p95": 76.54400169849396, + "p99": 78.20799946784973 + }, + "roundtrip": { + "p50": 282.75200724601746, + "p90": 286.20800375938416, + "p95": 288.83200883865356, + "p99": 311.8079900741577 + }, + "isolatedSum": { + "p50": 289.2480045557022, + "p90": 301.40799283981323, + "p95": 309.31200087070465, + "p99": 324.3519961833954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 225.055992603302, + "p90": 228.60799729824066, + "p95": 231.07199370861053, + "p99": 254.11200523376465 + }, + "combine": { + "p50": 109.15199667215347, + "p90": 111.77600175142288, + "p95": 114.27199840545654, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 327.13600993156433, + "p90": 330.9119939804077, + "p95": 334.1119885444641, + "p99": 350.3679931163788 + }, + "isolatedSum": { + "p50": 334.2079892754555, + "p90": 340.38399904966354, + "p95": 345.3439921140671, + "p99": 377.79200822114944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 288.57600688934326, + "p90": 292.64000058174133, + "p95": 293.7279939651489, + "p99": 306.68801069259644 + }, + "combine": { + "p50": 186.20799481868744, + "p90": 189.31199610233307, + "p95": 190.40000438690186, + "p99": 204.8639953136444 + }, + "roundtrip": { + "p50": 470.2720046043396, + "p90": 489.53598737716675, + "p95": 513.5040283203125, + "p99": 621.1519837379456 + }, + "isolatedSum": { + "p50": 474.7840017080307, + "p90": 481.9519966840744, + "p95": 484.1279983520508, + "p99": 511.55200600624084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 461.08800172805786, + "p90": 464.7360146045685, + "p95": 469.8239862918854, + "p99": 604.6079993247986 + }, + "combine": { + "p50": 321.727991104126, + "p90": 323.90400767326355, + "p95": 324.44798946380615, + "p99": 338.4320139884949 + }, + "roundtrip": { + "p50": 779.8720002174377, + "p90": 793.0880188941956, + "p95": 808.4480166435242, + "p99": 1184.2880249023438 + }, + "isolatedSum": { + "p50": 782.8159928321838, + "p90": 788.640022277832, + "p95": 794.2719757556915, + "p99": 943.0400133132935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 738.3999824523926, + "p90": 743.615984916687, + "p95": 757.8880190849304, + "p99": 845.3119993209839 + }, + "combine": { + "p50": 597.3439812660217, + "p90": 599.9360084533691, + "p95": 600.9600162506104, + "p99": 611.9359731674194 + }, + "roundtrip": { + "p50": 1332.319974899292, + "p90": 1340.000033378601, + "p95": 1360.5760335922241, + "p99": 1493.5040473937988 + }, + "isolatedSum": { + "p50": 1335.7439637184143, + "p90": 1343.5519933700562, + "p95": 1358.8480353355408, + "p99": 1457.2479724884033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-97545e08", + "identity": "b300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_48835cfe", + "comparisonKey": "59500faed52a21da", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:32.930881+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 179.51999604701996, + "p90": 185.85599958896637, + "p95": 190.46400487422943, + "p99": 207.48800039291382 + }, + "combine": { + "p50": 59.23200026154518, + "p90": 63.61600011587143, + "p95": 73.11999797821045, + "p99": 83.39200168848038 + }, + "roundtrip": { + "p50": 233.95200073719025, + "p90": 242.5920069217682, + "p95": 250.17601251602173, + "p99": 334.0800106525421 + }, + "isolatedSum": { + "p50": 238.75199630856514, + "p90": 249.4719997048378, + "p95": 263.5840028524399, + "p99": 290.8800020813942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.81600689888, + "p90": 191.00800156593323, + "p95": 194.65599954128265, + "p99": 212.09600567817688 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 81.56800270080566, + "p95": 82.46400207281113, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 260.3519856929779, + "p90": 268.99200677871704, + "p95": 275.9999930858612, + "p99": 297.2480058670044 + }, + "isolatedSum": { + "p50": 266.33600890636444, + "p90": 272.5760042667389, + "p95": 277.1200016140938, + "p99": 301.9520044326782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 214.33599293231964, + "p90": 217.82399713993073, + "p95": 220.44800221920013, + "p99": 262.4000012874603 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 124.64000284671783, + "p95": 125.59999525547028, + "p99": 133.91999900341034 + }, + "roundtrip": { + "p50": 336.64000034332275, + "p90": 342.1440124511719, + "p95": 349.69601035118103, + "p99": 373.8879859447479 + }, + "isolatedSum": { + "p50": 336.7679938673973, + "p90": 342.46399998664856, + "p95": 346.0479974746704, + "p99": 396.32000029087067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.87999176979065, + "p90": 322.56001234054565, + "p95": 323.90400767326355, + "p99": 370.59199810028076 + }, + "combine": { + "p50": 216.22399985790253, + "p90": 219.10400688648224, + "p95": 220.12799978256226, + "p99": 222.88000583648682 + }, + "roundtrip": { + "p50": 537.2480154037476, + "p90": 546.0799932479858, + "p95": 566.2400126457214, + "p99": 640.9919857978821 + }, + "isolatedSum": { + "p50": 535.1039916276932, + "p90": 541.6640192270279, + "p95": 544.0320074558258, + "p99": 593.4720039367676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 512.0319724082947, + "p90": 516.7040228843689, + "p95": 519.7759866714478, + "p99": 641.4399743080139 + }, + "combine": { + "p50": 386.75200939178467, + "p90": 389.2799913883209, + "p95": 389.98401165008545, + "p99": 391.7439877986908 + }, + "roundtrip": { + "p50": 902.400016784668, + "p90": 910.8160138130188, + "p95": 928.5119771957397, + "p99": 1163.2640361785889 + }, + "isolatedSum": { + "p50": 898.7839818000793, + "p90": 905.9840142726898, + "p95": 909.7599983215332, + "p99": 1033.1839621067047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 909.6959829330444, + "p90": 913.6959910392761, + "p95": 915.2640104293823, + "p99": 959.3920111656189 + }, + "combine": { + "p50": 737.7600073814392, + "p90": 741.4720058441162, + "p95": 742.9440021514893, + "p99": 759.9359750747681 + }, + "roundtrip": { + "p50": 1653.216004371643, + "p90": 1659.8399877548218, + "p95": 1666.0799980163574, + "p99": 1849.1840362548828 + }, + "isolatedSum": { + "p50": 1647.4559903144836, + "p90": 1655.1679968833923, + "p95": 1658.2080125808716, + "p99": 1719.327986240387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-98964e42", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_cfba45f0", + "comparisonKey": "37c8c95f7703ac6c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:03.133752+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.639994263649, + "p90": 212.22400665283203, + "p95": 214.9440050125122, + "p99": 259.5840096473694 + }, + "combine": { + "p50": 57.151999324560165, + "p90": 59.07199904322624, + "p95": 59.7120001912117, + "p99": 76.22399926185608 + }, + "roundtrip": { + "p50": 260.4160010814667, + "p90": 264.1279995441437, + "p95": 268.70399713516235, + "p99": 371.93599343299866 + }, + "isolatedSum": { + "p50": 265.79199358820915, + "p90": 271.2960056960583, + "p95": 274.6560052037239, + "p99": 335.80800890922546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 213.6639952659607, + "p90": 217.66400337219238, + "p95": 220.41599452495575, + "p99": 253.05598974227905 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 85.1840004324913, + "p95": 89.4400030374527, + "p99": 93.88799965381622 + }, + "roundtrip": { + "p50": 282.46399760246277, + "p90": 287.07200288772583, + "p95": 293.88800263404846, + "p99": 373.1519877910614 + }, + "isolatedSum": { + "p50": 287.7119928598404, + "p90": 302.8480038046837, + "p95": 309.85599756240845, + "p99": 346.9439893960953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.97599911689758, + "p90": 226.78400576114655, + "p95": 229.312002658844, + "p99": 318.84801387786865 + }, + "combine": { + "p50": 105.43999820947647, + "p90": 109.15199667215347, + "p95": 111.23199760913849, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 325.76000690460205, + "p90": 330.9760093688965, + "p95": 337.2800052165985, + "p99": 454.5919895172119 + }, + "isolatedSum": { + "p50": 328.41599732637405, + "p90": 335.9360024333, + "p95": 340.5440002679825, + "p99": 434.9120110273361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 287.200003862381, + "p90": 291.20001196861267, + "p95": 292.6720082759857, + "p99": 352.9919981956482 + }, + "combine": { + "p50": 184.86399948596954, + "p90": 187.3600035905838, + "p95": 188.4479969739914, + "p99": 193.34399700164795 + }, + "roundtrip": { + "p50": 468.83198618888855, + "p90": 477.1200120449066, + "p95": 496.41600251197815, + "p99": 536.6399884223938 + }, + "isolatedSum": { + "p50": 472.0640033483505, + "p90": 478.5600155591965, + "p95": 481.1200052499771, + "p99": 546.3359951972961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 460.28798818588257, + "p90": 464.54399824142456, + "p95": 472.57599234580994, + "p99": 660.6400012969971 + }, + "combine": { + "p50": 320.8320140838623, + "p90": 327.7119994163513, + "p95": 332.5119912624359, + "p99": 465.60001373291016 + }, + "roundtrip": { + "p50": 778.7520289421082, + "p90": 785.6000065803528, + "p95": 801.3120293617249, + "p99": 1032.096028327942 + }, + "isolatedSum": { + "p50": 781.1200022697449, + "p90": 792.2559976577759, + "p95": 805.0879836082458, + "p99": 1126.2400150299072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 738.6879920959473, + "p90": 744.7360157966614, + "p95": 758.624017238617, + "p99": 800.927996635437 + }, + "combine": { + "p50": 591.9679999351501, + "p90": 594.2720174789429, + "p95": 595.2000021934509, + "p99": 607.4240207672119 + }, + "roundtrip": { + "p50": 1328.06396484375, + "p90": 1333.791971206665, + "p95": 1347.648024559021, + "p99": 1474.6559858322144 + }, + "isolatedSum": { + "p50": 1330.6559920310974, + "p90": 1339.0080332756042, + "p95": 1353.8240194320679, + "p99": 1408.352017402649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bd260c24", + "identity": "b300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_8b963cf2", + "comparisonKey": "cf44df3c56876141", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:14.579402+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_08", + "sku": "b300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.32800662517548, + "p90": 210.52800118923187, + "p95": 212.25599944591522, + "p99": 225.53600370883942 + }, + "combine": { + "p50": 57.0559985935688, + "p90": 58.81600081920624, + "p95": 59.26400050520897, + "p99": 68.03199648857117 + }, + "roundtrip": { + "p50": 258.65599513053894, + "p90": 261.79200410842896, + "p95": 264.41600918769836, + "p99": 286.27198934555054 + }, + "isolatedSum": { + "p50": 264.3840052187443, + "p90": 269.3440020084381, + "p95": 271.5199999511242, + "p99": 293.5680001974106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 211.93599700927734, + "p90": 215.32799303531647, + "p95": 217.43999421596527, + "p99": 234.01600122451782 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 75.32799988985062, + "p95": 75.77600330114365, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 280.2560031414032, + "p90": 284.0000092983246, + "p95": 286.78399324417114, + "p99": 299.23200607299805 + }, + "isolatedSum": { + "p50": 285.2799966931343, + "p90": 290.6559929251671, + "p95": 293.2159975171089, + "p99": 317.6000043749809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 221.91999852657318, + "p90": 225.3440022468567, + "p95": 227.29599475860596, + "p99": 245.7599937915802 + }, + "combine": { + "p50": 104.86400127410889, + "p90": 107.07200318574905, + "p95": 107.32799768447876, + "p99": 114.30399864912033 + }, + "roundtrip": { + "p50": 324.6400058269501, + "p90": 328.6080062389374, + "p95": 330.4640054702759, + "p99": 343.80799531936646 + }, + "isolatedSum": { + "p50": 326.78399980068207, + "p90": 332.41600543260574, + "p95": 334.6239924430847, + "p99": 360.06399244070053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 286.3039970397949, + "p90": 289.5680069923401, + "p95": 290.5600070953369, + "p99": 301.60000920295715 + }, + "combine": { + "p50": 184.67199802398682, + "p90": 186.88000738620758, + "p95": 187.16800212860107, + "p99": 192.3840045928955 + }, + "roundtrip": { + "p50": 467.6479995250702, + "p90": 475.67999362945557, + "p95": 489.6639883518219, + "p99": 504.15998697280884 + }, + "isolatedSum": { + "p50": 470.97599506378174, + "p90": 476.44801437854767, + "p95": 477.728009223938, + "p99": 493.98401379585266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 459.3920111656189, + "p90": 462.6559913158417, + "p95": 465.2799963951111, + "p99": 500.44798851013184 + }, + "combine": { + "p50": 320.51199674606323, + "p90": 322.62399792671204, + "p95": 323.0080008506775, + "p99": 330.1759958267212 + }, + "roundtrip": { + "p50": 778.0159711837769, + "p90": 781.9200158119202, + "p95": 784.38401222229, + "p99": 851.967990398407 + }, + "isolatedSum": { + "p50": 779.9040079116821, + "p90": 785.2799892425537, + "p95": 788.2879972457886, + "p99": 830.623984336853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 737.3759746551514, + "p90": 740.8639788627625, + "p95": 742.4319982528687, + "p99": 763.5520100593567 + }, + "combine": { + "p50": 591.4559960365295, + "p90": 594.0799713134766, + "p95": 594.9119925498962, + "p99": 613.2479906082153 + }, + "roundtrip": { + "p50": 1327.1360397338867, + "p90": 1331.3920497894287, + "p95": 1333.8240385055542, + "p99": 1481.6960096359253 + }, + "isolatedSum": { + "p50": 1328.831970691681, + "p90": 1334.943950176239, + "p95": 1337.343990802765, + "p99": 1376.800000667572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-086b7bdd", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_6300ebb7", + "comparisonKey": "0f567db5f9c07223", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:55:43.486088+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.608001321554184, + "p90": 50.335999578237534, + "p95": 50.71999877691269, + "p99": 62.07999959588051 + }, + "combine": { + "p50": 48.608001321554184, + "p90": 50.335999578237534, + "p95": 50.71999877691269, + "p99": 62.07999959588051 + }, + "roundtrip": { + "p50": 48.608001321554184, + "p90": 50.335999578237534, + "p95": 50.71999877691269, + "p99": 62.07999959588051 + }, + "isolatedSum": { + "p50": 97.21600264310837, + "p90": 100.67199915647507, + "p95": 101.43999755382538, + "p99": 124.15999919176102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.04800111055374, + "p90": 51.42400041222572, + "p95": 52.41600051522255, + "p99": 61.08799949288368 + }, + "combine": { + "p50": 50.04800111055374, + "p90": 51.42400041222572, + "p95": 52.41600051522255, + "p99": 61.08799949288368 + }, + "roundtrip": { + "p50": 50.04800111055374, + "p90": 51.42400041222572, + "p95": 52.41600051522255, + "p99": 61.08799949288368 + }, + "isolatedSum": { + "p50": 100.09600222110748, + "p90": 102.84800082445145, + "p95": 104.8320010304451, + "p99": 122.17599898576736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 50.303999334573746, + "p90": 51.45600065588951, + "p95": 52.191998809576035, + "p99": 55.26399984955788 + }, + "combine": { + "p50": 50.303999334573746, + "p90": 51.45600065588951, + "p95": 52.191998809576035, + "p99": 55.26399984955788 + }, + "roundtrip": { + "p50": 50.303999334573746, + "p90": 51.45600065588951, + "p95": 52.191998809576035, + "p99": 55.26399984955788 + }, + "isolatedSum": { + "p50": 100.60799866914749, + "p90": 102.91200131177902, + "p95": 104.38399761915207, + "p99": 110.52799969911575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 50.624001771211624, + "p90": 52.41600051522255, + "p95": 53.95200103521347, + "p99": 70.04799693822861 + }, + "combine": { + "p50": 50.624001771211624, + "p90": 52.41600051522255, + "p95": 53.95200103521347, + "p99": 70.04799693822861 + }, + "roundtrip": { + "p50": 50.624001771211624, + "p90": 52.41600051522255, + "p95": 53.95200103521347, + "p99": 70.04799693822861 + }, + "isolatedSum": { + "p50": 101.24800354242325, + "p90": 104.8320010304451, + "p95": 107.90400207042694, + "p99": 140.09599387645721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.247999399900436, + "p90": 55.00800162553787, + "p95": 55.743999779224396, + "p99": 61.184000223875046 + }, + "combine": { + "p50": 53.247999399900436, + "p90": 55.00800162553787, + "p95": 55.743999779224396, + "p99": 61.184000223875046 + }, + "roundtrip": { + "p50": 53.247999399900436, + "p90": 55.00800162553787, + "p95": 55.743999779224396, + "p99": 61.184000223875046 + }, + "isolatedSum": { + "p50": 106.49599879980087, + "p90": 110.01600325107574, + "p95": 111.48799955844879, + "p99": 122.36800044775009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.00800162553787, + "p90": 56.8000003695488, + "p95": 57.5999990105629, + "p99": 63.74400109052658 + }, + "combine": { + "p50": 55.00800162553787, + "p90": 56.8000003695488, + "p95": 57.5999990105629, + "p99": 63.74400109052658 + }, + "roundtrip": { + "p50": 55.00800162553787, + "p90": 56.8000003695488, + "p95": 57.5999990105629, + "p99": 63.74400109052658 + }, + "isolatedSum": { + "p50": 110.01600325107574, + "p90": 113.6000007390976, + "p95": 115.1999980211258, + "p99": 127.48800218105316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.29600074887276, + "p90": 60.83200126886368, + "p95": 61.59999966621399, + "p99": 72.22399860620499 + }, + "combine": { + "p50": 59.29600074887276, + "p90": 60.83200126886368, + "p95": 61.59999966621399, + "p99": 72.22399860620499 + }, + "roundtrip": { + "p50": 59.29600074887276, + "p90": 60.83200126886368, + "p95": 61.59999966621399, + "p99": 72.22399860620499 + }, + "isolatedSum": { + "p50": 118.59200149774551, + "p90": 121.66400253772736, + "p95": 123.19999933242798, + "p99": 144.44799721240997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.34399902820587, + "p90": 70.81600278615952, + "p95": 71.26399874687195, + "p99": 81.98399841785431 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 70.81600278615952, + "p95": 71.26399874687195, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 69.34399902820587, + "p90": 70.81600278615952, + "p95": 71.26399874687195, + "p99": 81.98399841785431 + }, + "isolatedSum": { + "p50": 138.68799805641174, + "p90": 141.63200557231903, + "p95": 142.5279974937439, + "p99": 163.96799683570862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d4b99c4", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_1fa8fca1", + "comparisonKey": "a0a44bd8fbe428bb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:53.633058+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.855999648571014, + "p90": 51.35999992489815, + "p95": 52.09600180387497, + "p99": 55.71199953556061 + }, + "combine": { + "p50": 49.855999648571014, + "p90": 51.35999992489815, + "p95": 52.09600180387497, + "p99": 55.71199953556061 + }, + "roundtrip": { + "p50": 49.855999648571014, + "p90": 51.35999992489815, + "p95": 52.09600180387497, + "p99": 55.71199953556061 + }, + "isolatedSum": { + "p50": 99.71199929714203, + "p90": 102.7199998497963, + "p95": 104.19200360774994, + "p99": 111.42399907112122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 51.29599943757057, + "p90": 52.51200124621391, + "p95": 52.928000688552856, + "p99": 56.63999915122986 + }, + "combine": { + "p50": 51.29599943757057, + "p90": 52.51200124621391, + "p95": 52.928000688552856, + "p99": 56.63999915122986 + }, + "roundtrip": { + "p50": 51.29599943757057, + "p90": 52.51200124621391, + "p95": 52.928000688552856, + "p99": 56.63999915122986 + }, + "isolatedSum": { + "p50": 102.59199887514114, + "p90": 105.02400249242783, + "p95": 105.85600137710571, + "p99": 113.27999830245972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.42400041222572, + "p90": 53.02400141954422, + "p95": 53.79199981689453, + "p99": 58.33600088953972 + }, + "combine": { + "p50": 51.42400041222572, + "p90": 53.02400141954422, + "p95": 53.79199981689453, + "p99": 58.33600088953972 + }, + "roundtrip": { + "p50": 51.42400041222572, + "p90": 53.02400141954422, + "p95": 53.79199981689453, + "p99": 58.33600088953972 + }, + "isolatedSum": { + "p50": 102.84800082445145, + "p90": 106.04800283908844, + "p95": 107.58399963378906, + "p99": 116.67200177907944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.671998739242554, + "p90": 54.59199845790863, + "p95": 55.36000058054924, + "p99": 68.31999868154526 + }, + "combine": { + "p50": 52.671998739242554, + "p90": 54.59199845790863, + "p95": 55.36000058054924, + "p99": 68.31999868154526 + }, + "roundtrip": { + "p50": 52.671998739242554, + "p90": 54.59199845790863, + "p95": 55.36000058054924, + "p99": 68.31999868154526 + }, + "isolatedSum": { + "p50": 105.34399747848511, + "p90": 109.18399691581726, + "p95": 110.72000116109848, + "p99": 136.63999736309052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 55.58399856090546, + "p90": 58.17599967122078, + "p95": 59.61599946022034, + "p99": 77.37600058317184 + }, + "combine": { + "p50": 55.58399856090546, + "p90": 58.17599967122078, + "p95": 59.61599946022034, + "p99": 77.37600058317184 + }, + "roundtrip": { + "p50": 55.58399856090546, + "p90": 58.17599967122078, + "p95": 59.61599946022034, + "p99": 77.37600058317184 + }, + "isolatedSum": { + "p50": 111.16799712181091, + "p90": 116.35199934244156, + "p95": 119.23199892044067, + "p99": 154.7520011663437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.920001447200775, + "p90": 60.22400036454201, + "p95": 61.02399900555611, + "p99": 69.98399645090103 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 60.22400036454201, + "p95": 61.02399900555611, + "p99": 69.98399645090103 + }, + "roundtrip": { + "p50": 57.920001447200775, + "p90": 60.22400036454201, + "p95": 61.02399900555611, + "p99": 69.98399645090103 + }, + "isolatedSum": { + "p50": 115.84000289440155, + "p90": 120.44800072908401, + "p95": 122.04799801111221, + "p99": 139.96799290180206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 65.21599739789963, + "p90": 66.46399945020676, + "p95": 67.07199662923813, + "p99": 84.48000252246857 + }, + "combine": { + "p50": 65.21599739789963, + "p90": 66.46399945020676, + "p95": 67.07199662923813, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 65.21599739789963, + "p90": 66.46399945020676, + "p95": 67.07199662923813, + "p99": 84.48000252246857 + }, + "isolatedSum": { + "p50": 130.43199479579926, + "p90": 132.9279989004135, + "p95": 134.14399325847626, + "p99": 168.96000504493713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.49600231647491, + "p90": 84.89599823951721, + "p95": 86.30400151014328, + "p99": 100.25600343942642 + }, + "combine": { + "p50": 82.49600231647491, + "p90": 84.89599823951721, + "p95": 86.30400151014328, + "p99": 100.25600343942642 + }, + "roundtrip": { + "p50": 82.49600231647491, + "p90": 84.89599823951721, + "p95": 86.30400151014328, + "p99": 100.25600343942642 + }, + "isolatedSum": { + "p50": 164.99200463294983, + "p90": 169.79199647903442, + "p95": 172.60800302028656, + "p99": 200.51200687885284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8b67445", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_1ad937d8", + "comparisonKey": "97d25f2784fe7e9a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:58.540622+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 39.872001856565475, + "p90": 41.53599962592125, + "p95": 42.4639992415905, + "p99": 47.45600000023842 + }, + "combine": { + "p50": 39.872001856565475, + "p90": 41.53599962592125, + "p95": 42.4639992415905, + "p99": 47.45600000023842 + }, + "roundtrip": { + "p50": 39.872001856565475, + "p90": 41.53599962592125, + "p95": 42.4639992415905, + "p99": 47.45600000023842 + }, + "isolatedSum": { + "p50": 79.74400371313095, + "p90": 83.0719992518425, + "p95": 84.927998483181, + "p99": 94.91200000047684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.527999728918076, + "p90": 44.16000097990036, + "p95": 45.3759990632534, + "p99": 53.31199988722801 + }, + "combine": { + "p50": 42.527999728918076, + "p90": 44.16000097990036, + "p95": 45.3759990632534, + "p99": 53.31199988722801 + }, + "roundtrip": { + "p50": 42.527999728918076, + "p90": 44.16000097990036, + "p95": 45.3759990632534, + "p99": 53.31199988722801 + }, + "isolatedSum": { + "p50": 85.05599945783615, + "p90": 88.32000195980072, + "p95": 90.7519981265068, + "p99": 106.62399977445602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.92799937725067, + "p90": 46.911999583244324, + "p95": 47.520000487565994, + "p99": 59.007998555898666 + }, + "combine": { + "p50": 44.92799937725067, + "p90": 46.911999583244324, + "p95": 47.520000487565994, + "p99": 59.007998555898666 + }, + "roundtrip": { + "p50": 44.92799937725067, + "p90": 46.911999583244324, + "p95": 47.520000487565994, + "p99": 59.007998555898666 + }, + "isolatedSum": { + "p50": 89.85599875450134, + "p90": 93.82399916648865, + "p95": 95.04000097513199, + "p99": 118.01599711179733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 47.45600000023842, + "p90": 49.375999718904495, + "p95": 50.144001841545105, + "p99": 58.59199911355972 + }, + "combine": { + "p50": 47.45600000023842, + "p90": 49.375999718904495, + "p95": 50.144001841545105, + "p99": 58.59199911355972 + }, + "roundtrip": { + "p50": 47.45600000023842, + "p90": 49.375999718904495, + "p95": 50.144001841545105, + "p99": 58.59199911355972 + }, + "isolatedSum": { + "p50": 94.91200000047684, + "p90": 98.75199943780899, + "p95": 100.28800368309021, + "p99": 117.18399822711945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7b48fcc0", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_06cd6e2d", + "comparisonKey": "dc053af507ba01e6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:31:55.032534+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.991999208927155, + "p90": 43.20000112056732, + "p95": 50.144001841545105, + "p99": 59.167999774217606 + }, + "combine": { + "p50": 40.991999208927155, + "p90": 43.20000112056732, + "p95": 50.144001841545105, + "p99": 59.167999774217606 + }, + "roundtrip": { + "p50": 40.991999208927155, + "p90": 43.20000112056732, + "p95": 50.144001841545105, + "p99": 59.167999774217606 + }, + "isolatedSum": { + "p50": 81.98399841785431, + "p90": 86.40000224113464, + "p95": 100.28800368309021, + "p99": 118.33599954843521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.367998510599136, + "p90": 44.35199871659279, + "p95": 48.895999789237976, + "p99": 58.88000130653381 + }, + "combine": { + "p50": 42.367998510599136, + "p90": 44.35199871659279, + "p95": 48.895999789237976, + "p99": 58.88000130653381 + }, + "roundtrip": { + "p50": 42.367998510599136, + "p90": 44.35199871659279, + "p95": 48.895999789237976, + "p99": 58.88000130653381 + }, + "isolatedSum": { + "p50": 84.73599702119827, + "p90": 88.70399743318558, + "p95": 97.79199957847595, + "p99": 117.76000261306763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.68000105023384, + "p90": 46.1760014295578, + "p95": 53.727999329566956, + "p99": 63.10400366783142 + }, + "combine": { + "p50": 43.68000105023384, + "p90": 46.1760014295578, + "p95": 53.727999329566956, + "p99": 63.10400366783142 + }, + "roundtrip": { + "p50": 43.68000105023384, + "p90": 46.1760014295578, + "p95": 53.727999329566956, + "p99": 63.10400366783142 + }, + "isolatedSum": { + "p50": 87.36000210046768, + "p90": 92.3520028591156, + "p95": 107.45599865913391, + "p99": 126.20800733566284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.00800031423569, + "p90": 49.02400076389313, + "p95": 52.99200117588043, + "p99": 62.65600025653839 + }, + "combine": { + "p50": 47.00800031423569, + "p90": 49.02400076389313, + "p95": 52.99200117588043, + "p99": 62.65600025653839 + }, + "roundtrip": { + "p50": 47.00800031423569, + "p90": 49.02400076389313, + "p95": 52.99200117588043, + "p99": 62.65600025653839 + }, + "isolatedSum": { + "p50": 94.01600062847137, + "p90": 98.04800152778625, + "p95": 105.98400235176086, + "p99": 125.31200051307678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 50.944000482559204, + "p90": 52.73599922657013, + "p95": 56.063998490571976, + "p99": 65.50399959087372 + }, + "combine": { + "p50": 50.944000482559204, + "p90": 52.73599922657013, + "p95": 56.063998490571976, + "p99": 65.50399959087372 + }, + "roundtrip": { + "p50": 50.944000482559204, + "p90": 52.73599922657013, + "p95": 56.063998490571976, + "p99": 65.50399959087372 + }, + "isolatedSum": { + "p50": 101.88800096511841, + "p90": 105.47199845314026, + "p95": 112.12799698114395, + "p99": 131.00799918174744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.592001527547836, + "p90": 52.99200117588043, + "p95": 56.48000165820122, + "p99": 71.23199850320816 + }, + "combine": { + "p50": 50.592001527547836, + "p90": 52.99200117588043, + "p95": 56.48000165820122, + "p99": 71.23199850320816 + }, + "roundtrip": { + "p50": 50.592001527547836, + "p90": 52.99200117588043, + "p95": 56.48000165820122, + "p99": 71.23199850320816 + }, + "isolatedSum": { + "p50": 101.18400305509567, + "p90": 105.98400235176086, + "p95": 112.96000331640244, + "p99": 142.46399700641632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 51.90400034189224, + "p90": 54.687999188899994, + "p95": 56.384000927209854, + "p99": 65.63200056552887 + }, + "combine": { + "p50": 51.90400034189224, + "p90": 54.687999188899994, + "p95": 56.384000927209854, + "p99": 65.63200056552887 + }, + "roundtrip": { + "p50": 51.90400034189224, + "p90": 54.687999188899994, + "p95": 56.384000927209854, + "p99": 65.63200056552887 + }, + "isolatedSum": { + "p50": 103.80800068378448, + "p90": 109.37599837779999, + "p95": 112.76800185441971, + "p99": 131.26400113105774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 56.12799897789955, + "p90": 58.20799991488457, + "p95": 59.67999994754791, + "p99": 68.09599697589874 + }, + "combine": { + "p50": 56.12799897789955, + "p90": 58.20799991488457, + "p95": 59.67999994754791, + "p99": 68.09599697589874 + }, + "roundtrip": { + "p50": 56.12799897789955, + "p90": 58.20799991488457, + "p95": 59.67999994754791, + "p99": 68.09599697589874 + }, + "isolatedSum": { + "p50": 112.2559979557991, + "p90": 116.41599982976913, + "p95": 119.35999989509583, + "p99": 136.19199395179749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-20a72693", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_632c3d77", + "comparisonKey": "04a234350eec6e4e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:29:46.156047+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.25599864125252, + "p90": 49.79199916124344, + "p95": 50.56000128388405, + "p99": 56.543998420238495 + }, + "combine": { + "p50": 48.25599864125252, + "p90": 49.79199916124344, + "p95": 50.56000128388405, + "p99": 56.543998420238495 + }, + "roundtrip": { + "p50": 48.25599864125252, + "p90": 49.79199916124344, + "p95": 50.56000128388405, + "p99": 56.543998420238495 + }, + "isolatedSum": { + "p50": 96.51199728250504, + "p90": 99.58399832248688, + "p95": 101.1200025677681, + "p99": 113.08799684047699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.855999648571014, + "p90": 51.45600065588951, + "p95": 51.93600058555603, + "p99": 64.64000046253204 + }, + "combine": { + "p50": 49.855999648571014, + "p90": 51.45600065588951, + "p95": 51.93600058555603, + "p99": 64.64000046253204 + }, + "roundtrip": { + "p50": 49.855999648571014, + "p90": 51.45600065588951, + "p95": 51.93600058555603, + "p99": 64.64000046253204 + }, + "isolatedSum": { + "p50": 99.71199929714203, + "p90": 102.91200131177902, + "p95": 103.87200117111206, + "p99": 129.2800009250641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 50.75199902057648, + "p90": 52.60799825191498, + "p95": 54.368000477552414, + "p99": 61.85600161552429 + }, + "combine": { + "p50": 50.75199902057648, + "p90": 52.60799825191498, + "p95": 54.368000477552414, + "p99": 61.85600161552429 + }, + "roundtrip": { + "p50": 50.75199902057648, + "p90": 52.60799825191498, + "p95": 54.368000477552414, + "p99": 61.85600161552429 + }, + "isolatedSum": { + "p50": 101.50399804115295, + "p90": 105.21599650382996, + "p95": 108.73600095510483, + "p99": 123.71200323104858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.38400027155876, + "p90": 54.4000007212162, + "p95": 55.64799904823303, + "p99": 70.78400254249573 + }, + "combine": { + "p50": 52.38400027155876, + "p90": 54.4000007212162, + "p95": 55.64799904823303, + "p99": 70.78400254249573 + }, + "roundtrip": { + "p50": 52.38400027155876, + "p90": 54.4000007212162, + "p95": 55.64799904823303, + "p99": 70.78400254249573 + }, + "isolatedSum": { + "p50": 104.76800054311752, + "p90": 108.8000014424324, + "p95": 111.29599809646606, + "p99": 141.56800508499146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.016001522541046, + "p90": 56.51199817657471, + "p95": 57.5999990105629, + "p99": 64.51199948787689 + }, + "combine": { + "p50": 54.016001522541046, + "p90": 56.51199817657471, + "p95": 57.5999990105629, + "p99": 64.51199948787689 + }, + "roundtrip": { + "p50": 54.016001522541046, + "p90": 56.51199817657471, + "p95": 57.5999990105629, + "p99": 64.51199948787689 + }, + "isolatedSum": { + "p50": 108.03200304508209, + "p90": 113.02399635314941, + "p95": 115.1999980211258, + "p99": 129.02399897575378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.39200082421303, + "p90": 57.631999254226685, + "p95": 58.49599838256836, + "p99": 62.68800050020218 + }, + "combine": { + "p50": 55.39200082421303, + "p90": 57.631999254226685, + "p95": 58.49599838256836, + "p99": 62.68800050020218 + }, + "roundtrip": { + "p50": 55.39200082421303, + "p90": 57.631999254226685, + "p95": 58.49599838256836, + "p99": 62.68800050020218 + }, + "isolatedSum": { + "p50": 110.78400164842606, + "p90": 115.26399850845337, + "p95": 116.99199676513672, + "p99": 125.37600100040436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.967998415231705, + "p90": 61.85600161552429, + "p95": 62.84800171852112, + "p99": 86.75199747085571 + }, + "combine": { + "p50": 59.967998415231705, + "p90": 61.85600161552429, + "p95": 62.84800171852112, + "p99": 86.75199747085571 + }, + "roundtrip": { + "p50": 59.967998415231705, + "p90": 61.85600161552429, + "p95": 62.84800171852112, + "p99": 86.75199747085571 + }, + "isolatedSum": { + "p50": 119.93599683046341, + "p90": 123.71200323104858, + "p95": 125.69600343704224, + "p99": 173.50399494171143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.93599706888199, + "p90": 77.53600180149078, + "p95": 78.3040001988411, + "p99": 85.15200018882751 + }, + "combine": { + "p50": 75.93599706888199, + "p90": 77.53600180149078, + "p95": 78.3040001988411, + "p99": 85.15200018882751 + }, + "roundtrip": { + "p50": 75.93599706888199, + "p90": 77.53600180149078, + "p95": 78.3040001988411, + "p99": 85.15200018882751 + }, + "isolatedSum": { + "p50": 151.87199413776398, + "p90": 155.07200360298157, + "p95": 156.6080003976822, + "p99": 170.30400037765503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9b7d4da8", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_5a363747", + "comparisonKey": "7aa3f3f4b31df8d9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:12.999599+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.38399982452393, + "p90": 90.2400016784668, + "p95": 92.32000261545181, + "p99": 94.84799951314926 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 90.2400016784668, + "p95": 92.32000261545181, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 72.38399982452393, + "p90": 90.2400016784668, + "p95": 92.32000261545181, + "p99": 94.84799951314926 + }, + "isolatedSum": { + "p50": 144.76799964904785, + "p90": 180.4800033569336, + "p95": 184.64000523090363, + "p99": 189.69599902629852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.52800104022026, + "p90": 59.167999774217606, + "p95": 100.44799745082855, + "p99": 106.1440035700798 + }, + "combine": { + "p50": 50.52800104022026, + "p90": 59.167999774217606, + "p95": 100.44799745082855, + "p99": 106.1440035700798 + }, + "roundtrip": { + "p50": 50.52800104022026, + "p90": 59.167999774217606, + "p95": 100.44799745082855, + "p99": 106.1440035700798 + }, + "isolatedSum": { + "p50": 101.05600208044052, + "p90": 118.33599954843521, + "p95": 200.8959949016571, + "p99": 212.2880071401596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.44800075888634, + "p90": 98.04800152778625, + "p95": 101.88800096511841, + "p99": 106.23999685049057 + }, + "combine": { + "p50": 52.44800075888634, + "p90": 98.04800152778625, + "p95": 101.88800096511841, + "p99": 106.23999685049057 + }, + "roundtrip": { + "p50": 52.44800075888634, + "p90": 98.04800152778625, + "p95": 101.88800096511841, + "p99": 106.23999685049057 + }, + "isolatedSum": { + "p50": 104.89600151777267, + "p90": 196.0960030555725, + "p95": 203.77600193023682, + "p99": 212.47999370098114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 85.53600311279297, + "p90": 105.24799674749374, + "p95": 107.90400207042694, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 105.24799674749374, + "p95": 107.90400207042694, + "p99": 111.87200248241425 + }, + "roundtrip": { + "p50": 85.53600311279297, + "p90": 105.24799674749374, + "p95": 107.90400207042694, + "p99": 111.87200248241425 + }, + "isolatedSum": { + "p50": 171.07200622558594, + "p90": 210.4959934949875, + "p95": 215.80800414085388, + "p99": 223.7440049648285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.888000547885895, + "p90": 73.27999919652939, + "p95": 97.69599884748459, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 53.888000547885895, + "p90": 73.27999919652939, + "p95": 97.69599884748459, + "p99": 107.96800255775452 + }, + "roundtrip": { + "p50": 53.888000547885895, + "p90": 73.27999919652939, + "p95": 97.69599884748459, + "p99": 107.96800255775452 + }, + "isolatedSum": { + "p50": 107.77600109577179, + "p90": 146.55999839305878, + "p95": 195.39199769496918, + "p99": 215.93600511550903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.87200075387955, + "p90": 98.62399846315384, + "p95": 103.5199984908104, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 55.87200075387955, + "p90": 98.62399846315384, + "p95": 103.5199984908104, + "p99": 108.06400328874588 + }, + "roundtrip": { + "p50": 55.87200075387955, + "p90": 98.62399846315384, + "p95": 103.5199984908104, + "p99": 108.06400328874588 + }, + "isolatedSum": { + "p50": 111.7440015077591, + "p90": 197.24799692630768, + "p95": 207.0399969816208, + "p99": 216.12800657749176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 62.111999839544296, + "p90": 102.08000242710114, + "p95": 105.92000186443329, + "p99": 108.41599851846695 + }, + "combine": { + "p50": 62.111999839544296, + "p90": 102.08000242710114, + "p95": 105.92000186443329, + "p99": 108.41599851846695 + }, + "roundtrip": { + "p50": 62.111999839544296, + "p90": 102.08000242710114, + "p95": 105.92000186443329, + "p99": 108.41599851846695 + }, + "isolatedSum": { + "p50": 124.22399967908859, + "p90": 204.16000485420227, + "p95": 211.84000372886658, + "p99": 216.8319970369339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 76.89599692821503, + "p90": 119.32799965143204, + "p95": 122.11199849843979, + "p99": 128.06400656700134 + }, + "combine": { + "p50": 76.89599692821503, + "p90": 119.32799965143204, + "p95": 122.11199849843979, + "p99": 128.06400656700134 + }, + "roundtrip": { + "p50": 76.89599692821503, + "p90": 119.32799965143204, + "p95": 122.11199849843979, + "p99": 128.06400656700134 + }, + "isolatedSum": { + "p50": 153.79199385643005, + "p90": 238.65599930286407, + "p95": 244.22399699687958, + "p99": 256.1280131340027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dcaf13f1", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_82d29707", + "comparisonKey": "e0c86076ba4233b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:30:51.609594+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 45.823998749256134, + "p90": 47.32799902558327, + "p95": 48.35199937224388, + "p99": 57.312000542879105 + }, + "combine": { + "p50": 45.823998749256134, + "p90": 47.32799902558327, + "p95": 48.35199937224388, + "p99": 57.312000542879105 + }, + "roundtrip": { + "p50": 45.823998749256134, + "p90": 47.32799902558327, + "p95": 48.35199937224388, + "p99": 57.312000542879105 + }, + "isolatedSum": { + "p50": 91.64799749851227, + "p90": 94.65599805116653, + "p95": 96.70399874448776, + "p99": 114.62400108575821 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 48.67200180888176, + "p90": 50.4320003092289, + "p95": 50.912000238895416, + "p99": 60.095999389886856 + }, + "combine": { + "p50": 48.67200180888176, + "p90": 50.4320003092289, + "p95": 50.912000238895416, + "p99": 60.095999389886856 + }, + "roundtrip": { + "p50": 48.67200180888176, + "p90": 50.4320003092289, + "p95": 50.912000238895416, + "p99": 60.095999389886856 + }, + "isolatedSum": { + "p50": 97.34400361776352, + "p90": 100.8640006184578, + "p95": 101.82400047779083, + "p99": 120.19199877977371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.45600065588951, + "p90": 53.50400134921074, + "p95": 57.69599974155426, + "p99": 74.68800246715546 + }, + "combine": { + "p50": 51.45600065588951, + "p90": 53.50400134921074, + "p95": 57.69599974155426, + "p99": 74.68800246715546 + }, + "roundtrip": { + "p50": 51.45600065588951, + "p90": 53.50400134921074, + "p95": 57.69599974155426, + "p99": 74.68800246715546 + }, + "isolatedSum": { + "p50": 102.91200131177902, + "p90": 107.00800269842148, + "p95": 115.39199948310852, + "p99": 149.3760049343109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.10400170087814, + "p90": 52.960000932216644, + "p95": 53.63199859857559, + "p99": 56.352000683546066 + }, + "combine": { + "p50": 51.10400170087814, + "p90": 52.960000932216644, + "p95": 53.63199859857559, + "p99": 56.352000683546066 + }, + "roundtrip": { + "p50": 51.10400170087814, + "p90": 52.960000932216644, + "p95": 53.63199859857559, + "p99": 56.352000683546066 + }, + "isolatedSum": { + "p50": 102.20800340175629, + "p90": 105.92000186443329, + "p95": 107.26399719715118, + "p99": 112.70400136709213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 52.86400020122528, + "p90": 55.00800162553787, + "p95": 55.67999929189682, + "p99": 57.37600103020668 + }, + "combine": { + "p50": 52.86400020122528, + "p90": 55.00800162553787, + "p95": 55.67999929189682, + "p99": 57.37600103020668 + }, + "roundtrip": { + "p50": 52.86400020122528, + "p90": 55.00800162553787, + "p95": 55.67999929189682, + "p99": 57.37600103020668 + }, + "isolatedSum": { + "p50": 105.72800040245056, + "p90": 110.01600325107574, + "p95": 111.35999858379364, + "p99": 114.75200206041336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.816000163555145, + "p90": 56.992001831531525, + "p95": 57.792000472545624, + "p99": 65.24799764156342 + }, + "combine": { + "p50": 54.816000163555145, + "p90": 56.992001831531525, + "p95": 57.792000472545624, + "p99": 65.24799764156342 + }, + "roundtrip": { + "p50": 54.816000163555145, + "p90": 56.992001831531525, + "p95": 57.792000472545624, + "p99": 65.24799764156342 + }, + "isolatedSum": { + "p50": 109.63200032711029, + "p90": 113.98400366306305, + "p95": 115.58400094509125, + "p99": 130.49599528312683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.039998799562454, + "p90": 60.99199876189232, + "p95": 61.37600168585777, + "p99": 65.21599739789963 + }, + "combine": { + "p50": 59.039998799562454, + "p90": 60.99199876189232, + "p95": 61.37600168585777, + "p99": 65.21599739789963 + }, + "roundtrip": { + "p50": 59.039998799562454, + "p90": 60.99199876189232, + "p95": 61.37600168585777, + "p99": 65.21599739789963 + }, + "isolatedSum": { + "p50": 118.07999759912491, + "p90": 121.98399752378464, + "p95": 122.75200337171555, + "p99": 130.43199479579926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.24799829721451, + "p90": 70.81600278615952, + "p95": 71.3919997215271, + "p99": 77.37600058317184 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 70.81600278615952, + "p95": 71.3919997215271, + "p99": 77.37600058317184 + }, + "roundtrip": { + "p50": 69.24799829721451, + "p90": 70.81600278615952, + "p95": 71.3919997215271, + "p99": 77.37600058317184 + }, + "isolatedSum": { + "p50": 138.49599659442902, + "p90": 141.63200557231903, + "p95": 142.7839994430542, + "p99": 154.7520011663437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-181ba5c3", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_461b4b32", + "comparisonKey": "8ec6f853102cb32c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:00.845912+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 47.29599878191948, + "p90": 49.0880012512207, + "p95": 50.04800111055374, + "p99": 60.19200012087822 + }, + "combine": { + "p50": 47.29599878191948, + "p90": 49.0880012512207, + "p95": 50.04800111055374, + "p99": 60.19200012087822 + }, + "roundtrip": { + "p50": 47.29599878191948, + "p90": 49.0880012512207, + "p95": 50.04800111055374, + "p99": 60.19200012087822 + }, + "isolatedSum": { + "p50": 94.59199756383896, + "p90": 98.1760025024414, + "p95": 100.09600222110748, + "p99": 120.38400024175644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 48.73599857091904, + "p90": 49.8879998922348, + "p95": 50.40000006556511, + "p99": 51.32799968123436 + }, + "combine": { + "p50": 48.73599857091904, + "p90": 49.8879998922348, + "p95": 50.40000006556511, + "p99": 51.32799968123436 + }, + "roundtrip": { + "p50": 48.73599857091904, + "p90": 49.8879998922348, + "p95": 50.40000006556511, + "p99": 51.32799968123436 + }, + "isolatedSum": { + "p50": 97.47199714183807, + "p90": 99.7759997844696, + "p95": 100.80000013113022, + "p99": 102.65599936246872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.6320016682148, + "p90": 51.45600065588951, + "p95": 53.31199988722801, + "p99": 65.66400080919266 + }, + "combine": { + "p50": 49.6320016682148, + "p90": 51.45600065588951, + "p95": 53.31199988722801, + "p99": 65.66400080919266 + }, + "roundtrip": { + "p50": 49.6320016682148, + "p90": 51.45600065588951, + "p95": 53.31199988722801, + "p99": 65.66400080919266 + }, + "isolatedSum": { + "p50": 99.2640033364296, + "p90": 102.91200131177902, + "p95": 106.62399977445602, + "p99": 131.32800161838531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 50.912000238895416, + "p90": 53.0879981815815, + "p95": 54.016001522541046, + "p99": 62.3680017888546 + }, + "combine": { + "p50": 50.912000238895416, + "p90": 53.0879981815815, + "p95": 54.016001522541046, + "p99": 62.3680017888546 + }, + "roundtrip": { + "p50": 50.912000238895416, + "p90": 53.0879981815815, + "p95": 54.016001522541046, + "p99": 62.3680017888546 + }, + "isolatedSum": { + "p50": 101.82400047779083, + "p90": 106.175996363163, + "p95": 108.03200304508209, + "p99": 124.7360035777092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 52.89600044488907, + "p90": 55.26399984955788, + "p95": 56.063998490571976, + "p99": 62.880001962184906 + }, + "combine": { + "p50": 52.89600044488907, + "p90": 55.26399984955788, + "p95": 56.063998490571976, + "p99": 62.880001962184906 + }, + "roundtrip": { + "p50": 52.89600044488907, + "p90": 55.26399984955788, + "p95": 56.063998490571976, + "p99": 62.880001962184906 + }, + "isolatedSum": { + "p50": 105.79200088977814, + "p90": 110.52799969911575, + "p95": 112.12799698114395, + "p99": 125.76000392436981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 53.95200103521347, + "p90": 56.15999922156334, + "p95": 57.312000542879105, + "p99": 68.35199892520905 + }, + "combine": { + "p50": 53.95200103521347, + "p90": 56.15999922156334, + "p95": 57.312000542879105, + "p99": 68.35199892520905 + }, + "roundtrip": { + "p50": 53.95200103521347, + "p90": 56.15999922156334, + "p95": 57.312000542879105, + "p99": 68.35199892520905 + }, + "isolatedSum": { + "p50": 107.90400207042694, + "p90": 112.31999844312668, + "p95": 114.62400108575821, + "p99": 136.7039978504181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 58.079998940229416, + "p90": 59.67999994754791, + "p95": 60.22400036454201, + "p99": 67.1359971165657 + }, + "combine": { + "p50": 58.079998940229416, + "p90": 59.67999994754791, + "p95": 60.22400036454201, + "p99": 67.1359971165657 + }, + "roundtrip": { + "p50": 58.079998940229416, + "p90": 59.67999994754791, + "p95": 60.22400036454201, + "p99": 67.1359971165657 + }, + "isolatedSum": { + "p50": 116.15999788045883, + "p90": 119.35999989509583, + "p95": 120.44800072908401, + "p99": 134.2719942331314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.65600222349167, + "p90": 76.51200145483017, + "p95": 76.89599692821503, + "p99": 78.015998005867 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 76.51200145483017, + "p95": 76.89599692821503, + "p99": 78.015998005867 + }, + "roundtrip": { + "p50": 74.65600222349167, + "p90": 76.51200145483017, + "p95": 76.89599692821503, + "p99": 78.015998005867 + }, + "isolatedSum": { + "p50": 149.31200444698334, + "p90": 153.02400290966034, + "p95": 153.79199385643005, + "p99": 156.031996011734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c333ef0", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_2c1ac2d8", + "comparisonKey": "de27adbe30350744", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:06.845798+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.51199993491173, + "p90": 46.08000069856644, + "p95": 47.00800031423569, + "p99": 50.49600079655647 + }, + "combine": { + "p50": 44.51199993491173, + "p90": 46.08000069856644, + "p95": 47.00800031423569, + "p99": 50.49600079655647 + }, + "roundtrip": { + "p50": 44.51199993491173, + "p90": 46.08000069856644, + "p95": 47.00800031423569, + "p99": 50.49600079655647 + }, + "isolatedSum": { + "p50": 89.02399986982346, + "p90": 92.16000139713287, + "p95": 94.01600062847137, + "p99": 100.99200159311295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 45.69600149989128, + "p90": 47.39199951291084, + "p95": 48.0320006608963, + "p99": 52.44800075888634 + }, + "combine": { + "p50": 45.69600149989128, + "p90": 47.39199951291084, + "p95": 48.0320006608963, + "p99": 52.44800075888634 + }, + "roundtrip": { + "p50": 45.69600149989128, + "p90": 47.39199951291084, + "p95": 48.0320006608963, + "p99": 52.44800075888634 + }, + "isolatedSum": { + "p50": 91.39200299978256, + "p90": 94.78399902582169, + "p95": 96.0640013217926, + "p99": 104.89600151777267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 45.9199994802475, + "p90": 48.09600114822388, + "p95": 49.12000149488449, + "p99": 59.039998799562454 + }, + "combine": { + "p50": 45.9199994802475, + "p90": 48.09600114822388, + "p95": 49.12000149488449, + "p99": 59.039998799562454 + }, + "roundtrip": { + "p50": 45.9199994802475, + "p90": 48.09600114822388, + "p95": 49.12000149488449, + "p99": 59.039998799562454 + }, + "isolatedSum": { + "p50": 91.839998960495, + "p90": 96.19200229644775, + "p95": 98.24000298976898, + "p99": 118.07999759912491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.26399853825569, + "p90": 48.99200052022934, + "p95": 49.82399940490723, + "p99": 55.36000058054924 + }, + "combine": { + "p50": 47.26399853825569, + "p90": 48.99200052022934, + "p95": 49.82399940490723, + "p99": 55.36000058054924 + }, + "roundtrip": { + "p50": 47.26399853825569, + "p90": 48.99200052022934, + "p95": 49.82399940490723, + "p99": 55.36000058054924 + }, + "isolatedSum": { + "p50": 94.52799707651138, + "p90": 97.98400104045868, + "p95": 99.64799880981445, + "p99": 110.72000116109848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 50.783999264240265, + "p90": 52.25599929690361, + "p95": 52.99200117588043, + "p99": 55.80800026655197 + }, + "combine": { + "p50": 50.783999264240265, + "p90": 52.25599929690361, + "p95": 52.99200117588043, + "p99": 55.80800026655197 + }, + "roundtrip": { + "p50": 50.783999264240265, + "p90": 52.25599929690361, + "p95": 52.99200117588043, + "p99": 55.80800026655197 + }, + "isolatedSum": { + "p50": 101.56799852848053, + "p90": 104.51199859380722, + "p95": 105.98400235176086, + "p99": 111.61600053310394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 52.2879995405674, + "p90": 54.91200089454651, + "p95": 55.615998804569244, + "p99": 65.37599861621857 + }, + "combine": { + "p50": 52.2879995405674, + "p90": 54.91200089454651, + "p95": 55.615998804569244, + "p99": 65.37599861621857 + }, + "roundtrip": { + "p50": 52.2879995405674, + "p90": 54.91200089454651, + "p95": 55.615998804569244, + "p99": 65.37599861621857 + }, + "isolatedSum": { + "p50": 104.5759990811348, + "p90": 109.82400178909302, + "p95": 111.23199760913849, + "p99": 130.75199723243713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 56.832000613212585, + "p90": 58.27200040221214, + "p95": 59.20000001788139, + "p99": 75.83999633789062 + }, + "combine": { + "p50": 56.832000613212585, + "p90": 58.27200040221214, + "p95": 59.20000001788139, + "p99": 75.83999633789062 + }, + "roundtrip": { + "p50": 56.832000613212585, + "p90": 58.27200040221214, + "p95": 59.20000001788139, + "p99": 75.83999633789062 + }, + "isolatedSum": { + "p50": 113.66400122642517, + "p90": 116.54400080442429, + "p95": 118.40000003576279, + "p99": 151.67999267578125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 72.54400104284286, + "p90": 74.49600100517273, + "p95": 75.1039981842041, + "p99": 85.50400286912918 + }, + "combine": { + "p50": 72.54400104284286, + "p90": 74.49600100517273, + "p95": 75.1039981842041, + "p99": 85.50400286912918 + }, + "roundtrip": { + "p50": 72.54400104284286, + "p90": 74.49600100517273, + "p95": 75.1039981842041, + "p99": 85.50400286912918 + }, + "isolatedSum": { + "p50": 145.08800208568573, + "p90": 148.99200201034546, + "p95": 150.2079963684082, + "p99": 171.00800573825836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a8e4c30", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_83d3b3b6", + "comparisonKey": "24cad37ed2e9bcd1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:07.601622+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.00000041723251, + "p90": 49.60000142455101, + "p95": 50.36799982190132, + "p99": 60.38400158286095 + }, + "combine": { + "p50": 48.00000041723251, + "p90": 49.60000142455101, + "p95": 50.36799982190132, + "p99": 60.38400158286095 + }, + "roundtrip": { + "p50": 48.00000041723251, + "p90": 49.60000142455101, + "p95": 50.36799982190132, + "p99": 60.38400158286095 + }, + "isolatedSum": { + "p50": 96.00000083446503, + "p90": 99.20000284910202, + "p95": 100.73599964380264, + "p99": 120.7680031657219 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 48.99200052022934, + "p90": 50.27199909090996, + "p95": 50.783999264240265, + "p99": 53.21599915623665 + }, + "combine": { + "p50": 48.99200052022934, + "p90": 50.27199909090996, + "p95": 50.783999264240265, + "p99": 53.21599915623665 + }, + "roundtrip": { + "p50": 48.99200052022934, + "p90": 50.27199909090996, + "p95": 50.783999264240265, + "p99": 53.21599915623665 + }, + "isolatedSum": { + "p50": 97.98400104045868, + "p90": 100.54399818181992, + "p95": 101.56799852848053, + "p99": 106.4319983124733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.67999863624573, + "p90": 53.66399884223938, + "p95": 54.368000477552414, + "p99": 67.77600198984146 + }, + "combine": { + "p50": 51.67999863624573, + "p90": 53.66399884223938, + "p95": 54.368000477552414, + "p99": 67.77600198984146 + }, + "roundtrip": { + "p50": 51.67999863624573, + "p90": 53.66399884223938, + "p95": 54.368000477552414, + "p99": 67.77600198984146 + }, + "isolatedSum": { + "p50": 103.35999727249146, + "p90": 107.32799768447876, + "p95": 108.73600095510483, + "p99": 135.55200397968292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.80799961090088, + "p90": 53.75999957323074, + "p95": 54.23999950289726, + "p99": 57.82400071620941 + }, + "combine": { + "p50": 51.80799961090088, + "p90": 53.75999957323074, + "p95": 54.23999950289726, + "p99": 57.82400071620941 + }, + "roundtrip": { + "p50": 51.80799961090088, + "p90": 53.75999957323074, + "p95": 54.23999950289726, + "p99": 57.82400071620941 + }, + "isolatedSum": { + "p50": 103.61599922180176, + "p90": 107.51999914646149, + "p95": 108.47999900579453, + "p99": 115.64800143241882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.79199981689453, + "p90": 56.063998490571976, + "p95": 57.023998349905014, + "p99": 63.32799792289734 + }, + "combine": { + "p50": 53.79199981689453, + "p90": 56.063998490571976, + "p95": 57.023998349905014, + "p99": 63.32799792289734 + }, + "roundtrip": { + "p50": 53.79199981689453, + "p90": 56.063998490571976, + "p95": 57.023998349905014, + "p99": 63.32799792289734 + }, + "isolatedSum": { + "p50": 107.58399963378906, + "p90": 112.12799698114395, + "p95": 114.04799669981003, + "p99": 126.65599584579468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.64799904823303, + "p90": 57.920001447200775, + "p95": 58.78400057554245, + "p99": 68.51200014352798 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 57.920001447200775, + "p95": 58.78400057554245, + "p99": 68.51200014352798 + }, + "roundtrip": { + "p50": 55.64799904823303, + "p90": 57.920001447200775, + "p95": 58.78400057554245, + "p99": 68.51200014352798 + }, + "isolatedSum": { + "p50": 111.29599809646606, + "p90": 115.84000289440155, + "p95": 117.5680011510849, + "p99": 137.02400028705597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.55199897289276, + "p90": 61.3120011985302, + "p95": 61.95199862122536, + "p99": 64.83200192451477 + }, + "combine": { + "p50": 59.55199897289276, + "p90": 61.3120011985302, + "p95": 61.95199862122536, + "p99": 64.83200192451477 + }, + "roundtrip": { + "p50": 59.55199897289276, + "p90": 61.3120011985302, + "p95": 61.95199862122536, + "p99": 64.83200192451477 + }, + "isolatedSum": { + "p50": 119.10399794578552, + "p90": 122.6240023970604, + "p95": 123.90399724245071, + "p99": 129.66400384902954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.76000219583511, + "p90": 71.10399752855301, + "p95": 71.84000313282013, + "p99": 76.1599987745285 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 71.10399752855301, + "p95": 71.84000313282013, + "p99": 76.1599987745285 + }, + "roundtrip": { + "p50": 69.76000219583511, + "p90": 71.10399752855301, + "p95": 71.84000313282013, + "p99": 76.1599987745285 + }, + "isolatedSum": { + "p50": 139.52000439167023, + "p90": 142.20799505710602, + "p95": 143.68000626564026, + "p99": 152.319997549057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-329cbb1b", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_ffeff7f3", + "comparisonKey": "0fad0d1cea455fcc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:00.790595+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 47.71199822425842, + "p90": 48.99200052022934, + "p95": 50.36799982190132, + "p99": 55.10399863123894 + }, + "combine": { + "p50": 47.71199822425842, + "p90": 48.99200052022934, + "p95": 50.36799982190132, + "p99": 55.10399863123894 + }, + "roundtrip": { + "p50": 47.71199822425842, + "p90": 48.99200052022934, + "p95": 50.36799982190132, + "p99": 55.10399863123894 + }, + "isolatedSum": { + "p50": 95.42399644851685, + "p90": 97.98400104045868, + "p95": 100.73599964380264, + "p99": 110.20799726247787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.984000623226166, + "p90": 51.52000114321709, + "p95": 52.5440014898777, + "p99": 66.94400310516357 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 51.52000114321709, + "p95": 52.5440014898777, + "p99": 66.94400310516357 + }, + "roundtrip": { + "p50": 49.984000623226166, + "p90": 51.52000114321709, + "p95": 52.5440014898777, + "p99": 66.94400310516357 + }, + "isolatedSum": { + "p50": 99.96800124645233, + "p90": 103.04000228643417, + "p95": 105.0880029797554, + "p99": 133.88800621032715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.83999985456467, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 68.67200136184692 + }, + "combine": { + "p50": 51.83999985456467, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 68.67200136184692 + }, + "roundtrip": { + "p50": 51.83999985456467, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 68.67200136184692 + }, + "isolatedSum": { + "p50": 103.67999970912933, + "p90": 107.45599865913391, + "p95": 109.11999642848969, + "p99": 137.34400272369385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.90400034189224, + "p90": 53.95200103521347, + "p95": 54.816000163555145, + "p99": 66.68800115585327 + }, + "combine": { + "p50": 51.90400034189224, + "p90": 53.95200103521347, + "p95": 54.816000163555145, + "p99": 66.68800115585327 + }, + "roundtrip": { + "p50": 51.90400034189224, + "p90": 53.95200103521347, + "p95": 54.816000163555145, + "p99": 66.68800115585327 + }, + "isolatedSum": { + "p50": 103.80800068378448, + "p90": 107.90400207042694, + "p95": 109.63200032711029, + "p99": 133.37600231170654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.279999643564224, + "p90": 55.16799911856651, + "p95": 56.543998420238495, + "p99": 70.52800059318542 + }, + "combine": { + "p50": 53.279999643564224, + "p90": 55.16799911856651, + "p95": 56.543998420238495, + "p99": 70.52800059318542 + }, + "roundtrip": { + "p50": 53.279999643564224, + "p90": 55.16799911856651, + "p95": 56.543998420238495, + "p99": 70.52800059318542 + }, + "isolatedSum": { + "p50": 106.55999928712845, + "p90": 110.33599823713303, + "p95": 113.08799684047699, + "p99": 141.05600118637085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.88000065088272, + "p90": 57.151999324560165, + "p95": 58.62399935722351, + "p99": 75.39200037717819 + }, + "combine": { + "p50": 54.88000065088272, + "p90": 57.151999324560165, + "p95": 58.62399935722351, + "p99": 75.39200037717819 + }, + "roundtrip": { + "p50": 54.88000065088272, + "p90": 57.151999324560165, + "p95": 58.62399935722351, + "p99": 75.39200037717819 + }, + "isolatedSum": { + "p50": 109.76000130176544, + "p90": 114.30399864912033, + "p95": 117.24799871444702, + "p99": 150.78400075435638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.10399928689003, + "p90": 60.896001756191254, + "p95": 61.95199862122536, + "p99": 71.55200093984604 + }, + "combine": { + "p50": 59.10399928689003, + "p90": 60.896001756191254, + "p95": 61.95199862122536, + "p99": 71.55200093984604 + }, + "roundtrip": { + "p50": 59.10399928689003, + "p90": 60.896001756191254, + "p95": 61.95199862122536, + "p99": 71.55200093984604 + }, + "isolatedSum": { + "p50": 118.20799857378006, + "p90": 121.79200351238251, + "p95": 123.90399724245071, + "p99": 143.10400187969208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.43200051784515, + "p90": 75.6480023264885, + "p95": 76.22399926185608, + "p99": 86.59200370311737 + }, + "combine": { + "p50": 74.43200051784515, + "p90": 75.6480023264885, + "p95": 76.22399926185608, + "p99": 86.59200370311737 + }, + "roundtrip": { + "p50": 74.43200051784515, + "p90": 75.6480023264885, + "p95": 76.22399926185608, + "p99": 86.59200370311737 + }, + "isolatedSum": { + "p50": 148.8640010356903, + "p90": 151.296004652977, + "p95": 152.44799852371216, + "p99": 173.18400740623474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0acd72b1", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_7f54372b", + "comparisonKey": "e0422d5a205eed2d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:31.363511+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 46.20800167322159, + "p90": 47.74399846792221, + "p95": 48.31999912858009, + "p99": 54.1439987719059 + }, + "combine": { + "p50": 46.20800167322159, + "p90": 47.74399846792221, + "p95": 48.31999912858009, + "p99": 54.1439987719059 + }, + "roundtrip": { + "p50": 46.20800167322159, + "p90": 47.74399846792221, + "p95": 48.31999912858009, + "p99": 54.1439987719059 + }, + "isolatedSum": { + "p50": 92.41600334644318, + "p90": 95.48799693584442, + "p95": 96.63999825716019, + "p99": 108.2879975438118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.0880012512207, + "p90": 51.10400170087814, + "p95": 51.61599814891815, + "p99": 63.19999694824219 + }, + "combine": { + "p50": 49.0880012512207, + "p90": 51.10400170087814, + "p95": 51.61599814891815, + "p99": 63.19999694824219 + }, + "roundtrip": { + "p50": 49.0880012512207, + "p90": 51.10400170087814, + "p95": 51.61599814891815, + "p99": 63.19999694824219 + }, + "isolatedSum": { + "p50": 98.1760025024414, + "p90": 102.20800340175629, + "p95": 103.2319962978363, + "p99": 126.39999389648438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.000001072883606, + "p90": 53.75999957323074, + "p95": 54.687999188899994, + "p99": 63.87200206518173 + }, + "combine": { + "p50": 52.000001072883606, + "p90": 53.75999957323074, + "p95": 54.687999188899994, + "p99": 63.87200206518173 + }, + "roundtrip": { + "p50": 52.000001072883606, + "p90": 53.75999957323074, + "p95": 54.687999188899994, + "p99": 63.87200206518173 + }, + "isolatedSum": { + "p50": 104.00000214576721, + "p90": 107.51999914646149, + "p95": 109.37599837779999, + "p99": 127.74400413036346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.480001002550125, + "p90": 54.43200096487999, + "p95": 56.32000043988228, + "p99": 74.27199929952621 + }, + "combine": { + "p50": 52.480001002550125, + "p90": 54.43200096487999, + "p95": 56.32000043988228, + "p99": 74.27199929952621 + }, + "roundtrip": { + "p50": 52.480001002550125, + "p90": 54.43200096487999, + "p95": 56.32000043988228, + "p99": 74.27199929952621 + }, + "isolatedSum": { + "p50": 104.96000200510025, + "p90": 108.86400192975998, + "p95": 112.64000087976456, + "p99": 148.54399859905243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.46400120854378, + "p90": 56.63999915122986, + "p95": 57.21599981188774, + "p99": 68.76800209283829 + }, + "combine": { + "p50": 54.46400120854378, + "p90": 56.63999915122986, + "p95": 57.21599981188774, + "p99": 68.76800209283829 + }, + "roundtrip": { + "p50": 54.46400120854378, + "p90": 56.63999915122986, + "p95": 57.21599981188774, + "p99": 68.76800209283829 + }, + "isolatedSum": { + "p50": 108.92800241708755, + "p90": 113.27999830245972, + "p95": 114.43199962377548, + "p99": 137.53600418567657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.424001067876816, + "p90": 57.53599852323532, + "p95": 58.75200033187866, + "p99": 71.3919997215271 + }, + "combine": { + "p50": 55.424001067876816, + "p90": 57.53599852323532, + "p95": 58.75200033187866, + "p99": 71.3919997215271 + }, + "roundtrip": { + "p50": 55.424001067876816, + "p90": 57.53599852323532, + "p95": 58.75200033187866, + "p99": 71.3919997215271 + }, + "isolatedSum": { + "p50": 110.84800213575363, + "p90": 115.07199704647064, + "p95": 117.50400066375732, + "p99": 142.7839994430542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.51999872922897, + "p90": 61.43999844789505, + "p95": 61.95199862122536, + "p99": 81.66400343179703 + }, + "combine": { + "p50": 59.51999872922897, + "p90": 61.43999844789505, + "p95": 61.95199862122536, + "p99": 81.66400343179703 + }, + "roundtrip": { + "p50": 59.51999872922897, + "p90": 61.43999844789505, + "p95": 61.95199862122536, + "p99": 81.66400343179703 + }, + "isolatedSum": { + "p50": 119.03999745845795, + "p90": 122.8799968957901, + "p95": 123.90399724245071, + "p99": 163.32800686359406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.43999975919724, + "p90": 71.32799923419952, + "p95": 71.87200337648392, + "p99": 79.00799810886383 + }, + "combine": { + "p50": 69.43999975919724, + "p90": 71.32799923419952, + "p95": 71.87200337648392, + "p99": 79.00799810886383 + }, + "roundtrip": { + "p50": 69.43999975919724, + "p90": 71.32799923419952, + "p95": 71.87200337648392, + "p99": 79.00799810886383 + }, + "isolatedSum": { + "p50": 138.87999951839447, + "p90": 142.65599846839905, + "p95": 143.74400675296783, + "p99": 158.01599621772766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56c230c3", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_c6a488a8", + "comparisonKey": "d469000091116e9f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:58.017242+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 46.9760000705719, + "p90": 48.48000034689903, + "p95": 49.47200044989586, + "p99": 63.71200084686279 + }, + "combine": { + "p50": 46.9760000705719, + "p90": 48.48000034689903, + "p95": 49.47200044989586, + "p99": 63.71200084686279 + }, + "roundtrip": { + "p50": 46.9760000705719, + "p90": 48.48000034689903, + "p95": 49.47200044989586, + "p99": 63.71200084686279 + }, + "isolatedSum": { + "p50": 93.9520001411438, + "p90": 96.96000069379807, + "p95": 98.94400089979172, + "p99": 127.42400169372559 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 48.67200180888176, + "p90": 50.016000866889954, + "p95": 51.231998950242996, + "p99": 65.18399715423584 + }, + "combine": { + "p50": 48.67200180888176, + "p90": 50.016000866889954, + "p95": 51.231998950242996, + "p99": 65.18399715423584 + }, + "roundtrip": { + "p50": 48.67200180888176, + "p90": 50.016000866889954, + "p95": 51.231998950242996, + "p99": 65.18399715423584 + }, + "isolatedSum": { + "p50": 97.34400361776352, + "p90": 100.03200173377991, + "p95": 102.46399790048599, + "p99": 130.36799430847168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 50.75199902057648, + "p90": 52.671998739242554, + "p95": 53.21599915623665, + "p99": 55.71199953556061 + }, + "combine": { + "p50": 50.75199902057648, + "p90": 52.671998739242554, + "p95": 53.21599915623665, + "p99": 55.71199953556061 + }, + "roundtrip": { + "p50": 50.75199902057648, + "p90": 52.671998739242554, + "p95": 53.21599915623665, + "p99": 55.71199953556061 + }, + "isolatedSum": { + "p50": 101.50399804115295, + "p90": 105.34399747848511, + "p95": 106.4319983124733, + "p99": 111.42399907112122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.7439991235733, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 59.99999865889549 + }, + "combine": { + "p50": 51.7439991235733, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 59.99999865889549 + }, + "roundtrip": { + "p50": 51.7439991235733, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 59.99999865889549 + }, + "isolatedSum": { + "p50": 103.4879982471466, + "p90": 107.45599865913391, + "p95": 109.11999642848969, + "p99": 119.99999731779099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.02400141954422, + "p90": 55.39200082421303, + "p95": 56.832000613212585, + "p99": 64.06400352716446 + }, + "combine": { + "p50": 53.02400141954422, + "p90": 55.39200082421303, + "p95": 56.832000613212585, + "p99": 64.06400352716446 + }, + "roundtrip": { + "p50": 53.02400141954422, + "p90": 55.39200082421303, + "p95": 56.832000613212585, + "p99": 64.06400352716446 + }, + "isolatedSum": { + "p50": 106.04800283908844, + "p90": 110.78400164842606, + "p95": 113.66400122642517, + "p99": 128.12800705432892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.30399999022484, + "p90": 57.023998349905014, + "p95": 57.5999990105629, + "p99": 61.37600168585777 + }, + "combine": { + "p50": 54.30399999022484, + "p90": 57.023998349905014, + "p95": 57.5999990105629, + "p99": 61.37600168585777 + }, + "roundtrip": { + "p50": 54.30399999022484, + "p90": 57.023998349905014, + "p95": 57.5999990105629, + "p99": 61.37600168585777 + }, + "isolatedSum": { + "p50": 108.60799998044968, + "p90": 114.04799669981003, + "p95": 115.1999980211258, + "p99": 122.75200337171555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 57.82400071620941, + "p90": 59.51999872922897, + "p95": 60.32000109553337, + "p99": 66.97600334882736 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 59.51999872922897, + "p95": 60.32000109553337, + "p99": 66.97600334882736 + }, + "roundtrip": { + "p50": 57.82400071620941, + "p90": 59.51999872922897, + "p95": 60.32000109553337, + "p99": 66.97600334882736 + }, + "isolatedSum": { + "p50": 115.64800143241882, + "p90": 119.03999745845795, + "p95": 120.64000219106674, + "p99": 133.95200669765472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.79200309515, + "p90": 75.52000135183334, + "p95": 75.93599706888199, + "p99": 77.98399776220322 + }, + "combine": { + "p50": 73.79200309515, + "p90": 75.52000135183334, + "p95": 75.93599706888199, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 73.79200309515, + "p90": 75.52000135183334, + "p95": 75.93599706888199, + "p99": 77.98399776220322 + }, + "isolatedSum": { + "p50": 147.5840061903, + "p90": 151.0400027036667, + "p95": 151.87199413776398, + "p99": 155.96799552440643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71c44151", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_d5a1b306", + "comparisonKey": "6731fce53a4bdbf6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:29.209582+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.0320006608963, + "p90": 49.95200037956238, + "p95": 50.84799975156784, + "p99": 57.792000472545624 + }, + "combine": { + "p50": 48.0320006608963, + "p90": 49.95200037956238, + "p95": 50.84799975156784, + "p99": 57.792000472545624 + }, + "roundtrip": { + "p50": 48.0320006608963, + "p90": 49.95200037956238, + "p95": 50.84799975156784, + "p99": 57.792000472545624 + }, + "isolatedSum": { + "p50": 96.0640013217926, + "p90": 99.90400075912476, + "p95": 101.69599950313568, + "p99": 115.58400094509125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.247998744249344, + "p90": 50.624001771211624, + "p95": 51.711998879909515, + "p99": 55.52000179886818 + }, + "combine": { + "p50": 49.247998744249344, + "p90": 50.624001771211624, + "p95": 51.711998879909515, + "p99": 55.52000179886818 + }, + "roundtrip": { + "p50": 49.247998744249344, + "p90": 50.624001771211624, + "p95": 51.711998879909515, + "p99": 55.52000179886818 + }, + "isolatedSum": { + "p50": 98.49599748849869, + "p90": 101.24800354242325, + "p95": 103.42399775981903, + "p99": 111.04000359773636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.83999985456467, + "p90": 53.408000618219376, + "p95": 53.85600030422211, + "p99": 56.57599866390228 + }, + "combine": { + "p50": 51.83999985456467, + "p90": 53.408000618219376, + "p95": 53.85600030422211, + "p99": 56.57599866390228 + }, + "roundtrip": { + "p50": 51.83999985456467, + "p90": 53.408000618219376, + "p95": 53.85600030422211, + "p99": 56.57599866390228 + }, + "isolatedSum": { + "p50": 103.67999970912933, + "p90": 106.81600123643875, + "p95": 107.71200060844421, + "p99": 113.15199732780457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.25599929690361, + "p90": 54.368000477552414, + "p95": 56.15999922156334, + "p99": 66.30399823188782 + }, + "combine": { + "p50": 52.25599929690361, + "p90": 54.368000477552414, + "p95": 56.15999922156334, + "p99": 66.30399823188782 + }, + "roundtrip": { + "p50": 52.25599929690361, + "p90": 54.368000477552414, + "p95": 56.15999922156334, + "p99": 66.30399823188782 + }, + "isolatedSum": { + "p50": 104.51199859380722, + "p90": 108.73600095510483, + "p95": 112.31999844312668, + "p99": 132.60799646377563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.279999643564224, + "p90": 55.52000179886818, + "p95": 56.44800141453743, + "p99": 69.40799951553345 + }, + "combine": { + "p50": 53.279999643564224, + "p90": 55.52000179886818, + "p95": 56.44800141453743, + "p99": 69.40799951553345 + }, + "roundtrip": { + "p50": 53.279999643564224, + "p90": 55.52000179886818, + "p95": 56.44800141453743, + "p99": 69.40799951553345 + }, + "isolatedSum": { + "p50": 106.55999928712845, + "p90": 111.04000359773636, + "p95": 112.89600282907486, + "p99": 138.8159990310669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.07199838757515, + "p90": 57.69599974155426, + "p95": 58.75200033187866, + "p99": 68.70400160551071 + }, + "combine": { + "p50": 55.07199838757515, + "p90": 57.69599974155426, + "p95": 58.75200033187866, + "p99": 68.70400160551071 + }, + "roundtrip": { + "p50": 55.07199838757515, + "p90": 57.69599974155426, + "p95": 58.75200033187866, + "p99": 68.70400160551071 + }, + "isolatedSum": { + "p50": 110.1439967751503, + "p90": 115.39199948310852, + "p95": 117.50400066375732, + "p99": 137.40800321102142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.13599953055382, + "p90": 61.37600168585777, + "p95": 62.84800171852112, + "p99": 70.91200351715088 + }, + "combine": { + "p50": 59.13599953055382, + "p90": 61.37600168585777, + "p95": 62.84800171852112, + "p99": 70.91200351715088 + }, + "roundtrip": { + "p50": 59.13599953055382, + "p90": 61.37600168585777, + "p95": 62.84800171852112, + "p99": 70.91200351715088 + }, + "isolatedSum": { + "p50": 118.27199906110764, + "p90": 122.75200337171555, + "p95": 125.69600343704224, + "p99": 141.82400703430176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.023996591568, + "p90": 70.36799937486649, + "p95": 70.68800181150436, + "p99": 71.71200215816498 + }, + "combine": { + "p50": 69.023996591568, + "p90": 70.36799937486649, + "p95": 70.68800181150436, + "p99": 71.71200215816498 + }, + "roundtrip": { + "p50": 69.023996591568, + "p90": 70.36799937486649, + "p95": 70.68800181150436, + "p99": 71.71200215816498 + }, + "isolatedSum": { + "p50": 138.047993183136, + "p90": 140.73599874973297, + "p95": 141.37600362300873, + "p99": 143.42400431632996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-539b87cb", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_f4952abc", + "comparisonKey": "f71129e0a8e323cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:32:56.789834+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 47.45600000023842, + "p90": 48.86399954557419, + "p95": 49.8879998922348, + "p99": 64.54399973154068 + }, + "combine": { + "p50": 47.45600000023842, + "p90": 48.86399954557419, + "p95": 49.8879998922348, + "p99": 64.54399973154068 + }, + "roundtrip": { + "p50": 47.45600000023842, + "p90": 48.86399954557419, + "p95": 49.8879998922348, + "p99": 64.54399973154068 + }, + "isolatedSum": { + "p50": 94.91200000047684, + "p90": 97.72799909114838, + "p95": 99.7759997844696, + "p99": 129.08799946308136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.56800118088722, + "p90": 50.912000238895416, + "p95": 52.319999784231186, + "p99": 63.519999384880066 + }, + "combine": { + "p50": 49.56800118088722, + "p90": 50.912000238895416, + "p95": 52.319999784231186, + "p99": 63.519999384880066 + }, + "roundtrip": { + "p50": 49.56800118088722, + "p90": 50.912000238895416, + "p95": 52.319999784231186, + "p99": 63.519999384880066 + }, + "isolatedSum": { + "p50": 99.13600236177444, + "p90": 101.82400047779083, + "p95": 104.63999956846237, + "p99": 127.03999876976013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.191998809576035, + "p90": 53.02400141954422, + "p95": 54.07999828457832, + "p99": 57.72799998521805 + }, + "combine": { + "p50": 52.191998809576035, + "p90": 53.02400141954422, + "p95": 54.07999828457832, + "p99": 57.72799998521805 + }, + "roundtrip": { + "p50": 52.191998809576035, + "p90": 53.02400141954422, + "p95": 54.07999828457832, + "p99": 57.72799998521805 + }, + "isolatedSum": { + "p50": 104.38399761915207, + "p90": 106.04800283908844, + "p95": 108.15999656915665, + "p99": 115.4559999704361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.2879995405674, + "p90": 53.69599908590317, + "p95": 54.84800040721893, + "p99": 65.08799642324448 + }, + "combine": { + "p50": 52.2879995405674, + "p90": 53.69599908590317, + "p95": 54.84800040721893, + "p99": 65.08799642324448 + }, + "roundtrip": { + "p50": 52.2879995405674, + "p90": 53.69599908590317, + "p95": 54.84800040721893, + "p99": 65.08799642324448 + }, + "isolatedSum": { + "p50": 104.5759990811348, + "p90": 107.39199817180634, + "p95": 109.69600081443787, + "p99": 130.17599284648895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 53.408000618219376, + "p90": 55.296000093221664, + "p95": 56.671999394893646, + "p99": 60.28800085186958 + }, + "combine": { + "p50": 53.408000618219376, + "p90": 55.296000093221664, + "p95": 56.671999394893646, + "p99": 60.28800085186958 + }, + "roundtrip": { + "p50": 53.408000618219376, + "p90": 55.296000093221664, + "p95": 56.671999394893646, + "p99": 60.28800085186958 + }, + "isolatedSum": { + "p50": 106.81600123643875, + "p90": 110.59200018644333, + "p95": 113.34399878978729, + "p99": 120.57600170373917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.91200089454651, + "p90": 57.0559985935688, + "p95": 57.920001447200775, + "p99": 65.98400324583054 + }, + "combine": { + "p50": 54.91200089454651, + "p90": 57.0559985935688, + "p95": 57.920001447200775, + "p99": 65.98400324583054 + }, + "roundtrip": { + "p50": 54.91200089454651, + "p90": 57.0559985935688, + "p95": 57.920001447200775, + "p99": 65.98400324583054 + }, + "isolatedSum": { + "p50": 109.82400178909302, + "p90": 114.1119971871376, + "p95": 115.84000289440155, + "p99": 131.96800649166107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 59.039998799562454, + "p90": 60.864001512527466, + "p95": 61.535999178886414, + "p99": 71.19999825954437 + }, + "combine": { + "p50": 59.039998799562454, + "p90": 60.864001512527466, + "p95": 61.535999178886414, + "p99": 71.19999825954437 + }, + "roundtrip": { + "p50": 59.039998799562454, + "p90": 60.864001512527466, + "p95": 61.535999178886414, + "p99": 71.19999825954437 + }, + "isolatedSum": { + "p50": 118.07999759912491, + "p90": 121.72800302505493, + "p95": 123.07199835777283, + "p99": 142.39999651908875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.43999975919724, + "p90": 71.3919997215271, + "p95": 75.16799867153168, + "p99": 94.91200000047684 + }, + "combine": { + "p50": 69.43999975919724, + "p90": 71.3919997215271, + "p95": 75.16799867153168, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 69.43999975919724, + "p90": 71.3919997215271, + "p95": 75.16799867153168, + "p99": 94.91200000047684 + }, + "isolatedSum": { + "p50": 138.87999951839447, + "p90": 142.7839994430542, + "p95": 150.33599734306335, + "p99": 189.82400000095367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bb62e88", + "identity": "b300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_d22735ae", + "comparisonKey": "46230412bf8dc722", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:49.333435+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 50.56000128388405, + "p90": 52.06400156021118, + "p95": 53.21599915623665, + "p99": 58.720000088214874 + }, + "combine": { + "p50": 50.56000128388405, + "p90": 52.06400156021118, + "p95": 53.21599915623665, + "p99": 58.720000088214874 + }, + "roundtrip": { + "p50": 50.56000128388405, + "p90": 52.06400156021118, + "p95": 53.21599915623665, + "p99": 58.720000088214874 + }, + "isolatedSum": { + "p50": 101.1200025677681, + "p90": 104.12800312042236, + "p95": 106.4319983124733, + "p99": 117.44000017642975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 52.86400020122528, + "p90": 54.52800169587135, + "p95": 56.32000043988228, + "p99": 65.72800129652023 + }, + "combine": { + "p50": 52.86400020122528, + "p90": 54.52800169587135, + "p95": 56.32000043988228, + "p99": 65.72800129652023 + }, + "roundtrip": { + "p50": 52.86400020122528, + "p90": 54.52800169587135, + "p95": 56.32000043988228, + "p99": 65.72800129652023 + }, + "isolatedSum": { + "p50": 105.72800040245056, + "p90": 109.0560033917427, + "p95": 112.64000087976456, + "p99": 131.45600259304047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 53.53600159287453, + "p90": 55.58399856090546, + "p95": 56.384000927209854, + "p99": 60.416001826524734 + }, + "combine": { + "p50": 53.53600159287453, + "p90": 55.58399856090546, + "p95": 56.384000927209854, + "p99": 60.416001826524734 + }, + "roundtrip": { + "p50": 53.53600159287453, + "p90": 55.58399856090546, + "p95": 56.384000927209854, + "p99": 60.416001826524734 + }, + "isolatedSum": { + "p50": 107.07200318574905, + "p90": 111.16799712181091, + "p95": 112.76800185441971, + "p99": 120.83200365304947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 53.63199859857559, + "p90": 55.96800148487091, + "p95": 56.832000613212585, + "p99": 71.07199728488922 + }, + "combine": { + "p50": 53.63199859857559, + "p90": 55.96800148487091, + "p95": 56.832000613212585, + "p99": 71.07199728488922 + }, + "roundtrip": { + "p50": 53.63199859857559, + "p90": 55.96800148487091, + "p95": 56.832000613212585, + "p99": 71.07199728488922 + }, + "isolatedSum": { + "p50": 107.26399719715118, + "p90": 111.93600296974182, + "p95": 113.66400122642517, + "p99": 142.14399456977844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 55.48800155520439, + "p90": 57.66399949789047, + "p95": 60.15999987721443, + "p99": 71.35999947786331 + }, + "combine": { + "p50": 55.48800155520439, + "p90": 57.66399949789047, + "p95": 60.15999987721443, + "p99": 71.35999947786331 + }, + "roundtrip": { + "p50": 55.48800155520439, + "p90": 57.66399949789047, + "p95": 60.15999987721443, + "p99": 71.35999947786331 + }, + "isolatedSum": { + "p50": 110.97600311040878, + "p90": 115.32799899578094, + "p95": 120.31999975442886, + "p99": 142.71999895572662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 56.2559999525547, + "p90": 58.6559996008873, + "p95": 60.32000109553337, + "p99": 68.51200014352798 + }, + "combine": { + "p50": 56.2559999525547, + "p90": 58.6559996008873, + "p95": 60.32000109553337, + "p99": 68.51200014352798 + }, + "roundtrip": { + "p50": 56.2559999525547, + "p90": 58.6559996008873, + "p95": 60.32000109553337, + "p99": 68.51200014352798 + }, + "isolatedSum": { + "p50": 112.5119999051094, + "p90": 117.3119992017746, + "p95": 120.64000219106674, + "p99": 137.02400028705597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 58.079998940229416, + "p90": 59.93599817156792, + "p95": 61.535999178886414, + "p99": 75.93599706888199 + }, + "combine": { + "p50": 58.079998940229416, + "p90": 59.93599817156792, + "p95": 61.535999178886414, + "p99": 75.93599706888199 + }, + "roundtrip": { + "p50": 58.079998940229416, + "p90": 59.93599817156792, + "p95": 61.535999178886414, + "p99": 75.93599706888199 + }, + "isolatedSum": { + "p50": 116.15999788045883, + "p90": 119.87199634313583, + "p95": 123.07199835777283, + "p99": 151.87199413776398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 64.12799656391144, + "p90": 65.92000275850296, + "p95": 66.3679987192154, + "p99": 77.08799839019775 + }, + "combine": { + "p50": 64.12799656391144, + "p90": 65.92000275850296, + "p95": 66.3679987192154, + "p99": 77.08799839019775 + }, + "roundtrip": { + "p50": 64.12799656391144, + "p90": 65.92000275850296, + "p95": 66.3679987192154, + "p99": 77.08799839019775 + }, + "isolatedSum": { + "p50": 128.25599312782288, + "p90": 131.84000551700592, + "p95": 132.7359974384308, + "p99": 154.1759967803955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49e169da", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_6300ebb7", + "comparisonKey": "01804e6d9a96754e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:17.026963+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.66400146484375, + "p90": 71.10399752855301, + "p95": 71.87200337648392, + "p99": 87.71199733018875 + }, + "combine": { + "p50": 69.66400146484375, + "p90": 71.10399752855301, + "p95": 71.87200337648392, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 69.66400146484375, + "p90": 71.10399752855301, + "p95": 71.87200337648392, + "p99": 87.71199733018875 + }, + "isolatedSum": { + "p50": 139.3280029296875, + "p90": 142.20799505710602, + "p95": 143.74400675296783, + "p99": 175.4239946603775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.51199728250504, + "p90": 98.27200323343277, + "p95": 99.04000163078308, + "p99": 103.4879982471466 + }, + "combine": { + "p50": 96.51199728250504, + "p90": 98.27200323343277, + "p95": 99.04000163078308, + "p99": 103.4879982471466 + }, + "roundtrip": { + "p50": 96.51199728250504, + "p90": 98.27200323343277, + "p95": 99.04000163078308, + "p99": 103.4879982471466 + }, + "isolatedSum": { + "p50": 193.02399456501007, + "p90": 196.54400646686554, + "p95": 198.08000326156616, + "p99": 206.9759964942932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 153.08800339698792, + "p90": 155.2640050649643, + "p95": 156.031996011734, + "p99": 162.04799711704254 + }, + "combine": { + "p50": 153.08800339698792, + "p90": 155.2640050649643, + "p95": 156.031996011734, + "p99": 162.04799711704254 + }, + "roundtrip": { + "p50": 153.08800339698792, + "p90": 155.2640050649643, + "p95": 156.031996011734, + "p99": 162.04799711704254 + }, + "isolatedSum": { + "p50": 306.17600679397583, + "p90": 310.5280101299286, + "p95": 312.063992023468, + "p99": 324.0959942340851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 270.687997341156, + "p90": 274.04800057411194, + "p95": 274.84801411628723, + "p99": 278.6239981651306 + }, + "combine": { + "p50": 270.687997341156, + "p90": 274.04800057411194, + "p95": 274.84801411628723, + "p99": 278.6239981651306 + }, + "roundtrip": { + "p50": 270.687997341156, + "p90": 274.04800057411194, + "p95": 274.84801411628723, + "p99": 278.6239981651306 + }, + "isolatedSum": { + "p50": 541.375994682312, + "p90": 548.0960011482239, + "p95": 549.6960282325745, + "p99": 557.2479963302612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 498.23999404907227, + "p90": 501.5680193901062, + "p95": 502.81602144241333, + "p99": 516.2879824638367 + }, + "combine": { + "p50": 498.23999404907227, + "p90": 501.5680193901062, + "p95": 502.81602144241333, + "p99": 516.2879824638367 + }, + "roundtrip": { + "p50": 498.23999404907227, + "p90": 501.5680193901062, + "p95": 502.81602144241333, + "p99": 516.2879824638367 + }, + "isolatedSum": { + "p50": 996.4799880981445, + "p90": 1003.1360387802124, + "p95": 1005.6320428848267, + "p99": 1032.5759649276733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 942.8160190582275, + "p90": 945.9199905395508, + "p95": 946.943998336792, + "p99": 953.4080028533936 + }, + "combine": { + "p50": 942.8160190582275, + "p90": 945.9199905395508, + "p95": 946.943998336792, + "p99": 953.4080028533936 + }, + "roundtrip": { + "p50": 942.8160190582275, + "p90": 945.9199905395508, + "p95": 946.943998336792, + "p99": 953.4080028533936 + }, + "isolatedSum": { + "p50": 1885.632038116455, + "p90": 1891.8399810791016, + "p95": 1893.887996673584, + "p99": 1906.816005706787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7aff3b91", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_1fa8fca1", + "comparisonKey": "3c7385255c2add4b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:27.865424+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 82.59200304746628, + "p90": 84.79999750852585, + "p95": 85.34400165081024, + "p99": 89.59999680519104 + }, + "combine": { + "p50": 82.59200304746628, + "p90": 84.79999750852585, + "p95": 85.34400165081024, + "p99": 89.59999680519104 + }, + "roundtrip": { + "p50": 82.59200304746628, + "p90": 84.79999750852585, + "p95": 85.34400165081024, + "p99": 89.59999680519104 + }, + "isolatedSum": { + "p50": 165.18400609493256, + "p90": 169.5999950170517, + "p95": 170.68800330162048, + "p99": 179.19999361038208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 121.72800302505493, + "p90": 123.99999797344208, + "p95": 127.68000364303589, + "p99": 140.6719982624054 + }, + "combine": { + "p50": 121.72800302505493, + "p90": 123.99999797344208, + "p95": 127.68000364303589, + "p99": 140.6719982624054 + }, + "roundtrip": { + "p50": 121.72800302505493, + "p90": 123.99999797344208, + "p95": 127.68000364303589, + "p99": 140.6719982624054 + }, + "isolatedSum": { + "p50": 243.45600605010986, + "p90": 247.99999594688416, + "p95": 255.36000728607178, + "p99": 281.3439965248108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 209.21599864959717, + "p90": 211.5200012922287, + "p95": 212.76800334453583, + "p99": 226.8799990415573 + }, + "combine": { + "p50": 209.21599864959717, + "p90": 211.5200012922287, + "p95": 212.76800334453583, + "p99": 226.8799990415573 + }, + "roundtrip": { + "p50": 209.21599864959717, + "p90": 211.5200012922287, + "p95": 212.76800334453583, + "p99": 226.8799990415573 + }, + "isolatedSum": { + "p50": 418.43199729919434, + "p90": 423.0400025844574, + "p95": 425.53600668907166, + "p99": 453.7599980831146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 382.01600313186646, + "p90": 385.0879967212677, + "p95": 385.6959939002991, + "p99": 388.5439932346344 + }, + "combine": { + "p50": 382.01600313186646, + "p90": 385.0879967212677, + "p95": 385.6959939002991, + "p99": 388.5439932346344 + }, + "roundtrip": { + "p50": 382.01600313186646, + "p90": 385.0879967212677, + "p95": 385.6959939002991, + "p99": 388.5439932346344 + }, + "isolatedSum": { + "p50": 764.0320062637329, + "p90": 770.1759934425354, + "p95": 771.3919878005981, + "p99": 777.0879864692688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 720.192015171051, + "p90": 723.3920097351074, + "p95": 724.8640060424805, + "p99": 767.3599720001221 + }, + "combine": { + "p50": 720.192015171051, + "p90": 723.3920097351074, + "p95": 724.8640060424805, + "p99": 767.3599720001221 + }, + "roundtrip": { + "p50": 720.192015171051, + "p90": 723.3920097351074, + "p95": 724.8640060424805, + "p99": 767.3599720001221 + }, + "isolatedSum": { + "p50": 1440.384030342102, + "p90": 1446.7840194702148, + "p95": 1449.728012084961, + "p99": 1534.7199440002441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1392.2239542007446, + "p90": 1396.5760469436646, + "p95": 1399.6800184249878, + "p99": 1512.0960474014282 + }, + "combine": { + "p50": 1392.2239542007446, + "p90": 1396.5760469436646, + "p95": 1399.6800184249878, + "p99": 1512.0960474014282 + }, + "roundtrip": { + "p50": 1392.2239542007446, + "p90": 1396.5760469436646, + "p95": 1399.6800184249878, + "p99": 1512.0960474014282 + }, + "isolatedSum": { + "p50": 2784.4479084014893, + "p90": 2793.152093887329, + "p95": 2799.3600368499756, + "p99": 3024.1920948028564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2a305bb5", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_1ad937d8", + "comparisonKey": "5e4f70e922637c02", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:29.486632+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 47.488000243902206, + "p90": 49.18399825692177, + "p95": 49.855999648571014, + "p99": 58.559998869895935 + }, + "combine": { + "p50": 47.488000243902206, + "p90": 49.18399825692177, + "p95": 49.855999648571014, + "p99": 58.559998869895935 + }, + "roundtrip": { + "p50": 47.488000243902206, + "p90": 49.18399825692177, + "p95": 49.855999648571014, + "p99": 58.559998869895935 + }, + "isolatedSum": { + "p50": 94.97600048780441, + "p90": 98.36799651384354, + "p95": 99.71199929714203, + "p99": 117.11999773979187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 57.56799876689911, + "p90": 59.58399921655655, + "p95": 60.67200005054474, + "p99": 72.03199714422226 + }, + "combine": { + "p50": 57.56799876689911, + "p90": 59.58399921655655, + "p95": 60.67200005054474, + "p99": 72.03199714422226 + }, + "roundtrip": { + "p50": 57.56799876689911, + "p90": 59.58399921655655, + "p95": 60.67200005054474, + "p99": 72.03199714422226 + }, + "isolatedSum": { + "p50": 115.13599753379822, + "p90": 119.1679984331131, + "p95": 121.34400010108948, + "p99": 144.06399428844452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 127.80800461769104, + "p90": 130.62399625778198, + "p95": 131.58400356769562, + "p99": 140.44800400733948 + }, + "combine": { + "p50": 127.80800461769104, + "p90": 130.62399625778198, + "p95": 131.58400356769562, + "p99": 140.44800400733948 + }, + "roundtrip": { + "p50": 127.80800461769104, + "p90": 130.62399625778198, + "p95": 131.58400356769562, + "p99": 140.44800400733948 + }, + "isolatedSum": { + "p50": 255.61600923538208, + "p90": 261.24799251556396, + "p95": 263.16800713539124, + "p99": 280.89600801467896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5fa4cc14", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_06cd6e2d", + "comparisonKey": "f535f3c2d529108d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:32:26.016839+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 56.32000043988228, + "p90": 58.079998940229416, + "p95": 61.37600168585777, + "p99": 79.58400249481201 + }, + "combine": { + "p50": 56.32000043988228, + "p90": 58.079998940229416, + "p95": 61.37600168585777, + "p99": 79.58400249481201 + }, + "roundtrip": { + "p50": 56.32000043988228, + "p90": 58.079998940229416, + "p95": 61.37600168585777, + "p99": 79.58400249481201 + }, + "isolatedSum": { + "p50": 112.64000087976456, + "p90": 116.15999788045883, + "p95": 122.75200337171555, + "p99": 159.16800498962402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 68.2239979505539, + "p90": 69.63200122117996, + "p95": 70.43199986219406, + "p99": 81.79199695587158 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 69.63200122117996, + "p95": 70.43199986219406, + "p99": 81.79199695587158 + }, + "roundtrip": { + "p50": 68.2239979505539, + "p90": 69.63200122117996, + "p95": 70.43199986219406, + "p99": 81.79199695587158 + }, + "isolatedSum": { + "p50": 136.4479959011078, + "p90": 139.26400244235992, + "p95": 140.86399972438812, + "p99": 163.58399391174316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 97.63199836015701, + "p90": 99.20000284910202, + "p95": 99.67999905347824, + "p99": 104.35199737548828 + }, + "combine": { + "p50": 97.63199836015701, + "p90": 99.20000284910202, + "p95": 99.67999905347824, + "p99": 104.35199737548828 + }, + "roundtrip": { + "p50": 97.63199836015701, + "p90": 99.20000284910202, + "p95": 99.67999905347824, + "p99": 104.35199737548828 + }, + "isolatedSum": { + "p50": 195.26399672031403, + "p90": 198.40000569820404, + "p95": 199.35999810695648, + "p99": 208.70399475097656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 157.79200196266174, + "p90": 159.61599349975586, + "p95": 160.51200032234192, + "p99": 164.57599401474 + }, + "combine": { + "p50": 157.79200196266174, + "p90": 159.61599349975586, + "p95": 160.51200032234192, + "p99": 164.57599401474 + }, + "roundtrip": { + "p50": 157.79200196266174, + "p90": 159.61599349975586, + "p95": 160.51200032234192, + "p99": 164.57599401474 + }, + "isolatedSum": { + "p50": 315.5840039253235, + "p90": 319.2319869995117, + "p95": 321.02400064468384, + "p99": 329.15198802948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 282.5919985771179, + "p90": 284.09600257873535, + "p95": 284.7360074520111, + "p99": 288.09601068496704 + }, + "combine": { + "p50": 282.5919985771179, + "p90": 284.09600257873535, + "p95": 284.7360074520111, + "p99": 288.09601068496704 + }, + "roundtrip": { + "p50": 282.5919985771179, + "p90": 284.09600257873535, + "p95": 284.7360074520111, + "p99": 288.09601068496704 + }, + "isolatedSum": { + "p50": 565.1839971542358, + "p90": 568.1920051574707, + "p95": 569.4720149040222, + "p99": 576.1920213699341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 533.2159996032715, + "p90": 535.5520248413086, + "p95": 536.4159941673279, + "p99": 553.7919998168945 + }, + "combine": { + "p50": 533.2159996032715, + "p90": 535.5520248413086, + "p95": 536.4159941673279, + "p99": 553.7919998168945 + }, + "roundtrip": { + "p50": 533.2159996032715, + "p90": 535.5520248413086, + "p95": 536.4159941673279, + "p99": 553.7919998168945 + }, + "isolatedSum": { + "p50": 1066.431999206543, + "p90": 1071.1040496826172, + "p95": 1072.8319883346558, + "p99": 1107.583999633789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c1f671ed", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_632c3d77", + "comparisonKey": "e64ae7f524bed889", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:30:20.938191+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.52000135183334, + "p90": 77.53600180149078, + "p95": 78.015998005867, + "p99": 80.64000308513641 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 77.53600180149078, + "p95": 78.015998005867, + "p99": 80.64000308513641 + }, + "roundtrip": { + "p50": 75.52000135183334, + "p90": 77.53600180149078, + "p95": 78.015998005867, + "p99": 80.64000308513641 + }, + "isolatedSum": { + "p50": 151.0400027036667, + "p90": 155.07200360298157, + "p95": 156.031996011734, + "p99": 161.28000617027283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 113.37599903345108, + "p90": 114.62400108575821, + "p95": 114.88000303506851, + "p99": 116.06399714946747 + }, + "combine": { + "p50": 113.37599903345108, + "p90": 114.62400108575821, + "p95": 114.88000303506851, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 113.37599903345108, + "p90": 114.62400108575821, + "p95": 114.88000303506851, + "p99": 116.06399714946747 + }, + "isolatedSum": { + "p50": 226.75199806690216, + "p90": 229.24800217151642, + "p95": 229.76000607013702, + "p99": 232.12799429893494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.75199282169342, + "p90": 196.25599682331085, + "p95": 196.79999351501465, + "p99": 199.26400482654572 + }, + "combine": { + "p50": 194.75199282169342, + "p90": 196.25599682331085, + "p95": 196.79999351501465, + "p99": 199.26400482654572 + }, + "roundtrip": { + "p50": 194.75199282169342, + "p90": 196.25599682331085, + "p95": 196.79999351501465, + "p99": 199.26400482654572 + }, + "isolatedSum": { + "p50": 389.50398564338684, + "p90": 392.5119936466217, + "p95": 393.5999870300293, + "p99": 398.52800965309143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 369.6959912776947, + "p90": 372.1599876880646, + "p95": 372.99200892448425, + "p99": 376.1279881000519 + }, + "combine": { + "p50": 369.6959912776947, + "p90": 372.1599876880646, + "p95": 372.99200892448425, + "p99": 376.1279881000519 + }, + "roundtrip": { + "p50": 369.6959912776947, + "p90": 372.1599876880646, + "p95": 372.99200892448425, + "p99": 376.1279881000519 + }, + "isolatedSum": { + "p50": 739.3919825553894, + "p90": 744.3199753761292, + "p95": 745.9840178489685, + "p99": 752.2559762001038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 745.0240254402161, + "p90": 749.2160201072693, + "p95": 751.2000203132629, + "p99": 753.5359859466553 + }, + "combine": { + "p50": 745.0240254402161, + "p90": 749.2160201072693, + "p95": 751.2000203132629, + "p99": 753.5359859466553 + }, + "roundtrip": { + "p50": 745.0240254402161, + "p90": 749.2160201072693, + "p95": 751.2000203132629, + "p99": 753.5359859466553 + }, + "isolatedSum": { + "p50": 1490.0480508804321, + "p90": 1498.4320402145386, + "p95": 1502.4000406265259, + "p99": 1507.0719718933105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1492.319941520691, + "p90": 1496.9279766082764, + "p95": 1498.687982559204, + "p99": 1510.3039741516113 + }, + "combine": { + "p50": 1492.319941520691, + "p90": 1496.9279766082764, + "p95": 1498.687982559204, + "p99": 1510.3039741516113 + }, + "roundtrip": { + "p50": 1492.319941520691, + "p90": 1496.9279766082764, + "p95": 1498.687982559204, + "p99": 1510.3039741516113 + }, + "isolatedSum": { + "p50": 2984.639883041382, + "p90": 2993.8559532165527, + "p95": 2997.375965118408, + "p99": 3020.6079483032227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a9c2b0c", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_5a363747", + "comparisonKey": "6be3492988aaab41", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:46.352508+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.63200122117996, + "p90": 70.94399631023407, + "p95": 71.84000313282013, + "p99": 77.2479996085167 + }, + "combine": { + "p50": 69.63200122117996, + "p90": 70.94399631023407, + "p95": 71.84000313282013, + "p99": 77.2479996085167 + }, + "roundtrip": { + "p50": 69.63200122117996, + "p90": 70.94399631023407, + "p95": 71.84000313282013, + "p99": 77.2479996085167 + }, + "isolatedSum": { + "p50": 139.26400244235992, + "p90": 141.88799262046814, + "p95": 143.68000626564026, + "p99": 154.4959992170334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.51199728250504, + "p90": 98.33600372076035, + "p95": 98.91200065612793, + "p99": 120.99199742078781 + }, + "combine": { + "p50": 96.51199728250504, + "p90": 98.33600372076035, + "p95": 98.91200065612793, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 96.51199728250504, + "p90": 98.33600372076035, + "p95": 98.91200065612793, + "p99": 120.99199742078781 + }, + "isolatedSum": { + "p50": 193.02399456501007, + "p90": 196.6720074415207, + "p95": 197.82400131225586, + "p99": 241.98399484157562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.25599706172943, + "p90": 154.4959992170334, + "p95": 155.68000078201294, + "p99": 161.1199975013733 + }, + "combine": { + "p50": 152.25599706172943, + "p90": 154.4959992170334, + "p95": 155.68000078201294, + "p99": 161.1199975013733 + }, + "roundtrip": { + "p50": 152.25599706172943, + "p90": 154.4959992170334, + "p95": 155.68000078201294, + "p99": 161.1199975013733 + }, + "isolatedSum": { + "p50": 304.51199412345886, + "p90": 308.9919984340668, + "p95": 311.3600015640259, + "p99": 322.2399950027466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 270.1759934425354, + "p90": 273.21600914001465, + "p95": 274.4640111923218, + "p99": 285.12001037597656 + }, + "combine": { + "p50": 270.1759934425354, + "p90": 273.21600914001465, + "p95": 274.4640111923218, + "p99": 285.12001037597656 + }, + "roundtrip": { + "p50": 270.1759934425354, + "p90": 273.21600914001465, + "p95": 274.4640111923218, + "p99": 285.12001037597656 + }, + "isolatedSum": { + "p50": 540.3519868850708, + "p90": 546.4320182800293, + "p95": 548.9280223846436, + "p99": 570.2400207519531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 494.7200119495392, + "p90": 497.9200065135956, + "p95": 499.4879961013794, + "p99": 508.12798738479614 + }, + "combine": { + "p50": 494.7200119495392, + "p90": 497.9200065135956, + "p95": 499.4879961013794, + "p99": 508.12798738479614 + }, + "roundtrip": { + "p50": 494.7200119495392, + "p90": 497.9200065135956, + "p95": 499.4879961013794, + "p99": 508.12798738479614 + }, + "isolatedSum": { + "p50": 989.4400238990784, + "p90": 995.8400130271912, + "p95": 998.9759922027588, + "p99": 1016.2559747695923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 941.215991973877, + "p90": 945.1519846916199, + "p95": 946.943998336792, + "p99": 2227.7119159698486 + }, + "combine": { + "p50": 941.215991973877, + "p90": 945.1519846916199, + "p95": 946.943998336792, + "p99": 2227.7119159698486 + }, + "roundtrip": { + "p50": 941.215991973877, + "p90": 945.1519846916199, + "p95": 946.943998336792, + "p99": 2227.7119159698486 + }, + "isolatedSum": { + "p50": 1882.431983947754, + "p90": 1890.3039693832397, + "p95": 1893.887996673584, + "p99": 4455.423831939697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edc4ab11", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_82d29707", + "comparisonKey": "4a50e4ca41ac1466", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:31:24.471673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.24799829721451, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 91.839998960495 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 91.839998960495 + }, + "roundtrip": { + "p50": 69.24799829721451, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 91.839998960495 + }, + "isolatedSum": { + "p50": 138.49599659442902, + "p90": 141.95199310779572, + "p95": 142.71999895572662, + "p99": 183.67999792099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.51199728250504, + "p90": 98.30400347709656, + "p95": 99.64799880981445, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 96.51199728250504, + "p90": 98.30400347709656, + "p95": 99.64799880981445, + "p99": 113.37599903345108 + }, + "roundtrip": { + "p50": 96.51199728250504, + "p90": 98.30400347709656, + "p95": 99.64799880981445, + "p99": 113.37599903345108 + }, + "isolatedSum": { + "p50": 193.02399456501007, + "p90": 196.60800695419312, + "p95": 199.2959976196289, + "p99": 226.75199806690216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.67199277877808, + "p90": 154.9759954214096, + "p95": 155.87200224399567, + "p99": 166.30400717258453 + }, + "combine": { + "p50": 152.67199277877808, + "p90": 154.9759954214096, + "p95": 155.87200224399567, + "p99": 166.30400717258453 + }, + "roundtrip": { + "p50": 152.67199277877808, + "p90": 154.9759954214096, + "p95": 155.87200224399567, + "p99": 166.30400717258453 + }, + "isolatedSum": { + "p50": 305.34398555755615, + "p90": 309.9519908428192, + "p95": 311.74400448799133, + "p99": 332.60801434516907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 269.79199051856995, + "p90": 273.5680043697357, + "p95": 275.29600262641907, + "p99": 279.04000878334045 + }, + "combine": { + "p50": 269.79199051856995, + "p90": 273.5680043697357, + "p95": 275.29600262641907, + "p99": 279.04000878334045 + }, + "roundtrip": { + "p50": 269.79199051856995, + "p90": 273.5680043697357, + "p95": 275.29600262641907, + "p99": 279.04000878334045 + }, + "isolatedSum": { + "p50": 539.5839810371399, + "p90": 547.1360087394714, + "p95": 550.5920052528381, + "p99": 558.0800175666809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 496.22398614883423, + "p90": 499.32798743247986, + "p95": 500.63997507095337, + "p99": 510.8479857444763 + }, + "combine": { + "p50": 496.22398614883423, + "p90": 499.32798743247986, + "p95": 500.63997507095337, + "p99": 510.8479857444763 + }, + "roundtrip": { + "p50": 496.22398614883423, + "p90": 499.32798743247986, + "p95": 500.63997507095337, + "p99": 510.8479857444763 + }, + "isolatedSum": { + "p50": 992.4479722976685, + "p90": 998.6559748649597, + "p95": 1001.2799501419067, + "p99": 1021.6959714889526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 942.143976688385, + "p90": 945.9519982337952, + "p95": 948.8319754600525, + "p99": 1125.3440380096436 + }, + "combine": { + "p50": 942.143976688385, + "p90": 945.9519982337952, + "p95": 948.8319754600525, + "p99": 1125.3440380096436 + }, + "roundtrip": { + "p50": 942.143976688385, + "p90": 945.9519982337952, + "p95": 948.8319754600525, + "p99": 1125.3440380096436 + }, + "isolatedSum": { + "p50": 1884.28795337677, + "p90": 1891.9039964675903, + "p95": 1897.663950920105, + "p99": 2250.688076019287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-15208ada", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_461b4b32", + "comparisonKey": "8db2c0db35e7638e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:35.435850+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.5600014925003, + "p90": 76.19199901819229, + "p95": 76.7040029168129, + "p99": 80.03199845552444 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 76.19199901819229, + "p95": 76.7040029168129, + "p99": 80.03199845552444 + }, + "roundtrip": { + "p50": 74.5600014925003, + "p90": 76.19199901819229, + "p95": 76.7040029168129, + "p99": 80.03199845552444 + }, + "isolatedSum": { + "p50": 149.1200029850006, + "p90": 152.38399803638458, + "p95": 153.4080058336258, + "p99": 160.0639969110489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.64800077676773, + "p90": 112.96000331640244, + "p95": 113.24799805879593, + "p99": 114.33599889278412 + }, + "combine": { + "p50": 111.64800077676773, + "p90": 112.96000331640244, + "p95": 113.24799805879593, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 111.64800077676773, + "p90": 112.96000331640244, + "p95": 113.24799805879593, + "p99": 114.33599889278412 + }, + "isolatedSum": { + "p50": 223.29600155353546, + "p90": 225.92000663280487, + "p95": 226.49599611759186, + "p99": 228.67199778556824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 190.40000438690186, + "p90": 192.28799641132355, + "p95": 192.6400065422058, + "p99": 194.46399807929993 + }, + "combine": { + "p50": 190.40000438690186, + "p90": 192.28799641132355, + "p95": 192.6400065422058, + "p99": 194.46399807929993 + }, + "roundtrip": { + "p50": 190.40000438690186, + "p90": 192.28799641132355, + "p95": 192.6400065422058, + "p99": 194.46399807929993 + }, + "isolatedSum": { + "p50": 380.8000087738037, + "p90": 384.5759928226471, + "p95": 385.2800130844116, + "p99": 388.92799615859985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 395.7439959049225, + "p90": 399.04001355171204, + "p95": 399.77601170539856, + "p99": 402.3039937019348 + }, + "combine": { + "p50": 395.7439959049225, + "p90": 399.04001355171204, + "p95": 399.77601170539856, + "p99": 402.3039937019348 + }, + "roundtrip": { + "p50": 395.7439959049225, + "p90": 399.04001355171204, + "p95": 399.77601170539856, + "p99": 402.3039937019348 + }, + "isolatedSum": { + "p50": 791.487991809845, + "p90": 798.0800271034241, + "p95": 799.5520234107971, + "p99": 804.6079874038696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 767.3919796943665, + "p90": 769.9199914932251, + "p95": 770.687997341156, + "p99": 783.3279967308044 + }, + "combine": { + "p50": 767.3919796943665, + "p90": 769.9199914932251, + "p95": 770.687997341156, + "p99": 783.3279967308044 + }, + "roundtrip": { + "p50": 767.3919796943665, + "p90": 769.9199914932251, + "p95": 770.687997341156, + "p99": 783.3279967308044 + }, + "isolatedSum": { + "p50": 1534.783959388733, + "p90": 1539.8399829864502, + "p95": 1541.375994682312, + "p99": 1566.655993461609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1494.5600032806396, + "p90": 1497.439980506897, + "p95": 1498.4960556030273, + "p99": 1503.3279657363892 + }, + "combine": { + "p50": 1494.5600032806396, + "p90": 1497.439980506897, + "p95": 1498.4960556030273, + "p99": 1503.3279657363892 + }, + "roundtrip": { + "p50": 1494.5600032806396, + "p90": 1497.439980506897, + "p95": 1498.4960556030273, + "p99": 1503.3279657363892 + }, + "isolatedSum": { + "p50": 2989.1200065612793, + "p90": 2994.879961013794, + "p95": 2996.9921112060547, + "p99": 3006.6559314727783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-66fc3dc0", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_2c1ac2d8", + "comparisonKey": "845030a82268131c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:40.423184+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 72.03199714422226, + "p90": 79.52000200748444, + "p95": 83.36000144481659, + "p99": 92.67199784517288 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 79.52000200748444, + "p95": 83.36000144481659, + "p99": 92.67199784517288 + }, + "roundtrip": { + "p50": 72.03199714422226, + "p90": 79.52000200748444, + "p95": 83.36000144481659, + "p99": 92.67199784517288 + }, + "isolatedSum": { + "p50": 144.06399428844452, + "p90": 159.04000401496887, + "p95": 166.72000288963318, + "p99": 185.34399569034576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 107.90400207042694, + "p90": 109.24799740314484, + "p95": 109.98400300741196, + "p99": 112.06399649381638 + }, + "combine": { + "p50": 107.90400207042694, + "p90": 109.24799740314484, + "p95": 109.98400300741196, + "p99": 112.06399649381638 + }, + "roundtrip": { + "p50": 107.90400207042694, + "p90": 109.24799740314484, + "p95": 109.98400300741196, + "p99": 112.06399649381638 + }, + "isolatedSum": { + "p50": 215.80800414085388, + "p90": 218.49599480628967, + "p95": 219.9680060148239, + "p99": 224.12799298763275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 189.53600525856018, + "p90": 190.94400107860565, + "p95": 191.48799777030945, + "p99": 195.6160068511963 + }, + "combine": { + "p50": 189.53600525856018, + "p90": 190.94400107860565, + "p95": 191.48799777030945, + "p99": 195.6160068511963 + }, + "roundtrip": { + "p50": 189.53600525856018, + "p90": 190.94400107860565, + "p95": 191.48799777030945, + "p99": 195.6160068511963 + }, + "isolatedSum": { + "p50": 379.07201051712036, + "p90": 381.8880021572113, + "p95": 382.9759955406189, + "p99": 391.2320137023926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 358.7520122528076, + "p90": 360.4159951210022, + "p95": 360.9279990196228, + "p99": 362.7200126647949 + }, + "combine": { + "p50": 358.7520122528076, + "p90": 360.4159951210022, + "p95": 360.9279990196228, + "p99": 362.7200126647949 + }, + "roundtrip": { + "p50": 358.7520122528076, + "p90": 360.4159951210022, + "p95": 360.9279990196228, + "p99": 362.7200126647949 + }, + "isolatedSum": { + "p50": 717.5040245056152, + "p90": 720.8319902420044, + "p95": 721.8559980392456, + "p99": 725.4400253295898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 679.3919801712036, + "p90": 681.8879842758179, + "p95": 682.5600266456604, + "p99": 684.2560172080994 + }, + "combine": { + "p50": 679.3919801712036, + "p90": 681.8879842758179, + "p95": 682.5600266456604, + "p99": 684.2560172080994 + }, + "roundtrip": { + "p50": 679.3919801712036, + "p90": 681.8879842758179, + "p95": 682.5600266456604, + "p99": 684.2560172080994 + }, + "isolatedSum": { + "p50": 1358.7839603424072, + "p90": 1363.7759685516357, + "p95": 1365.1200532913208, + "p99": 1368.5120344161987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1324.895977973938, + "p90": 1327.712059020996, + "p95": 1328.4480571746826, + "p99": 1336.7040157318115 + }, + "combine": { + "p50": 1324.895977973938, + "p90": 1327.712059020996, + "p95": 1328.4480571746826, + "p99": 1336.7040157318115 + }, + "roundtrip": { + "p50": 1324.895977973938, + "p90": 1327.712059020996, + "p95": 1328.4480571746826, + "p99": 1336.7040157318115 + }, + "isolatedSum": { + "p50": 2649.791955947876, + "p90": 2655.424118041992, + "p95": 2656.8961143493652, + "p99": 2673.408031463623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d6181a83", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_83d3b3b6", + "comparisonKey": "22809447254ab834", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:40.877493+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 70.14399766921997, + "p90": 71.6480016708374, + "p95": 72.28799909353256, + "p99": 78.49600166082382 + }, + "combine": { + "p50": 70.14399766921997, + "p90": 71.6480016708374, + "p95": 72.28799909353256, + "p99": 78.49600166082382 + }, + "roundtrip": { + "p50": 70.14399766921997, + "p90": 71.6480016708374, + "p95": 72.28799909353256, + "p99": 78.49600166082382 + }, + "isolatedSum": { + "p50": 140.28799533843994, + "p90": 143.2960033416748, + "p95": 144.57599818706512, + "p99": 156.99200332164764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 98.1760025024414, + "p90": 99.71199929714203, + "p95": 100.44799745082855, + "p99": 105.98400235176086 + }, + "combine": { + "p50": 98.1760025024414, + "p90": 99.71199929714203, + "p95": 100.44799745082855, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 98.1760025024414, + "p90": 99.71199929714203, + "p95": 100.44799745082855, + "p99": 105.98400235176086 + }, + "isolatedSum": { + "p50": 196.3520050048828, + "p90": 199.42399859428406, + "p95": 200.8959949016571, + "p99": 211.96800470352173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 156.47999942302704, + "p90": 158.4320068359375, + "p95": 159.16800498962402, + "p99": 162.4000072479248 + }, + "combine": { + "p50": 156.47999942302704, + "p90": 158.4320068359375, + "p95": 159.16800498962402, + "p99": 162.4000072479248 + }, + "roundtrip": { + "p50": 156.47999942302704, + "p90": 158.4320068359375, + "p95": 159.16800498962402, + "p99": 162.4000072479248 + }, + "isolatedSum": { + "p50": 312.9599988460541, + "p90": 316.864013671875, + "p95": 318.33600997924805, + "p99": 324.8000144958496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 276.8639922142029, + "p90": 279.35999631881714, + "p95": 280.44798970222473, + "p99": 293.5360074043274 + }, + "combine": { + "p50": 276.8639922142029, + "p90": 279.35999631881714, + "p95": 280.44798970222473, + "p99": 293.5360074043274 + }, + "roundtrip": { + "p50": 276.8639922142029, + "p90": 279.35999631881714, + "p95": 280.44798970222473, + "p99": 293.5360074043274 + }, + "isolatedSum": { + "p50": 553.7279844284058, + "p90": 558.7199926376343, + "p95": 560.8959794044495, + "p99": 587.0720148086548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 514.7519707679749, + "p90": 518.3359980583191, + "p95": 519.2639827728271, + "p99": 536.2240076065063 + }, + "combine": { + "p50": 514.7519707679749, + "p90": 518.3359980583191, + "p95": 519.2639827728271, + "p99": 536.2240076065063 + }, + "roundtrip": { + "p50": 514.7519707679749, + "p90": 518.3359980583191, + "p95": 519.2639827728271, + "p99": 536.2240076065063 + }, + "isolatedSum": { + "p50": 1029.5039415359497, + "p90": 1036.6719961166382, + "p95": 1038.5279655456543, + "p99": 1072.4480152130127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 986.240029335022, + "p90": 990.015983581543, + "p95": 991.8720126152039, + "p99": 998.9439845085144 + }, + "combine": { + "p50": 986.240029335022, + "p90": 990.015983581543, + "p95": 991.8720126152039, + "p99": 998.9439845085144 + }, + "roundtrip": { + "p50": 986.240029335022, + "p90": 990.015983581543, + "p95": 991.8720126152039, + "p99": 998.9439845085144 + }, + "isolatedSum": { + "p50": 1972.480058670044, + "p90": 1980.031967163086, + "p95": 1983.7440252304077, + "p99": 1997.8879690170288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fd75660", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_ffeff7f3", + "comparisonKey": "0880112104f2ce03", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:04.515672+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.34399968385696, + "p90": 75.07199794054031, + "p95": 75.52000135183334, + "p99": 85.28000116348267 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 75.07199794054031, + "p95": 75.52000135183334, + "p99": 85.28000116348267 + }, + "roundtrip": { + "p50": 73.34399968385696, + "p90": 75.07199794054031, + "p95": 75.52000135183334, + "p99": 85.28000116348267 + }, + "isolatedSum": { + "p50": 146.68799936771393, + "p90": 150.14399588108063, + "p95": 151.0400027036667, + "p99": 170.56000232696533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.30399799346924, + "p90": 111.93600296974182, + "p95": 112.28799819946289, + "p99": 128.7360042333603 + }, + "combine": { + "p50": 110.30399799346924, + "p90": 111.93600296974182, + "p95": 112.28799819946289, + "p99": 128.7360042333603 + }, + "roundtrip": { + "p50": 110.30399799346924, + "p90": 111.93600296974182, + "p95": 112.28799819946289, + "p99": 128.7360042333603 + }, + "isolatedSum": { + "p50": 220.60799598693848, + "p90": 223.87200593948364, + "p95": 224.57599639892578, + "p99": 257.4720084667206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 187.9040002822876, + "p90": 189.60000574588776, + "p95": 189.95200097560883, + "p99": 191.71200692653656 + }, + "combine": { + "p50": 187.9040002822876, + "p90": 189.60000574588776, + "p95": 189.95200097560883, + "p99": 191.71200692653656 + }, + "roundtrip": { + "p50": 187.9040002822876, + "p90": 189.60000574588776, + "p95": 189.95200097560883, + "p99": 191.71200692653656 + }, + "isolatedSum": { + "p50": 375.8080005645752, + "p90": 379.2000114917755, + "p95": 379.90400195121765, + "p99": 383.4240138530731 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 356.57599568367004, + "p90": 359.9039912223816, + "p95": 361.34400963783264, + "p99": 370.1759874820709 + }, + "combine": { + "p50": 356.57599568367004, + "p90": 359.9039912223816, + "p95": 361.34400963783264, + "p99": 370.1759874820709 + }, + "roundtrip": { + "p50": 356.57599568367004, + "p90": 359.9039912223816, + "p95": 361.34400963783264, + "p99": 370.1759874820709 + }, + "isolatedSum": { + "p50": 713.1519913673401, + "p90": 719.8079824447632, + "p95": 722.6880192756653, + "p99": 740.3519749641418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 721.88800573349, + "p90": 726.0159850120544, + "p95": 727.3920178413391, + "p99": 733.4079742431641 + }, + "combine": { + "p50": 721.88800573349, + "p90": 726.0159850120544, + "p95": 727.3920178413391, + "p99": 733.4079742431641 + }, + "roundtrip": { + "p50": 721.88800573349, + "p90": 726.0159850120544, + "p95": 727.3920178413391, + "p99": 733.4079742431641 + }, + "isolatedSum": { + "p50": 1443.77601146698, + "p90": 1452.031970024109, + "p95": 1454.7840356826782, + "p99": 1466.8159484863281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1441.3440227508545, + "p90": 1447.167992591858, + "p95": 1449.728012084961, + "p99": 1526.2720584869385 + }, + "combine": { + "p50": 1441.3440227508545, + "p90": 1447.167992591858, + "p95": 1449.728012084961, + "p99": 1526.2720584869385 + }, + "roundtrip": { + "p50": 1441.3440227508545, + "p90": 1447.167992591858, + "p95": 1449.728012084961, + "p99": 1526.2720584869385 + }, + "isolatedSum": { + "p50": 2882.688045501709, + "p90": 2894.335985183716, + "p95": 2899.456024169922, + "p99": 3052.544116973877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c1fe270", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_7f54372b", + "comparisonKey": "823f2517dbe02270", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:37.795495+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.60000097751617, + "p90": 71.26399874687195, + "p95": 71.77600264549255, + "p99": 81.02399855852127 + }, + "combine": { + "p50": 69.60000097751617, + "p90": 71.26399874687195, + "p95": 71.77600264549255, + "p99": 81.02399855852127 + }, + "roundtrip": { + "p50": 69.60000097751617, + "p90": 71.26399874687195, + "p95": 71.77600264549255, + "p99": 81.02399855852127 + }, + "isolatedSum": { + "p50": 139.20000195503235, + "p90": 142.5279974937439, + "p95": 143.5520052909851, + "p99": 162.04799711704254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 97.43999689817429, + "p90": 98.75199943780899, + "p95": 99.5199978351593, + "p99": 105.31199723482132 + }, + "combine": { + "p50": 97.43999689817429, + "p90": 98.75199943780899, + "p95": 99.5199978351593, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 97.43999689817429, + "p90": 98.75199943780899, + "p95": 99.5199978351593, + "p99": 105.31199723482132 + }, + "isolatedSum": { + "p50": 194.87999379634857, + "p90": 197.50399887561798, + "p95": 199.0399956703186, + "p99": 210.62399446964264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.76800096035004, + "p90": 155.008003115654, + "p95": 156.25600516796112, + "p99": 174.0799993276596 + }, + "combine": { + "p50": 152.76800096035004, + "p90": 155.008003115654, + "p95": 156.25600516796112, + "p99": 174.0799993276596 + }, + "roundtrip": { + "p50": 152.76800096035004, + "p90": 155.008003115654, + "p95": 156.25600516796112, + "p99": 174.0799993276596 + }, + "isolatedSum": { + "p50": 305.5360019207001, + "p90": 310.016006231308, + "p95": 312.51201033592224, + "p99": 348.1599986553192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 269.98400688171387, + "p90": 273.6319899559021, + "p95": 274.6880054473877, + "p99": 284.1919958591461 + }, + "combine": { + "p50": 269.98400688171387, + "p90": 273.6319899559021, + "p95": 274.6880054473877, + "p99": 284.1919958591461 + }, + "roundtrip": { + "p50": 269.98400688171387, + "p90": 273.6319899559021, + "p95": 274.6880054473877, + "p99": 284.1919958591461 + }, + "isolatedSum": { + "p50": 539.9680137634277, + "p90": 547.2639799118042, + "p95": 549.3760108947754, + "p99": 568.3839917182922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 497.0879852771759, + "p90": 500.70399045944214, + "p95": 501.72799825668335, + "p99": 675.9679913520813 + }, + "combine": { + "p50": 497.0879852771759, + "p90": 500.70399045944214, + "p95": 501.72799825668335, + "p99": 675.9679913520813 + }, + "roundtrip": { + "p50": 497.0879852771759, + "p90": 500.70399045944214, + "p95": 501.72799825668335, + "p99": 675.9679913520813 + }, + "isolatedSum": { + "p50": 994.1759705543518, + "p90": 1001.4079809188843, + "p95": 1003.4559965133667, + "p99": 1351.9359827041626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 943.6479806900024, + "p90": 946.3040232658386, + "p95": 947.808027267456, + "p99": 1111.232042312622 + }, + "combine": { + "p50": 943.6479806900024, + "p90": 946.3040232658386, + "p95": 947.808027267456, + "p99": 1111.232042312622 + }, + "roundtrip": { + "p50": 943.6479806900024, + "p90": 946.3040232658386, + "p95": 947.808027267456, + "p99": 1111.232042312622 + }, + "isolatedSum": { + "p50": 1887.2959613800049, + "p90": 1892.6080465316772, + "p95": 1895.616054534912, + "p99": 2222.464084625244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b54d5bda", + "identity": "b300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_c6a488a8", + "comparisonKey": "7693a081b531505a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:02.826546+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.46400076150894, + "p90": 76.12799853086472, + "p95": 76.54400169849396, + "p99": 81.98399841785431 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 76.12799853086472, + "p95": 76.54400169849396, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 74.46400076150894, + "p90": 76.12799853086472, + "p95": 76.54400169849396, + "p99": 81.98399841785431 + }, + "isolatedSum": { + "p50": 148.92800152301788, + "p90": 152.25599706172943, + "p95": 153.08800339698792, + "p99": 163.96799683570862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 111.35999858379364, + "p90": 112.83200234174728, + "p95": 113.27999830245972, + "p99": 114.23999816179276 + }, + "combine": { + "p50": 111.35999858379364, + "p90": 112.83200234174728, + "p95": 113.27999830245972, + "p99": 114.23999816179276 + }, + "roundtrip": { + "p50": 111.35999858379364, + "p90": 112.83200234174728, + "p95": 113.27999830245972, + "p99": 114.23999816179276 + }, + "isolatedSum": { + "p50": 222.71999716758728, + "p90": 225.66400468349457, + "p95": 226.55999660491943, + "p99": 228.4799963235855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 190.75199961662292, + "p90": 192.57600605487823, + "p95": 193.31200420856476, + "p99": 198.62399995326996 + }, + "combine": { + "p50": 190.75199961662292, + "p90": 192.57600605487823, + "p95": 193.31200420856476, + "p99": 198.62399995326996 + }, + "roundtrip": { + "p50": 190.75199961662292, + "p90": 192.57600605487823, + "p95": 193.31200420856476, + "p99": 198.62399995326996 + }, + "isolatedSum": { + "p50": 381.50399923324585, + "p90": 385.15201210975647, + "p95": 386.6240084171295, + "p99": 397.2479999065399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 397.024005651474, + "p90": 400.7039964199066, + "p95": 402.3039937019348, + "p99": 573.4080076217651 + }, + "combine": { + "p50": 397.024005651474, + "p90": 400.7039964199066, + "p95": 402.3039937019348, + "p99": 573.4080076217651 + }, + "roundtrip": { + "p50": 397.024005651474, + "p90": 400.7039964199066, + "p95": 402.3039937019348, + "p99": 573.4080076217651 + }, + "isolatedSum": { + "p50": 794.048011302948, + "p90": 801.4079928398132, + "p95": 804.6079874038696, + "p99": 1146.8160152435303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 767.8719758987427, + "p90": 770.687997341156, + "p95": 772.2240090370178, + "p99": 787.5199913978577 + }, + "combine": { + "p50": 767.8719758987427, + "p90": 770.687997341156, + "p95": 772.2240090370178, + "p99": 787.5199913978577 + }, + "roundtrip": { + "p50": 767.8719758987427, + "p90": 770.687997341156, + "p95": 772.2240090370178, + "p99": 787.5199913978577 + }, + "isolatedSum": { + "p50": 1535.7439517974854, + "p90": 1541.375994682312, + "p95": 1544.4480180740356, + "p99": 1575.0399827957153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1496.2879419326782, + "p90": 1500.0640153884888, + "p95": 1501.2480020523071, + "p99": 1552.5120496749878 + }, + "combine": { + "p50": 1496.2879419326782, + "p90": 1500.0640153884888, + "p95": 1501.2480020523071, + "p99": 1552.5120496749878 + }, + "roundtrip": { + "p50": 1496.2879419326782, + "p90": 1500.0640153884888, + "p95": 1501.2480020523071, + "p99": 1552.5120496749878 + }, + "isolatedSum": { + "p50": 2992.5758838653564, + "p90": 3000.1280307769775, + "p95": 3002.4960041046143, + "p99": 3105.0240993499756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a4ed5f30", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_d5a1b306", + "comparisonKey": "c18afd56c5b4ea67", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:36.248379+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_04", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.69600170850754, + "p90": 71.10399752855301, + "p95": 71.74400240182877, + "p99": 78.59200239181519 + }, + "combine": { + "p50": 69.69600170850754, + "p90": 71.10399752855301, + "p95": 71.74400240182877, + "p99": 78.59200239181519 + }, + "roundtrip": { + "p50": 69.69600170850754, + "p90": 71.10399752855301, + "p95": 71.74400240182877, + "p99": 78.59200239181519 + }, + "isolatedSum": { + "p50": 139.39200341701508, + "p90": 142.20799505710602, + "p95": 143.48800480365753, + "p99": 157.18400478363037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.70399874448776, + "p90": 98.43199700117111, + "p95": 99.13600236177444, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 98.43199700117111, + "p95": 99.13600236177444, + "p99": 110.07999628782272 + }, + "roundtrip": { + "p50": 96.70399874448776, + "p90": 98.43199700117111, + "p95": 99.13600236177444, + "p99": 110.07999628782272 + }, + "isolatedSum": { + "p50": 193.40799748897552, + "p90": 196.86399400234222, + "p95": 198.2720047235489, + "p99": 220.15999257564545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.70400047302246, + "p90": 155.96799552440643, + "p95": 157.47199952602386, + "p99": 161.76000237464905 + }, + "combine": { + "p50": 152.70400047302246, + "p90": 155.96799552440643, + "p95": 157.47199952602386, + "p99": 161.76000237464905 + }, + "roundtrip": { + "p50": 152.70400047302246, + "p90": 155.96799552440643, + "p95": 157.47199952602386, + "p99": 161.76000237464905 + }, + "isolatedSum": { + "p50": 305.4080009460449, + "p90": 311.93599104881287, + "p95": 314.94399905204773, + "p99": 323.5200047492981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 268.44799518585205, + "p90": 271.263986825943, + "p95": 272.3200023174286, + "p99": 289.5359992980957 + }, + "combine": { + "p50": 268.44799518585205, + "p90": 271.263986825943, + "p95": 272.3200023174286, + "p99": 289.5359992980957 + }, + "roundtrip": { + "p50": 268.44799518585205, + "p90": 271.263986825943, + "p95": 272.3200023174286, + "p99": 289.5359992980957 + }, + "isolatedSum": { + "p50": 536.8959903717041, + "p90": 542.527973651886, + "p95": 544.6400046348572, + "p99": 579.0719985961914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 497.5360035896301, + "p90": 500.7359981536865, + "p95": 502.9119849205017, + "p99": 655.8719873428345 + }, + "combine": { + "p50": 497.5360035896301, + "p90": 500.7359981536865, + "p95": 502.9119849205017, + "p99": 655.8719873428345 + }, + "roundtrip": { + "p50": 497.5360035896301, + "p90": 500.7359981536865, + "p95": 502.9119849205017, + "p99": 655.8719873428345 + }, + "isolatedSum": { + "p50": 995.0720071792603, + "p90": 1001.471996307373, + "p95": 1005.8239698410034, + "p99": 1311.743974685669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 948.8319754600525, + "p90": 953.1199932098389, + "p95": 955.1360011100769, + "p99": 1154.4959545135498 + }, + "combine": { + "p50": 948.8319754600525, + "p90": 953.1199932098389, + "p95": 955.1360011100769, + "p99": 1154.4959545135498 + }, + "roundtrip": { + "p50": 948.8319754600525, + "p90": 953.1199932098389, + "p95": 955.1360011100769, + "p99": 1154.4959545135498 + }, + "isolatedSum": { + "p50": 1897.663950920105, + "p90": 1906.2399864196777, + "p95": 1910.2720022201538, + "p99": 2308.9919090270996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6d6938a6", + "identity": "b300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_f4952abc", + "comparisonKey": "5045578e6160fd2b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:33:29.675503+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.31199878454208, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 86.496002972126 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 86.496002972126 + }, + "roundtrip": { + "p50": 69.31199878454208, + "p90": 70.97599655389786, + "p95": 71.35999947786331, + "p99": 86.496002972126 + }, + "isolatedSum": { + "p50": 138.62399756908417, + "p90": 141.95199310779572, + "p95": 142.71999895572662, + "p99": 172.992005944252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.89600020647049, + "p90": 98.39999675750732, + "p95": 99.55199807882309, + "p99": 120.2239990234375 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 98.39999675750732, + "p95": 99.55199807882309, + "p99": 120.2239990234375 + }, + "roundtrip": { + "p50": 96.89600020647049, + "p90": 98.39999675750732, + "p95": 99.55199807882309, + "p99": 120.2239990234375 + }, + "isolatedSum": { + "p50": 193.79200041294098, + "p90": 196.79999351501465, + "p95": 199.10399615764618, + "p99": 240.447998046875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.0960032939911, + "p90": 154.65599298477173, + "p95": 155.13600409030914, + "p99": 157.98400342464447 + }, + "combine": { + "p50": 152.0960032939911, + "p90": 154.65599298477173, + "p95": 155.13600409030914, + "p99": 157.98400342464447 + }, + "roundtrip": { + "p50": 152.0960032939911, + "p90": 154.65599298477173, + "p95": 155.13600409030914, + "p99": 157.98400342464447 + }, + "isolatedSum": { + "p50": 304.1920065879822, + "p90": 309.31198596954346, + "p95": 310.2720081806183, + "p99": 315.96800684928894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 268.99200677871704, + "p90": 271.7759907245636, + "p95": 272.8320062160492, + "p99": 274.52799677848816 + }, + "combine": { + "p50": 268.99200677871704, + "p90": 271.7759907245636, + "p95": 272.8320062160492, + "p99": 274.52799677848816 + }, + "roundtrip": { + "p50": 268.99200677871704, + "p90": 271.7759907245636, + "p95": 272.8320062160492, + "p99": 274.52799677848816 + }, + "isolatedSum": { + "p50": 537.9840135574341, + "p90": 543.5519814491272, + "p95": 545.6640124320984, + "p99": 549.0559935569763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 497.6640045642853, + "p90": 500.86402893066406, + "p95": 502.1439790725708, + "p99": 522.1760272979736 + }, + "combine": { + "p50": 497.6640045642853, + "p90": 500.86402893066406, + "p95": 502.1439790725708, + "p99": 522.1760272979736 + }, + "roundtrip": { + "p50": 497.6640045642853, + "p90": 500.86402893066406, + "p95": 502.1439790725708, + "p99": 522.1760272979736 + }, + "isolatedSum": { + "p50": 995.3280091285706, + "p90": 1001.7280578613281, + "p95": 1004.2879581451416, + "p99": 1044.3520545959473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 948.5120177268982, + "p90": 952.6079893112183, + "p95": 954.2080163955688, + "p99": 967.8720235824585 + }, + "combine": { + "p50": 948.5120177268982, + "p90": 952.6079893112183, + "p95": 954.2080163955688, + "p99": 967.8720235824585 + }, + "roundtrip": { + "p50": 948.5120177268982, + "p90": 952.6079893112183, + "p95": 954.2080163955688, + "p99": 967.8720235824585 + }, + "isolatedSum": { + "p50": 1897.0240354537964, + "p90": 1905.2159786224365, + "p95": 1908.4160327911377, + "p99": 1935.744047164917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bdaa4895", + "identity": "b300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_d22735ae", + "comparisonKey": "e30791951192637e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:22.130812+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_14", + "sku": "b300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 64.99200314283371, + "p90": 66.72000139951706, + "p95": 67.71200150251389, + "p99": 74.30399954319 + }, + "combine": { + "p50": 64.99200314283371, + "p90": 66.72000139951706, + "p95": 67.71200150251389, + "p99": 74.30399954319 + }, + "roundtrip": { + "p50": 64.99200314283371, + "p90": 66.72000139951706, + "p95": 67.71200150251389, + "p99": 74.30399954319 + }, + "isolatedSum": { + "p50": 129.98400628566742, + "p90": 133.44000279903412, + "p95": 135.42400300502777, + "p99": 148.60799908638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 84.16000008583069, + "p90": 86.04799956083298, + "p95": 86.62399649620056, + "p99": 99.58399832248688 + }, + "combine": { + "p50": 84.16000008583069, + "p90": 86.04799956083298, + "p95": 86.62399649620056, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 84.16000008583069, + "p90": 86.04799956083298, + "p95": 86.62399649620056, + "p99": 99.58399832248688 + }, + "isolatedSum": { + "p50": 168.32000017166138, + "p90": 172.09599912166595, + "p95": 173.24799299240112, + "p99": 199.16799664497375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 130.5920034646988, + "p90": 132.6719969511032, + "p95": 133.4719955921173, + "p99": 137.11999356746674 + }, + "combine": { + "p50": 130.5920034646988, + "p90": 132.6719969511032, + "p95": 133.4719955921173, + "p99": 137.11999356746674 + }, + "roundtrip": { + "p50": 130.5920034646988, + "p90": 132.6719969511032, + "p95": 133.4719955921173, + "p99": 137.11999356746674 + }, + "isolatedSum": { + "p50": 261.1840069293976, + "p90": 265.3439939022064, + "p95": 266.9439911842346, + "p99": 274.2399871349335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 229.69600558280945, + "p90": 233.2800030708313, + "p95": 234.23999547958374, + "p99": 249.82400238513947 + }, + "combine": { + "p50": 229.69600558280945, + "p90": 233.2800030708313, + "p95": 234.23999547958374, + "p99": 249.82400238513947 + }, + "roundtrip": { + "p50": 229.69600558280945, + "p90": 233.2800030708313, + "p95": 234.23999547958374, + "p99": 249.82400238513947 + }, + "isolatedSum": { + "p50": 459.3920111656189, + "p90": 466.5600061416626, + "p95": 468.4799909591675, + "p99": 499.64800477027893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 404.7679901123047, + "p90": 407.3919951915741, + "p95": 408.60798954963684, + "p99": 414.0479862689972 + }, + "combine": { + "p50": 404.7679901123047, + "p90": 407.3919951915741, + "p95": 408.60798954963684, + "p99": 414.0479862689972 + }, + "roundtrip": { + "p50": 404.7679901123047, + "p90": 407.3919951915741, + "p95": 408.60798954963684, + "p99": 414.0479862689972 + }, + "isolatedSum": { + "p50": 809.5359802246094, + "p90": 814.7839903831482, + "p95": 817.2159790992737, + "p99": 828.0959725379944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 756.0960054397583, + "p90": 759.9999904632568, + "p95": 762.9439830780029, + "p99": 952.5120258331299 + }, + "combine": { + "p50": 756.0960054397583, + "p90": 759.9999904632568, + "p95": 762.9439830780029, + "p99": 952.5120258331299 + }, + "roundtrip": { + "p50": 756.0960054397583, + "p90": 759.9999904632568, + "p95": 762.9439830780029, + "p99": 952.5120258331299 + }, + "isolatedSum": { + "p50": 1512.1920108795166, + "p90": 1519.9999809265137, + "p95": 1525.8879661560059, + "p99": 1905.0240516662598 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2fad8f5d", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_231adb37", + "comparisonKey": "789209c0a36d7502", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:08.084538+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 190.40000438690186, + "p90": 199.35999810695648, + "p95": 215.00800549983978, + "p99": 4880.064010620117 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 67.32799857854843, + "p95": 78.62400263547897, + "p99": 5893.407821655273 + }, + "roundtrip": { + "p50": 261.31200790405273, + "p90": 274.399995803833, + "p95": 283.52001309394836, + "p99": 5786.59200668335 + }, + "isolatedSum": { + "p50": 252.76800617575645, + "p90": 266.6879966855049, + "p95": 293.63200813531876, + "p99": 10773.47183227539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 189.85599279403687, + "p90": 198.40000569820404, + "p95": 209.9200040102005, + "p99": 5843.520164489746 + }, + "combine": { + "p50": 63.1679967045784, + "p90": 67.29599833488464, + "p95": 70.88000327348709, + "p99": 2628.2880306243896 + }, + "roundtrip": { + "p50": 260.3519856929779, + "p90": 272.41599559783936, + "p95": 280.7680070400238, + "p99": 5590.65580368042 + }, + "isolatedSum": { + "p50": 253.02398949861526, + "p90": 265.6960040330887, + "p95": 280.8000072836876, + "p99": 8471.808195114136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 196.28800451755524, + "p90": 207.23199844360352, + "p95": 212.70400285720825, + "p99": 4633.9521408081055 + }, + "combine": { + "p50": 62.463998794555664, + "p90": 69.023996591568, + "p95": 74.14399832487106, + "p99": 3724.735975265503 + }, + "roundtrip": { + "p50": 269.3119943141937, + "p90": 283.26401114463806, + "p95": 299.1360127925873, + "p99": 5338.912010192871 + }, + "isolatedSum": { + "p50": 258.7520033121109, + "p90": 276.2559950351715, + "p95": 286.8480011820793, + "p99": 8358.688116073608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.0399956703186, + "p90": 207.61600136756897, + "p95": 213.31200003623962, + "p99": 5150.368213653564 + }, + "combine": { + "p50": 63.231997191905975, + "p90": 68.38399916887283, + "p95": 71.19999825954437, + "p99": 4930.592060089111 + }, + "roundtrip": { + "p50": 271.263986825943, + "p90": 284.2240035533905, + "p95": 309.1199994087219, + "p99": 5298.079967498779 + }, + "isolatedSum": { + "p50": 262.2719928622246, + "p90": 276.0000005364418, + "p95": 284.511998295784, + "p99": 10080.960273742676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 202.14399695396423, + "p90": 212.3199999332428, + "p95": 223.1999933719635, + "p99": 4280.191898345947 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 77.15199887752533, + "p95": 78.87999713420868, + "p99": 331.36001229286194 + }, + "roundtrip": { + "p50": 268.95999908447266, + "p90": 281.40801191329956, + "p95": 290.5920147895813, + "p99": 5434.175968170166 + }, + "isolatedSum": { + "p50": 275.32799541950226, + "p90": 289.4719988107681, + "p95": 302.0799905061722, + "p99": 4611.551910638809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 227.48799622058868, + "p90": 235.6480062007904, + "p95": 240.31999707221985, + "p99": 2677.92010307312 + }, + "combine": { + "p50": 92.41600334644318, + "p90": 96.22400254011154, + "p95": 100.63999891281128, + "p99": 4249.248027801514 + }, + "roundtrip": { + "p50": 303.74398827552795, + "p90": 314.04799222946167, + "p95": 357.7600121498108, + "p99": 4856.512069702148 + }, + "isolatedSum": { + "p50": 319.90399956703186, + "p90": 331.87200874090195, + "p95": 340.9599959850311, + "p99": 6927.168130874634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 244.28799748420715, + "p90": 256.6719949245453, + "p95": 267.2959864139557, + "p99": 4451.903820037842 + }, + "combine": { + "p50": 126.24000012874603, + "p90": 130.0799995660782, + "p95": 133.31200182437897, + "p99": 2464.639902114868 + }, + "roundtrip": { + "p50": 355.29598593711853, + "p90": 366.36799573898315, + "p95": 375.90399384498596, + "p99": 3801.85604095459 + }, + "isolatedSum": { + "p50": 370.5279976129532, + "p90": 386.7519944906235, + "p95": 400.60798823833466, + "p99": 6916.54372215271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 247.80799448490143, + "p90": 257.9199969768524, + "p95": 263.64800333976746, + "p99": 3994.335889816284 + }, + "combine": { + "p50": 181.0240000486374, + "p90": 187.80800700187683, + "p95": 192.47999787330627, + "p99": 3546.3359355926514 + }, + "roundtrip": { + "p50": 414.8479998111725, + "p90": 424.67200756073, + "p95": 501.3759732246399, + "p99": 3831.7439556121826 + }, + "isolatedSum": { + "p50": 428.8319945335388, + "p90": 445.72800397872925, + "p95": 456.12800121307373, + "p99": 7540.671825408936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a504c44", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_087d0321", + "comparisonKey": "712aacd9f3ecdd78", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:34.745424+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 193.59999895095825, + "p90": 202.81599462032318, + "p95": 260.51199436187744, + "p99": 4250.016212463379 + }, + "combine": { + "p50": 61.63199990987778, + "p90": 66.04799628257751, + "p95": 67.55200028419495, + "p99": 136.9599997997284 + }, + "roundtrip": { + "p50": 266.1759853363037, + "p90": 280.16000986099243, + "p95": 294.9120104312897, + "p99": 5096.735954284668 + }, + "isolatedSum": { + "p50": 255.23199886083603, + "p90": 268.8639909029007, + "p95": 328.0639946460724, + "p99": 4386.976212263107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 193.6960071325302, + "p90": 202.04800367355347, + "p95": 210.11200547218323, + "p99": 4475.776195526123 + }, + "combine": { + "p50": 64.7680014371872, + "p90": 69.69600170850754, + "p95": 74.30399954319, + "p99": 4410.272121429443 + }, + "roundtrip": { + "p50": 266.400009393692, + "p90": 278.1440019607544, + "p95": 345.2160060405731, + "p99": 4882.143974304199 + }, + "isolatedSum": { + "p50": 258.4640085697174, + "p90": 271.744005382061, + "p95": 284.41600501537323, + "p99": 8886.048316955566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.6959935426712, + "p90": 213.69600296020508, + "p95": 221.37600183486938, + "p99": 4144.288063049316 + }, + "combine": { + "p50": 63.13599646091461, + "p90": 69.08799707889557, + "p95": 71.00799679756165, + "p99": 151.90400183200836 + }, + "roundtrip": { + "p50": 275.519996881485, + "p90": 293.3439910411835, + "p95": 302.7519881725311, + "p99": 4885.7598304748535 + }, + "isolatedSum": { + "p50": 264.8319900035858, + "p90": 282.78400003910065, + "p95": 292.38399863243103, + "p99": 4296.192064881325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 203.5199999809265, + "p90": 212.38400042057037, + "p95": 216.67200326919556, + "p99": 4312.416076660156 + }, + "combine": { + "p50": 63.519999384880066, + "p90": 69.56800073385239, + "p95": 83.93599838018417, + "p99": 4470.751762390137 + }, + "roundtrip": { + "p50": 277.0560085773468, + "p90": 289.11998867988586, + "p95": 301.5359938144684, + "p99": 4827.807903289795 + }, + "isolatedSum": { + "p50": 267.0399993658066, + "p90": 281.95200115442276, + "p95": 300.60800164937973, + "p99": 8783.167839050293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 202.33599841594696, + "p90": 212.0320051908493, + "p95": 218.1120067834854, + "p99": 3893.02396774292 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 75.71200281381607, + "p95": 78.46400141716003, + "p99": 3892.7040100097656 + }, + "roundtrip": { + "p50": 273.3120024204254, + "p90": 287.200003862381, + "p95": 345.8879888057709, + "p99": 4879.327774047852 + }, + "isolatedSum": { + "p50": 274.33599531650543, + "p90": 287.7440080046654, + "p95": 296.57600820064545, + "p99": 7785.727977752686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 228.64000499248505, + "p90": 237.69600689411163, + "p95": 244.80000138282776, + "p99": 4145.664215087891 + }, + "combine": { + "p50": 89.66399729251862, + "p90": 93.75999867916107, + "p95": 98.81599992513657, + "p99": 3855.3919792175293 + }, + "roundtrip": { + "p50": 305.56800961494446, + "p90": 314.7839903831482, + "p95": 328.575998544693, + "p99": 4436.672210693359 + }, + "isolatedSum": { + "p50": 318.30400228500366, + "p90": 331.4560055732727, + "p95": 343.6160013079643, + "p99": 8001.05619430542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 244.03199553489685, + "p90": 260.51199436187744, + "p95": 276.70401334762573, + "p99": 3981.1840057373047 + }, + "combine": { + "p50": 125.15200674533844, + "p90": 129.82399761676788, + "p95": 132.57600367069244, + "p99": 3383.8400840759277 + }, + "roundtrip": { + "p50": 356.9599986076355, + "p90": 369.56799030303955, + "p95": 415.5519902706146, + "p99": 4039.872169494629 + }, + "isolatedSum": { + "p50": 369.1840022802353, + "p90": 390.3359919786453, + "p95": 409.2800170183182, + "p99": 7365.024089813232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.56800043582916, + "p90": 258.7519884109497, + "p95": 265.7279968261719, + "p99": 4058.5598945617676 + }, + "combine": { + "p50": 180.54400384426117, + "p90": 188.73600661754608, + "p95": 388.63998651504517, + "p99": 3816.6720867156982 + }, + "roundtrip": { + "p50": 416.31999611854553, + "p90": 456.60799741744995, + "p95": 544.2879796028137, + "p99": 3632.767915725708 + }, + "isolatedSum": { + "p50": 430.11200428009033, + "p90": 447.4879950284958, + "p95": 654.367983341217, + "p99": 7875.231981277466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7ee42fc5", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_b2980f58", + "comparisonKey": "4356993ad0ca99aa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:57.519848+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 196.70400023460388, + "p90": 205.37599921226501, + "p95": 215.83999693393707, + "p99": 4728.415966033936 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 69.34399902820587, + "p95": 85.1840004324913, + "p99": 5060.319900512695 + }, + "roundtrip": { + "p50": 270.1759934425354, + "p90": 286.46400570869446, + "p95": 487.93599009513855, + "p99": 5890.848159790039 + }, + "isolatedSum": { + "p50": 260.2880001068115, + "p90": 274.7199982404709, + "p95": 301.0239973664284, + "p99": 9788.73586654663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 209.6640020608902, + "p90": 222.3680019378662, + "p95": 1980.5760383605957, + "p99": 5272.384166717529 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 73.47200065851212, + "p95": 105.59999942779541, + "p99": 4456.416130065918 + }, + "roundtrip": { + "p50": 278.6880135536194, + "p90": 296.86400294303894, + "p95": 1274.3359804153442, + "p99": 6639.647960662842 + }, + "isolatedSum": { + "p50": 275.7439985871315, + "p90": 295.8400025963783, + "p95": 2086.176037788391, + "p99": 9728.800296783447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 232.96000063419342, + "p90": 249.40800666809082, + "p95": 261.28000020980835, + "p99": 4103.616237640381 + }, + "combine": { + "p50": 90.97599983215332, + "p90": 96.67199850082397, + "p95": 99.87200051546097, + "p99": 336.5760147571564 + }, + "roundtrip": { + "p50": 310.5599880218506, + "p90": 324.16000962257385, + "p95": 339.711993932724, + "p99": 4660.223960876465 + }, + "isolatedSum": { + "p50": 323.93600046634674, + "p90": 346.0800051689148, + "p95": 361.1520007252693, + "p99": 4440.192252397537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 250.65600872039795, + "p90": 262.1760070323944, + "p95": 268.92799139022827, + "p99": 4700.831890106201 + }, + "combine": { + "p50": 181.05599284172058, + "p90": 188.7039989233017, + "p95": 195.51999866962433, + "p99": 3866.4638996124268 + }, + "roundtrip": { + "p50": 416.54399037361145, + "p90": 427.71199345588684, + "p95": 515.7439708709717, + "p99": 4197.343826293945 + }, + "isolatedSum": { + "p50": 431.71200156211853, + "p90": 450.8800059556961, + "p95": 464.4479900598526, + "p99": 8567.295789718628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e4de7440", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_6b9be1ad", + "comparisonKey": "a4559637464c8dcc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:30.172331+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 192.9599940776825, + "p90": 202.97600328922272, + "p95": 327.7760148048401, + "p99": 5132.063865661621 + }, + "combine": { + "p50": 62.30400130152702, + "p90": 66.97600334882736, + "p95": 70.88000327348709, + "p99": 3720.832109451294 + }, + "roundtrip": { + "p50": 264.3519937992096, + "p90": 285.72800755500793, + "p95": 2775.6481170654297, + "p99": 5273.471832275391 + }, + "isolatedSum": { + "p50": 255.26399537920952, + "p90": 269.9520066380501, + "p95": 398.6560180783272, + "p99": 8852.895975112915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 9, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 193.27999651432037, + "p90": 201.9519954919815, + "p95": 982.9760193824768, + "p99": 4682.176113128662 + }, + "combine": { + "p50": 64.99200314283371, + "p90": 71.29599899053574, + "p95": 81.40800148248672, + "p99": 4475.520133972168 + }, + "roundtrip": { + "p50": 263.2960081100464, + "p90": 286.8160009384155, + "p95": 398.1119990348816, + "p99": 5286.752223968506 + }, + "isolatedSum": { + "p50": 258.2719996571541, + "p90": 273.24799448251724, + "p95": 1064.3840208649635, + "p99": 9157.69624710083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 18, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.71199333667755, + "p90": 212.54399418830872, + "p95": 283.488005399704, + "p99": 4692.319869995117 + }, + "combine": { + "p50": 62.55999952554703, + "p90": 68.64000111818314, + "p95": 71.61600142717361, + "p99": 474.3039906024933 + }, + "roundtrip": { + "p50": 272.2879946231842, + "p90": 300.6719946861267, + "p95": 397.66401052474976, + "p99": 5431.903839111328 + }, + "isolatedSum": { + "p50": 262.2719928622246, + "p90": 281.18399530649185, + "p95": 355.1040068268776, + "p99": 5166.6238605976105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 36, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 201.02399587631226, + "p90": 209.27999913692474, + "p95": 212.41599321365356, + "p99": 399.8720049858093 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 69.5360004901886, + "p95": 84.03199911117554, + "p99": 4422.751903533936 + }, + "roundtrip": { + "p50": 272.96000719070435, + "p90": 282.04798698425293, + "p95": 286.5920066833496, + "p99": 5003.3278465271 + }, + "isolatedSum": { + "p50": 263.96799832582474, + "p90": 278.81599962711334, + "p95": 296.4479923248291, + "p99": 4822.623908519745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 72, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 202.07999646663666, + "p90": 209.34399962425232, + "p95": 212.64000236988068, + "p99": 348.9280045032501 + }, + "combine": { + "p50": 72.48000055551529, + "p90": 75.93599706888199, + "p95": 80.51200211048126, + "p99": 4324.3842124938965 + }, + "roundtrip": { + "p50": 271.263986825943, + "p90": 281.95199370384216, + "p95": 290.5600070953369, + "p99": 4862.207889556885 + }, + "isolatedSum": { + "p50": 274.55999702215195, + "p90": 285.2799966931343, + "p95": 293.15200448036194, + "p99": 4673.312216997147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 228.7999987602234, + "p90": 235.77600717544556, + "p95": 239.04000222682953, + "p99": 3743.135929107666 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 94.46399658918381, + "p95": 104.16000336408615, + "p99": 3847.9039669036865 + }, + "roundtrip": { + "p50": 304.51199412345886, + "p90": 313.85600566864014, + "p95": 350.40000081062317, + "p99": 4507.328033447266 + }, + "isolatedSum": { + "p50": 319.10400092601776, + "p90": 330.24000376462936, + "p95": 343.2000055909157, + "p99": 7591.0398960113525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.44000625610352, + "p90": 261.59998774528503, + "p95": 328.5439908504486, + "p99": 3570.336103439331 + }, + "combine": { + "p50": 127.55200266838074, + "p90": 132.4480026960373, + "p95": 137.63199746608734, + "p99": 3433.151960372925 + }, + "roundtrip": { + "p50": 361.5359961986542, + "p90": 376.1279881000519, + "p95": 470.68798542022705, + "p99": 3966.655969619751 + }, + "isolatedSum": { + "p50": 372.99200892448425, + "p90": 394.0479904413223, + "p95": 466.17598831653595, + "p99": 7003.488063812256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 576, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 250.20799040794373, + "p90": 260.0640058517456, + "p95": 267.67998933792114, + "p99": 3949.1519927978516 + }, + "combine": { + "p50": 183.3599954843521, + "p90": 190.11199474334717, + "p95": 242.2720044851303, + "p99": 3440.000057220459 + }, + "roundtrip": { + "p50": 418.39998960494995, + "p90": 430.33599853515625, + "p95": 534.4319939613342, + "p99": 3439.0718936920166 + }, + "isolatedSum": { + "p50": 433.56798589229584, + "p90": 450.1760005950928, + "p95": 509.95199382305145, + "p99": 7389.152050018311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d6f24513", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_942c0ff7", + "comparisonKey": "d01ed8aa43ea0748", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:35.186333+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 195.8719938993454, + "p90": 204.67199385166168, + "p95": 218.9120054244995, + "p99": 5547.999858856201 + }, + "combine": { + "p50": 65.43999910354614, + "p90": 74.07999783754349, + "p95": 96.79999947547913, + "p99": 4347.871780395508 + }, + "roundtrip": { + "p50": 270.4319953918457, + "p90": 283.1360101699829, + "p95": 298.5920011997223, + "p99": 5857.408046722412 + }, + "isolatedSum": { + "p50": 261.31199300289154, + "p90": 278.75199168920517, + "p95": 315.71200489997864, + "p99": 9895.871639251709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 195.10400295257568, + "p90": 203.77600193023682, + "p95": 224.41600263118744, + "p99": 6950.751781463623 + }, + "combine": { + "p50": 67.07199662923813, + "p90": 73.63200187683105, + "p95": 79.42400127649307, + "p99": 3976.6080379486084 + }, + "roundtrip": { + "p50": 271.263986825943, + "p90": 284.60800647735596, + "p95": 342.0160114765167, + "p99": 5323.616027832031 + }, + "isolatedSum": { + "p50": 262.1759995818138, + "p90": 277.40800380706787, + "p95": 303.8400039076805, + "p99": 10927.359819412231 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 203.93599569797516, + "p90": 214.84799683094025, + "p95": 223.61600399017334, + "p99": 4698.65608215332 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 71.61600142717361, + "p95": 74.20799881219864, + "p99": 2850.4960536956787 + }, + "roundtrip": { + "p50": 281.47199749946594, + "p90": 298.68799448013306, + "p95": 311.48800253868103, + "p99": 5455.776214599609 + }, + "isolatedSum": { + "p50": 269.6639969944954, + "p90": 286.46399825811386, + "p95": 297.824002802372, + "p99": 7549.152135848999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 50, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 205.34400641918182, + "p90": 215.29600024223328, + "p95": 250.75200200080872, + "p99": 4837.152004241943 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 71.32799923419952, + "p95": 74.62400197982788, + "p99": 982.9760193824768 + }, + "roundtrip": { + "p50": 280.19198775291443, + "p90": 294.68798637390137, + "p95": 358.5920035839081, + "p99": 5785.088062286377 + }, + "isolatedSum": { + "p50": 271.232008934021, + "p90": 286.6239994764328, + "p95": 325.3760039806366, + "p99": 5820.12802362442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 206.62400126457214, + "p90": 217.631995677948, + "p95": 340.7680094242096, + "p99": 4409.599781036377 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 78.17599922418594, + "p95": 80.73599636554718, + "p99": 4069.888114929199 + }, + "roundtrip": { + "p50": 279.5200049877167, + "p90": 293.15200448036194, + "p95": 349.37599301338196, + "p99": 5989.120006561279 + }, + "isolatedSum": { + "p50": 281.1200022697449, + "p90": 295.80799490213394, + "p95": 421.5040057897568, + "p99": 8479.487895965576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 224, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 232.80000686645508, + "p90": 241.95200204849243, + "p95": 247.871994972229, + "p99": 4347.807884216309 + }, + "combine": { + "p50": 96.16000205278397, + "p90": 99.58399832248688, + "p95": 103.26399654150009, + "p99": 3832.9920768737793 + }, + "roundtrip": { + "p50": 318.2399868965149, + "p90": 327.2640109062195, + "p95": 353.40800881385803, + "p99": 4471.551895141602 + }, + "isolatedSum": { + "p50": 328.96000891923904, + "p90": 341.5360003709793, + "p95": 351.1359915137291, + "p99": 8180.799961090088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 249.11999702453613, + "p90": 263.264000415802, + "p95": 271.2959945201874, + "p99": 3731.487989425659 + }, + "combine": { + "p50": 129.88799810409546, + "p90": 133.63200426101685, + "p95": 136.1279934644699, + "p99": 3651.3280868530273 + }, + "roundtrip": { + "p50": 368.8960075378418, + "p90": 406.0479998588562, + "p95": 576.2240290641785, + "p99": 4475.2960205078125 + }, + "isolatedSum": { + "p50": 379.0079951286316, + "p90": 396.89600467681885, + "p95": 407.4239879846573, + "p99": 7382.8160762786865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 925, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 262.04800605773926, + "p90": 272.19200134277344, + "p95": 284.5759987831116, + "p99": 4157.18412399292 + }, + "combine": { + "p50": 187.71199882030487, + "p90": 193.7599927186966, + "p95": 199.26400482654572, + "p99": 3007.5840950012207 + }, + "roundtrip": { + "p50": 448.60801100730896, + "p90": 459.1679871082306, + "p95": 584.8000049591064, + "p99": 3878.528118133545 + }, + "isolatedSum": { + "p50": 449.7600048780441, + "p90": 465.95199406147003, + "p95": 483.8400036096573, + "p99": 7164.768218994141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-31ab0928", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_f1f50ec7", + "comparisonKey": "aee57b7f9b164a81", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:50.026915+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 191.3280040025711, + "p90": 204.22400534152985, + "p95": 344.7360098361969, + "p99": 5083.2319259643555 + }, + "combine": { + "p50": 63.29599767923355, + "p90": 70.68800181150436, + "p95": 75.9039968252182, + "p99": 4541.4719581604 + }, + "roundtrip": { + "p50": 265.6320035457611, + "p90": 282.81599283218384, + "p95": 465.5359983444214, + "p99": 5265.952110290527 + }, + "isolatedSum": { + "p50": 254.62400168180466, + "p90": 274.9120071530342, + "p95": 420.6400066614151, + "p99": 9624.703884124756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 191.0720020532608, + "p90": 199.93600249290466, + "p95": 217.75999665260315, + "p99": 5141.8561935424805 + }, + "combine": { + "p50": 64.70400094985962, + "p90": 68.57600063085556, + "p95": 70.8480030298233, + "p99": 4529.151916503906 + }, + "roundtrip": { + "p50": 264.0959918498993, + "p90": 275.39199590682983, + "p95": 354.2720079421997, + "p99": 5303.487777709961 + }, + "isolatedSum": { + "p50": 255.77600300312042, + "p90": 268.5120031237602, + "p95": 288.60799968242645, + "p99": 9671.008110046387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 22, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 200.19200444221497, + "p90": 211.84000372886658, + "p95": 233.3119958639145, + "p99": 4206.016063690186 + }, + "combine": { + "p50": 63.61600011587143, + "p90": 69.95200365781784, + "p95": 73.5040009021759, + "p99": 4584.832191467285 + }, + "roundtrip": { + "p50": 272.67199754714966, + "p90": 290.2719974517822, + "p95": 351.29600763320923, + "p99": 5053.760051727295 + }, + "isolatedSum": { + "p50": 263.8080045580864, + "p90": 281.7920073866844, + "p95": 306.8159967660904, + "p99": 8790.84825515747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 200.6080001592636, + "p90": 210.4959934949875, + "p95": 224.95999932289124, + "p99": 4450.463771820068 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 67.74400174617767, + "p95": 71.19999825954437, + "p99": 4362.527847290039 + }, + "roundtrip": { + "p50": 271.93599939346313, + "p90": 281.8560004234314, + "p95": 301.472008228302, + "p99": 4973.087787628174 + }, + "isolatedSum": { + "p50": 263.68000358343124, + "p90": 278.23999524116516, + "p95": 296.1599975824356, + "p99": 8812.991619110107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 79, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.664000749588, + "p90": 210.94399690628052, + "p95": 221.11999988555908, + "p99": 4107.52010345459 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 77.15199887752533, + "p95": 85.21600067615509, + "p99": 4030.4641723632812 + }, + "roundtrip": { + "p50": 272.7999985218048, + "p90": 292.54400730133057, + "p95": 382.81598687171936, + "p99": 4979.743957519531 + }, + "isolatedSum": { + "p50": 275.04000067710876, + "p90": 288.09599578380585, + "p95": 306.3360005617142, + "p99": 8137.984275817871 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 134, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 228.60799729824066, + "p90": 236.4799976348877, + "p95": 250.4960000514984, + "p99": 4469.024181365967 + }, + "combine": { + "p50": 91.48799628019333, + "p90": 94.52799707651138, + "p95": 96.51199728250504, + "p99": 2889.4400596618652 + }, + "roundtrip": { + "p50": 305.37599325180054, + "p90": 316.19200110435486, + "p95": 350.20801424980164, + "p99": 4521.728038787842 + }, + "isolatedSum": { + "p50": 320.095993578434, + "p90": 331.0079947113991, + "p95": 347.00799733400345, + "p99": 7358.464241027832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 268, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.27999758720398, + "p90": 257.53599405288696, + "p95": 265.28000831604004, + "p99": 3784.9600315093994 + }, + "combine": { + "p50": 126.94400548934937, + "p90": 131.71200454235077, + "p95": 134.07999277114868, + "p99": 3590.2719497680664 + }, + "roundtrip": { + "p50": 355.1360070705414, + "p90": 370.7520067691803, + "p95": 534.3040227890015, + "p99": 4016.6401863098145 + }, + "isolatedSum": { + "p50": 372.22400307655334, + "p90": 389.24799859523773, + "p95": 399.3600010871887, + "p99": 7375.231981277466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 533, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.439999461174, + "p90": 297.37600684165955, + "p95": 317.7599906921387, + "p99": 4091.4878845214844 + }, + "combine": { + "p50": 179.967999458313, + "p90": 185.82400679588318, + "p95": 193.4400051832199, + "p99": 3418.463945388794 + }, + "roundtrip": { + "p50": 414.0799939632416, + "p90": 425.28000473976135, + "p95": 514.3679976463318, + "p99": 3644.831895828247 + }, + "isolatedSum": { + "p50": 429.407998919487, + "p90": 483.2000136375427, + "p95": 511.1999958753586, + "p99": 7509.951829910278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 1027, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cfec6e71", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_44808587", + "comparisonKey": "a286b771eaf5b192", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:03.009745+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 192.60799884796143, + "p90": 202.2400051355362, + "p95": 220.86399793624878, + "p99": 5523.4880447387695 + }, + "combine": { + "p50": 62.81600147485733, + "p90": 69.5360004901886, + "p95": 84.1279998421669, + "p99": 4517.280101776123 + }, + "roundtrip": { + "p50": 264.8960053920746, + "p90": 276.67200565338135, + "p95": 314.4640028476715, + "p99": 5948.448181152344 + }, + "isolatedSum": { + "p50": 255.42400032281876, + "p90": 271.7760056257248, + "p95": 304.9919977784157, + "p99": 10040.768146514893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 15, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 193.15199553966522, + "p90": 203.74399423599243, + "p95": 253.34399938583374, + "p99": 5708.127975463867 + }, + "combine": { + "p50": 65.31199812889099, + "p90": 70.39999961853027, + "p95": 76.28799974918365, + "p99": 1810.655951499939 + }, + "roundtrip": { + "p50": 264.76800441741943, + "p90": 282.943993806839, + "p95": 439.8080110549927, + "p99": 5950.528144836426 + }, + "isolatedSum": { + "p50": 258.4639936685562, + "p90": 274.1439938545227, + "p95": 329.6319991350174, + "p99": 7518.783926963806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.2959976196289, + "p90": 215.26400744915009, + "p95": 239.45599794387817, + "p99": 5069.183826446533 + }, + "combine": { + "p50": 63.35999816656113, + "p90": 70.78400254249573, + "p95": 219.26400065422058, + "p99": 4301.407814025879 + }, + "roundtrip": { + "p50": 273.4079957008362, + "p90": 295.23199796676636, + "p95": 400.38400888442993, + "p99": 5889.920234680176 + }, + "isolatedSum": { + "p50": 262.65599578619003, + "p90": 286.0480099916458, + "p95": 458.71999859809875, + "p99": 9370.591640472412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 43, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 203.07199656963348, + "p90": 213.24799954891205, + "p95": 225.8239984512329, + "p99": 5556.159973144531 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 69.18399780988693, + "p95": 71.35999947786331, + "p99": 2229.9840450286865 + }, + "roundtrip": { + "p50": 274.9119997024536, + "p90": 289.8240089416504, + "p95": 336.7359936237335, + "p99": 5832.479953765869 + }, + "isolatedSum": { + "p50": 266.6559964418411, + "p90": 282.431997358799, + "p95": 297.1839979290962, + "p99": 7786.144018173218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 73, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 204.16000485420227, + "p90": 215.16799926757812, + "p95": 220.73599696159363, + "p99": 4615.935802459717 + }, + "combine": { + "p50": 73.82400333881378, + "p90": 78.72000336647034, + "p95": 82.59200304746628, + "p99": 3874.1118907928467 + }, + "roundtrip": { + "p50": 272.0640003681183, + "p90": 286.624014377594, + "p95": 356.83199763298035, + "p99": 4809.919834136963 + }, + "isolatedSum": { + "p50": 277.98400819301605, + "p90": 293.88800263404846, + "p95": 303.3280000090599, + "p99": 8490.047693252563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 142, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 230.68800568580627, + "p90": 240.447998046875, + "p95": 401.98400616645813, + "p99": 4342.016220092773 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 95.39200365543365, + "p95": 98.01600128412247, + "p99": 3907.2000980377197 + }, + "roundtrip": { + "p50": 306.7519962787628, + "p90": 318.39999556541443, + "p95": 376.8959939479828, + "p99": 5127.744197845459 + }, + "isolatedSum": { + "p50": 322.30400294065475, + "p90": 335.84000170230865, + "p95": 500.0000074505806, + "p99": 8249.216318130493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 274, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.12000381946564, + "p90": 259.2320144176483, + "p95": 267.4880027770996, + "p99": 3731.3599586486816 + }, + "combine": { + "p50": 125.85599720478058, + "p90": 130.62399625778198, + "p95": 137.34400272369385, + "p99": 4276.895999908447 + }, + "roundtrip": { + "p50": 360.7040047645569, + "p90": 374.783992767334, + "p95": 516.864001750946, + "p99": 4700.575828552246 + }, + "isolatedSum": { + "p50": 370.9760010242462, + "p90": 389.8560106754303, + "p95": 404.83200550079346, + "p99": 8008.255958557129 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 250.17601251602173, + "p90": 260.127991437912, + "p95": 272.6399898529053, + "p99": 4259.200096130371 + }, + "combine": { + "p50": 180.25599420070648, + "p90": 187.391996383667, + "p95": 336.8319869041443, + "p99": 3460.736036300659 + }, + "roundtrip": { + "p50": 415.1360094547272, + "p90": 427.13600397109985, + "p95": 565.1199817657471, + "p99": 4020.127773284912 + }, + "isolatedSum": { + "p50": 430.4320067167282, + "p90": 447.519987821579, + "p95": 609.4719767570496, + "p99": 7719.93613243103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 1042, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2a23043", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_9d63e9b2", + "comparisonKey": "f5ed5a4cbbf5ae55", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:12.058414+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 191.55199825763702, + "p90": 199.8720020055771, + "p95": 214.6880030632019, + "p99": 4711.423873901367 + }, + "combine": { + "p50": 61.43999844789505, + "p90": 65.66400080919266, + "p95": 68.60800087451935, + "p99": 648.6080288887024 + }, + "roundtrip": { + "p50": 260.8959972858429, + "p90": 273.8879919052124, + "p95": 290.3999984264374, + "p99": 5324.76806640625 + }, + "isolatedSum": { + "p50": 252.99199670553207, + "p90": 265.53600281476974, + "p95": 283.29600393772125, + "p99": 5360.03190279007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 192.06400215625763, + "p90": 200.03199577331543, + "p95": 207.13600516319275, + "p99": 4987.48779296875 + }, + "combine": { + "p50": 65.60000032186508, + "p90": 70.04799693822861, + "p95": 73.37599992752075, + "p99": 4649.343967437744 + }, + "roundtrip": { + "p50": 260.8959972858429, + "p90": 273.3440101146698, + "p95": 283.9359939098358, + "p99": 5201.791763305664 + }, + "isolatedSum": { + "p50": 257.6640024781227, + "p90": 270.07999271154404, + "p95": 280.5120050907135, + "p99": 9636.831760406494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.60000705718994, + "p90": 208.44799280166626, + "p95": 230.0799936056137, + "p99": 4322.591781616211 + }, + "combine": { + "p50": 62.01599910855293, + "p90": 67.45599955320358, + "p95": 72.25599884986877, + "p99": 4442.272186279297 + }, + "roundtrip": { + "p50": 268.12800765037537, + "p90": 285.98400950431824, + "p95": 304.32000756263733, + "p99": 5243.103981018066 + }, + "isolatedSum": { + "p50": 259.6160061657429, + "p90": 275.90399235486984, + "p95": 302.3359924554825, + "p99": 8764.863967895508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 204.51200008392334, + "p90": 214.11199867725372, + "p95": 220.47999501228333, + "p99": 4296.351909637451 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 71.77600264549255, + "p95": 73.98399710655212, + "p99": 4503.424167633057 + }, + "roundtrip": { + "p50": 268.41598749160767, + "p90": 283.7440073490143, + "p95": 296.3840067386627, + "p99": 5065.055847167969 + }, + "isolatedSum": { + "p50": 272.12800085544586, + "p90": 285.8880013227463, + "p95": 294.46399211883545, + "p99": 8799.776077270508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 211.45600080490112, + "p90": 219.9680060148239, + "p95": 226.23999416828156, + "p99": 4102.848052978516 + }, + "combine": { + "p50": 84.19200032949448, + "p90": 88.60799670219421, + "p95": 90.94399958848953, + "p99": 3922.7840900421143 + }, + "roundtrip": { + "p50": 286.3039970397949, + "p90": 296.31999135017395, + "p95": 311.48800253868103, + "p99": 4838.8800621032715 + }, + "isolatedSum": { + "p50": 295.6480011343956, + "p90": 308.5760027170181, + "p95": 317.1839937567711, + "p99": 8025.63214302063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 237.5040054321289, + "p90": 245.79200148582458, + "p95": 251.6160011291504, + "p99": 4204.927921295166 + }, + "combine": { + "p50": 102.75200009346008, + "p90": 105.85600137710571, + "p95": 108.31999778747559, + "p99": 3528.1920433044434 + }, + "roundtrip": { + "p50": 338.55998516082764, + "p90": 346.17599844932556, + "p95": 461.2480103969574, + "p99": 4323.423862457275 + }, + "isolatedSum": { + "p50": 340.256005525589, + "p90": 351.6480028629303, + "p95": 359.935998916626, + "p99": 7733.119964599609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 270.9119915962219, + "p90": 278.7199914455414, + "p95": 288.2559895515442, + "p99": 3405.888080596924 + }, + "combine": { + "p50": 154.52800691127777, + "p90": 159.61599349975586, + "p95": 172.4800020456314, + "p99": 3527.1360874176025 + }, + "roundtrip": { + "p50": 427.8079867362976, + "p90": 437.21601366996765, + "p95": 535.1359844207764, + "p99": 3706.6879272460938 + }, + "isolatedSum": { + "p50": 425.4399985074997, + "p90": 438.33598494529724, + "p95": 460.7359915971756, + "p99": 6933.024168014526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 334.27199721336365, + "p90": 346.97601199150085, + "p95": 458.9439928531647, + "p99": 3564.9919509887695 + }, + "combine": { + "p50": 242.97599494457245, + "p90": 248.25599789619446, + "p95": 339.9679958820343, + "p99": 2355.9679985046387 + }, + "roundtrip": { + "p50": 579.7439813613892, + "p90": 590.0800228118896, + "p95": 1053.7919998168945, + "p99": 3110.7840538024902 + }, + "isolatedSum": { + "p50": 577.2479921579361, + "p90": 595.2320098876953, + "p95": 798.911988735199, + "p99": 5920.959949493408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-85317570", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_c32a6d58", + "comparisonKey": "04937b83b7b15cd2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:49.497146+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 195.0400024652481, + "p90": 249.15200471878052, + "p95": 268.38400959968567, + "p99": 5330.560207366943 + }, + "combine": { + "p50": 61.47199869155884, + "p90": 67.03999638557434, + "p95": 69.85600292682648, + "p99": 166.143998503685 + }, + "roundtrip": { + "p50": 263.0079984664917, + "p90": 277.72799134254456, + "p95": 301.56800150871277, + "p99": 5648.831844329834 + }, + "isolatedSum": { + "p50": 256.51200115680695, + "p90": 316.19200110435486, + "p95": 338.24001252651215, + "p99": 5496.704205870628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 59, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 193.05600225925446, + "p90": 203.5840004682541, + "p95": 251.00800395011902, + "p99": 5511.45601272583 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 67.87200272083282, + "p95": 69.023996591568, + "p99": 75.52000135183334 + }, + "roundtrip": { + "p50": 262.36799359321594, + "p90": 277.24799513816833, + "p95": 415.039986371994, + "p99": 5968.512058258057 + }, + "isolatedSum": { + "p50": 256.8960040807724, + "p90": 271.4560031890869, + "p95": 320.032000541687, + "p99": 5586.976014077663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 121, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.9519954919815, + "p90": 214.23999965190887, + "p95": 225.40800273418427, + "p99": 5530.496120452881 + }, + "combine": { + "p50": 63.45599889755249, + "p90": 72.76800274848938, + "p95": 90.2400016784668, + "p99": 4832.543849945068 + }, + "roundtrip": { + "p50": 268.8960134983063, + "p90": 286.8799865245819, + "p95": 303.8719892501831, + "p99": 6332.064151763916 + }, + "isolatedSum": { + "p50": 265.407994389534, + "p90": 287.00800240039825, + "p95": 315.64800441265106, + "p99": 10363.03997039795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 244, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 212.70400285720825, + "p90": 222.88000583648682, + "p95": 339.00800347328186, + "p99": 5274.2719650268555 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 78.36800068616867, + "p95": 86.36800199747086, + "p99": 4304.512023925781 + }, + "roundtrip": { + "p50": 280.5120050907135, + "p90": 291.6159927845001, + "p95": 439.64800238609314, + "p99": 5749.023914337158 + }, + "isolatedSum": { + "p50": 285.66399961709976, + "p90": 301.2480065226555, + "p95": 425.3760054707527, + "p99": 9578.783988952637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 478, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 212.19199895858765, + "p90": 220.92799842357635, + "p95": 227.87199914455414, + "p99": 4583.615779876709 + }, + "combine": { + "p50": 83.67999643087387, + "p90": 88.28800171613693, + "p95": 105.21599650382996, + "p99": 4195.968151092529 + }, + "roundtrip": { + "p50": 289.8879945278168, + "p90": 297.4720001220703, + "p95": 306.0159981250763, + "p99": 4634.7198486328125 + }, + "isolatedSum": { + "p50": 295.8719953894615, + "p90": 309.2160001397133, + "p95": 333.0879956483841, + "p99": 8779.583930969238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 953, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 253.76001000404358, + "p90": 260.47998666763306, + "p95": 444.2879855632782, + "p99": 4225.376129150391 + }, + "combine": { + "p50": 111.87200248241425, + "p90": 115.77600240707397, + "p95": 123.3920007944107, + "p99": 3959.0399265289307 + }, + "roundtrip": { + "p50": 364.25599455833435, + "p90": 372.79999256134033, + "p95": 578.2719850540161, + "p99": 4725.9521484375 + }, + "isolatedSum": { + "p50": 365.6320124864578, + "p90": 376.25598907470703, + "p95": 567.6799863576889, + "p99": 8184.416055679321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 1908, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 303.6159873008728, + "p90": 317.31200218200684, + "p95": 525.9519815444946, + "p99": 3844.575881958008 + }, + "combine": { + "p50": 171.61600291728973, + "p90": 175.6799966096878, + "p95": 178.46399545669556, + "p99": 3439.2640590667725 + }, + "roundtrip": { + "p50": 474.8159945011139, + "p90": 486.9759976863861, + "p95": 2203.61590385437, + "p99": 3833.4081172943115 + }, + "isolatedSum": { + "p50": 475.23199021816254, + "p90": 492.99199879169464, + "p95": 704.4159770011902, + "p99": 7283.83994102478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 3804, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 380.22398948669434, + "p90": 390.1120126247406, + "p95": 822.8800296783447, + "p99": 4348.19221496582 + }, + "combine": { + "p50": 288.7679934501648, + "p90": 295.6160008907318, + "p95": 664.7679805755615, + "p99": 2670.527935028076 + }, + "roundtrip": { + "p50": 666.4959788322449, + "p90": 675.9679913520813, + "p95": 2066.9119358062744, + "p99": 3093.503952026367 + }, + "isolatedSum": { + "p50": 668.9919829368591, + "p90": 685.7280135154724, + "p95": 1487.6480102539062, + "p99": 7018.7201499938965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2cf4fb0", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_268c1336", + "comparisonKey": "c820a60f75751bfd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:23.834347+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 194.43200528621674, + "p90": 203.23200523853302, + "p95": 216.89599752426147, + "p99": 4657.18412399292 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 68.89600306749344, + "p95": 155.008003115654, + "p99": 4194.431781768799 + }, + "roundtrip": { + "p50": 266.04801416397095, + "p90": 280.8000147342682, + "p95": 330.30399680137634, + "p99": 5405.727863311768 + }, + "isolatedSum": { + "p50": 258.2720071077347, + "p90": 272.12800830602646, + "p95": 371.90400063991547, + "p99": 8851.615905761719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 194.39999759197235, + "p90": 203.48800718784332, + "p95": 222.1119999885559, + "p99": 4567.71183013916 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 89.56799656152725, + "p95": 141.31200313568115, + "p99": 4727.583885192871 + }, + "roundtrip": { + "p50": 268.19199323654175, + "p90": 281.98400139808655, + "p95": 312.7039968967438, + "p99": 5118.752002716064 + }, + "isolatedSum": { + "p50": 261.695995926857, + "p90": 293.0560037493706, + "p95": 363.42400312423706, + "p99": 9295.295715332031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.34399831295013, + "p90": 214.39999341964722, + "p95": 242.23999679088593, + "p99": 4494.688034057617 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 69.2799985408783, + "p95": 71.58400118350983, + "p99": 340.7999873161316 + }, + "roundtrip": { + "p50": 275.2000093460083, + "p90": 297.791987657547, + "p95": 386.6879940032959, + "p99": 4968.736171722412 + }, + "isolatedSum": { + "p50": 265.3760015964508, + "p90": 283.6799919605255, + "p95": 313.82399797439575, + "p99": 4835.488021373749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 40, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 202.43200659751892, + "p90": 212.19199895858765, + "p95": 218.36799383163452, + "p99": 4179.232120513916 + }, + "combine": { + "p50": 64.12799656391144, + "p90": 70.27199864387512, + "p95": 76.09599828720093, + "p99": 4496.799945831299 + }, + "roundtrip": { + "p50": 276.0320007801056, + "p90": 288.92800211906433, + "p95": 305.1519989967346, + "p99": 5077.216148376465 + }, + "isolatedSum": { + "p50": 266.56000316143036, + "p90": 282.46399760246277, + "p95": 294.46399211883545, + "p99": 8676.032066345215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 71, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 202.72000133991241, + "p90": 213.6320024728775, + "p95": 224.5440036058426, + "p99": 4031.8078994750977 + }, + "combine": { + "p50": 73.60000163316727, + "p90": 78.65600287914276, + "p95": 92.00000017881393, + "p99": 4682.528018951416 + }, + "roundtrip": { + "p50": 274.4959890842438, + "p90": 287.51999139785767, + "p95": 370.07999420166016, + "p99": 4811.232089996338 + }, + "isolatedSum": { + "p50": 276.3200029730797, + "p90": 292.28800535202026, + "p95": 316.5440037846565, + "p99": 8714.335918426514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 143, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 229.44000363349915, + "p90": 237.05600202083588, + "p95": 244.03199553489685, + "p99": 4018.9437866210938 + }, + "combine": { + "p50": 91.48799628019333, + "p90": 94.87999975681305, + "p95": 108.06400328874588, + "p99": 4151.552200317383 + }, + "roundtrip": { + "p50": 307.20001459121704, + "p90": 319.39199566841125, + "p95": 392.0319974422455, + "p99": 4361.504077911377 + }, + "isolatedSum": { + "p50": 320.9279999136925, + "p90": 331.9360017776489, + "p95": 352.09599882364273, + "p99": 8170.495986938477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 266, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.12000381946564, + "p90": 260.6079876422882, + "p95": 407.1680009365082, + "p99": 4418.68782043457 + }, + "combine": { + "p50": 126.27199292182922, + "p90": 131.6159963607788, + "p95": 134.5279961824417, + "p99": 3378.079891204834 + }, + "roundtrip": { + "p50": 358.72000455856323, + "p90": 374.208003282547, + "p95": 512.9280090332031, + "p99": 4577.311992645264 + }, + "isolatedSum": { + "p50": 371.39199674129486, + "p90": 392.223984003067, + "p95": 541.6959971189499, + "p99": 7796.767711639404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 534, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 250.11199712753296, + "p90": 261.34398579597473, + "p95": 395.58398723602295, + "p99": 4947.264194488525 + }, + "combine": { + "p50": 180.83199858665466, + "p90": 188.960000872612, + "p95": 292.1600043773651, + "p99": 3785.536050796509 + }, + "roundtrip": { + "p50": 416.0960018634796, + "p90": 429.85600233078003, + "p95": 604.9919724464417, + "p99": 3960.832118988037 + }, + "isolatedSum": { + "p50": 430.9439957141876, + "p90": 450.30398666858673, + "p95": 687.7439916133881, + "p99": 8732.800245285034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1044, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f101ba9b", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_05b63373", + "comparisonKey": "77091e64c9750c7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:22.156938+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 192.06400215625763, + "p90": 200.32000541687012, + "p95": 328.70399951934814, + "p99": 5070.687770843506 + }, + "combine": { + "p50": 63.00800293684006, + "p90": 69.40799951553345, + "p95": 92.41600334644318, + "p99": 4562.016010284424 + }, + "roundtrip": { + "p50": 264.0320062637329, + "p90": 280.5759906768799, + "p95": 502.1759867668152, + "p99": 5258.975982666016 + }, + "isolatedSum": { + "p50": 255.0720050930977, + "p90": 269.72800493240356, + "p95": 421.1200028657913, + "p99": 9632.70378112793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 192.57600605487823, + "p90": 202.07999646663666, + "p95": 218.81599724292755, + "p99": 5338.592052459717 + }, + "combine": { + "p50": 64.57599997520447, + "p90": 70.30399888753891, + "p95": 94.2080020904541, + "p99": 4446.623802185059 + }, + "roundtrip": { + "p50": 262.7840042114258, + "p90": 275.10398626327515, + "p95": 306.62399530410767, + "p99": 5340.832233428955 + }, + "isolatedSum": { + "p50": 257.1520060300827, + "p90": 272.38399535417557, + "p95": 313.02399933338165, + "p99": 9785.215854644775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 40, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.8080015182495, + "p90": 213.47199380397797, + "p95": 227.00800001621246, + "p99": 4727.712154388428 + }, + "combine": { + "p50": 63.87200206518173, + "p90": 70.46400010585785, + "p95": 72.92799651622772, + "p99": 4522.0160484313965 + }, + "roundtrip": { + "p50": 272.2879946231842, + "p90": 294.2720055580139, + "p95": 325.408011674881, + "p99": 4939.7759437561035 + }, + "isolatedSum": { + "p50": 263.68000358343124, + "p90": 283.9359939098358, + "p95": 299.9359965324402, + "p99": 9249.728202819824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 81, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 202.59200036525726, + "p90": 210.81599593162537, + "p95": 222.33599424362183, + "p99": 4685.696125030518 + }, + "combine": { + "p50": 64.25599753856659, + "p90": 70.36799937486649, + "p95": 75.32799988985062, + "p99": 4333.792209625244 + }, + "roundtrip": { + "p50": 272.4800109863281, + "p90": 288.4159982204437, + "p95": 324.9279856681824, + "p99": 5158.048152923584 + }, + "isolatedSum": { + "p50": 266.84799790382385, + "p90": 281.18399530649185, + "p95": 297.66399413347244, + "p99": 9019.488334655762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 210.55999398231506, + "p90": 219.64800357818604, + "p95": 230.880007147789, + "p99": 4182.176113128662 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 83.29600095748901, + "p95": 86.97599917650223, + "p99": 3934.175968170166 + }, + "roundtrip": { + "p50": 272.2879946231842, + "p90": 290.5600070953369, + "p95": 408.32000970840454, + "p99": 5045.85599899292 + }, + "isolatedSum": { + "p50": 289.43999111652374, + "p90": 302.94400453567505, + "p95": 317.85600632429123, + "p99": 8116.352081298828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 339, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 234.30399596691132, + "p90": 242.8479939699173, + "p95": 259.68000292778015, + "p99": 4366.6558265686035 + }, + "combine": { + "p50": 98.68799895048141, + "p90": 101.79200023412704, + "p95": 103.90400141477585, + "p99": 3859.0400218963623 + }, + "roundtrip": { + "p50": 316.22400879859924, + "p90": 324.41601157188416, + "p95": 396.7359960079193, + "p99": 4502.175807952881 + }, + "isolatedSum": { + "p50": 332.99199491739273, + "p90": 344.63999420404434, + "p95": 363.584004342556, + "p99": 8225.695848464966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 676, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 251.77600979804993, + "p90": 265.82399010658264, + "p95": 278.75199913978577, + "p99": 3269.08802986145 + }, + "combine": { + "p50": 139.80799913406372, + "p90": 144.86399292945862, + "p95": 201.9840031862259, + "p99": 3515.8400535583496 + }, + "roundtrip": { + "p50": 378.81600856781006, + "p90": 393.5999870300293, + "p95": 524.5440006256104, + "p99": 3879.744052886963 + }, + "isolatedSum": { + "p50": 391.58400893211365, + "p90": 410.68798303604126, + "p95": 480.73600232601166, + "p99": 6784.9280834198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 1328, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 284.92799401283264, + "p90": 294.7840094566345, + "p95": 314.6879971027374, + "p99": 3891.3919925689697 + }, + "combine": { + "p50": 203.36000621318817, + "p90": 209.85600352287292, + "p95": 272.8320062160492, + "p99": 3012.255907058716 + }, + "roundtrip": { + "p50": 480.22401332855225, + "p90": 491.58400297164917, + "p95": 671.392023563385, + "p99": 3734.3039512634277 + }, + "isolatedSum": { + "p50": 488.2880002260208, + "p90": 504.64001297950745, + "p95": 587.5200033187866, + "p99": 6903.647899627686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c1189131", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_b05409ab", + "comparisonKey": "8166da57a6722efc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:58.638768+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 189.02400135993958, + "p90": 198.30399751663208, + "p95": 263.96799087524414, + "p99": 5403.456211090088 + }, + "combine": { + "p50": 60.256000608205795, + "p90": 64.99200314283371, + "p95": 68.41599941253662, + "p99": 4484.096050262451 + }, + "roundtrip": { + "p50": 258.8160037994385, + "p90": 271.0080146789551, + "p95": 359.51998829841614, + "p99": 5296.99182510376 + }, + "isolatedSum": { + "p50": 249.28000196814537, + "p90": 263.2960006594658, + "p95": 332.38399028778076, + "p99": 9887.552261352539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 188.83199989795685, + "p90": 197.37599790096283, + "p95": 210.4319930076599, + "p99": 4707.520008087158 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 68.09599697589874, + "p95": 70.68800181150436, + "p99": 4206.175804138184 + }, + "roundtrip": { + "p50": 259.3280076980591, + "p90": 272.0000147819519, + "p95": 352.2239923477173, + "p99": 5290.048122406006 + }, + "isolatedSum": { + "p50": 252.22399830818176, + "p90": 265.4719948768616, + "p95": 281.1199948191643, + "p99": 8913.695812225342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 22, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 196.383997797966, + "p90": 208.00000429153442, + "p95": 217.056006193161, + "p99": 4297.599792480469 + }, + "combine": { + "p50": 60.864001512527466, + "p90": 65.60000032186508, + "p95": 67.29599833488464, + "p99": 89.79199826717377 + }, + "roundtrip": { + "p50": 266.78401231765747, + "p90": 283.1360101699829, + "p95": 300.6719946861267, + "p99": 4962.81623840332 + }, + "isolatedSum": { + "p50": 257.24799931049347, + "p90": 273.6000046133995, + "p95": 284.35200452804565, + "p99": 4387.3917907476425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.10399615764618, + "p90": 207.10399746894836, + "p95": 213.50400149822235, + "p99": 4484.70401763916 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 66.3679987192154, + "p95": 70.8480030298233, + "p99": 4381.120204925537 + }, + "roundtrip": { + "p50": 268.7680125236511, + "p90": 282.24000334739685, + "p95": 364.7040128707886, + "p99": 4999.072074890137 + }, + "isolatedSum": { + "p50": 261.02399453520775, + "p90": 273.47199618816376, + "p95": 284.35200452804565, + "p99": 8865.824222564697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 73, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.47199928760529, + "p90": 209.60000157356262, + "p95": 214.33599293231964, + "p99": 3866.3039207458496 + }, + "combine": { + "p50": 73.56800138950348, + "p90": 77.37600058317184, + "p95": 79.77599650621414, + "p99": 1846.943974494934 + }, + "roundtrip": { + "p50": 266.88000559806824, + "p90": 279.04000878334045, + "p95": 287.9999876022339, + "p99": 4647.520065307617 + }, + "isolatedSum": { + "p50": 275.04000067710876, + "p90": 286.97600215673447, + "p95": 294.1119894385338, + "p99": 5713.247895240784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 138, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 227.64800488948822, + "p90": 236.9920015335083, + "p95": 247.29600548744202, + "p99": 4020.6079483032227 + }, + "combine": { + "p50": 91.42400324344635, + "p90": 94.52799707651138, + "p95": 98.08000177145004, + "p99": 3080.2559852600098 + }, + "roundtrip": { + "p50": 302.3360073566437, + "p90": 312.3199939727783, + "p95": 326.880007982254, + "p99": 4286.816120147705 + }, + "isolatedSum": { + "p50": 319.07200813293457, + "p90": 331.5199986100197, + "p95": 345.37600725889206, + "p99": 7100.863933563232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 273, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 241.40800535678864, + "p90": 257.0880055427551, + "p95": 344.38401460647583, + "p99": 3927.7119636535645 + }, + "combine": { + "p50": 127.45599448680878, + "p90": 132.192000746727, + "p95": 134.94400680065155, + "p99": 2842.5281047821045 + }, + "roundtrip": { + "p50": 358.62401127815247, + "p90": 370.7840144634247, + "p95": 461.95200085639954, + "p99": 4116.799831390381 + }, + "isolatedSum": { + "p50": 368.8639998435974, + "p90": 389.2800062894821, + "p95": 479.3280214071274, + "p99": 6770.240068435669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 532, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 246.20799720287323, + "p90": 255.19999861717224, + "p95": 264.8000121116638, + "p99": 4533.279895782471 + }, + "combine": { + "p50": 180.9920072555542, + "p90": 188.09600174427032, + "p95": 198.91199469566345, + "p99": 3186.4960193634033 + }, + "roundtrip": { + "p50": 411.45598888397217, + "p90": 421.7279851436615, + "p95": 524.4799852371216, + "p99": 3575.007915496826 + }, + "isolatedSum": { + "p50": 427.20000445842743, + "p90": 443.29600036144257, + "p95": 463.71200680732727, + "p99": 7719.775915145874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 1041, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c057a843", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_2b72fc28", + "comparisonKey": "c5d7747eeab81f18", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:19.818338+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 195.5839991569519, + "p90": 208.03199708461761, + "p95": 259.90399718284607, + "p99": 6034.527778625488 + }, + "combine": { + "p50": 63.29599767923355, + "p90": 68.67200136184692, + "p95": 70.78400254249573, + "p99": 100.73599964380264 + }, + "roundtrip": { + "p50": 267.2320008277893, + "p90": 280.38400411605835, + "p95": 336.544007062912, + "p99": 6343.135833740234 + }, + "isolatedSum": { + "p50": 258.87999683618546, + "p90": 276.70399844646454, + "p95": 330.6879997253418, + "p99": 6135.263778269291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 194.5279985666275, + "p90": 202.81599462032318, + "p95": 208.639994263649, + "p99": 4796.383857727051 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 72.22399860620499, + "p95": 76.12799853086472, + "p99": 4640.5439376831055 + }, + "roundtrip": { + "p50": 266.400009393692, + "p90": 276.5440046787262, + "p95": 284.2560112476349, + "p99": 5408.83207321167 + }, + "isolatedSum": { + "p50": 260.79999655485153, + "p90": 275.03999322652817, + "p95": 284.7679927945137, + "p99": 9436.927795410156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 202.84800231456757, + "p90": 216.2880003452301, + "p95": 231.61600530147552, + "p99": 5609.344005584717 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 69.72800195217133, + "p95": 73.85600358247757, + "p99": 4339.712142944336 + }, + "roundtrip": { + "p50": 272.96000719070435, + "p90": 290.336012840271, + "p95": 298.6559867858887, + "p99": 5087.103843688965 + }, + "isolatedSum": { + "p50": 266.52800291776657, + "p90": 286.0160022974014, + "p95": 305.4720088839531, + "p99": 9949.056148529053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 209.6959948539734, + "p90": 217.3759937286377, + "p95": 220.99199891090393, + "p99": 4648.352146148682 + }, + "combine": { + "p50": 68.09599697589874, + "p90": 72.22399860620499, + "p95": 75.68000257015228, + "p99": 2176.543951034546 + }, + "roundtrip": { + "p50": 273.824006319046, + "p90": 285.66399216651917, + "p95": 301.503986120224, + "p99": 4617.472171783447 + }, + "isolatedSum": { + "p50": 277.79199182987213, + "p90": 289.5999923348427, + "p95": 296.6720014810562, + "p99": 6824.8960971832275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 216.86400473117828, + "p90": 226.01599991321564, + "p95": 232.80000686645508, + "p99": 3504.415988922119 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 89.15200084447861, + "p95": 91.39200299978256, + "p99": 3491.136074066162 + }, + "roundtrip": { + "p50": 289.98398780822754, + "p90": 298.0160117149353, + "p95": 315.36000967025757, + "p99": 4452.991962432861 + }, + "isolatedSum": { + "p50": 301.7280027270317, + "p90": 315.16800075769424, + "p95": 324.19200986623764, + "p99": 6995.552062988281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 243.58400702476501, + "p90": 251.42401456832886, + "p95": 265.855997800827, + "p99": 4314.047813415527 + }, + "combine": { + "p50": 103.61599922180176, + "p90": 106.81600123643875, + "p95": 109.11999642848969, + "p99": 2446.7520713806152 + }, + "roundtrip": { + "p50": 340.7360017299652, + "p90": 349.63199496269226, + "p95": 367.42401123046875, + "p99": 4176.896095275879 + }, + "isolatedSum": { + "p50": 347.2000062465668, + "p90": 358.2400158047676, + "p95": 374.9759942293167, + "p99": 6760.799884796143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 277.2800028324127, + "p90": 287.6160144805908, + "p95": 293.7279939651489, + "p99": 4628.7360191345215 + }, + "combine": { + "p50": 155.20000457763672, + "p90": 160.70400178432465, + "p95": 170.17599940299988, + "p99": 3969.9840545654297 + }, + "roundtrip": { + "p50": 429.82399463653564, + "p90": 440.12799859046936, + "p95": 497.0879852771759, + "p99": 3691.488027572632 + }, + "isolatedSum": { + "p50": 432.48000741004944, + "p90": 448.32001626491547, + "p95": 463.9039933681488, + "p99": 8598.720073699951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 340.1919901371002, + "p90": 348.9919900894165, + "p95": 358.17599296569824, + "p99": 3536.3199710845947 + }, + "combine": { + "p50": 243.93600225448608, + "p90": 250.30401349067688, + "p95": 261.9839906692505, + "p99": 2981.6958904266357 + }, + "roundtrip": { + "p50": 581.3760161399841, + "p90": 594.5280194282532, + "p95": 1981.727957725525, + "p99": 3525.023937225342 + }, + "isolatedSum": { + "p50": 584.1279923915863, + "p90": 599.2960035800934, + "p95": 620.1599836349487, + "p99": 6518.0158615112305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4742c7d1", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_396c2d86", + "comparisonKey": "27f013095c8c802e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:56.129086+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 191.55199825763702, + "p90": 204.83200252056122, + "p95": 959.3600034713745, + "p99": 5571.392059326172 + }, + "combine": { + "p50": 62.272001057863235, + "p90": 67.48799979686737, + "p95": 73.82400333881378, + "p99": 3361.6960048675537 + }, + "roundtrip": { + "p50": 261.82401180267334, + "p90": 276.44801139831543, + "p95": 387.8079950809479, + "p99": 6258.240222930908 + }, + "isolatedSum": { + "p50": 253.82399931550026, + "p90": 272.3200023174286, + "p95": 1033.1840068101883, + "p99": 8933.088064193726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 190.88000059127808, + "p90": 202.07999646663666, + "p95": 380.8319866657257, + "p99": 5449.312210083008 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 69.37599927186966, + "p95": 72.73600250482559, + "p99": 1924.064040184021 + }, + "roundtrip": { + "p50": 261.56800985336304, + "p90": 274.399995803833, + "p95": 425.05601048469543, + "p99": 5464.032173156738 + }, + "isolatedSum": { + "p50": 254.56000119447708, + "p90": 271.4559957385063, + "p95": 453.5679891705513, + "p99": 7373.376250267029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.13600385189056, + "p90": 220.06399929523468, + "p95": 278.9759933948517, + "p99": 4784.031867980957 + }, + "combine": { + "p50": 62.81600147485733, + "p90": 68.92800331115723, + "p95": 72.54400104284286, + "p99": 3134.4640254974365 + }, + "roundtrip": { + "p50": 269.27998661994934, + "p90": 287.55199909210205, + "p95": 307.93601274490356, + "p99": 5037.568092346191 + }, + "isolatedSum": { + "p50": 261.9520053267479, + "p90": 288.9920026063919, + "p95": 351.51999443769455, + "p99": 7918.495893478394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 200.19200444221497, + "p90": 208.67200195789337, + "p95": 215.7440036535263, + "p99": 3692.1279430389404 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 72.15999811887741, + "p95": 93.40800344944, + "p99": 5336.607933044434 + }, + "roundtrip": { + "p50": 274.4640111923218, + "p90": 289.6000146865845, + "p95": 319.39199566841125, + "p99": 5127.007961273193 + }, + "isolatedSum": { + "p50": 263.87200504541397, + "p90": 280.8320000767708, + "p95": 309.1520071029663, + "p99": 9028.735876083374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 203.0400037765503, + "p90": 213.1199985742569, + "p95": 225.63199698925018, + "p99": 4033.0238342285156 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 78.11199873685837, + "p95": 83.93599838018417, + "p99": 4349.376201629639 + }, + "roundtrip": { + "p50": 270.27198672294617, + "p90": 283.7759852409363, + "p95": 299.9359965324402, + "p99": 4749.887943267822 + }, + "isolatedSum": { + "p50": 276.7680063843727, + "p90": 291.23199731111526, + "p95": 309.56799536943436, + "p99": 8382.400035858154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 228.38400304317474, + "p90": 239.23200368881226, + "p95": 270.6240117549896, + "p99": 4620.160102844238 + }, + "combine": { + "p50": 91.39200299978256, + "p90": 94.59199756383896, + "p95": 96.83199971914291, + "p99": 3578.495979309082 + }, + "roundtrip": { + "p50": 305.7920038700104, + "p90": 316.0319924354553, + "p95": 400.4800021648407, + "p99": 4429.247856140137 + }, + "isolatedSum": { + "p50": 319.7760060429573, + "p90": 333.8240012526512, + "p95": 367.45601147413254, + "p99": 8198.65608215332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 245.5040067434311, + "p90": 260.47998666763306, + "p95": 281.0559868812561, + "p99": 3600.543975830078 + }, + "combine": { + "p50": 129.05600666999817, + "p90": 133.59999656677246, + "p95": 190.33600389957428, + "p99": 3458.336114883423 + }, + "roundtrip": { + "p50": 358.65598917007446, + "p90": 371.2320029735565, + "p95": 426.33599042892456, + "p99": 4063.9681816101074 + }, + "isolatedSum": { + "p50": 374.56001341342926, + "p90": 394.0799832344055, + "p95": 471.3919907808304, + "p99": 7058.880090713501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.60799312591553, + "p90": 258.2400143146515, + "p95": 272.7360129356384, + "p99": 3565.4399394989014 + }, + "combine": { + "p50": 180.31999468803406, + "p90": 188.12799453735352, + "p95": 230.49600422382355, + "p99": 3375.0081062316895 + }, + "roundtrip": { + "p50": 413.9519929885864, + "p90": 424.1600036621094, + "p95": 535.968005657196, + "p99": 3448.1279850006104 + }, + "isolatedSum": { + "p50": 428.9279878139496, + "p90": 446.368008852005, + "p95": 503.232017159462, + "p99": 6940.448045730591 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1200874b", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_fa5b663c", + "comparisonKey": "4c5022bcf499769e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:55.582703+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 194.62400674819946, + "p90": 203.19999754428864, + "p95": 430.62400817871094, + "p99": 6063.5199546813965 + }, + "combine": { + "p50": 62.78400123119354, + "p90": 68.03199648857117, + "p95": 71.16799801588058, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 266.36800169944763, + "p90": 280.19198775291443, + "p95": 313.85600566864014, + "p99": 6389.023780822754 + }, + "isolatedSum": { + "p50": 257.408007979393, + "p90": 271.2319940328598, + "p95": 501.7920061945915, + "p99": 6150.911957025528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 193.50400567054749, + "p90": 201.92000269889832, + "p95": 209.9519968032837, + "p99": 5453.59992980957 + }, + "combine": { + "p50": 64.86400216817856, + "p90": 68.31999868154526, + "p95": 69.92000341415405, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 265.8880054950714, + "p90": 275.87199211120605, + "p95": 283.52001309394836, + "p99": 5159.6479415893555 + }, + "isolatedSum": { + "p50": 258.36800783872604, + "p90": 270.2400013804436, + "p95": 279.87200021743774, + "p99": 5532.511927187443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.47199928760529, + "p90": 213.02400529384613, + "p95": 220.19200026988983, + "p99": 4900.320053100586 + }, + "combine": { + "p50": 63.77600133419037, + "p90": 69.5360004901886, + "p95": 72.22399860620499, + "p99": 3947.3280906677246 + }, + "roundtrip": { + "p50": 274.2399871349335, + "p90": 292.32001304626465, + "p95": 304.86398935317993, + "p99": 5656.320095062256 + }, + "isolatedSum": { + "p50": 265.24800062179565, + "p90": 282.56000578403473, + "p95": 292.4159988760948, + "p99": 8847.64814376831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 203.0079960823059, + "p90": 212.8320038318634, + "p95": 217.8560048341751, + "p99": 4306.240081787109 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 69.82400268316269, + "p95": 74.17599856853485, + "p99": 4095.263957977295 + }, + "roundtrip": { + "p50": 275.39199590682983, + "p90": 287.58400678634644, + "p95": 298.880010843277, + "p99": 5532.320022583008 + }, + "isolatedSum": { + "p50": 267.0399993658066, + "p90": 282.6560065150261, + "p95": 292.03200340270996, + "p99": 8401.504039764404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 203.80799472332, + "p90": 213.21600675582886, + "p95": 222.20799326896667, + "p99": 5707.104206085205 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 77.82399654388428, + "p95": 79.9039974808693, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 271.93599939346313, + "p90": 284.89598631858826, + "p95": 297.69599437713623, + "p99": 5189.727783203125 + }, + "isolatedSum": { + "p50": 277.50399708747864, + "p90": 291.04000329971313, + "p95": 302.11199074983597, + "p99": 5805.600203573704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 230.30400276184082, + "p90": 236.95999383926392, + "p95": 240.03200232982635, + "p99": 255.42399287223816 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 97.95200079679489, + "p95": 2228.543996810913, + "p99": 4884.607791900635 + }, + "roundtrip": { + "p50": 306.0480058193207, + "p90": 315.744012594223, + "p95": 331.29599690437317, + "p99": 4707.007884979248 + }, + "isolatedSum": { + "p50": 322.08000123500824, + "p90": 334.9119946360588, + "p95": 2468.5759991407394, + "p99": 5140.031784772873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 246.97600305080414, + "p90": 266.33599400520325, + "p95": 328.73600721359253, + "p99": 3860.991954803467 + }, + "combine": { + "p50": 129.69599664211273, + "p90": 134.88000631332397, + "p95": 140.25600254535675, + "p99": 3453.471899032593 + }, + "roundtrip": { + "p50": 363.20000886917114, + "p90": 374.5279908180237, + "p95": 385.53598523139954, + "p99": 4526.400089263916 + }, + "isolatedSum": { + "p50": 376.67199969291687, + "p90": 401.2160003185272, + "p95": 468.9920097589493, + "p99": 7314.46385383606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.82400238513947, + "p90": 261.53600215911865, + "p95": 274.52799677848816, + "p99": 4944.575786590576 + }, + "combine": { + "p50": 180.67200481891632, + "p90": 186.3040030002594, + "p95": 189.53600525856018, + "p99": 3743.1039810180664 + }, + "roundtrip": { + "p50": 414.91198539733887, + "p90": 426.7520010471344, + "p95": 486.5280091762543, + "p99": 4090.3358459472656 + }, + "isolatedSum": { + "p50": 430.4960072040558, + "p90": 447.84000515937805, + "p95": 464.06400203704834, + "p99": 8687.679767608643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96e61c5a", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_231adb37", + "comparisonKey": "6202e2d8131eb13d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:58.577513+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.54399263858795, + "p90": 257.85601139068604, + "p95": 275.84001421928406, + "p99": 2818.9759254455566 + }, + "combine": { + "p50": 180.63999712467194, + "p90": 187.42400407791138, + "p95": 225.40800273418427, + "p99": 3686.079978942871 + }, + "roundtrip": { + "p50": 415.74400663375854, + "p90": 427.8079867362976, + "p95": 577.4719715118408, + "p99": 3902.048110961914 + }, + "isolatedSum": { + "p50": 429.1839897632599, + "p90": 445.2800154685974, + "p95": 501.2480169534683, + "p99": 6505.055904388428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 276.06400847435, + "p90": 288.06400299072266, + "p95": 294.20799016952515, + "p99": 2533.5679054260254 + }, + "combine": { + "p50": 310.14400720596313, + "p90": 321.696013212204, + "p95": 1777.3760557174683, + "p99": 3418.911933898926 + }, + "roundtrip": { + "p50": 570.7520246505737, + "p90": 587.7439975738525, + "p95": 2255.199909210205, + "p99": 3413.952112197876 + }, + "isolatedSum": { + "p50": 586.2080156803131, + "p90": 609.7600162029266, + "p95": 2071.5840458869934, + "p99": 5952.479839324951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 324.12800192832947, + "p90": 384.2560052871704, + "p95": 476.608008146286, + "p99": 3439.6800994873047 + }, + "combine": { + "p50": 606.1760187149048, + "p90": 727.4240255355835, + "p95": 1419.2639589309692, + "p99": 2458.1758975982666 + }, + "roundtrip": { + "p50": 920.1599955558777, + "p90": 958.4000110626221, + "p95": 2130.4640769958496, + "p99": 2833.951950073242 + }, + "isolatedSum": { + "p50": 930.3040206432343, + "p90": 1111.680030822754, + "p95": 1895.8719670772552, + "p99": 5897.855997085571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 480.0640046596527, + "p90": 500.38397312164307, + "p95": 620.959997177124, + "p99": 2692.1920776367188 + }, + "combine": { + "p50": 1151.4559984207153, + "p90": 1176.543951034546, + "p95": 1327.7440071105957, + "p99": 1879.0719509124756 + }, + "roundtrip": { + "p50": 1610.7200384140015, + "p90": 1675.935983657837, + "p95": 2173.151969909668, + "p99": 2822.943925857544 + }, + "isolatedSum": { + "p50": 1631.520003080368, + "p90": 1676.927924156189, + "p95": 1948.7040042877197, + "p99": 4571.264028549194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 681.984007358551, + "p90": 742.0160174369812, + "p95": 1745.9839582443237, + "p99": 2493.1840896606445 + }, + "combine": { + "p50": 2224.3199348449707, + "p90": 2234.6880435943604, + "p95": 2239.3600940704346, + "p99": 2271.2960243225098 + }, + "roundtrip": { + "p50": 2881.3118934631348, + "p90": 2923.840045928955, + "p95": 2983.135938644409, + "p99": 3381.4079761505127 + }, + "isolatedSum": { + "p50": 2906.3039422035217, + "p90": 2976.7040610313416, + "p95": 3985.3440523147583, + "p99": 4764.480113983154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1080.2240371704102, + "p90": 1229.2799949645996, + "p95": 1659.9680185317993, + "p99": 2006.7839622497559 + }, + "combine": { + "p50": 4362.94412612915, + "p90": 4375.072002410889, + "p95": 4385.7598304748535, + "p99": 4449.471950531006 + }, + "roundtrip": { + "p50": 5416.48006439209, + "p90": 5437.82377243042, + "p95": 5448.8959312438965, + "p99": 5562.751770019531 + }, + "isolatedSum": { + "p50": 5443.168163299561, + "p90": 5604.351997375488, + "p95": 6045.727849006653, + "p99": 6456.255912780762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-39643b11", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_087d0321", + "comparisonKey": "10b720223748125c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:23.721496+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 251.0719895362854, + "p90": 260.70401072502136, + "p95": 272.96000719070435, + "p99": 2355.679988861084 + }, + "combine": { + "p50": 180.54400384426117, + "p90": 189.60000574588776, + "p95": 330.52799105644226, + "p99": 3737.4401092529297 + }, + "roundtrip": { + "p50": 414.68799114227295, + "p90": 426.2720048427582, + "p95": 2455.3279876708984, + "p99": 4067.1682357788086 + }, + "isolatedSum": { + "p50": 431.61599338054657, + "p90": 450.3040164709091, + "p95": 603.4879982471466, + "p99": 6093.120098114014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 274.2399871349335, + "p90": 284.5759987831116, + "p95": 297.2800135612488, + "p99": 2504.4798851013184 + }, + "combine": { + "p50": 308.80001187324524, + "p90": 319.10398602485657, + "p95": 529.3440222740173, + "p99": 3043.9040660858154 + }, + "roundtrip": { + "p50": 567.520022392273, + "p90": 589.631974697113, + "p95": 1369.3439960479736, + "p99": 3316.7359828948975 + }, + "isolatedSum": { + "p50": 583.0399990081787, + "p90": 603.6799848079681, + "p95": 826.6240358352661, + "p99": 5548.383951187134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 323.64800572395325, + "p90": 336.16000413894653, + "p95": 350.14399886131287, + "p99": 3634.3679428100586 + }, + "combine": { + "p50": 605.9200167655945, + "p90": 832.7999711036682, + "p95": 1420.7040071487427, + "p99": 2553.5359382629395 + }, + "roundtrip": { + "p50": 916.6079759597778, + "p90": 984.2560291290283, + "p95": 2262.079954147339, + "p99": 2835.3281021118164 + }, + "isolatedSum": { + "p50": 929.5680224895477, + "p90": 1168.9599752426147, + "p95": 1770.8480060100555, + "p99": 6187.903881072998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 485.3120148181915, + "p90": 502.1759867668152, + "p95": 574.6240019798279, + "p99": 2810.59193611145 + }, + "combine": { + "p50": 1149.9520540237427, + "p90": 1174.2719411849976, + "p95": 1231.2959432601929, + "p99": 1751.7759799957275 + }, + "roundtrip": { + "p50": 1614.9120330810547, + "p90": 1703.0080556869507, + "p95": 2299.5519638061523, + "p99": 2826.8160820007324 + }, + "isolatedSum": { + "p50": 1635.2640688419342, + "p90": 1676.4479279518127, + "p95": 1805.9199452400208, + "p99": 4562.367916107178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 681.6959977149963, + "p90": 733.951985836029, + "p95": 1072.383999824524, + "p99": 2444.3199634552 + }, + "combine": { + "p50": 2223.328113555908, + "p90": 2235.4559898376465, + "p95": 2243.936061859131, + "p99": 2288.640022277832 + }, + "roundtrip": { + "p50": 2880.8960914611816, + "p90": 2951.551914215088, + "p95": 3246.1440563201904, + "p99": 3815.5200481414795 + }, + "isolatedSum": { + "p50": 2905.0241112709045, + "p90": 2969.4079756736755, + "p95": 3316.320061683655, + "p99": 4732.959985733032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1078.8160562515259, + "p90": 1240.831971168518, + "p95": 1608.8000535964966, + "p99": 2012.3519897460938 + }, + "combine": { + "p50": 4360.544204711914, + "p90": 4372.543811798096, + "p95": 4376.927852630615, + "p99": 4414.495944976807 + }, + "roundtrip": { + "p50": 5415.167808532715, + "p90": 5443.488121032715, + "p95": 5466.527938842773, + "p99": 5647.264003753662 + }, + "isolatedSum": { + "p50": 5439.36026096344, + "p90": 5613.375782966614, + "p95": 5985.727906227112, + "p99": 6426.8479347229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-142e8635", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_b2980f58", + "comparisonKey": "6083d0e3890dfb1b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:35.575762+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 252.22399830818176, + "p90": 263.10399174690247, + "p95": 268.15998554229736, + "p99": 2860.192060470581 + }, + "combine": { + "p50": 180.54400384426117, + "p90": 187.99999356269836, + "p95": 245.60000002384186, + "p99": 4248.608112335205 + }, + "roundtrip": { + "p50": 415.8720076084137, + "p90": 429.24800515174866, + "p95": 467.51999855041504, + "p99": 4176.896095275879 + }, + "isolatedSum": { + "p50": 432.76800215244293, + "p90": 451.10398530960083, + "p95": 513.7599855661392, + "p99": 7108.800172805786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 331.7759931087494, + "p90": 348.4480082988739, + "p95": 392.35201478004456, + "p99": 2805.9520721435547 + }, + "combine": { + "p50": 606.2080264091492, + "p90": 658.5919857025146, + "p95": 1450.2719640731812, + "p99": 2651.871919631958 + }, + "roundtrip": { + "p50": 922.3039746284485, + "p90": 942.3679709434509, + "p95": 2106.4319610595703, + "p99": 3030.1759243011475 + }, + "isolatedSum": { + "p50": 937.9840195178986, + "p90": 1007.0399940013885, + "p95": 1842.6239788532257, + "p99": 5457.823991775513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 686.8799924850464, + "p90": 743.0400252342224, + "p95": 1715.6800031661987, + "p99": 2443.648099899292 + }, + "combine": { + "p50": 2224.3199348449707, + "p90": 2238.6560440063477, + "p95": 2245.055913925171, + "p99": 2258.3680152893066 + }, + "roundtrip": { + "p50": 2890.4640674591064, + "p90": 2933.311939239502, + "p95": 3065.920114517212, + "p99": 3392.575979232788 + }, + "isolatedSum": { + "p50": 2911.199927330017, + "p90": 2981.69606924057, + "p95": 3960.7359170913696, + "p99": 4702.016115188599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-15efea94", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_6b9be1ad", + "comparisonKey": "c05d4e20dcd5bcbe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:19.035968+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.08800423145294, + "p90": 257.1839988231659, + "p95": 263.71198892593384, + "p99": 2201.98392868042 + }, + "combine": { + "p50": 183.4239959716797, + "p90": 190.8479928970337, + "p95": 228.19200158119202, + "p99": 3351.167917251587 + }, + "roundtrip": { + "p50": 418.94400119781494, + "p90": 433.6960017681122, + "p95": 504.32002544403076, + "p99": 3357.3760986328125 + }, + "isolatedSum": { + "p50": 432.51200020313263, + "p90": 448.0319917201996, + "p95": 491.90399050712585, + "p99": 5553.151845932007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 277.44001150131226, + "p90": 287.1359884738922, + "p95": 295.9359884262085, + "p99": 2377.2480487823486 + }, + "combine": { + "p50": 319.07200813293457, + "p90": 327.7119994163513, + "p95": 393.40800046920776, + "p99": 2819.7760581970215 + }, + "roundtrip": { + "p50": 576.7679810523987, + "p90": 593.4720039367676, + "p95": 1619.488000869751, + "p99": 3102.4959087371826 + }, + "isolatedSum": { + "p50": 596.5120196342468, + "p90": 614.8479878902435, + "p95": 689.3439888954163, + "p99": 5197.02410697937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 2304, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 337.15200424194336, + "p90": 347.3280072212219, + "p95": 441.3439929485321, + "p99": 2997.920036315918 + }, + "combine": { + "p50": 610.8800172805786, + "p90": 841.5359854698181, + "p95": 1522.4640369415283, + "p99": 2276.671886444092 + }, + "roundtrip": { + "p50": 932.5119853019714, + "p90": 966.3680195808411, + "p95": 1919.1679954528809, + "p99": 2515.0399208068848 + }, + "isolatedSum": { + "p50": 948.032021522522, + "p90": 1188.86399269104, + "p95": 1963.8080298900604, + "p99": 5274.59192276001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 4608, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 500.67198276519775, + "p90": 521.727979183197, + "p95": 865.3119802474976, + "p99": 2285.759925842285 + }, + "combine": { + "p50": 1162.1119976043701, + "p90": 1175.1680374145508, + "p95": 1205.4400444030762, + "p99": 1732.5760126113892 + }, + "roundtrip": { + "p50": 1643.839955329895, + "p90": 1694.0799951553345, + "p95": 2174.015998840332, + "p99": 2552.191972732544 + }, + "isolatedSum": { + "p50": 1662.7839803695679, + "p90": 1696.8960165977478, + "p95": 2070.7520246505737, + "p99": 4018.3359384536743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 9216, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 708.3839774131775, + "p90": 767.4559950828552, + "p95": 1524.351954460144, + "p99": 2358.783960342407 + }, + "combine": { + "p50": 2251.6160011291504, + "p90": 2261.728048324585, + "p95": 2267.0719623565674, + "p99": 2291.4559841156006 + }, + "roundtrip": { + "p50": 2936.3200664520264, + "p90": 2984.800100326538, + "p95": 3042.7839756011963, + "p99": 3349.7281074523926 + }, + "isolatedSum": { + "p50": 2959.999978542328, + "p90": 3029.18404340744, + "p95": 3791.4239168167114, + "p99": 4650.239944458008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 18432, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1133.952021598816, + "p90": 1285.7279777526855, + "p95": 1547.8399991989136, + "p99": 2076.256036758423 + }, + "combine": { + "p50": 4423.8080978393555, + "p90": 4435.935974121094, + "p95": 4440.256118774414, + "p99": 4450.9758949279785 + }, + "roundtrip": { + "p50": 5527.455806732178, + "p90": 5545.631885528564, + "p95": 5553.120136260986, + "p99": 5617.824077606201 + }, + "isolatedSum": { + "p50": 5557.760119438171, + "p90": 5721.663951873779, + "p95": 5988.096117973328, + "p99": 6527.231931686401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 36864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3a37756d", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_942c0ff7", + "comparisonKey": "380979897204840b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:26.686361+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 256.76798820495605, + "p90": 262.2399926185608, + "p95": 265.53601026535034, + "p99": 2408.1599712371826 + }, + "combine": { + "p50": 187.77599930763245, + "p90": 195.93599438667297, + "p95": 215.71199595928192, + "p99": 3231.872081756592 + }, + "roundtrip": { + "p50": 448.15999269485474, + "p90": 456.1919867992401, + "p95": 778.7200212478638, + "p99": 3495.8720207214355 + }, + "isolatedSum": { + "p50": 444.5439875125885, + "p90": 458.17598700523376, + "p95": 481.24800622463226, + "p99": 5640.032052993774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 307.0079982280731, + "p90": 314.04799222946167, + "p95": 342.9119884967804, + "p99": 2678.368091583252 + }, + "combine": { + "p50": 327.13600993156433, + "p90": 340.60800075531006, + "p95": 1687.4879598617554, + "p99": 2537.4720096588135 + }, + "roundtrip": { + "p50": 639.2959952354431, + "p90": 646.3680267333984, + "p95": 717.8879976272583, + "p99": 2900.928020477295 + }, + "isolatedSum": { + "p50": 634.1440081596375, + "p90": 654.6559929847717, + "p95": 2030.3999483585358, + "p99": 5215.840101242065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 3755, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 386.24000549316406, + "p90": 395.61599493026733, + "p95": 564.2880201339722, + "p99": 3033.1199169158936 + }, + "combine": { + "p50": 650.8479714393616, + "p90": 665.3760075569153, + "p95": 1042.8160429000854, + "p99": 1611.5200519561768 + }, + "roundtrip": { + "p50": 1040.287971496582, + "p90": 1133.8880062103271, + "p95": 2080.415964126587, + "p99": 2474.4958877563477 + }, + "isolatedSum": { + "p50": 1037.0879769325256, + "p90": 1060.9920024871826, + "p95": 1607.1040630340576, + "p99": 4644.63996887207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 7556, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 606.3680052757263, + "p90": 620.2880144119263, + "p95": 1558.6559772491455, + "p99": 2275.2959728240967 + }, + "combine": { + "p50": 1249.5360374450684, + "p90": 1257.0559978485107, + "p95": 1260.256052017212, + "p99": 1543.4880256652832 + }, + "roundtrip": { + "p50": 1827.6159763336182, + "p90": 1881.2479972839355, + "p95": 2239.2959594726562, + "p99": 2503.0720233917236 + }, + "isolatedSum": { + "p50": 1855.9040427207947, + "p90": 1877.344012260437, + "p95": 2818.9120292663574, + "p99": 3818.78399848938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 15163, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 911.1359715461731, + "p90": 980.6399941444397, + "p95": 1438.7520551681519, + "p99": 1983.0080270767212 + }, + "combine": { + "p50": 2431.071996688843, + "p90": 2439.743995666504, + "p95": 2442.3680305480957, + "p99": 2448.256015777588 + }, + "roundtrip": { + "p50": 3352.3519039154053, + "p90": 3376.09601020813, + "p95": 3389.087915420532, + "p99": 3553.119897842407 + }, + "isolatedSum": { + "p50": 3342.207968235016, + "p90": 3420.3839898109436, + "p95": 3881.1200857162476, + "p99": 4431.264042854309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 30215, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1532.7039957046509, + "p90": 1576.9920349121094, + "p95": 1680.351972579956, + "p99": 1890.7840251922607 + }, + "combine": { + "p50": 4793.983936309814, + "p90": 4812.4799728393555, + "p95": 4816.415786743164, + "p99": 4827.968120574951 + }, + "roundtrip": { + "p50": 6426.8798828125, + "p90": 6445.47176361084, + "p95": 6453.567981719971, + "p99": 6493.023872375488 + }, + "isolatedSum": { + "p50": 6326.687932014465, + "p90": 6389.472007751465, + "p95": 6496.76775932312, + "p99": 6718.752145767212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 60512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-749a558c", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_f1f50ec7", + "comparisonKey": "757f604dcb402a77", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:39.753789+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 247.93599545955658, + "p90": 255.64798712730408, + "p95": 276.6079902648926, + "p99": 2649.3759155273438 + }, + "combine": { + "p50": 181.21600151062012, + "p90": 189.18399512767792, + "p95": 296.28801345825195, + "p99": 3125.5359649658203 + }, + "roundtrip": { + "p50": 414.7840142250061, + "p90": 424.47999119758606, + "p95": 459.1679871082306, + "p99": 3627.8719902038574 + }, + "isolatedSum": { + "p50": 429.1519969701767, + "p90": 444.831982254982, + "p95": 572.8960037231445, + "p99": 5774.911880493164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 1080, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 274.97598528862, + "p90": 283.90398621559143, + "p95": 288.7679934501648, + "p99": 2781.8241119384766 + }, + "combine": { + "p50": 309.28000807762146, + "p90": 318.1439936161041, + "p95": 1343.4879779815674, + "p99": 3089.951992034912 + }, + "roundtrip": { + "p50": 570.688009262085, + "p90": 584.2239856719971, + "p95": 2122.7519512176514, + "p99": 3127.648115158081 + }, + "isolatedSum": { + "p50": 584.2559933662415, + "p90": 602.0479798316956, + "p95": 1632.2559714317322, + "p99": 5871.776103973389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 2102, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 322.04800844192505, + "p90": 332.3200047016144, + "p95": 346.3360071182251, + "p99": 3048.543930053711 + }, + "combine": { + "p50": 605.6640148162842, + "p90": 627.0400285720825, + "p95": 1240.5760288238525, + "p99": 2237.152099609375 + }, + "roundtrip": { + "p50": 918.0480241775513, + "p90": 960.7999920845032, + "p95": 2020.576000213623, + "p99": 2571.743965148926 + }, + "isolatedSum": { + "p50": 927.7120232582092, + "p90": 959.3600332736969, + "p95": 1586.9120359420776, + "p99": 5285.696029663086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 4207, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 480.4159998893738, + "p90": 492.41599440574646, + "p95": 556.0960173606873, + "p99": 2544.703960418701 + }, + "combine": { + "p50": 1150.8480310440063, + "p90": 1175.2640008926392, + "p95": 1314.560055732727, + "p99": 1757.7600479125977 + }, + "roundtrip": { + "p50": 1612.5760078430176, + "p90": 1669.119954109192, + "p95": 2028.6080837249756, + "p99": 2762.9120349884033 + }, + "isolatedSum": { + "p50": 1631.2640309333801, + "p90": 1667.6799952983856, + "p95": 1870.6560730934143, + "p99": 4302.464008331299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8365, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 679.4880032539368, + "p90": 745.7280158996582, + "p95": 1769.7279453277588, + "p99": 2286.1759662628174 + }, + "combine": { + "p50": 2224.287986755371, + "p90": 2232.703924179077, + "p95": 2238.879919052124, + "p99": 2275.871992111206 + }, + "roundtrip": { + "p50": 2879.8398971557617, + "p90": 2927.295923233032, + "p95": 3045.504093170166, + "p99": 3407.968044281006 + }, + "isolatedSum": { + "p50": 2903.775990009308, + "p90": 2978.4319400787354, + "p95": 4008.607864379883, + "p99": 4562.047958374023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 16483, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1084.3199491500854, + "p90": 1228.7039756774902, + "p95": 1448.6720561981201, + "p99": 1870.6239461898804 + }, + "combine": { + "p50": 4362.175941467285, + "p90": 4375.1678466796875, + "p95": 4388.192176818848, + "p99": 4493.919849395752 + }, + "roundtrip": { + "p50": 5416.128158569336, + "p90": 5441.63179397583, + "p95": 5461.503982543945, + "p99": 5563.168048858643 + }, + "isolatedSum": { + "p50": 5446.495890617371, + "p90": 5603.871822357178, + "p95": 5836.864233016968, + "p99": 6364.543795585632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 32777, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-575a2291", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_44808587", + "comparisonKey": "f7c935cdbca8531a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:52.753517+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 249.4720071554184, + "p90": 258.39999318122864, + "p95": 266.1440074443817, + "p99": 2446.239948272705 + }, + "combine": { + "p50": 180.31999468803406, + "p90": 185.69600582122803, + "p95": 189.4720047712326, + "p99": 3388.9598846435547 + }, + "roundtrip": { + "p50": 415.6480133533478, + "p90": 427.0080029964447, + "p95": 567.9360032081604, + "p99": 3615.936040878296 + }, + "isolatedSum": { + "p50": 429.79200184345245, + "p90": 444.09599900245667, + "p95": 455.6160122156143, + "p99": 5835.19983291626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 1064, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 277.69601345062256, + "p90": 288.672000169754, + "p95": 310.14400720596313, + "p99": 2257.6639652252197 + }, + "combine": { + "p50": 311.64801120758057, + "p90": 324.8000144958496, + "p95": 1730.720043182373, + "p99": 3134.079933166504 + }, + "roundtrip": { + "p50": 572.7360248565674, + "p90": 585.2800011634827, + "p95": 2191.135883331299, + "p99": 3098.047971725464 + }, + "isolatedSum": { + "p50": 589.3440246582031, + "p90": 613.4720146656036, + "p95": 2040.8640503883362, + "p99": 5391.743898391724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 2081, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 324.6079981327057, + "p90": 336.09598875045776, + "p95": 350.49599409103394, + "p99": 3247.3599910736084 + }, + "combine": { + "p50": 606.2399744987488, + "p90": 657.4400067329407, + "p95": 1340.8639430999756, + "p99": 2808.448076248169 + }, + "roundtrip": { + "p50": 920.2560186386108, + "p90": 947.488009929657, + "p95": 2043.936014175415, + "p99": 2842.3359394073486 + }, + "isolatedSum": { + "p50": 930.8479726314545, + "p90": 993.5359954833984, + "p95": 1691.3599371910095, + "p99": 6055.808067321777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 4153, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 482.015997171402, + "p90": 505.72800636291504, + "p95": 1254.431962966919, + "p99": 2715.0719165802 + }, + "combine": { + "p50": 1151.4240503311157, + "p90": 1171.2960004806519, + "p95": 1268.3199644088745, + "p99": 1677.791953086853 + }, + "roundtrip": { + "p50": 1615.1679754257202, + "p90": 1671.0400581359863, + "p95": 2136.8958950042725, + "p99": 2764.6400928497314 + }, + "isolatedSum": { + "p50": 1633.4400475025177, + "p90": 1677.024006843567, + "p95": 2522.7519273757935, + "p99": 4392.863869667053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8313, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 683.3599805831909, + "p90": 738.6239767074585, + "p95": 1349.2480516433716, + "p99": 2510.240077972412 + }, + "combine": { + "p50": 2225.856065750122, + "p90": 2237.1199131011963, + "p95": 2247.040033340454, + "p99": 2280.6079387664795 + }, + "roundtrip": { + "p50": 2885.0879669189453, + "p90": 2926.815986633301, + "p95": 3044.287919998169, + "p99": 3400.576114654541 + }, + "isolatedSum": { + "p50": 2909.216046333313, + "p90": 2975.743889808655, + "p95": 3596.2880849838257, + "p99": 4790.848016738892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 16581, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1082.3359489440918, + "p90": 1258.0159902572632, + "p95": 1614.0799522399902, + "p99": 1935.03999710083 + }, + "combine": { + "p50": 4360.735893249512, + "p90": 4375.103950500488, + "p95": 4380.512237548828, + "p99": 4410.560131072998 + }, + "roundtrip": { + "p50": 5413.568019866943, + "p90": 5438.176155090332, + "p95": 5458.303928375244, + "p99": 5598.336219787598 + }, + "isolatedSum": { + "p50": 5443.0718421936035, + "p90": 5633.1199407577515, + "p95": 5994.592189788818, + "p99": 6345.600128173828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 32887, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-36ec925a", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_9d63e9b2", + "comparisonKey": "c3550f4b44711480", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:12.957572+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 332.96000957489014, + "p90": 344.1599905490875, + "p95": 686.5919828414917, + "p99": 2343.456029891968 + }, + "combine": { + "p50": 242.65600740909576, + "p90": 249.66399371623993, + "p95": 258.62398743629456, + "p99": 2363.935947418213 + }, + "roundtrip": { + "p50": 576.416015625, + "p90": 584.3520164489746, + "p95": 635.6160044670105, + "p99": 3012.415885925293 + }, + "isolatedSum": { + "p50": 575.6160169839859, + "p90": 593.8239842653275, + "p95": 945.2159702777863, + "p99": 4707.391977310181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 427.13600397109985, + "p90": 439.32801485061646, + "p95": 976.1279821395874, + "p99": 2557.055950164795 + }, + "combine": { + "p50": 440.5759871006012, + "p90": 453.92000675201416, + "p95": 1345.855951309204, + "p99": 1838.7839794158936 + }, + "roundtrip": { + "p50": 864.6079897880554, + "p90": 896.7040181159973, + "p95": 2053.6320209503174, + "p99": 2523.008108139038 + }, + "isolatedSum": { + "p50": 867.711991071701, + "p90": 893.2480216026306, + "p95": 2321.9839334487915, + "p99": 4395.8399295806885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 614.5600080490112, + "p90": 700.9279727935791, + "p95": 1591.1680459976196, + "p99": 2152.8639793395996 + }, + "combine": { + "p50": 862.559974193573, + "p90": 869.8880076408386, + "p95": 878.4639835357666, + "p99": 1151.8399715423584 + }, + "roundtrip": { + "p50": 1473.7600088119507, + "p90": 1554.368019104004, + "p95": 1932.1279525756836, + "p99": 2371.74391746521 + }, + "isolatedSum": { + "p50": 1477.1199822425842, + "p90": 1570.8159804344177, + "p95": 2469.6320295333862, + "p99": 3304.703950881958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1053.1200170516968, + "p90": 1141.9199705123901, + "p95": 1273.8239765167236, + "p99": 1632.159948348999 + }, + "combine": { + "p50": 1669.75998878479, + "p90": 1677.888035774231, + "p95": 1681.2160015106201, + "p99": 1692.2880411148071 + }, + "roundtrip": { + "p50": 2741.4400577545166, + "p90": 2757.535934448242, + "p95": 2771.1360454559326, + "p99": 2876.607894897461 + }, + "isolatedSum": { + "p50": 2722.880005836487, + "p90": 2819.808006286621, + "p95": 2955.0399780273438, + "p99": 3324.447989463806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1800.4800081253052, + "p90": 1818.8799619674683, + "p95": 1831.3599824905396, + "p99": 1920.3519821166992 + }, + "combine": { + "p50": 3340.672016143799, + "p90": 3352.0960807800293, + "p95": 3356.1599254608154, + "p99": 3365.6959533691406 + }, + "roundtrip": { + "p50": 5114.6559715271, + "p90": 5132.575988769531, + "p95": 5143.775939941406, + "p99": 5657.536029815674 + }, + "isolatedSum": { + "p50": 5141.152024269104, + "p90": 5170.976042747498, + "p95": 5187.519907951355, + "p99": 5286.04793548584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3314.8159980773926, + "p90": 3333.4081172943115, + "p95": 3338.815927505493, + "p99": 3357.343912124634 + }, + "combine": { + "p50": 6583.136081695557, + "p90": 6596.000194549561, + "p95": 6600.319862365723, + "p99": 6613.696098327637 + }, + "roundtrip": { + "p50": 9852.67162322998, + "p90": 9869.407653808594, + "p95": 9876.704216003418, + "p99": 9890.368461608887 + }, + "isolatedSum": { + "p50": 9897.95207977295, + "p90": 9929.408311843872, + "p95": 9939.135789871216, + "p99": 9971.04001045227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f2c0940", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_c32a6d58", + "comparisonKey": "80c666b5c6a682e1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:59.050080+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 382.207989692688, + "p90": 392.2240138053894, + "p95": 1387.1040344238281, + "p99": 2245.0881004333496 + }, + "combine": { + "p50": 288.4159982204437, + "p90": 296.06398940086365, + "p95": 1485.8239889144897, + "p99": 2130.9759616851807 + }, + "roundtrip": { + "p50": 662.4959707260132, + "p90": 670.5600023269653, + "p95": 733.2800030708313, + "p99": 2679.935932159424 + }, + "isolatedSum": { + "p50": 670.6239879131317, + "p90": 688.288003206253, + "p95": 2872.928023338318, + "p99": 4376.06406211853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 520.1280117034912, + "p90": 555.9679865837097, + "p95": 1031.391978263855, + "p99": 2216.12811088562 + }, + "combine": { + "p50": 528.0640125274658, + "p90": 539.903998374939, + "p95": 793.2159900665283, + "p99": 1223.0720520019531 + }, + "roundtrip": { + "p50": 1035.9359979629517, + "p90": 1054.4320344924927, + "p95": 1575.808048248291, + "p99": 2408.672094345093 + }, + "isolatedSum": { + "p50": 1048.192024230957, + "p90": 1095.8719849586487, + "p95": 1824.6079683303833, + "p99": 3439.2001628875732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 15151, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 793.2479977607727, + "p90": 825.8240222930908, + "p95": 1180.2560091018677, + "p99": 1876.863956451416 + }, + "combine": { + "p50": 1035.4880094528198, + "p90": 1042.9760217666626, + "p95": 1045.4720258712769, + "p99": 1065.4079914093018 + }, + "roundtrip": { + "p50": 1819.3600177764893, + "p90": 1834.5600366592407, + "p95": 1898.3999490737915, + "p99": 2210.400104522705 + }, + "isolatedSum": { + "p50": 1828.7360072135925, + "p90": 1868.8000440597534, + "p95": 2225.7280349731445, + "p99": 2942.271947860718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 30290, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1410.3679656982422, + "p90": 1447.0080137252808, + "p95": 1499.2320537567139, + "p99": 1702.3040056228638 + }, + "combine": { + "p50": 2015.199899673462, + "p90": 2024.3840217590332, + "p95": 2028.7361145019531, + "p99": 2037.343978881836 + }, + "roundtrip": { + "p50": 3418.5280799865723, + "p90": 3433.120012283325, + "p95": 3447.999954223633, + "p99": 3476.4161109924316 + }, + "isolatedSum": { + "p50": 3425.567865371704, + "p90": 3471.392035484314, + "p95": 3527.968168258667, + "p99": 3739.6479845046997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 60548, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2538.815975189209, + "p90": 2554.6560287475586, + "p95": 2563.136100769043, + "p99": 2667.3920154571533 + }, + "combine": { + "p50": 4052.9918670654297, + "p90": 4065.152168273926, + "p95": 4069.119930267334, + "p99": 4078.9761543273926 + }, + "roundtrip": { + "p50": 6570.8160400390625, + "p90": 6586.6241455078125, + "p95": 6596.03214263916, + "p99": 6622.015953063965 + }, + "isolatedSum": { + "p50": 6591.807842254639, + "p90": 6619.808197021484, + "p95": 6632.256031036377, + "p99": 6746.368169784546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 121046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 4784.992218017578, + "p90": 4795.904159545898, + "p95": 4800.096035003662, + "p99": 4814.432144165039 + }, + "combine": { + "p50": 8044.12841796875, + "p90": 8060.735702514648, + "p95": 8065.823554992676, + "p99": 8078.144073486328 + }, + "roundtrip": { + "p50": 12812.064170837402, + "p90": 12831.040382385254, + "p95": 12840.415954589844, + "p99": 12893.119812011719 + }, + "isolatedSum": { + "p50": 12829.120635986328, + "p90": 12856.639862060547, + "p95": 12865.919589996338, + "p99": 12892.576217651367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 242154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a4709d03", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_268c1336", + "comparisonKey": "a0cf6dd6b0b9b4b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:13.246445+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 250.04801154136658, + "p90": 260.3839933872223, + "p95": 292.06401109695435, + "p99": 2561.631917953491 + }, + "combine": { + "p50": 179.61600422859192, + "p90": 184.12800133228302, + "p95": 188.28800320625305, + "p99": 3151.2320041656494 + }, + "roundtrip": { + "p50": 414.97600078582764, + "p90": 426.2399971485138, + "p95": 524.6400237083435, + "p99": 3932.0640563964844 + }, + "isolatedSum": { + "p50": 429.6640157699585, + "p90": 444.5119947195053, + "p95": 480.3520143032074, + "p99": 5712.863922119141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 1049, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 277.1199941635132, + "p90": 287.9999876022339, + "p95": 292.86399483680725, + "p99": 1717.2800302505493 + }, + "combine": { + "p50": 309.9839985370636, + "p90": 323.64800572395325, + "p95": 731.3600182533264, + "p99": 3241.9519424438477 + }, + "roundtrip": { + "p50": 571.2000131607056, + "p90": 592.0000076293945, + "p95": 1848.9279747009277, + "p99": 3084.415912628174 + }, + "isolatedSum": { + "p50": 587.1039927005768, + "p90": 611.6479933261871, + "p95": 1024.2240130901337, + "p99": 4959.231972694397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 2084, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 326.1120021343231, + "p90": 337.69598603248596, + "p95": 345.5039858818054, + "p99": 2544.6720123291016 + }, + "combine": { + "p50": 605.4720282554626, + "p90": 628.928005695343, + "p95": 1118.623971939087, + "p99": 1759.0399980545044 + }, + "roundtrip": { + "p50": 921.1519956588745, + "p90": 944.8959827423096, + "p95": 1935.3920221328735, + "p99": 2664.2239093780518 + }, + "isolatedSum": { + "p50": 931.5840303897858, + "p90": 966.623991727829, + "p95": 1464.1279578208923, + "p99": 4303.712010383606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 4126, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 482.1760058403015, + "p90": 503.39198112487793, + "p95": 1217.9839611053467, + "p99": 2571.2320804595947 + }, + "combine": { + "p50": 1150.1120328903198, + "p90": 1182.752013206482, + "p95": 1374.4640350341797, + "p99": 1799.1039752960205 + }, + "roundtrip": { + "p50": 1612.4160289764404, + "p90": 1879.647970199585, + "p95": 2298.3040809631348, + "p99": 2625.152111053467 + }, + "isolatedSum": { + "p50": 1632.2880387306213, + "p90": 1686.1439943313599, + "p95": 2592.4479961395264, + "p99": 4370.336055755615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8234, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 680.3200244903564, + "p90": 727.9999852180481, + "p95": 1390.4320001602173, + "p99": 2403.9039611816406 + }, + "combine": { + "p50": 2224.6079444885254, + "p90": 2234.9119186401367, + "p95": 2242.1441078186035, + "p99": 2280.479907989502 + }, + "roundtrip": { + "p50": 2881.56795501709, + "p90": 2922.976016998291, + "p95": 3081.120014190674, + "p99": 3361.3760471343994 + }, + "isolatedSum": { + "p50": 2904.927968978882, + "p90": 2962.911903858185, + "p95": 3632.576107978821, + "p99": 4684.383869171143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 16480, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1083.6479663848877, + "p90": 1207.8399658203125, + "p95": 1460.479974746704, + "p99": 2015.6800746917725 + }, + "combine": { + "p50": 4362.5922203063965, + "p90": 4374.015808105469, + "p95": 4378.047943115234, + "p99": 4395.232200622559 + }, + "roundtrip": { + "p50": 5415.040016174316, + "p90": 5438.752174377441, + "p95": 5452.672004699707, + "p99": 5583.392143249512 + }, + "isolatedSum": { + "p50": 5446.240186691284, + "p90": 5581.855773925781, + "p95": 5838.5279178619385, + "p99": 6410.912275314331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 32889, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-88d58ce0", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_05b63373", + "comparisonKey": "7136e13b88918020", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:52.601734+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 278.4639894962311, + "p90": 285.95200181007385, + "p95": 295.5839931964874, + "p99": 2803.2639026641846 + }, + "combine": { + "p50": 201.56799256801605, + "p90": 207.07200467586517, + "p95": 273.79199862480164, + "p99": 3119.9679374694824 + }, + "roundtrip": { + "p50": 485.82398891448975, + "p90": 495.7759976387024, + "p95": 2300.6720542907715, + "p99": 3599.008083343506 + }, + "isolatedSum": { + "p50": 480.03198206424713, + "p90": 493.024006485939, + "p95": 569.3759918212891, + "p99": 5923.231840133667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 339.4559919834137, + "p90": 347.4560081958771, + "p95": 364.03200030326843, + "p99": 2917.59991645813 + }, + "combine": { + "p50": 354.94399070739746, + "p90": 363.3599877357483, + "p95": 1586.624026298523, + "p99": 2609.15207862854 + }, + "roundtrip": { + "p50": 700.3840208053589, + "p90": 711.9680047035217, + "p95": 2245.5999851226807, + "p99": 2786.2401008605957 + }, + "isolatedSum": { + "p50": 694.3999826908112, + "p90": 710.8159959316254, + "p95": 1950.6560266017914, + "p99": 5526.75199508667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 5302, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 443.07199120521545, + "p90": 455.83999156951904, + "p95": 1488.9600276947021, + "p99": 2667.743921279907 + }, + "combine": { + "p50": 700.0960111618042, + "p90": 846.7839956283569, + "p95": 1219.9360132217407, + "p99": 1951.4880180358887 + }, + "roundtrip": { + "p50": 1136.415958404541, + "p90": 1216.480016708374, + "p95": 1999.6800422668457, + "p99": 2409.0240001678467 + }, + "isolatedSum": { + "p50": 1143.1680023670197, + "p90": 1302.623987197876, + "p95": 2708.896040916443, + "p99": 4619.231939315796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 10587, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 715.8719897270203, + "p90": 781.0239791870117, + "p95": 1652.351975440979, + "p99": 2256.160020828247 + }, + "combine": { + "p50": 1335.0080251693726, + "p90": 1341.8879508972168, + "p95": 1345.728039741516, + "p99": 1567.296028137207 + }, + "roundtrip": { + "p50": 2034.3680381774902, + "p90": 2081.1519622802734, + "p95": 2188.6401176452637, + "p99": 2509.82403755188 + }, + "isolatedSum": { + "p50": 2050.880014896393, + "p90": 2122.9119300842285, + "p95": 2998.080015182495, + "p99": 3823.456048965454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 21014, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1115.2960062026978, + "p90": 1298.3360290527344, + "p95": 1466.912031173706, + "p99": 1872.320055961609 + }, + "combine": { + "p50": 2609.6959114074707, + "p90": 2622.015953063965, + "p95": 2626.3039112091064, + "p99": 2637.3119354248047 + }, + "roundtrip": { + "p50": 3789.247989654541, + "p90": 3812.7360343933105, + "p95": 3823.199987411499, + "p99": 3896.512031555176 + }, + "isolatedSum": { + "p50": 3724.9919176101685, + "p90": 3920.351982116699, + "p95": 4093.2159423828125, + "p99": 4509.631991386414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 41814, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1940.9279823303223, + "p90": 1979.583978652954, + "p95": 1999.0719556808472, + "p99": 2059.040069580078 + }, + "combine": { + "p50": 5244.575977325439, + "p90": 5257.279872894287, + "p95": 5260.5438232421875, + "p99": 5272.736072540283 + }, + "roundtrip": { + "p50": 7209.856033325195, + "p90": 7235.104084014893, + "p95": 7244.192123413086, + "p99": 7274.784088134766 + }, + "isolatedSum": { + "p50": 7185.503959655762, + "p90": 7236.863851547241, + "p95": 7259.615778923035, + "p99": 7331.776142120361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 83417, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c460e5f0", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_b05409ab", + "comparisonKey": "74fefe1747166f89", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:43.128647+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.3839988708496, + "p90": 256.6080093383789, + "p95": 265.0560140609741, + "p99": 2996.6399669647217 + }, + "combine": { + "p50": 180.38399517536163, + "p90": 186.3040030002594, + "p95": 207.0080041885376, + "p99": 3219.1998958587646 + }, + "roundtrip": { + "p50": 414.62400555610657, + "p90": 424.44801330566406, + "p95": 516.543984413147, + "p99": 3532.5119495391846 + }, + "isolatedSum": { + "p50": 428.76799404621124, + "p90": 442.9120123386383, + "p95": 472.0640182495117, + "p99": 6215.839862823486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 1067, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 275.2000093460083, + "p90": 283.55199098587036, + "p95": 288.7359857559204, + "p99": 1719.264030456543 + }, + "combine": { + "p50": 311.1039996147156, + "p90": 323.39200377464294, + "p95": 1892.4479484558105, + "p99": 3175.679922103882 + }, + "roundtrip": { + "p50": 569.599986076355, + "p90": 585.5680108070374, + "p95": 2312.096118927002, + "p99": 3244.767904281616 + }, + "isolatedSum": { + "p50": 586.3040089607239, + "p90": 606.9439947605133, + "p95": 2181.183934211731, + "p99": 4894.943952560425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 2097, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 320.76799869537354, + "p90": 331.90399408340454, + "p95": 345.5039858818054, + "p99": 3095.7438945770264 + }, + "combine": { + "p50": 604.4800281524658, + "p90": 623.9680051803589, + "p95": 1284.1600179672241, + "p99": 1993.8559532165527 + }, + "roundtrip": { + "p50": 915.2640104293823, + "p90": 964.0960097312927, + "p95": 1941.1519765853882, + "p99": 2609.055995941162 + }, + "isolatedSum": { + "p50": 925.2480268478394, + "p90": 955.8719992637634, + "p95": 1629.6640038490295, + "p99": 5089.599847793579 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 4163, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 479.2639911174774, + "p90": 495.5199956893921, + "p95": 1314.8479461669922, + "p99": 2620.800018310547 + }, + "combine": { + "p50": 1151.8080234527588, + "p90": 1178.272008895874, + "p95": 1332.8640460968018, + "p99": 1738.5599613189697 + }, + "roundtrip": { + "p50": 1611.232042312622, + "p90": 1701.8239498138428, + "p95": 2166.208028793335, + "p99": 2465.5680656433105 + }, + "isolatedSum": { + "p50": 1631.0720145702362, + "p90": 1673.792004585266, + "p95": 2647.711992263794, + "p99": 4359.359979629517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8305, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 681.984007358551, + "p90": 738.8160228729248, + "p95": 1646.239995956421, + "p99": 2375.2639293670654 + }, + "combine": { + "p50": 2225.6319522857666, + "p90": 2235.487937927246, + "p95": 2242.3360347747803, + "p99": 2299.2000579833984 + }, + "roundtrip": { + "p50": 2884.320020675659, + "p90": 2942.944049835205, + "p95": 3053.407907485962, + "p99": 3340.1598930358887 + }, + "isolatedSum": { + "p50": 2907.6159596443176, + "p90": 2974.303960800171, + "p95": 3888.576030731201, + "p99": 4674.463987350464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 16529, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1080.1279544830322, + "p90": 1218.559980392456, + "p95": 1515.071988105774, + "p99": 1932.479977607727 + }, + "combine": { + "p50": 4363.296031951904, + "p90": 4379.231929779053, + "p95": 4384.2878341674805, + "p99": 4431.2639236450195 + }, + "roundtrip": { + "p50": 5417.856216430664, + "p90": 5440.192222595215, + "p95": 5452.383995056152, + "p99": 5610.943794250488 + }, + "isolatedSum": { + "p50": 5443.4239864349365, + "p90": 5597.791910171509, + "p95": 5899.359822273254, + "p99": 6363.743901252747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 32880, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6b987a5a", + "identity": "b300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_2b72fc28", + "comparisonKey": "065f7724ed2cdc11", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:57.576636+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 338.46399188041687, + "p90": 346.46400809288025, + "p95": 351.936012506485, + "p99": 1810.1439476013184 + }, + "combine": { + "p50": 243.26400458812714, + "p90": 247.93599545955658, + "p95": 263.35999369621277, + "p99": 2701.024055480957 + }, + "roundtrip": { + "p50": 577.0559906959534, + "p90": 589.0880227088928, + "p95": 2291.071891784668, + "p99": 3145.18404006958 + }, + "isolatedSum": { + "p50": 581.727996468544, + "p90": 594.4000035524368, + "p95": 615.2960062026978, + "p99": 4511.168003082275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 434.3999922275543, + "p90": 443.87200474739075, + "p95": 469.31201219558716, + "p99": 2057.055950164795 + }, + "combine": { + "p50": 441.3760006427765, + "p90": 449.91999864578247, + "p95": 967.3600196838379, + "p99": 1881.119966506958 + }, + "roundtrip": { + "p50": 865.5040264129639, + "p90": 884.1599822044373, + "p95": 1925.1199960708618, + "p99": 2581.5041065216064 + }, + "isolatedSum": { + "p50": 875.7759928703308, + "p90": 893.7920033931732, + "p95": 1436.672031879425, + "p99": 3938.175916671753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 620.7039952278137, + "p90": 639.2639875411987, + "p95": 897.4720239639282, + "p99": 2166.719913482666 + }, + "combine": { + "p50": 862.8159761428833, + "p90": 872.4160194396973, + "p95": 951.1359930038452, + "p99": 1241.6640520095825 + }, + "roundtrip": { + "p50": 1474.1439819335938, + "p90": 1506.5280199050903, + "p95": 1989.5039796829224, + "p99": 2385.024070739746 + }, + "isolatedSum": { + "p50": 1483.519971370697, + "p90": 1511.680006980896, + "p95": 1848.6080169677734, + "p99": 3408.3839654922485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1052.2880554199219, + "p90": 1207.0080041885376, + "p95": 1307.647943496704, + "p99": 1850.816011428833 + }, + "combine": { + "p50": 1669.8880195617676, + "p90": 1678.048014640808, + "p95": 1681.8879842758179, + "p99": 1692.736029624939 + }, + "roundtrip": { + "p50": 2741.215944290161, + "p90": 2754.528045654297, + "p95": 2765.7599449157715, + "p99": 2805.2799701690674 + }, + "isolatedSum": { + "p50": 2722.1760749816895, + "p90": 2885.0560188293457, + "p95": 2989.535927772522, + "p99": 3543.552041053772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1804.8640489578247, + "p90": 1823.3599662780762, + "p95": 1840.224027633667, + "p99": 1930.5280447006226 + }, + "combine": { + "p50": 3340.480089187622, + "p90": 3352.479934692383, + "p95": 3357.151985168457, + "p99": 3367.680072784424 + }, + "roundtrip": { + "p50": 5114.560127258301, + "p90": 5126.688003540039, + "p95": 5133.887767791748, + "p99": 5180.0642013549805 + }, + "isolatedSum": { + "p50": 5145.344138145447, + "p90": 5175.839900970459, + "p95": 5197.376012802124, + "p99": 5298.208117485046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3317.5039291381836, + "p90": 3334.912061691284, + "p95": 3343.3279991149902, + "p99": 3366.8160438537598 + }, + "combine": { + "p50": 6583.583831787109, + "p90": 6596.831798553467, + "p95": 6602.880001068115, + "p99": 6615.551948547363 + }, + "roundtrip": { + "p50": 9855.29613494873, + "p90": 9871.295928955078, + "p95": 9879.936218261719, + "p99": 9901.375770568848 + }, + "isolatedSum": { + "p50": 9901.087760925293, + "p90": 9931.743860244751, + "p95": 9946.208000183105, + "p99": 9982.367992401123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3b1b1ab0", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_396c2d86", + "comparisonKey": "365416199a93ab8e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:47.320926+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 247.23200500011444, + "p90": 255.13601303100586, + "p95": 264.3199861049652, + "p99": 2533.3759784698486 + }, + "combine": { + "p50": 179.83999848365784, + "p90": 185.82400679588318, + "p95": 237.66399919986725, + "p99": 3429.311990737915 + }, + "roundtrip": { + "p50": 412.6400053501129, + "p90": 421.31200432777405, + "p95": 489.4079864025116, + "p99": 3817.631959915161 + }, + "isolatedSum": { + "p50": 427.0720034837723, + "p90": 440.96001982688904, + "p95": 501.98398530483246, + "p99": 5962.687969207764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 274.78399872779846, + "p90": 291.6480004787445, + "p95": 334.04800295829773, + "p99": 2838.207960128784 + }, + "combine": { + "p50": 309.59999561309814, + "p90": 319.7439908981323, + "p95": 402.20800042152405, + "p99": 2940.351963043213 + }, + "roundtrip": { + "p50": 568.4159994125366, + "p90": 579.584002494812, + "p95": 693.6320066452026, + "p99": 3121.407985687256 + }, + "isolatedSum": { + "p50": 584.3839943408966, + "p90": 611.3919913768768, + "p95": 736.2560033798218, + "p99": 5778.559923171997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 323.35999608039856, + "p90": 337.3439908027649, + "p95": 814.8159980773926, + "p99": 3302.783966064453 + }, + "combine": { + "p50": 606.5599918365479, + "p90": 644.2239880561829, + "p95": 1533.3119630813599, + "p99": 2404.416084289551 + }, + "roundtrip": { + "p50": 919.0719723701477, + "p90": 963.808000087738, + "p95": 2201.119899749756, + "p99": 2617.503881454468 + }, + "isolatedSum": { + "p50": 929.9199879169464, + "p90": 981.5679788589478, + "p95": 2348.1279611587524, + "p99": 5707.200050354004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 478.91199588775635, + "p90": 495.9680140018463, + "p95": 596.2240099906921, + "p99": 2568.3200359344482 + }, + "combine": { + "p50": 1149.1520404815674, + "p90": 1174.6560335159302, + "p95": 1336.5440368652344, + "p99": 1732.800006866455 + }, + "roundtrip": { + "p50": 1609.7919940948486, + "p90": 1675.9999990463257, + "p95": 2063.1680488586426, + "p99": 2798.0799674987793 + }, + "isolatedSum": { + "p50": 1628.0640363693237, + "p90": 1670.6240475177765, + "p95": 1932.7680468559265, + "p99": 4301.120042800903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 681.5680265426636, + "p90": 740.4159903526306, + "p95": 1919.3919897079468, + "p99": 2539.9680137634277 + }, + "combine": { + "p50": 2227.519989013672, + "p90": 2235.680103302002, + "p95": 2238.464117050171, + "p99": 2250.175952911377 + }, + "roundtrip": { + "p50": 2882.496118545532, + "p90": 2927.8080463409424, + "p95": 3027.967929840088, + "p99": 3352.479934692383 + }, + "isolatedSum": { + "p50": 2909.0880155563354, + "p90": 2976.0960936546326, + "p95": 4157.856106758118, + "p99": 4790.143966674805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1080.5120468139648, + "p90": 1257.856011390686, + "p95": 1554.6879768371582, + "p99": 1971.9359874725342 + }, + "combine": { + "p50": 4362.304210662842, + "p90": 4378.24010848999, + "p95": 4386.144161224365, + "p99": 4465.3120040893555 + }, + "roundtrip": { + "p50": 5416.639804840088, + "p90": 5437.6959800720215, + "p95": 5456.480026245117, + "p99": 5553.055763244629 + }, + "isolatedSum": { + "p50": 5442.816257476807, + "p90": 5636.096119880676, + "p95": 5940.832138061523, + "p99": 6437.24799156189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a6676f26", + "identity": "b300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_fa5b663c", + "comparisonKey": "8956e4e43db22205", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:45.631025+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_07", + "sku": "b300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.57600033283234, + "p90": 280.0000011920929, + "p95": 303.00799012184143, + "p99": 1948.6720561981201 + }, + "combine": { + "p50": 180.67200481891632, + "p90": 191.93600118160248, + "p95": 251.77600979804993, + "p99": 3441.215991973877 + }, + "roundtrip": { + "p50": 413.9840006828308, + "p90": 455.9360146522522, + "p95": 521.9200253486633, + "p99": 3497.472047805786 + }, + "isolatedSum": { + "p50": 429.24800515174866, + "p90": 471.9360023736954, + "p95": 554.7839999198914, + "p99": 5389.888048171997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 275.2000093460083, + "p90": 286.78399324417114, + "p95": 295.1039969921112, + "p99": 2451.2319564819336 + }, + "combine": { + "p50": 309.79201197624207, + "p90": 319.13599371910095, + "p95": 413.85599970817566, + "p99": 2965.696096420288 + }, + "roundtrip": { + "p50": 568.7680244445801, + "p90": 586.9119763374329, + "p95": 2078.495979309082, + "p99": 3135.296106338501 + }, + "isolatedSum": { + "p50": 584.9920213222504, + "p90": 605.9199869632721, + "p95": 708.9599967002869, + "p99": 5416.928052902222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 323.93598556518555, + "p90": 338.9120101928711, + "p95": 373.088002204895, + "p99": 3161.2160205841064 + }, + "combine": { + "p50": 607.1680188179016, + "p90": 728.8960218429565, + "p95": 1313.5360479354858, + "p99": 2221.951961517334 + }, + "roundtrip": { + "p50": 919.5200204849243, + "p90": 953.0240297317505, + "p95": 2265.216112136841, + "p99": 2490.3359413146973 + }, + "isolatedSum": { + "p50": 931.1040043830872, + "p90": 1067.8080320358276, + "p95": 1686.6240501403809, + "p99": 5383.16798210144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 477.60000824928284, + "p90": 500.35202503204346, + "p95": 1039.903998374939, + "p99": 2533.792018890381 + }, + "combine": { + "p50": 1149.0240097045898, + "p90": 1175.1359701156616, + "p95": 1363.0399703979492, + "p99": 1712.607979774475 + }, + "roundtrip": { + "p50": 1610.7840538024902, + "p90": 1745.6320524215698, + "p95": 2010.9119415283203, + "p99": 2473.6320972442627 + }, + "isolatedSum": { + "p50": 1626.6240179538727, + "p90": 1675.487995147705, + "p95": 2402.943968772888, + "p99": 4246.399998664856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 679.2320013046265, + "p90": 744.0959811210632, + "p95": 1823.9680528640747, + "p99": 2440.3839111328125 + }, + "combine": { + "p50": 2227.776050567627, + "p90": 2238.0480766296387, + "p95": 2243.680000305176, + "p99": 2266.9119834899902 + }, + "roundtrip": { + "p50": 2884.8960399627686, + "p90": 2918.879985809326, + "p95": 3000.063896179199, + "p99": 3304.7680854797363 + }, + "isolatedSum": { + "p50": 2907.0080518722534, + "p90": 2982.144057750702, + "p95": 4067.6480531692505, + "p99": 4707.295894622803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1080.9600353240967, + "p90": 1221.824049949646, + "p95": 1444.4160461425781, + "p99": 1885.9519958496094 + }, + "combine": { + "p50": 4362.656116485596, + "p90": 4376.128196716309, + "p95": 4380.447864532471, + "p99": 4420.000076293945 + }, + "roundtrip": { + "p50": 5415.4558181762695, + "p90": 5433.407783508301, + "p95": 5440.3839111328125, + "p99": 5508.1281661987305 + }, + "isolatedSum": { + "p50": 5443.616151809692, + "p90": 5597.952246665955, + "p95": 5824.863910675049, + "p99": 6305.952072143555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c6cdce1", + "identity": "b300|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_58db80e8", + "comparisonKey": "2e24cfc6fff54829", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:43.054875+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.4720026254654, + "p90": 95.67999839782715, + "p95": 98.68799895048141, + "p99": 108.38399827480316 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 63.93600255250931, + "p95": 64.60800021886826, + "p99": 76.57600194215775 + }, + "roundtrip": { + "p50": 124.22399967908859, + "p90": 130.36799430847168, + "p95": 131.6480040550232, + "p99": 138.94400000572205 + }, + "isolatedSum": { + "p50": 147.39200100302696, + "p90": 159.61600095033646, + "p95": 163.29599916934967, + "p99": 184.9600002169609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.48000186681747, + "p90": 87.16800063848495, + "p95": 87.99999952316284, + "p99": 97.98400104045868 + }, + "combine": { + "p50": 62.591999769210815, + "p90": 70.91200351715088, + "p95": 71.68000191450119, + "p99": 74.17599856853485 + }, + "roundtrip": { + "p50": 127.42400169372559, + "p90": 130.78400492668152, + "p95": 132.06399977207184, + "p99": 144.83200013637543 + }, + "isolatedSum": { + "p50": 143.0720016360283, + "p90": 158.08000415563583, + "p95": 159.68000143766403, + "p99": 172.15999960899353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 79.80799674987793, + "p90": 84.70399677753448, + "p95": 85.85599809885025, + "p99": 92.0960009098053 + }, + "combine": { + "p50": 63.10400366783142, + "p90": 71.77600264549255, + "p95": 72.4480003118515, + "p99": 76.9599974155426 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 131.55199587345123, + "p95": 139.55199718475342, + "p99": 162.04799711704254 + }, + "isolatedSum": { + "p50": 142.91200041770935, + "p90": 156.47999942302704, + "p95": 158.30399841070175, + "p99": 169.0559983253479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.55200225114822, + "p90": 87.55200356245041, + "p95": 92.41600334644318, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 64.41599875688553, + "p90": 73.08799773454666, + "p95": 73.47200065851212, + "p99": 86.2400010228157 + }, + "roundtrip": { + "p50": 123.99999797344208, + "p90": 126.81600451469421, + "p95": 128.60800325870514, + "p99": 143.90400052070618 + }, + "isolatedSum": { + "p50": 143.96800100803375, + "p90": 160.64000129699707, + "p95": 165.8880040049553, + "p99": 191.23200327157974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.16799932718277, + "p90": 81.24800026416779, + "p95": 83.03999900817871, + "p99": 89.88799899816513 + }, + "combine": { + "p50": 71.71200215816498, + "p90": 73.02399724721909, + "p95": 73.34399968385696, + "p99": 74.78400319814682 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 127.07200646400452, + "p95": 131.3599944114685, + "p99": 145.9839940071106 + }, + "isolatedSum": { + "p50": 150.88000148534775, + "p90": 154.27199751138687, + "p95": 156.38399869203568, + "p99": 164.67200219631195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.4720019698143, + "p90": 83.52000266313553, + "p95": 85.50400286912918, + "p99": 100.00000149011612 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 73.47200065851212, + "p95": 74.0479975938797, + "p99": 76.51200145483017 + }, + "roundtrip": { + "p50": 137.5039964914322, + "p90": 142.11200177669525, + "p95": 143.64799857139587, + "p99": 163.83999586105347 + }, + "isolatedSum": { + "p50": 154.27200496196747, + "p90": 156.99200332164764, + "p95": 159.55200046300888, + "p99": 176.5120029449463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.12800115346909, + "p90": 96.12800180912018, + "p95": 97.9200005531311, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 85.63199639320374, + "p90": 86.84799820184708, + "p95": 88.06400001049042, + "p99": 109.56799983978271 + }, + "roundtrip": { + "p50": 150.4960060119629, + "p90": 155.7759940624237, + "p95": 160.16000509262085, + "p99": 176.92799866199493 + }, + "isolatedSum": { + "p50": 177.75999754667282, + "p90": 182.97600001096725, + "p95": 185.98400056362152, + "p99": 219.13599967956543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.4319983124733, + "p90": 113.24799805879593, + "p95": 116.7680025100708, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 97.98400104045868, + "p90": 100.38399696350098, + "p95": 101.24800354242325, + "p99": 109.37599837779999 + }, + "roundtrip": { + "p50": 184.76800620555878, + "p90": 191.64800643920898, + "p95": 193.24800372123718, + "p99": 206.33600652217865 + }, + "isolatedSum": { + "p50": 204.41599935293198, + "p90": 213.6319950222969, + "p95": 218.01600605249405, + "p99": 238.27199637889862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8331e2f4", + "identity": "b300|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_58db80e8", + "comparisonKey": "e981d8dc9a02f2f0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:52.889964+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.63199639320374, + "p90": 90.68799763917923, + "p95": 98.75199943780899, + "p99": 104.86400127410889 + }, + "combine": { + "p50": 63.040003180503845, + "p90": 72.06399738788605, + "p95": 72.86400347948074, + "p99": 83.39200168848038 + }, + "roundtrip": { + "p50": 127.42400169372559, + "p90": 131.71200454235077, + "p95": 139.13600146770477, + "p99": 155.68000078201294 + }, + "isolatedSum": { + "p50": 148.67199957370758, + "p90": 162.75199502706528, + "p95": 171.61600291728973, + "p99": 188.25600296258926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.03199845552444, + "p90": 87.5839963555336, + "p95": 90.94399958848953, + "p99": 103.45599800348282 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 72.86400347948074, + "p95": 73.66400212049484, + "p99": 84.99199897050858 + }, + "roundtrip": { + "p50": 124.60800260305405, + "p90": 133.7279975414276, + "p95": 139.39200341701508, + "p99": 155.32800555229187 + }, + "isolatedSum": { + "p50": 143.8720002770424, + "p90": 160.44799983501434, + "p95": 164.60800170898438, + "p99": 188.4479969739914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.03199845552444, + "p90": 88.51200342178345, + "p95": 96.63999825716019, + "p99": 109.63200032711029 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 73.47200065851212, + "p95": 74.27199929952621, + "p99": 86.01599931716919 + }, + "roundtrip": { + "p50": 125.44000148773193, + "p90": 135.00800728797913, + "p95": 140.60799777507782, + "p99": 157.82399475574493 + }, + "isolatedSum": { + "p50": 152.70400047302246, + "p90": 161.98400408029556, + "p95": 170.9119975566864, + "p99": 195.64799964427948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.83999699354172, + "p90": 84.76799726486206, + "p95": 89.28000181913376, + "p99": 104.35199737548828 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 74.0479975938797, + "p95": 75.29599964618683, + "p99": 85.63199639320374 + }, + "roundtrip": { + "p50": 133.215993642807, + "p90": 141.6960060596466, + "p95": 146.88000082969666, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 152.73600071668625, + "p90": 158.81599485874176, + "p95": 164.5760014653206, + "p99": 189.98399376869202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.53600245714188, + "p90": 95.45599669218063, + "p95": 101.21600329875946, + "p99": 113.3119985461235 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 74.20799881219864, + "p95": 76.48000121116638, + "p99": 97.15200215578079 + }, + "roundtrip": { + "p50": 135.26399433612823, + "p90": 143.45599710941315, + "p95": 147.74399995803833, + "p99": 158.55999290943146 + }, + "isolatedSum": { + "p50": 154.7520011663437, + "p90": 169.66399550437927, + "p95": 177.69600450992584, + "p99": 210.4640007019043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.33600109815598, + "p90": 88.16000074148178, + "p95": 94.27200257778168, + "p99": 109.27999764680862 + }, + "combine": { + "p50": 74.11199808120728, + "p90": 76.35200023651123, + "p95": 84.41600203514099, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 136.4479959011078, + "p90": 143.90400052070618, + "p95": 150.39999783039093, + "p99": 164.63999450206757 + }, + "isolatedSum": { + "p50": 156.44799917936325, + "p90": 164.512000977993, + "p95": 178.68800461292267, + "p99": 206.59200102090836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 92.67199784517288, + "p90": 100.99200159311295, + "p95": 105.59999942779541, + "p99": 124.4800016283989 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 96.92800045013428, + "p95": 97.59999811649323, + "p99": 109.63200032711029 + }, + "roundtrip": { + "p50": 167.23200678825378, + "p90": 171.10399901866913, + "p95": 175.04000663757324, + "p99": 191.39200448989868 + }, + "isolatedSum": { + "p50": 180.60799688100815, + "p90": 197.92000204324722, + "p95": 203.19999754428864, + "p99": 234.1120019555092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.21599650382996, + "p90": 111.58400028944016, + "p95": 117.21599847078323, + "p99": 128.57599556446075 + }, + "combine": { + "p50": 109.66400057077408, + "p90": 110.59200018644333, + "p95": 113.3119985461235, + "p99": 125.72799623012543 + }, + "roundtrip": { + "p50": 189.98399376869202, + "p90": 198.17599654197693, + "p95": 203.93599569797516, + "p99": 218.46400201320648 + }, + "isolatedSum": { + "p50": 214.87999707460403, + "p90": 222.17600047588348, + "p95": 230.52799701690674, + "p99": 254.30399179458618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fbabeb21", + "identity": "b300|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_58db80e8", + "comparisonKey": "88864d220d9ec37e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:06.367239+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.44000172615051, + "p90": 88.03199976682663, + "p95": 88.8959988951683, + "p99": 102.81600058078766 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 73.60000163316727, + "p95": 74.20799881219864, + "p99": 79.6160027384758 + }, + "roundtrip": { + "p50": 128.80000472068787, + "p90": 133.69600474834442, + "p95": 135.1040005683899, + "p99": 142.2719955444336 + }, + "isolatedSum": { + "p50": 154.24000471830368, + "p90": 161.6320013999939, + "p95": 163.10399770736694, + "p99": 182.43200331926346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.60800284147263, + "p90": 83.45600217580795, + "p95": 86.30400151014328, + "p99": 93.05600076913834 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 73.5040009021759, + "p95": 73.79200309515, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 126.49600207805634, + "p90": 130.75199723243713, + "p95": 132.83200562000275, + "p99": 138.91200721263885 + }, + "isolatedSum": { + "p50": 153.59999984502792, + "p90": 156.96000307798386, + "p95": 160.09600460529327, + "p99": 169.47200149297714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.79999685287476, + "p90": 83.5840031504631, + "p95": 86.43200248479843, + "p99": 128.28800082206726 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 74.23999905586243, + "p95": 74.68800246715546, + "p99": 76.7040029168129 + }, + "roundtrip": { + "p50": 134.71999764442444, + "p90": 141.92000031471252, + "p95": 143.0719941854477, + "p99": 154.40000593662262 + }, + "isolatedSum": { + "p50": 154.2079970240593, + "p90": 157.82400220632553, + "p95": 161.1200049519539, + "p99": 204.99200373888016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.64000308513641, + "p90": 82.36800134181976, + "p95": 84.35200154781342, + "p99": 94.43199634552002 + }, + "combine": { + "p50": 73.53600114583969, + "p90": 75.74400305747986, + "p95": 77.66400277614594, + "p99": 97.05600142478943 + }, + "roundtrip": { + "p50": 137.7599984407425, + "p90": 143.51999759674072, + "p95": 144.86399292945862, + "p99": 155.03999590873718 + }, + "isolatedSum": { + "p50": 154.1760042309761, + "p90": 158.11200439929962, + "p95": 162.01600432395935, + "p99": 191.48799777030945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.18399977684021, + "p90": 92.06400066614151, + "p95": 96.16000205278397, + "p99": 106.65600001811981 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 83.45600217580795, + "p95": 84.32000130414963, + "p99": 97.4079966545105 + }, + "roundtrip": { + "p50": 137.63199746608734, + "p90": 144.06399428844452, + "p95": 144.896000623703, + "p99": 152.73599326610565 + }, + "isolatedSum": { + "p50": 155.7760015130043, + "p90": 175.52000284194946, + "p95": 180.4800033569336, + "p99": 204.0639966726303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.87999910116196, + "p90": 96.96000069379807, + "p95": 98.78399968147278, + "p99": 129.31199371814728 + }, + "combine": { + "p50": 84.54400300979614, + "p90": 85.63199639320374, + "p95": 86.01599931716919, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 150.56000649929047, + "p90": 154.81600165367126, + "p95": 155.93600273132324, + "p99": 162.01600432395935 + }, + "isolatedSum": { + "p50": 175.4240021109581, + "p90": 182.5919970870018, + "p95": 184.79999899864197, + "p99": 225.95199197530746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.67999839782715, + "p90": 98.01600128412247, + "p95": 99.84000027179718, + "p99": 107.2319969534874 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 98.81599992513657, + "p95": 99.13600236177444, + "p99": 112.06399649381638 + }, + "roundtrip": { + "p50": 176.03200674057007, + "p90": 180.12799322605133, + "p95": 181.31199479103088, + "p99": 202.55999267101288 + }, + "isolatedSum": { + "p50": 193.4399977326393, + "p90": 196.83200120925903, + "p95": 198.97600263357162, + "p99": 219.29599344730377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.47999900579453, + "p90": 118.07999759912491, + "p95": 125.2799928188324, + "p99": 134.65599715709686 + }, + "combine": { + "p50": 112.2559979557991, + "p90": 121.15199863910675, + "p95": 121.47200107574463, + "p99": 132.76800513267517 + }, + "roundtrip": { + "p50": 204.03200387954712, + "p90": 209.21599864959717, + "p95": 212.38400042057037, + "p99": 224.0000069141388 + }, + "isolatedSum": { + "p50": 220.73599696159363, + "p90": 239.23199623823166, + "p95": 246.75199389457703, + "p99": 267.42400228977203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02e00dfe", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_df222060", + "comparisonKey": "97ed86fe35a5b2af", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:47.338563+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 83.10399949550629, + "p90": 88.19200098514557, + "p95": 88.79999816417694, + "p99": 91.64799749851227 + }, + "combine": { + "p50": 73.7600028514862, + "p90": 75.52000135183334, + "p95": 76.22399926185608, + "p99": 85.88799834251404 + }, + "roundtrip": { + "p50": 136.99199259281158, + "p90": 143.0400013923645, + "p95": 143.99999380111694, + "p99": 148.67199957370758 + }, + "isolatedSum": { + "p50": 156.8640023469925, + "p90": 163.7120023369789, + "p95": 165.02399742603302, + "p99": 177.5359958410263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.4160013794899, + "p90": 82.5280025601387, + "p95": 85.15200018882751, + "p99": 90.33600240945816 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 75.55200159549713, + "p95": 76.25599950551987, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 140.57600498199463, + "p90": 144.16000247001648, + "p95": 145.6640064716339, + "p99": 157.95199573040009 + }, + "isolatedSum": { + "p50": 154.39999848604202, + "p90": 158.08000415563583, + "p95": 161.40799969434738, + "p99": 174.27200078964233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.32000064849854, + "p90": 82.0159986615181, + "p95": 83.80799740552902, + "p99": 100.47999769449234 + }, + "combine": { + "p50": 74.91199672222137, + "p90": 78.33600044250488, + "p95": 84.1279998421669, + "p99": 87.74399757385254 + }, + "roundtrip": { + "p50": 138.36799561977386, + "p90": 144.25599575042725, + "p95": 145.56799829006195, + "p99": 152.0960032939911 + }, + "isolatedSum": { + "p50": 155.2319973707199, + "p90": 160.35199910402298, + "p95": 167.93599724769592, + "p99": 188.22399526834488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.38400113582611, + "p90": 82.75199681520462, + "p95": 83.96799862384796, + "p99": 96.92800045013428 + }, + "combine": { + "p50": 75.48800110816956, + "p90": 84.76799726486206, + "p95": 85.21600067615509, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 137.5039964914322, + "p90": 140.1280015707016, + "p95": 142.56000518798828, + "p99": 153.1199961900711 + }, + "isolatedSum": { + "p50": 155.87200224399567, + "p90": 167.51999408006668, + "p95": 169.18399930000305, + "p99": 193.63199919462204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.18399977684021, + "p90": 83.16799998283386, + "p95": 84.44800227880478, + "p99": 102.46399790048599 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 84.99199897050858, + "p95": 85.63199639320374, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 137.7280056476593, + "p90": 140.6719982624054, + "p95": 142.97600090503693, + "p99": 151.8079936504364 + }, + "isolatedSum": { + "p50": 156.89600259065628, + "p90": 168.15999895334244, + "p95": 170.07999867200851, + "p99": 190.39999693632126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.84800016880035, + "p90": 100.832000374794, + "p95": 101.72799974679947, + "p99": 106.97600245475769 + }, + "combine": { + "p50": 85.69599688053131, + "p90": 86.62399649620056, + "p95": 87.77599781751633, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 150.91200172901154, + "p90": 177.279993891716, + "p95": 188.73600661754608, + "p99": 211.58400177955627 + }, + "isolatedSum": { + "p50": 184.54399704933167, + "p90": 187.45599687099457, + "p95": 189.5039975643158, + "p99": 203.8080021739006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.34399747848511, + "p90": 112.03200370073318, + "p95": 113.37599903345108, + "p99": 119.58400160074234 + }, + "combine": { + "p50": 98.4639972448349, + "p90": 100.70399940013885, + "p95": 101.59999877214432, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 174.97600615024567, + "p90": 179.07199263572693, + "p95": 182.49599635601044, + "p99": 202.4960070848465 + }, + "isolatedSum": { + "p50": 203.80799472332, + "p90": 212.73600310087204, + "p95": 214.9759978055954, + "p99": 232.00000077486038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.1919994354248, + "p90": 127.55200266838074, + "p95": 131.1360001564026, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 123.45600128173828, + "p95": 123.9359974861145, + "p99": 133.95200669765472 + }, + "roundtrip": { + "p50": 211.93599700927734, + "p90": 218.27200055122375, + "p95": 220.38400173187256, + "p99": 228.2560020685196 + }, + "isolatedSum": { + "p50": 246.46399915218353, + "p90": 251.00800395011902, + "p95": 255.0719976425171, + "p99": 273.9199995994568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd1f72c6", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_58db80e8", + "comparisonKey": "35bdca1918a8990e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:56.324390+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.16000008583069, + "p90": 98.81599992513657, + "p95": 103.00800204277039, + "p99": 115.07199704647064 + }, + "combine": { + "p50": 73.91999661922455, + "p90": 76.19199901819229, + "p95": 78.52800190448761, + "p99": 100.16000270843506 + }, + "roundtrip": { + "p50": 137.66400516033173, + "p90": 144.16000247001648, + "p95": 145.1839953660965, + "p99": 148.0640023946762 + }, + "isolatedSum": { + "p50": 158.07999670505524, + "p90": 175.00799894332886, + "p95": 181.536003947258, + "p99": 215.2319997549057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.4160013794899, + "p90": 83.3280012011528, + "p95": 85.40800213813782, + "p99": 95.74399888515472 + }, + "combine": { + "p50": 74.30399954319, + "p90": 76.51200145483017, + "p95": 83.67999643087387, + "p99": 85.02399921417236 + }, + "roundtrip": { + "p50": 141.34399592876434, + "p90": 144.9279934167862, + "p95": 146.01600170135498, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 154.7200009226799, + "p90": 159.84000265598297, + "p95": 169.0879985690117, + "p99": 180.7679980993271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.35200089216232, + "p90": 82.65600353479385, + "p95": 83.99999886751175, + "p99": 98.65599870681763 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 83.20000022649765, + "p95": 84.54400300979614, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 136.60800457000732, + "p90": 142.39999651908875, + "p95": 143.61600577831268, + "p99": 156.22399747371674 + }, + "isolatedSum": { + "p50": 155.13600409030914, + "p90": 165.8560037612915, + "p95": 168.5440018773079, + "p99": 186.36799603700638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.92799782752991, + "p90": 83.42400193214417, + "p95": 86.17600053548813, + "p99": 117.15199798345566 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 85.05599945783615, + "p95": 85.4400023818016, + "p99": 109.37599837779999 + }, + "roundtrip": { + "p50": 137.53600418567657, + "p90": 141.15199446678162, + "p95": 142.91200041770935, + "p99": 160.12799739837646 + }, + "isolatedSum": { + "p50": 157.85599499940872, + "p90": 168.48000138998032, + "p95": 171.61600291728973, + "p99": 226.52799636125565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.40800148248672, + "p90": 102.04800218343735, + "p95": 105.79200088977814, + "p99": 110.04800349473953 + }, + "combine": { + "p50": 83.48800241947174, + "p90": 85.37600189447403, + "p95": 85.69599688053131, + "p99": 88.639996945858 + }, + "roundtrip": { + "p50": 138.65600526332855, + "p90": 142.2400027513504, + "p95": 146.97599411010742, + "p99": 183.3599954843521 + }, + "isolatedSum": { + "p50": 164.89600390195847, + "p90": 187.42400407791138, + "p95": 191.48799777030945, + "p99": 198.68800044059753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.45599734783173, + "p90": 104.032002389431, + "p95": 111.42399907112122, + "p99": 122.079998254776 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 99.55199807882309, + "p95": 109.43999886512756, + "p99": 132.28799402713776 + }, + "roundtrip": { + "p50": 149.85600113868713, + "p90": 156.73600137233734, + "p95": 165.75999557971954, + "p99": 178.17600071430206 + }, + "isolatedSum": { + "p50": 184.9600002169609, + "p90": 203.5840004682541, + "p95": 220.86399793624878, + "p99": 254.36799228191376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.11199653148651, + "p90": 113.43999952077866, + "p95": 114.33599889278412, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 98.9760011434555, + "p90": 102.08000242710114, + "p95": 108.83200168609619, + "p99": 109.98400300741196 + }, + "roundtrip": { + "p50": 175.55199563503265, + "p90": 182.14400112628937, + "p95": 185.37600338459015, + "p99": 202.7519941329956 + }, + "isolatedSum": { + "p50": 209.08799767494202, + "p90": 215.5200019478798, + "p95": 223.1680005788803, + "p99": 228.54400426149368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.90399724245071, + "p90": 126.5919953584671, + "p95": 127.55200266838074, + "p99": 133.12000036239624 + }, + "combine": { + "p50": 122.11199849843979, + "p90": 123.19999933242798, + "p95": 123.9359974861145, + "p99": 134.07999277114868 + }, + "roundtrip": { + "p50": 211.04000508785248, + "p90": 215.03999829292297, + "p95": 218.176007270813, + "p99": 245.7599937915802 + }, + "isolatedSum": { + "p50": 246.0159957408905, + "p90": 249.79199469089508, + "p95": 251.48800015449524, + "p99": 267.1999931335449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1db6c67b", + "identity": "b300|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_58db80e8", + "comparisonKey": "72e06d8e8ba4ccac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:32.632015+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.34400099515915, + "p90": 88.79999816417694, + "p95": 91.839998960495, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 74.11199808120728, + "p95": 75.29599964618683, + "p99": 85.4720026254654 + }, + "roundtrip": { + "p50": 133.4719955921173, + "p90": 136.9280070066452, + "p95": 141.92000031471252, + "p99": 150.7200002670288 + }, + "isolatedSum": { + "p50": 154.33599799871445, + "p90": 162.91199624538422, + "p95": 167.13599860668182, + "p99": 195.42400538921356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.11999928951263, + "p90": 84.28800106048584, + "p95": 87.20000088214874, + "p99": 98.1760025024414 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 75.9039968252182, + "p95": 76.48000121116638, + "p99": 84.44800227880478 + }, + "roundtrip": { + "p50": 140.9280002117157, + "p90": 143.8080072402954, + "p95": 144.70399916172028, + "p99": 159.19999778270721 + }, + "isolatedSum": { + "p50": 154.81600165367126, + "p90": 160.19199788570404, + "p95": 163.68000209331512, + "p99": 182.62400478124619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.64000308513641, + "p90": 84.22400057315826, + "p95": 86.40000224113464, + "p99": 96.89600020647049 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 83.64800363779068, + "p95": 84.3840017914772, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 137.15200126171112, + "p90": 143.77599954605103, + "p95": 149.34399724006653, + "p99": 160.5760008096695 + }, + "isolatedSum": { + "p50": 155.04000335931778, + "p90": 167.87200421094894, + "p95": 170.78400403261185, + "p99": 192.73599982261658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 82.11199939250946, + "p90": 111.77600175142288, + "p95": 115.03999680280685, + "p99": 120.38400024175644 + }, + "combine": { + "p50": 75.74400305747986, + "p90": 85.91999858617783, + "p95": 93.28000247478485, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 137.63199746608734, + "p90": 140.86399972438812, + "p95": 144.3520039319992, + "p99": 160.64000129699707 + }, + "isolatedSum": { + "p50": 157.85600244998932, + "p90": 197.6960003376007, + "p95": 208.3199992775917, + "p99": 230.14400154352188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.04799890518188, + "p90": 85.66399663686752, + "p95": 88.60799670219421, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 84.16000008583069, + "p90": 85.4400023818016, + "p95": 85.63199639320374, + "p99": 112.86400258541107 + }, + "roundtrip": { + "p50": 138.8159990310669, + "p90": 144.19199526309967, + "p95": 147.23199605941772, + "p99": 155.16799688339233 + }, + "isolatedSum": { + "p50": 166.20799899101257, + "p90": 171.10399901866913, + "p95": 174.23999309539795, + "p99": 210.49600094556808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.48799759149551, + "p90": 102.08000242710114, + "p95": 104.5759990811348, + "p99": 149.53599870204926 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 86.59200370311737, + "p95": 87.42400258779526, + "p99": 98.1760025024414 + }, + "roundtrip": { + "p50": 150.2400040626526, + "p90": 154.2080044746399, + "p95": 156.2879979610443, + "p99": 169.21600699424744 + }, + "isolatedSum": { + "p50": 185.02400070428848, + "p90": 188.6720061302185, + "p95": 192.00000166893005, + "p99": 247.71200120449066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.04000359773636, + "p90": 113.72800171375275, + "p95": 114.72000181674957, + "p99": 125.21600723266602 + }, + "combine": { + "p50": 99.16800260543823, + "p90": 108.09600353240967, + "p95": 108.99200290441513, + "p99": 134.5600038766861 + }, + "roundtrip": { + "p50": 178.20799350738525, + "p90": 185.7600063085556, + "p95": 187.391996383667, + "p99": 200.19200444221497 + }, + "isolatedSum": { + "p50": 210.2080062031746, + "p90": 221.82400524616241, + "p95": 223.7120047211647, + "p99": 259.7760111093521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.80799651145935, + "p90": 126.46399438381195, + "p95": 127.68000364303589, + "p99": 139.90400731563568 + }, + "combine": { + "p50": 122.14399874210358, + "p90": 123.58400225639343, + "p95": 124.67200309038162, + "p99": 148.00000190734863 + }, + "roundtrip": { + "p50": 212.19199895858765, + "p90": 221.82400524616241, + "p95": 241.37599766254425, + "p99": 250.07998943328857 + }, + "isolatedSum": { + "p50": 245.95199525356293, + "p90": 250.04799664020538, + "p95": 252.3520067334175, + "p99": 287.9040092229843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e4a4eb89", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "b300_3cb8cb98", + "comparisonKey": "bd911062ff2c0f3e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:10.428066+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.7599967122078, + "p90": 87.16800063848495, + "p95": 87.99999952316284, + "p99": 91.80799871683121 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 76.83199644088745, + "p95": 83.74399691820145, + "p99": 85.31200140714645 + }, + "roundtrip": { + "p50": 140.25600254535675, + "p90": 143.51999759674072, + "p95": 148.47999811172485, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 156.15999698638916, + "p90": 163.9999970793724, + "p95": 171.7439964413643, + "p99": 177.12000012397766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.216000020504, + "p90": 88.60799670219421, + "p95": 96.54399752616882, + "p99": 108.44799876213074 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 84.28800106048584, + "p95": 84.70399677753448, + "p99": 87.42400258779526 + }, + "roundtrip": { + "p50": 137.05599308013916, + "p90": 140.4159963130951, + "p95": 142.0159935951233, + "p99": 148.22399616241455 + }, + "isolatedSum": { + "p50": 156.54399991035461, + "p90": 172.89599776268005, + "p95": 181.2479943037033, + "p99": 195.872001349926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.54400235414505, + "p90": 85.11999994516373, + "p95": 86.20800077915192, + "p99": 92.67199784517288 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 84.44800227880478, + "p95": 84.95999872684479, + "p99": 88.0960002541542 + }, + "roundtrip": { + "p50": 137.31199502944946, + "p90": 140.6400054693222, + "p95": 142.81600713729858, + "p99": 160.76800227165222 + }, + "isolatedSum": { + "p50": 155.96800297498703, + "p90": 169.5680022239685, + "p95": 171.1679995059967, + "p99": 180.7679980993271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.57600259780884, + "p90": 83.23200047016144, + "p95": 85.79199761152267, + "p99": 107.55199939012527 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 85.56800335645676, + "p95": 85.88799834251404, + "p99": 109.0560033917427 + }, + "roundtrip": { + "p50": 137.66400516033173, + "p90": 140.9599930047989, + "p95": 143.61600577831268, + "p99": 155.4879993200302 + }, + "isolatedSum": { + "p50": 164.96000438928604, + "p90": 168.8000038266182, + "p95": 171.6799959540367, + "p99": 216.60800278186798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.37600123882294, + "p90": 83.29600095748901, + "p95": 85.31200140714645, + "p99": 105.56799918413162 + }, + "combine": { + "p50": 85.24800091981888, + "p90": 85.9839990735054, + "p95": 87.67999708652496, + "p99": 110.1439967751503 + }, + "roundtrip": { + "p50": 148.44800531864166, + "p90": 154.81600165367126, + "p95": 155.64799308776855, + "p99": 163.35999965667725 + }, + "isolatedSum": { + "p50": 166.62400215864182, + "p90": 169.28000003099442, + "p95": 172.99199849367142, + "p99": 215.71199595928192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.66399794816971, + "p90": 100.35199671983719, + "p95": 104.51199859380722, + "p99": 117.91999638080597 + }, + "combine": { + "p50": 86.56000345945358, + "p90": 89.02399986982346, + "p95": 95.93600034713745, + "p99": 110.17599701881409 + }, + "roundtrip": { + "p50": 160.5760008096695, + "p90": 167.58400201797485, + "p95": 168.73599588871002, + "p99": 180.51199615001678 + }, + "isolatedSum": { + "p50": 180.2240014076233, + "p90": 189.37599658966064, + "p95": 200.44799894094467, + "p99": 228.09599339962006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.0880029797554, + "p90": 107.2319969534874, + "p95": 108.67200046777725, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 108.86400192975998, + "p90": 110.33599823713303, + "p95": 110.75200140476227, + "p99": 123.61600250005722 + }, + "roundtrip": { + "p50": 188.6720061302185, + "p90": 192.1599954366684, + "p95": 194.97600197792053, + "p99": 200.25600492954254 + }, + "isolatedSum": { + "p50": 213.95200490951538, + "p90": 217.56799519062042, + "p95": 219.42400187253952, + "p99": 238.40000480413437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.62399625778198, + "p90": 136.76799833774567, + "p95": 137.9839926958084, + "p99": 140.35199582576752 + }, + "combine": { + "p50": 136.9599997997284, + "p90": 146.01600170135498, + "p95": 146.68799936771393, + "p99": 159.45599973201752 + }, + "roundtrip": { + "p50": 249.7600018978119, + "p90": 255.45600056648254, + "p95": 256.48000836372375, + "p99": 267.87200570106506 + }, + "isolatedSum": { + "p50": 267.5839960575104, + "p90": 282.78400003910065, + "p95": 284.67199206352234, + "p99": 299.80799555778503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2833b112", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "b300_0c4d187b", + "comparisonKey": "6e85a1dbe7b65819", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:16.045658+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 82.11199939250946, + "p90": 91.90399944782257, + "p95": 98.84800016880035, + "p99": 106.36799782514572 + }, + "combine": { + "p50": 63.58399987220764, + "p90": 65.21599739789963, + "p95": 66.39999896287918, + "p99": 74.40000027418137 + }, + "roundtrip": { + "p50": 124.95999783277512, + "p90": 132.54399597644806, + "p95": 133.82400572299957, + "p99": 143.96800100803375 + }, + "isolatedSum": { + "p50": 145.6959992647171, + "p90": 157.1199968457222, + "p95": 165.24799913167953, + "p99": 180.7679980993271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.93599772453308, + "p90": 84.73599702119827, + "p95": 87.61599659919739, + "p99": 96.79999947547913 + }, + "combine": { + "p50": 63.19999694824219, + "p90": 71.99999690055847, + "p95": 72.48000055551529, + "p99": 84.79999750852585 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 130.40000200271606, + "p95": 131.16799294948578, + "p99": 135.1040005683899 + }, + "isolatedSum": { + "p50": 143.13599467277527, + "p90": 156.73599392175674, + "p95": 160.09599715471268, + "p99": 181.59999698400497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.71199929714203, + "p90": 101.88800096511841, + "p95": 103.16800326108932, + "p99": 120.28799951076508 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 73.95199686288834, + "p95": 74.33599978685379, + "p99": 76.4480009675026 + }, + "roundtrip": { + "p50": 137.02400028705597, + "p90": 140.19200205802917, + "p95": 141.85599982738495, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 172.89599776268005, + "p90": 175.83999782800674, + "p95": 177.50400304794312, + "p99": 196.73600047826767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 92.92799979448318, + "p90": 98.52799773216248, + "p95": 99.84000027179718, + "p99": 119.4240003824234 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 76.4160007238388, + "p95": 76.99199765920639, + "p99": 97.120001912117 + }, + "roundtrip": { + "p50": 150.4639983177185, + "p90": 155.93600273132324, + "p95": 157.151997089386, + "p99": 216.2880003452301 + }, + "isolatedSum": { + "p50": 167.29599982500076, + "p90": 174.94399845600128, + "p95": 176.83199793100357, + "p99": 216.5440022945404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d6a5e1cb", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "b300_2b2c04f6", + "comparisonKey": "92c62a29530b0fc5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:36.756417+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 87.90399879217148, + "p90": 92.96000003814697, + "p95": 97.59999811649323, + "p99": 104.89600151777267 + }, + "combine": { + "p50": 63.19999694824219, + "p90": 72.1919983625412, + "p95": 72.76800274848938, + "p99": 75.26399940252304 + }, + "roundtrip": { + "p50": 126.20800733566284, + "p90": 133.18400084972382, + "p95": 134.07999277114868, + "p99": 148.54399859905243 + }, + "isolatedSum": { + "p50": 151.10399574041367, + "p90": 165.15199840068817, + "p95": 170.3680008649826, + "p99": 180.16000092029572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.88799768686295, + "p90": 88.19200098514557, + "p95": 89.24800157546997, + "p99": 93.75999867916107 + }, + "combine": { + "p50": 64.06400352716446, + "p90": 72.86400347948074, + "p95": 73.21599870920181, + "p99": 74.97599720954895 + }, + "roundtrip": { + "p50": 124.9919980764389, + "p90": 128.38399410247803, + "p95": 130.78400492668152, + "p99": 141.92000031471252 + }, + "isolatedSum": { + "p50": 145.9520012140274, + "p90": 161.0560044646263, + "p95": 162.46400028467178, + "p99": 168.73599588871002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.08799904584885, + "p90": 87.00799942016602, + "p95": 89.05600011348724, + "p99": 97.6639986038208 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 73.31199944019318, + "p95": 73.5040009021759, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 126.20800733566284, + "p90": 129.40800189971924, + "p95": 131.23199343681335, + "p99": 151.0079950094223 + }, + "isolatedSum": { + "p50": 152.70400047302246, + "p90": 160.3199988603592, + "p95": 162.56000101566315, + "p99": 176.60799622535706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.99199831485748, + "p90": 83.45600217580795, + "p95": 85.34400165081024, + "p99": 93.08800101280212 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 73.5040009021759, + "p95": 74.07999783754349, + "p99": 96.79999947547913 + }, + "roundtrip": { + "p50": 126.56000256538391, + "p90": 130.3360015153885, + "p95": 134.46399569511414, + "p99": 144.80000734329224 + }, + "isolatedSum": { + "p50": 153.888002038002, + "p90": 156.96000307798386, + "p95": 159.42399948835373, + "p99": 189.88800048828125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.216000020504, + "p90": 83.5840031504631, + "p95": 85.4720026254654, + "p99": 93.6959981918335 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 73.44000041484833, + "p95": 74.01599735021591, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 128.9920061826706, + "p90": 136.4160031080246, + "p95": 139.20000195503235, + "p99": 150.87999403476715 + }, + "isolatedSum": { + "p50": 154.23999726772308, + "p90": 157.02400356531143, + "p95": 159.4879999756813, + "p99": 178.5919964313507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.14399963617325, + "p90": 84.57600325345993, + "p95": 87.16800063848495, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 74.07999783754349, + "p95": 75.00799745321274, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 139.1039937734604, + "p90": 143.5520052909851, + "p95": 144.73600685596466, + "p99": 158.59200060367584 + }, + "isolatedSum": { + "p50": 155.35999834537506, + "p90": 158.65600109100342, + "p95": 162.1759980916977, + "p99": 190.36800414323807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 85.82399785518646, + "p90": 92.6079973578453, + "p95": 94.52799707651138, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 74.91199672222137, + "p90": 84.1279998421669, + "p95": 84.63999629020691, + "p99": 85.82399785518646 + }, + "roundtrip": { + "p50": 139.00800049304962, + "p90": 142.62400567531586, + "p95": 147.2959965467453, + "p99": 152.99199521541595 + }, + "isolatedSum": { + "p50": 160.73599457740784, + "p90": 176.7359972000122, + "p95": 179.1679933667183, + "p99": 195.3279972076416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.0640019774437, + "p90": 103.10400277376175, + "p95": 104.032002389431, + "p99": 108.51199924945831 + }, + "combine": { + "p50": 88.95999938249588, + "p90": 97.69599884748459, + "p95": 97.98400104045868, + "p99": 101.02400183677673 + }, + "roundtrip": { + "p50": 167.61599481105804, + "p90": 185.12000143527985, + "p95": 196.86399400234222, + "p99": 235.26400327682495 + }, + "isolatedSum": { + "p50": 189.02400135993958, + "p90": 200.80000162124634, + "p95": 202.01600342988968, + "p99": 209.53600108623505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3f5e98ea", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "b300_d9b08302", + "comparisonKey": "cfbe5616162b6fa2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:24.799118+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.7599967122078, + "p90": 88.44800293445587, + "p95": 89.53599631786346, + "p99": 101.95200145244598 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 74.46400076150894, + "p95": 75.6160020828247, + "p99": 86.07999980449677 + }, + "roundtrip": { + "p50": 126.97599828243256, + "p90": 134.68800485134125, + "p95": 135.8720064163208, + "p99": 143.5839980840683 + }, + "isolatedSum": { + "p50": 154.78399395942688, + "p90": 162.9120036959648, + "p95": 165.15199840068817, + "p99": 188.03200125694275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.06399869918823, + "p90": 83.71199667453766, + "p95": 86.27200126647949, + "p99": 95.74399888515472 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 75.6480023264885, + "p95": 76.25599950551987, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 140.6400054693222, + "p90": 143.99999380111694, + "p95": 144.9279934167862, + "p99": 167.58400201797485 + }, + "isolatedSum": { + "p50": 154.01599556207657, + "p90": 159.35999900102615, + "p95": 162.52800077199936, + "p99": 179.9359992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.54400235414505, + "p90": 83.16799998283386, + "p95": 85.53600311279297, + "p99": 96.47999703884125 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 84.03199911117554, + "p95": 84.51200276613235, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 137.34400272369385, + "p90": 142.71999895572662, + "p95": 144.896000623703, + "p99": 159.19999778270721 + }, + "isolatedSum": { + "p50": 155.87200224399567, + "p90": 167.1999990940094, + "p95": 170.04800587892532, + "p99": 184.06399339437485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.03199845552444, + "p90": 82.30400085449219, + "p95": 84.57600325345993, + "p99": 103.55199873447418 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 87.0399996638298, + "p95": 97.18400239944458, + "p99": 109.69600081443787 + }, + "roundtrip": { + "p50": 136.80000603199005, + "p90": 147.74399995803833, + "p95": 153.47200632095337, + "p99": 162.78399527072906 + }, + "isolatedSum": { + "p50": 154.91199493408203, + "p90": 169.344000518322, + "p95": 181.7600056529045, + "p99": 213.24799954891205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.73599636554718, + "p90": 82.46400207281113, + "p95": 83.67999643087387, + "p99": 87.45600283145905 + }, + "combine": { + "p50": 76.35200023651123, + "p90": 85.4400023818016, + "p95": 85.69599688053131, + "p99": 109.95200276374817 + }, + "roundtrip": { + "p50": 139.20000195503235, + "p90": 146.7200070619583, + "p95": 148.3840048313141, + "p99": 162.81600296497345 + }, + "isolatedSum": { + "p50": 157.0879966020584, + "p90": 167.90400445461273, + "p95": 169.37599331140518, + "p99": 197.40800559520721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.2640033364296, + "p90": 102.81600058078766, + "p95": 107.45599865913391, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 85.4720026254654, + "p90": 86.496002972126, + "p95": 87.42400258779526, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 149.3760049343109, + "p90": 151.90400183200836, + "p95": 153.72799336910248, + "p99": 162.49600052833557 + }, + "isolatedSum": { + "p50": 184.736005961895, + "p90": 189.31200355291367, + "p95": 194.88000124692917, + "p99": 217.0879989862442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.07200318574905, + "p90": 112.47999966144562, + "p95": 113.18399757146835, + "p99": 115.64800143241882 + }, + "combine": { + "p50": 99.07200187444687, + "p90": 101.27999633550644, + "p95": 108.73600095510483, + "p99": 111.64800077676773 + }, + "roundtrip": { + "p50": 183.58400464057922, + "p90": 191.39200448989868, + "p95": 192.9599940776825, + "p99": 209.56799387931824 + }, + "isolatedSum": { + "p50": 206.14400506019592, + "p90": 213.75999599695206, + "p95": 221.91999852657318, + "p99": 227.29600220918655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.83999741077423, + "p90": 133.63200426101685, + "p95": 135.16800105571747, + "p99": 146.11199498176575 + }, + "combine": { + "p50": 134.3040019273758, + "p90": 135.80800592899323, + "p95": 136.60800457000732, + "p99": 144.67200636863708 + }, + "roundtrip": { + "p50": 236.2239956855774, + "p90": 243.55199933052063, + "p95": 244.7039932012558, + "p99": 248.57600033283234 + }, + "isolatedSum": { + "p50": 262.14399933815, + "p90": 269.44001019001007, + "p95": 271.7760056257248, + "p99": 290.78400135040283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f65ea097", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "b300_ee223ecc", + "comparisonKey": "f93e47a0b80ab727", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:12.733295+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.40800148248672, + "p90": 88.22400122880936, + "p95": 90.17600119113922, + "p99": 105.76000064611435 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 74.72000271081924, + "p95": 76.35200023651123, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 134.39999520778656, + "p90": 140.32000303268433, + "p95": 142.4960047006607, + "p99": 151.0079950094223 + }, + "isolatedSum": { + "p50": 154.78400141000748, + "p90": 162.9440039396286, + "p95": 166.52800142765045, + "p99": 193.7279999256134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.66400343179703, + "p90": 93.82399916648865, + "p95": 102.65599936246872, + "p99": 114.81600254774094 + }, + "combine": { + "p50": 74.11199808120728, + "p90": 76.19199901819229, + "p95": 77.47200131416321, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 138.94400000572205, + "p90": 144.80000734329224, + "p95": 145.91999351978302, + "p99": 152.8960019350052 + }, + "isolatedSum": { + "p50": 155.7760015130043, + "p90": 170.01599818468094, + "p95": 180.12800067663193, + "p99": 199.45599883794785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.216000020504, + "p90": 84.32000130414963, + "p95": 86.20800077915192, + "p99": 94.14400160312653 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 83.42400193214417, + "p95": 84.22400057315826, + "p99": 85.4720026254654 + }, + "roundtrip": { + "p50": 139.64800536632538, + "p90": 144.73600685596466, + "p95": 145.60000598430634, + "p99": 150.81599354743958 + }, + "isolatedSum": { + "p50": 155.71200102567673, + "p90": 167.7440032362938, + "p95": 170.43200135231018, + "p99": 179.61600422859192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.24800026416779, + "p90": 83.90399813652039, + "p95": 85.50400286912918, + "p99": 94.59199756383896 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 84.95999872684479, + "p95": 85.66399663686752, + "p99": 108.22399705648422 + }, + "roundtrip": { + "p50": 137.7280056476593, + "p90": 140.44800400733948, + "p95": 141.95199310779572, + "p99": 151.13599598407745 + }, + "isolatedSum": { + "p50": 157.0879966020584, + "p90": 168.86399686336517, + "p95": 171.1679995059967, + "p99": 202.81599462032318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.17599987983704, + "p90": 93.53599697351456, + "p95": 96.09600156545639, + "p99": 109.31199789047241 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 85.66399663686752, + "p95": 86.11200004816055, + "p99": 100.38399696350098 + }, + "roundtrip": { + "p50": 139.80799913406372, + "p90": 143.77599954605103, + "p95": 148.41599762439728, + "p99": 162.4000072479248 + }, + "isolatedSum": { + "p50": 166.30399972200394, + "p90": 179.19999361038208, + "p95": 182.20800161361694, + "p99": 209.6959948539734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.13600236177444, + "p90": 101.21600329875946, + "p95": 102.24000364542007, + "p99": 108.35199803113937 + }, + "combine": { + "p50": 85.69599688053131, + "p90": 86.91199868917465, + "p95": 88.60799670219421, + "p99": 110.11199653148651 + }, + "roundtrip": { + "p50": 150.52799880504608, + "p90": 153.21600437164307, + "p95": 154.91199493408203, + "p99": 171.424001455307 + }, + "isolatedSum": { + "p50": 184.83199924230576, + "p90": 188.1280019879341, + "p95": 190.8480003476143, + "p99": 218.46399456262589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.46399921178818, + "p90": 113.47199976444244, + "p95": 114.75200206041336, + "p99": 125.18399953842163 + }, + "combine": { + "p50": 99.16800260543823, + "p90": 105.15200346708298, + "p95": 108.96000266075134, + "p99": 111.16799712181091 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 182.68799781799316, + "p95": 185.7919991016388, + "p99": 198.17599654197693 + }, + "isolatedSum": { + "p50": 209.6320018172264, + "p90": 218.62400323152542, + "p95": 223.7120047211647, + "p99": 236.35199666023254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.20799922943115, + "p90": 128.7039965391159, + "p95": 133.44000279903412, + "p99": 147.5200057029724 + }, + "combine": { + "p50": 122.81599640846252, + "p90": 125.50400197505951, + "p95": 133.37600231170654, + "p99": 135.6479972600937 + }, + "roundtrip": { + "p50": 217.056006193161, + "p90": 224.2240011692047, + "p95": 225.72800517082214, + "p99": 239.26399648189545 + }, + "isolatedSum": { + "p50": 245.02399563789368, + "p90": 254.20799851417542, + "p95": 266.81600511074066, + "p99": 283.1680029630661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb9f1628", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "b300_a50cd7ae", + "comparisonKey": "2d6b04373becb329", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:32.032144+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.21600067615509, + "p90": 88.8959988951683, + "p95": 90.14400094747543, + "p99": 94.94400024414062 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 74.01599735021591, + "p95": 74.33599978685379, + "p99": 76.51200145483017 + }, + "roundtrip": { + "p50": 127.20000743865967, + "p90": 134.62400436401367, + "p95": 136.60800457000732, + "p99": 146.464005112648 + }, + "isolatedSum": { + "p50": 158.33599865436554, + "p90": 162.91199624538422, + "p95": 164.48000073432922, + "p99": 171.4560016989708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.05599880218506, + "p90": 84.99199897050858, + "p95": 87.20000088214874, + "p99": 96.41599655151367 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 75.29599964618683, + "p95": 76.64000242948532, + "p99": 85.66399663686752 + }, + "roundtrip": { + "p50": 138.62399756908417, + "p90": 143.90400052070618, + "p95": 145.37599682807922, + "p99": 157.27999806404114 + }, + "isolatedSum": { + "p50": 154.55999970436096, + "p90": 160.2879986166954, + "p95": 163.84000331163406, + "p99": 182.0799931883812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.4720019698143, + "p90": 86.17600053548813, + "p95": 100.09600222110748, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 83.20000022649765, + "p95": 83.96799862384796, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 140.32000303268433, + "p90": 144.67200636863708, + "p95": 146.11199498176575, + "p99": 153.1520038843155 + }, + "isolatedSum": { + "p50": 156.09600394964218, + "p90": 169.37600076198578, + "p95": 184.06400084495544, + "p99": 207.93599635362625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.69600367546082, + "p90": 84.3840017914772, + "p95": 85.91999858617783, + "p99": 93.98400038480759 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 84.60800349712372, + "p95": 85.37600189447403, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 138.36799561977386, + "p90": 162.75200247764587, + "p95": 181.0240000486374, + "p99": 214.30400013923645 + }, + "isolatedSum": { + "p50": 157.1200042963028, + "p90": 168.99200528860092, + "p95": 171.29600048065186, + "p99": 182.81599879264832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.2720006108284, + "p90": 84.6719965338707, + "p95": 86.97599917650223, + "p99": 104.92800176143646 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 97.37599641084671, + "p95": 102.24000364542007, + "p99": 120.57600170373917 + }, + "roundtrip": { + "p50": 139.42399621009827, + "p90": 152.8639942407608, + "p95": 161.43999993801117, + "p99": 169.3439930677414 + }, + "isolatedSum": { + "p50": 166.4000004529953, + "p90": 182.0479929447174, + "p95": 189.2160028219223, + "p99": 225.50400346517563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.35199671983719, + "p90": 102.1760031580925, + "p95": 103.13600301742554, + "p99": 111.80800199508667 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 86.59200370311737, + "p95": 88.41600269079208, + "p99": 101.47199779748917 + }, + "roundtrip": { + "p50": 149.9519944190979, + "p90": 153.02400290966034, + "p95": 154.6880006790161, + "p99": 166.143998503685 + }, + "isolatedSum": { + "p50": 185.88799983263016, + "p90": 188.76800686120987, + "p95": 191.55200570821762, + "p99": 213.27999979257584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.5759990811348, + "p90": 111.35999858379364, + "p95": 112.70400136709213, + "p99": 118.30399930477142 + }, + "combine": { + "p50": 99.04000163078308, + "p90": 107.96800255775452, + "p95": 108.8000014424324, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 177.47199535369873, + "p90": 190.33600389957428, + "p95": 198.04799556732178, + "p99": 207.71199464797974 + }, + "isolatedSum": { + "p50": 203.61600071191788, + "p90": 219.32800114154816, + "p95": 221.50400280952454, + "p99": 230.71999847888947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.4800016283989, + "p90": 127.07200646400452, + "p95": 129.12000715732574, + "p99": 135.6160044670105 + }, + "combine": { + "p50": 122.14399874210358, + "p90": 123.1359988451004, + "p95": 124.25599992275238, + "p99": 146.5280055999756 + }, + "roundtrip": { + "p50": 211.5200012922287, + "p90": 215.00800549983978, + "p95": 217.50399470329285, + "p99": 228.92799973487854 + }, + "isolatedSum": { + "p50": 246.62400037050247, + "p90": 250.20800530910492, + "p95": 253.37600708007812, + "p99": 282.1440100669861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-67e0f8ae", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_acd57143", + "comparisonKey": "8121b2f6ce30de9a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:12.228816+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.66399663686752, + "p90": 87.99999952316284, + "p95": 88.86399865150452, + "p99": 98.9760011434555 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 73.66400212049484, + "p95": 74.49600100517273, + "p99": 77.79199630022049 + }, + "roundtrip": { + "p50": 130.78400492668152, + "p90": 133.82400572299957, + "p95": 136.6720050573349, + "p99": 142.0159935951233 + }, + "isolatedSum": { + "p50": 158.4639996290207, + "p90": 161.66400164365768, + "p95": 163.35999965667725, + "p99": 176.767997443676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 85.4720026254654, + "p90": 93.31200271844864, + "p95": 98.91200065612793, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 74.62400197982788, + "p95": 75.3600001335144, + "p99": 79.58400249481201 + }, + "roundtrip": { + "p50": 129.7599971294403, + "p90": 136.4160031080246, + "p95": 138.62399756908417, + "p99": 149.9519944190979 + }, + "isolatedSum": { + "p50": 158.59200060367584, + "p90": 167.93600469827652, + "p95": 174.27200078964233, + "p99": 189.53600525856018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.44000172615051, + "p90": 87.93599903583527, + "p95": 89.9839997291565, + "p99": 105.98400235176086 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 75.71200281381607, + "p95": 76.76800340414047, + "p99": 86.81599795818329 + }, + "roundtrip": { + "p50": 136.03200018405914, + "p90": 143.5520052909851, + "p95": 144.6080058813095, + "p99": 147.16799557209015 + }, + "isolatedSum": { + "p50": 155.42399883270264, + "p90": 163.64800184965134, + "p95": 166.75200313329697, + "p99": 192.80000030994415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.28000050783157, + "p90": 97.120001912117, + "p95": 102.84800082445145, + "p99": 112.31999844312668 + }, + "combine": { + "p50": 74.01599735021591, + "p90": 76.22399926185608, + "p95": 83.16799998283386, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 144.54400539398193, + "p95": 146.04799449443817, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 155.29599785804749, + "p90": 173.34400117397308, + "p95": 186.0160008072853, + "p99": 210.56000143289566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.34400099515915, + "p90": 83.64800363779068, + "p95": 85.66399663686752, + "p99": 94.68799829483032 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 83.16799998283386, + "p95": 84.09599959850311, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 136.80000603199005, + "p90": 143.0719941854477, + "p95": 144.31999623775482, + "p99": 159.90400314331055 + }, + "isolatedSum": { + "p50": 156.00000321865082, + "p90": 166.81600362062454, + "p95": 169.75999623537064, + "p99": 212.09599822759628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.44000369310379, + "p90": 101.79200023412704, + "p95": 105.59999942779541, + "p99": 173.6000031232834 + }, + "combine": { + "p50": 84.6719965338707, + "p90": 85.60000360012054, + "p95": 85.7279971241951, + "p99": 97.50399738550186 + }, + "roundtrip": { + "p50": 148.83199334144592, + "p90": 155.10399639606476, + "p95": 156.3519984483719, + "p99": 164.5440012216568 + }, + "isolatedSum": { + "p50": 178.1120002269745, + "p90": 187.3920038342476, + "p95": 191.3279965519905, + "p99": 271.10400050878525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.67199850082397, + "p90": 105.3759977221489, + "p95": 109.3439981341362, + "p99": 116.64000153541565 + }, + "combine": { + "p50": 97.88800030946732, + "p90": 99.80800002813339, + "p95": 101.08800232410431, + "p99": 121.72800302505493 + }, + "roundtrip": { + "p50": 178.39999496936798, + "p90": 193.63200664520264, + "p95": 210.01599729061127, + "p99": 235.58400571346283 + }, + "isolatedSum": { + "p50": 194.5599988102913, + "p90": 205.1839977502823, + "p95": 210.4320004582405, + "p99": 238.36800456047058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.01599711179733, + "p90": 120.12799829244614, + "p95": 121.44000083208084, + "p99": 128.38399410247803 + }, + "combine": { + "p50": 132.7040046453476, + "p90": 134.5279961824417, + "p95": 134.88000631332397, + "p99": 150.2400040626526 + }, + "roundtrip": { + "p50": 227.35999524593353, + "p90": 232.54400491714478, + "p95": 234.8479926586151, + "p99": 247.45599925518036 + }, + "isolatedSum": { + "p50": 250.72000175714493, + "p90": 254.65599447488785, + "p95": 256.3200071454048, + "p99": 278.6239981651306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-53c9732d", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "b300_88389899", + "comparisonKey": "de2d5411e355f23d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:19.206107+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.56800335645676, + "p90": 92.92799979448318, + "p95": 99.48799759149551, + "p99": 108.35199803113937 + }, + "combine": { + "p50": 64.38399851322174, + "p90": 72.57600128650665, + "p95": 73.18399846553802, + "p99": 75.13599842786789 + }, + "roundtrip": { + "p50": 125.791996717453, + "p90": 132.83200562000275, + "p95": 134.36800241470337, + "p99": 145.05599439144135 + }, + "isolatedSum": { + "p50": 149.9520018696785, + "p90": 165.50400108098984, + "p95": 172.67199605703354, + "p99": 183.48799645900726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.34400099515915, + "p90": 88.19200098514557, + "p95": 89.37600255012512, + "p99": 99.64799880981445 + }, + "combine": { + "p50": 63.77600133419037, + "p90": 72.28799909353256, + "p95": 72.80000299215317, + "p99": 76.38400048017502 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 133.34399461746216, + "p95": 134.783998131752, + "p99": 148.22399616241455 + }, + "isolatedSum": { + "p50": 145.12000232934952, + "p90": 160.48000007867813, + "p95": 162.1760055422783, + "p99": 176.03199928998947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.31200075149536, + "p90": 88.76799792051315, + "p95": 91.36000275611877, + "p99": 129.92000579833984 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 72.28799909353256, + "p95": 72.86400347948074, + "p99": 76.06399804353714 + }, + "roundtrip": { + "p50": 127.42400169372559, + "p90": 149.59999918937683, + "p95": 154.4640064239502, + "p99": 159.7760021686554 + }, + "isolatedSum": { + "p50": 144.99200135469437, + "p90": 161.05599701404572, + "p95": 164.22400623559952, + "p99": 205.98400384187698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.89599758386612, + "p90": 97.47199714183807, + "p95": 102.78400033712387, + "p99": 112.64000087976456 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 73.47200065851212, + "p95": 74.17599856853485, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 125.72799623012543, + "p90": 129.08799946308136, + "p95": 132.06399977207184, + "p99": 140.8960074186325 + }, + "isolatedSum": { + "p50": 153.27999740839005, + "p90": 170.9439978003502, + "p95": 176.95999890565872, + "p99": 197.12000340223312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.69600367546082, + "p90": 94.59199756383896, + "p95": 99.93600100278854, + "p99": 108.73600095510483 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 74.23999905586243, + "p95": 75.07199794054031, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 134.39999520778656, + "p90": 140.83200693130493, + "p95": 143.23200285434723, + "p99": 147.90399372577667 + }, + "isolatedSum": { + "p50": 154.78400141000748, + "p90": 168.83199661970139, + "p95": 175.00799894332886, + "p99": 195.13600319623947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.74399822950363, + "p90": 99.64799880981445, + "p95": 103.35999727249146, + "p99": 118.84800344705582 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 76.9599974155426, + "p95": 84.25600081682205, + "p99": 97.21600264310837 + }, + "roundtrip": { + "p50": 139.23199474811554, + "p90": 147.23199605941772, + "p95": 148.8640010356903, + "p99": 157.31200575828552 + }, + "isolatedSum": { + "p50": 166.3680002093315, + "p90": 176.60799622535706, + "p95": 187.6159980893135, + "p99": 216.06400609016418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.70399940013885, + "p90": 105.53599894046783, + "p95": 110.30399799346924, + "p99": 143.39199662208557 + }, + "combine": { + "p50": 97.28000313043594, + "p90": 98.08000177145004, + "p95": 98.75199943780899, + "p99": 121.8239963054657 + }, + "roundtrip": { + "p50": 173.8239973783493, + "p90": 181.18399381637573, + "p95": 182.3039948940277, + "p99": 187.83999979496002 + }, + "isolatedSum": { + "p50": 197.9840025305748, + "p90": 203.61600071191788, + "p95": 209.05599743127823, + "p99": 265.21599292755127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.12799763679504, + "p90": 119.45600062608719, + "p95": 121.37600034475327, + "p99": 125.2480000257492 + }, + "combine": { + "p50": 123.19999933242798, + "p90": 133.02400708198547, + "p95": 133.88800621032715, + "p99": 147.90399372577667 + }, + "roundtrip": { + "p50": 217.53600239753723, + "p90": 224.60800409317017, + "p95": 227.4239957332611, + "p99": 232.7360063791275 + }, + "isolatedSum": { + "p50": 239.32799696922302, + "p90": 252.48000770807266, + "p95": 255.2640065550804, + "p99": 273.1519937515259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-55b7f75b", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "b300_28bc3f15", + "comparisonKey": "cdd4d75f9a857b86", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:07.545944+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 86.46400272846222, + "p90": 89.40800279378891, + "p95": 91.58399701118469, + "p99": 117.50400066375732 + }, + "combine": { + "p50": 72.92799651622772, + "p90": 73.91999661922455, + "p95": 74.8480036854744, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 131.3920021057129, + "p90": 135.3919953107834, + "p95": 136.99199259281158, + "p99": 154.4959992170334 + }, + "isolatedSum": { + "p50": 159.39199924468994, + "p90": 163.32799941301346, + "p95": 166.4320006966591, + "p99": 201.6960009932518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.11999928951263, + "p90": 86.75199747085571, + "p95": 88.51200342178345, + "p99": 103.26399654150009 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 75.45600086450577, + "p95": 76.22399926185608, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 138.5599970817566, + "p90": 144.31999623775482, + "p95": 145.56799829006195, + "p99": 148.99200201034546 + }, + "isolatedSum": { + "p50": 154.4959992170334, + "p90": 162.20799833536148, + "p95": 164.73600268363953, + "p99": 192.09599494934082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.24800026416779, + "p90": 84.70399677753448, + "p95": 87.52000331878662, + "p99": 99.71199929714203 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 76.89599692821503, + "p95": 83.64800363779068, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 137.5039964914322, + "p90": 143.13599467277527, + "p95": 144.44799721240997, + "p99": 153.6960005760193 + }, + "isolatedSum": { + "p50": 155.71200102567673, + "p90": 161.5999937057495, + "p95": 171.1680069565773, + "p99": 187.1040016412735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.31200075149536, + "p90": 84.86399799585342, + "p95": 87.77599781751633, + "p99": 104.86400127410889 + }, + "combine": { + "p50": 76.28799974918365, + "p90": 85.02399921417236, + "p95": 85.4400023818016, + "p99": 87.07199990749359 + }, + "roundtrip": { + "p50": 139.615997672081, + "p90": 145.24799585342407, + "p95": 149.56800639629364, + "p99": 167.64800250530243 + }, + "isolatedSum": { + "p50": 157.60000050067902, + "p90": 169.8879972100258, + "p95": 173.21600019931793, + "p99": 191.93600118160248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.53600245714188, + "p90": 84.3840017914772, + "p95": 86.11200004816055, + "p99": 106.20799660682678 + }, + "combine": { + "p50": 84.51200276613235, + "p90": 85.63199639320374, + "p95": 85.85599809885025, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 139.55199718475342, + "p90": 146.68799936771393, + "p95": 149.1840034723282, + "p99": 163.455992937088 + }, + "isolatedSum": { + "p50": 166.04800522327423, + "p90": 170.01599818468094, + "p95": 171.9679981470108, + "p99": 215.96799790859222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.48799759149551, + "p90": 102.1760031580925, + "p95": 103.71199995279312, + "p99": 112.99200356006622 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 86.91199868917465, + "p95": 88.48000317811966, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 151.5199989080429, + "p90": 156.12800419330597, + "p95": 157.85600244998932, + "p99": 166.20799899101257 + }, + "isolatedSum": { + "p50": 185.2479949593544, + "p90": 189.08800184726715, + "p95": 192.19200313091278, + "p99": 221.82400524616241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.2319962978363, + "p90": 108.70400071144104, + "p95": 110.944002866745, + "p99": 123.87199699878693 + }, + "combine": { + "p50": 98.91200065612793, + "p90": 102.27199643850327, + "p95": 108.99200290441513, + "p99": 112.44799941778183 + }, + "roundtrip": { + "p50": 179.1680008172989, + "p90": 184.4799965620041, + "p95": 186.62400543689728, + "p99": 195.77600061893463 + }, + "isolatedSum": { + "p50": 202.14399695396423, + "p90": 210.9759971499443, + "p95": 219.93600577116013, + "p99": 236.31999641656876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.60800260305405, + "p90": 127.6479959487915, + "p95": 130.46400249004364, + "p99": 140.1280015707016 + }, + "combine": { + "p50": 122.04799801111221, + "p90": 122.78400361537933, + "p95": 123.61600250005722, + "p99": 136.99199259281158 + }, + "roundtrip": { + "p50": 212.41599321365356, + "p90": 216.60800278186798, + "p95": 218.46400201320648, + "p99": 234.30399596691132 + }, + "isolatedSum": { + "p50": 246.65600061416626, + "p90": 250.43199956417084, + "p95": 254.08000499010086, + "p99": 277.1199941635132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d755cd64", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "b300_58029714", + "comparisonKey": "a6cafaa270a49270", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:45.555146+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 86.14400029182434, + "p90": 88.51200342178345, + "p95": 89.53599631786346, + "p99": 99.13600236177444 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 74.07999783754349, + "p95": 74.97599720954895, + "p99": 79.48800176382065 + }, + "roundtrip": { + "p50": 127.29600071907043, + "p90": 135.13599336147308, + "p95": 137.2479945421219, + "p99": 152.6080071926117 + }, + "isolatedSum": { + "p50": 159.2639982700348, + "p90": 162.59200125932693, + "p95": 164.51199352741241, + "p99": 178.6240041255951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.51200211048126, + "p90": 86.56000345945358, + "p95": 87.71199733018875, + "p99": 95.39200365543365 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 75.16799867153168, + "p95": 76.22399926185608, + "p99": 87.74399757385254 + }, + "roundtrip": { + "p50": 135.45599579811096, + "p90": 142.2719955444336, + "p95": 143.5839980840683, + "p99": 163.93600404262543 + }, + "isolatedSum": { + "p50": 153.85600179433823, + "p90": 161.72800213098526, + "p95": 163.93599659204483, + "p99": 183.1360012292862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.44800162315369, + "p90": 83.71199667453766, + "p95": 86.20800077915192, + "p99": 120.92799693346024 + }, + "combine": { + "p50": 74.33599978685379, + "p90": 76.48000121116638, + "p95": 83.74399691820145, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 139.74399864673615, + "p90": 144.73600685596466, + "p95": 146.11199498176575, + "p99": 153.82400155067444 + }, + "isolatedSum": { + "p50": 154.78400141000748, + "p90": 160.19199788570404, + "p95": 169.95199769735336, + "p99": 218.46399456262589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.31200075149536, + "p90": 90.14400094747543, + "p95": 92.83199906349182, + "p99": 103.55199873447418 + }, + "combine": { + "p50": 74.97599720954895, + "p90": 84.25600081682205, + "p95": 84.79999750852585, + "p99": 101.59999877214432 + }, + "roundtrip": { + "p50": 137.28000223636627, + "p90": 142.5279974937439, + "p95": 144.03200149536133, + "p99": 153.4080058336258 + }, + "isolatedSum": { + "p50": 156.2879979610443, + "p90": 174.40000176429749, + "p95": 177.63199657201767, + "p99": 205.1519975066185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.60000294446945, + "p90": 84.32000130414963, + "p95": 86.20800077915192, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 75.96799731254578, + "p90": 85.15200018882751, + "p95": 85.4720026254654, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 137.92000710964203, + "p90": 140.79999923706055, + "p95": 143.8400000333786, + "p99": 153.6320000886917 + }, + "isolatedSum": { + "p50": 157.56800025701523, + "p90": 169.47200149297714, + "p95": 171.6800034046173, + "p99": 196.86400145292282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.7199991941452, + "p90": 101.85600072145462, + "p95": 102.91200131177902, + "p99": 106.78400099277496 + }, + "combine": { + "p50": 85.4720026254654, + "p90": 86.43200248479843, + "p95": 87.42400258779526, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 149.56800639629364, + "p90": 152.25599706172943, + "p95": 153.9520025253296, + "p99": 165.82399606704712 + }, + "isolatedSum": { + "p50": 184.1920018196106, + "p90": 188.28800320625305, + "p95": 190.33600389957428, + "p99": 202.7520015835762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.53599894046783, + "p90": 111.07199639081955, + "p95": 113.34399878978729, + "p99": 157.6319932937622 + }, + "combine": { + "p50": 98.81599992513657, + "p90": 101.88800096511841, + "p95": 109.27999764680862, + "p99": 137.69599795341492 + }, + "roundtrip": { + "p50": 179.29600179195404, + "p90": 185.63200533390045, + "p95": 187.71199882030487, + "p99": 198.11199605464935 + }, + "isolatedSum": { + "p50": 204.3519988656044, + "p90": 212.95999735593796, + "p95": 222.62399643659592, + "p99": 295.3279912471771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.61600315570831, + "p90": 149.9519944190979, + "p95": 168.5439944267273, + "p99": 218.84800493717194 + }, + "combine": { + "p50": 134.43200290203094, + "p90": 147.87200093269348, + "p95": 159.8079949617386, + "p99": 183.45600366592407 + }, + "roundtrip": { + "p50": 233.66400599479675, + "p90": 241.11999571323395, + "p95": 242.71999299526215, + "p99": 257.02399015426636 + }, + "isolatedSum": { + "p50": 262.04800605773926, + "p90": 297.8239953517914, + "p95": 328.3519893884659, + "p99": 402.304008603096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-faa8ffdc", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "b300_187551ba", + "comparisonKey": "84ec5d39bd4d43cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:14.065291+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 85.24800091981888, + "p90": 88.35200220346451, + "p95": 89.56799656152725, + "p99": 101.88800096511841 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 75.23199915885925, + "p95": 75.80800354480743, + "p99": 84.28800106048584 + }, + "roundtrip": { + "p50": 135.42400300502777, + "p90": 141.76000654697418, + "p95": 143.68000626564026, + "p99": 146.88000082969666 + }, + "isolatedSum": { + "p50": 158.56000036001205, + "p90": 163.58400136232376, + "p95": 165.3760001063347, + "p99": 186.17600202560425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.216000020504, + "p90": 87.90399879217148, + "p95": 90.40000289678574, + "p99": 97.95200079679489 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 76.35200023651123, + "p95": 83.42400193214417, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 141.4719969034195, + "p90": 144.57599818706512, + "p95": 145.7280069589615, + "p99": 154.27200496196747 + }, + "isolatedSum": { + "p50": 155.45599907636642, + "p90": 164.2559990286827, + "p95": 173.8240048289299, + "p99": 194.59199905395508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.15199953317642, + "p90": 83.55200290679932, + "p95": 85.50400286912918, + "p99": 104.12800312042236 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 83.71199667453766, + "p95": 84.57600325345993, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 137.7280056476593, + "p90": 143.71199905872345, + "p95": 144.67200636863708, + "p99": 150.94399452209473 + }, + "isolatedSum": { + "p50": 155.71200102567673, + "p90": 167.26399958133698, + "p95": 170.0800061225891, + "p99": 200.96000283956528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.44000172615051, + "p90": 83.52000266313553, + "p95": 85.1840004324913, + "p99": 89.79199826717377 + }, + "combine": { + "p50": 77.18399912118912, + "p90": 85.11999994516373, + "p95": 85.53600311279297, + "p99": 87.96799927949905 + }, + "roundtrip": { + "p50": 138.68799805641174, + "p90": 141.6960060596466, + "p95": 143.10400187969208, + "p99": 150.9760022163391 + }, + "isolatedSum": { + "p50": 158.62400084733963, + "p90": 168.64000260829926, + "p95": 170.72000354528427, + "p99": 177.75999754667282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.2720006108284, + "p90": 92.6079973578453, + "p95": 97.69599884748459, + "p99": 166.04800522327423 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 88.3840024471283, + "p95": 96.73599898815155, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 139.0399932861328, + "p90": 148.3200043439865, + "p95": 152.41600573062897, + "p99": 164.12800550460815 + }, + "isolatedSum": { + "p50": 166.4000004529953, + "p90": 180.9919998049736, + "p95": 194.43199783563614, + "p99": 265.53600281476974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.61599856615067, + "p90": 101.50399804115295, + "p95": 102.49599814414978, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 86.46400272846222, + "p95": 87.52000331878662, + "p99": 110.46399921178818 + }, + "roundtrip": { + "p50": 149.72800016403198, + "p90": 151.87199413776398, + "p95": 153.18399667739868, + "p99": 155.7759940624237 + }, + "isolatedSum": { + "p50": 185.12000143527985, + "p90": 187.96800076961517, + "p95": 190.0160014629364, + "p99": 233.47199708223343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 109.0880036354065, + "p90": 113.21599781513214, + "p95": 113.88800293207169, + "p99": 115.99999666213989 + }, + "combine": { + "p50": 99.2640033364296, + "p90": 108.19199681282043, + "p95": 109.0880036354065, + "p99": 111.58400028944016 + }, + "roundtrip": { + "p50": 178.27199399471283, + "p90": 182.43199586868286, + "p95": 184.76800620555878, + "p99": 204.0639966726303 + }, + "isolatedSum": { + "p50": 208.3520069718361, + "p90": 221.40799462795258, + "p95": 222.97600656747818, + "p99": 227.58399695158005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.12799894809723, + "p90": 126.27199292182922, + "p95": 127.13600695133209, + "p99": 129.7599971294403 + }, + "combine": { + "p50": 122.17599898576736, + "p90": 122.75200337171555, + "p95": 123.61600250005722, + "p99": 139.0720009803772 + }, + "roundtrip": { + "p50": 211.16800606250763, + "p90": 215.7759964466095, + "p95": 218.33600103855133, + "p99": 242.71999299526215 + }, + "isolatedSum": { + "p50": 246.3039979338646, + "p90": 249.02399629354477, + "p95": 250.7520094513893, + "p99": 268.8319981098175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da7570d0", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "b300_225a9503", + "comparisonKey": "8cd0b2857a77a271", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:56.311644+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.82399719953537, + "p90": 92.38400310277939, + "p95": 97.50399738550186, + "p99": 118.8800036907196 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 73.72800260782242, + "p95": 74.49600100517273, + "p99": 78.84799689054489 + }, + "roundtrip": { + "p50": 130.49599528312683, + "p90": 134.5919966697693, + "p95": 136.7039978504181, + "p99": 142.81600713729858 + }, + "isolatedSum": { + "p50": 154.59199994802475, + "p90": 166.1120057106018, + "p95": 171.9999983906746, + "p99": 197.7280005812645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 83.0719992518425, + "p90": 91.39200299978256, + "p95": 98.68799895048141, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 74.5920017361641, + "p95": 75.29599964618683, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 132.9600065946579, + "p90": 136.35200262069702, + "p95": 137.7280056476593, + "p99": 142.752006649971 + }, + "isolatedSum": { + "p50": 156.3199982047081, + "p90": 165.98400473594666, + "p95": 173.98399859666824, + "p99": 193.7279999256134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.73599636554718, + "p90": 101.56799852848053, + "p95": 105.59999942779541, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 75.74400305747986, + "p95": 76.80000364780426, + "p99": 89.12000060081482 + }, + "roundtrip": { + "p50": 135.71199774742126, + "p90": 142.97600090503693, + "p95": 144.51199769973755, + "p99": 168.47999393939972 + }, + "isolatedSum": { + "p50": 154.4319987297058, + "p90": 177.3120015859604, + "p95": 182.40000307559967, + "p99": 200.25599747896194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.76799660921097, + "p90": 91.26400202512741, + "p95": 104.2879968881607, + "p99": 153.9520025253296 + }, + "combine": { + "p50": 74.11199808120728, + "p90": 78.49600166082382, + "p95": 84.51200276613235, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 139.615997672081, + "p90": 145.05599439144135, + "p95": 145.91999351978302, + "p99": 159.90400314331055 + }, + "isolatedSum": { + "p50": 154.87999469041824, + "p90": 169.76000368595123, + "p95": 188.79999965429306, + "p99": 251.20000541210175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.64000308513641, + "p90": 82.78399705886841, + "p95": 84.09599959850311, + "p99": 91.67999774217606 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 83.67999643087387, + "p95": 84.51200276613235, + "p99": 87.67999708652496 + }, + "roundtrip": { + "p50": 136.57599687576294, + "p90": 139.20000195503235, + "p95": 141.31200313568115, + "p99": 149.50400590896606 + }, + "isolatedSum": { + "p50": 155.2640050649643, + "p90": 166.46399348974228, + "p95": 168.60800236463547, + "p99": 179.35999482870102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.04800087213516, + "p90": 102.88000106811523, + "p95": 106.11200332641602, + "p99": 117.40799993276596 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 86.20800077915192, + "p95": 86.65599673986435, + "p99": 98.33600372076035 + }, + "roundtrip": { + "p50": 149.21599626541138, + "p90": 154.81600165367126, + "p95": 155.58399260044098, + "p99": 171.77599668502808 + }, + "isolatedSum": { + "p50": 178.9119988679886, + "p90": 189.08800184726715, + "p95": 192.76800006628036, + "p99": 215.7440036535263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.02400118112564, + "p90": 103.7760004401207, + "p95": 105.8880016207695, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 98.08000177145004, + "p90": 99.90400075912476, + "p95": 100.99200159311295, + "p99": 115.58400094509125 + }, + "roundtrip": { + "p50": 176.7359972000122, + "p90": 181.0240000486374, + "p95": 183.20000171661377, + "p99": 197.08800315856934 + }, + "isolatedSum": { + "p50": 195.10400295257568, + "p90": 203.68000119924545, + "p95": 206.88000321388245, + "p99": 238.5919988155365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.95199662446976, + "p90": 120.54400146007538, + "p95": 122.40000069141388, + "p99": 128.48000228405 + }, + "combine": { + "p50": 132.9279989004135, + "p90": 134.5919966697693, + "p95": 135.00800728797913, + "p99": 149.4079977273941 + }, + "roundtrip": { + "p50": 227.03999280929565, + "p90": 231.90400004386902, + "p95": 233.43999683856964, + "p99": 236.38400435447693 + }, + "isolatedSum": { + "p50": 250.87999552488327, + "p90": 255.13599812984467, + "p95": 257.408007979393, + "p99": 277.8880000114441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8fdc94b8", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_0818b53b", + "comparisonKey": "d9368de7e3895092", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:24.719830+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 87.16800063848495, + "p90": 88.99199962615967, + "p95": 90.46400338411331, + "p99": 103.2319962978363 + }, + "combine": { + "p50": 72.86400347948074, + "p90": 73.53600114583969, + "p95": 74.11199808120728, + "p99": 77.53600180149078 + }, + "roundtrip": { + "p50": 126.52799487113953, + "p90": 131.32800161838531, + "p95": 134.71999764442444, + "p99": 146.84799313545227 + }, + "isolatedSum": { + "p50": 160.0320041179657, + "p90": 162.52800077199936, + "p95": 164.5760014653206, + "p99": 180.7679980993271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 81.02399855852127, + "p90": 86.65599673986435, + "p95": 87.39200234413147, + "p99": 89.56799656152725 + }, + "combine": { + "p50": 73.63200187683105, + "p90": 75.26399940252304, + "p95": 76.38400048017502, + "p99": 84.79999750852585 + }, + "roundtrip": { + "p50": 140.60799777507782, + "p90": 143.99999380111694, + "p95": 145.1520025730133, + "p99": 157.50400722026825 + }, + "isolatedSum": { + "p50": 154.65600043535233, + "p90": 161.9199961423874, + "p95": 163.7760028243065, + "p99": 174.3679940700531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.44800162315369, + "p90": 83.23200047016144, + "p95": 84.95999872684479, + "p99": 91.32800251245499 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 76.4160007238388, + "p95": 83.48800241947174, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 139.64800536632538, + "p90": 144.86399292945862, + "p95": 146.27200365066528, + "p99": 160.44799983501434 + }, + "isolatedSum": { + "p50": 154.6880006790161, + "p90": 159.64800119400024, + "p95": 168.44800114631653, + "p99": 188.03200125694275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.70400357246399, + "p90": 82.84799754619598, + "p95": 84.44800227880478, + "p99": 92.86399930715561 + }, + "combine": { + "p50": 74.75200295448303, + "p90": 83.67999643087387, + "p95": 84.89599823951721, + "p99": 97.9200005531311 + }, + "roundtrip": { + "p50": 138.0160003900528, + "p90": 141.4400041103363, + "p95": 143.8719928264618, + "p99": 156.38400614261627 + }, + "isolatedSum": { + "p50": 155.45600652694702, + "p90": 166.52799397706985, + "p95": 169.344000518322, + "p99": 190.7839998602867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.43200182914734, + "p90": 102.27199643850327, + "p95": 107.45599865913391, + "p99": 114.97599631547928 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 85.24800091981888, + "p95": 85.69599688053131, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 138.49599659442902, + "p90": 141.24800264835358, + "p95": 142.62400567531586, + "p99": 152.73599326610565 + }, + "isolatedSum": { + "p50": 159.35999900102615, + "p90": 187.51999735832214, + "p95": 193.15199553966522, + "p99": 211.67999505996704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.67199915647507, + "p90": 102.52799838781357, + "p95": 103.39199751615524, + "p99": 127.07200646400452 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 86.91199868917465, + "p95": 87.74399757385254, + "p99": 108.92800241708755 + }, + "roundtrip": { + "p50": 150.07999539375305, + "p90": 152.96000242233276, + "p95": 154.62400019168854, + "p99": 175.29599368572235 + }, + "isolatedSum": { + "p50": 186.17600202560425, + "p90": 189.43999707698822, + "p95": 191.13599509000778, + "p99": 236.00000888109207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.47999769449234, + "p90": 113.50400000810623, + "p95": 119.10399794578552, + "p99": 136.51199638843536 + }, + "combine": { + "p50": 99.13600236177444, + "p90": 101.79200023412704, + "p95": 109.15199667215347, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 177.98399925231934, + "p90": 183.74399840831757, + "p95": 187.16800212860107, + "p99": 205.6960016489029 + }, + "isolatedSum": { + "p50": 199.61600005626678, + "p90": 215.29600024223328, + "p95": 228.255994617939, + "p99": 248.86399507522583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.89599734544754, + "p90": 127.48800218105316, + "p95": 129.88799810409546, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 123.61600250005722, + "p95": 124.60800260305405, + "p99": 134.65599715709686 + }, + "roundtrip": { + "p50": 213.85599672794342, + "p90": 221.66399657726288, + "p95": 222.6559966802597, + "p99": 228.2560020685196 + }, + "isolatedSum": { + "p50": 247.26399779319763, + "p90": 251.10400468111038, + "p95": 254.4960007071495, + "p99": 275.29600262641907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba9aa938", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "b300_e8881afb", + "comparisonKey": "68b087e61477830b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:37.087890+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 86.91199868917465, + "p90": 89.28000181913376, + "p95": 90.97599983215332, + "p99": 103.67999970912933 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 73.72800260782242, + "p95": 74.27199929952621, + "p99": 84.51200276613235 + }, + "roundtrip": { + "p50": 127.13600695133209, + "p90": 133.12000036239624, + "p95": 135.48800349235535, + "p99": 145.6640064716339 + }, + "isolatedSum": { + "p50": 159.90399569272995, + "p90": 163.00800442695618, + "p95": 165.24799913167953, + "p99": 188.1920024752617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 83.3280012011528, + "p90": 92.67199784517288, + "p95": 95.58399766683578, + "p99": 103.61599922180176 + }, + "combine": { + "p50": 73.66400212049484, + "p90": 75.80800354480743, + "p95": 76.9599974155426, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 139.615997672081, + "p90": 144.0960019826889, + "p95": 145.63199877738953, + "p99": 153.79199385643005 + }, + "isolatedSum": { + "p50": 156.99200332164764, + "p90": 168.48000138998032, + "p95": 172.5439950823784, + "p99": 188.09600174427032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.37600123882294, + "p90": 83.61600339412689, + "p95": 86.14400029182434, + "p99": 97.21600264310837 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 83.80799740552902, + "p95": 84.3840017914772, + "p99": 89.66399729251862 + }, + "roundtrip": { + "p50": 139.67999815940857, + "p90": 145.53600549697876, + "p95": 146.68799936771393, + "p99": 155.87200224399567 + }, + "isolatedSum": { + "p50": 156.0320034623146, + "p90": 167.42400079965591, + "p95": 170.52800208330154, + "p99": 186.87999993562698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.95199817419052, + "p90": 96.12800180912018, + "p95": 102.30399668216705, + "p99": 112.73600161075592 + }, + "combine": { + "p50": 75.6480023264885, + "p90": 84.35200154781342, + "p95": 84.79999750852585, + "p99": 108.38399827480316 + }, + "roundtrip": { + "p50": 137.66400516033173, + "p90": 140.70400595664978, + "p95": 142.11200177669525, + "p99": 155.10399639606476 + }, + "isolatedSum": { + "p50": 157.60000050067902, + "p90": 180.4800033569336, + "p95": 187.1039941906929, + "p99": 221.11999988555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.11199939250946, + "p90": 94.4959968328476, + "p95": 100.28800368309021, + "p99": 109.40799862146378 + }, + "combine": { + "p50": 84.16000008583069, + "p90": 85.53600311279297, + "p95": 85.75999736785889, + "p99": 95.45599669218063 + }, + "roundtrip": { + "p50": 139.16799426078796, + "p90": 142.2719955444336, + "p95": 146.7200070619583, + "p99": 165.12000560760498 + }, + "isolatedSum": { + "p50": 166.27199947834015, + "p90": 180.03199994564056, + "p95": 186.0480010509491, + "p99": 204.8639953136444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.09600222110748, + "p90": 102.20800340175629, + "p95": 103.55199873447418, + "p99": 111.93600296974182 + }, + "combine": { + "p50": 85.56800335645676, + "p90": 101.08800232410431, + "p95": 109.56799983978271, + "p99": 122.23999947309494 + }, + "roundtrip": { + "p50": 150.11200308799744, + "p90": 154.94400262832642, + "p95": 159.39199924468994, + "p99": 169.3120002746582 + }, + "isolatedSum": { + "p50": 185.66400557756424, + "p90": 203.2960057258606, + "p95": 213.1199985742569, + "p99": 234.17600244283676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.03200173377991, + "p90": 122.43200093507767, + "p95": 132.06399977207184, + "p99": 144.73600685596466 + }, + "combine": { + "p50": 100.00000149011612, + "p90": 108.86400192975998, + "p95": 109.37599837779999, + "p99": 112.22399771213531 + }, + "roundtrip": { + "p50": 177.824005484581, + "p90": 182.0479929447174, + "p95": 183.67999792099, + "p99": 201.08799636363983 + }, + "isolatedSum": { + "p50": 200.03200322389603, + "p90": 231.29600286483765, + "p95": 241.43999814987183, + "p99": 256.9600045681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.34399330615997, + "p90": 129.72800433635712, + "p95": 131.80799782276154, + "p99": 137.05599308013916 + }, + "combine": { + "p50": 122.3360002040863, + "p90": 123.36000055074692, + "p95": 124.35200065374374, + "p99": 132.9919993877411 + }, + "roundtrip": { + "p50": 214.1759991645813, + "p90": 221.5680032968521, + "p95": 222.97599911689758, + "p99": 263.2000148296356 + }, + "isolatedSum": { + "p50": 247.67999351024628, + "p90": 253.08800488710403, + "p95": 256.1599984765053, + "p99": 270.04799246788025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba081d72", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_e76916bc", + "comparisonKey": "30a0a09b731140d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:51.910625+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.60800087451935, + "p90": 79.71200346946716, + "p95": 87.61599659919739, + "p99": 96.3520035147667 + }, + "combine": { + "p50": 73.66400212049484, + "p90": 75.42400062084198, + "p95": 76.4160007238388, + "p99": 84.25600081682205 + }, + "roundtrip": { + "p50": 126.81600451469421, + "p90": 131.8719983100891, + "p95": 133.05599987506866, + "p99": 148.00000190734863 + }, + "isolatedSum": { + "p50": 142.2720029950142, + "p90": 155.13600409030914, + "p95": 164.0319973230362, + "p99": 180.60800433158875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.18399846553802, + "p90": 88.639996945858, + "p95": 91.10400080680847, + "p99": 98.59199821949005 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 76.60800218582153, + "p95": 83.67999643087387, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 128.12800705432892, + "p90": 132.06399977207184, + "p95": 132.9919993877411, + "p99": 146.2080031633377 + }, + "isolatedSum": { + "p50": 147.5839987397194, + "p90": 165.24799913167953, + "p95": 174.78399723768234, + "p99": 197.02399522066116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.09599697589874, + "p90": 70.23999840021133, + "p95": 72.83200323581696, + "p99": 91.839998960495 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 76.57600194215775, + "p95": 83.93599838018417, + "p99": 88.57599645853043 + }, + "roundtrip": { + "p50": 124.32000041007996, + "p90": 129.34400141239166, + "p95": 130.40000200271606, + "p99": 134.43200290203094 + }, + "isolatedSum": { + "p50": 142.46399700641632, + "p90": 146.81600034236908, + "p95": 156.76800161600113, + "p99": 180.41599541902542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 68.60800087451935, + "p90": 87.26400136947632, + "p95": 89.82399851083755, + "p99": 94.94400024414062 + }, + "combine": { + "p50": 75.48800110816956, + "p90": 84.79999750852585, + "p95": 85.08799970149994, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 124.7360035777092, + "p90": 127.3919939994812, + "p95": 131.52000308036804, + "p99": 147.64800667762756 + }, + "isolatedSum": { + "p50": 144.0960019826889, + "p90": 172.06399887800217, + "p95": 174.9119982123375, + "p99": 190.59199839830399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.023996591568, + "p90": 71.07199728488922, + "p95": 72.48000055551529, + "p99": 91.64799749851227 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 84.86399799585342, + "p95": 85.34400165081024, + "p99": 101.27999633550644 + }, + "roundtrip": { + "p50": 125.98399817943573, + "p90": 130.97600638866425, + "p95": 134.5279961824417, + "p99": 159.61599349975586 + }, + "isolatedSum": { + "p50": 144.6399986743927, + "p90": 155.93599528074265, + "p95": 157.82400220632553, + "p99": 192.9279938340187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.36000210046768, + "p90": 91.839998960495, + "p95": 97.18400239944458, + "p99": 108.22399705648422 + }, + "combine": { + "p50": 85.4400023818016, + "p90": 86.59200370311737, + "p95": 87.61599659919739, + "p99": 97.72799909114838 + }, + "roundtrip": { + "p50": 138.11199367046356, + "p90": 140.99200069904327, + "p95": 144.57599818706512, + "p99": 162.01600432395935 + }, + "isolatedSum": { + "p50": 172.8000044822693, + "p90": 178.43200266361237, + "p95": 184.79999899864197, + "p99": 205.9519961476326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.29600358009338, + "p90": 102.24000364542007, + "p95": 103.13600301742554, + "p99": 115.74400216341019 + }, + "combine": { + "p50": 98.78399968147278, + "p90": 101.50399804115295, + "p95": 108.8000014424324, + "p99": 123.99999797344208 + }, + "roundtrip": { + "p50": 163.93600404262543, + "p90": 171.80800437927246, + "p95": 173.7920045852661, + "p99": 197.11999595165253 + }, + "isolatedSum": { + "p50": 198.08000326156616, + "p90": 203.74400168657303, + "p95": 211.93600445985794, + "p99": 239.74400013685226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.87200248241425, + "p90": 114.72000181674957, + "p95": 115.77600240707397, + "p99": 125.59999525547028 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 145.53600549697876, + "p95": 151.39199793338776, + "p99": 183.9040070772171 + }, + "roundtrip": { + "p50": 199.20000433921814, + "p90": 203.5840004682541, + "p95": 208.44799280166626, + "p99": 224.70399737358093 + }, + "isolatedSum": { + "p50": 234.3040034174919, + "p90": 260.25600731372833, + "p95": 267.16800034046173, + "p99": 309.5040023326874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-52cd7a30", + "identity": "b300|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "b300_6fb604e7", + "comparisonKey": "39f8392fb823e1c0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:37.724067+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 134.88000631332397, + "p90": 139.90400731563568, + "p95": 144.3839967250824, + "p99": 181.7599982023239 + }, + "combine": { + "p50": 57.0559985935688, + "p90": 59.61599946022034, + "p95": 60.575999319553375, + "p99": 75.55200159549713 + }, + "roundtrip": { + "p50": 184.12800133228302, + "p90": 188.9919936656952, + "p95": 191.3280040025711, + "p99": 206.65599405765533 + }, + "isolatedSum": { + "p50": 191.93600490689278, + "p90": 199.52000677585602, + "p95": 204.95999604463577, + "p99": 257.31199979782104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 135.6479972600937, + "p90": 146.55999839305878, + "p95": 154.88000214099884, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 58.62399935722351, + "p90": 60.67200005054474, + "p95": 61.76000088453293, + "p99": 66.39999896287918 + }, + "roundtrip": { + "p50": 183.9359998703003, + "p90": 189.05599415302277, + "p95": 191.3280040025711, + "p99": 205.50400018692017 + }, + "isolatedSum": { + "p50": 194.2719966173172, + "p90": 207.23199844360352, + "p95": 216.64000302553177, + "p99": 234.8480001091957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 135.26399433612823, + "p90": 139.71200585365295, + "p95": 145.47200500965118, + "p99": 157.27999806404114 + }, + "combine": { + "p50": 60.32000109553337, + "p90": 62.81600147485733, + "p95": 64.06400352716446, + "p99": 84.79999750852585 + }, + "roundtrip": { + "p50": 185.66399812698364, + "p90": 189.40800428390503, + "p95": 192.80000030994415, + "p99": 212.96000480651855 + }, + "isolatedSum": { + "p50": 195.5839954316616, + "p90": 202.52800732851028, + "p95": 209.53600853681564, + "p99": 242.079995572567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 137.2479945421219, + "p90": 142.0159935951233, + "p95": 146.14400267601013, + "p99": 170.81600427627563 + }, + "combine": { + "p50": 62.68800050020218, + "p90": 64.89600241184235, + "p95": 66.04799628257751, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 188.4160041809082, + "p90": 193.02399456501007, + "p95": 196.54400646686554, + "p99": 227.1679937839508 + }, + "isolatedSum": { + "p50": 199.93599504232407, + "p90": 206.91199600696564, + "p95": 212.19199895858765, + "p99": 255.2960067987442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 139.0720009803772, + "p90": 148.99200201034546, + "p95": 157.18400478363037, + "p99": 171.00800573825836 + }, + "combine": { + "p50": 62.880001962184906, + "p90": 64.99200314283371, + "p95": 66.6240006685257, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 190.49599766731262, + "p90": 196.54400646686554, + "p95": 199.072003364563, + "p99": 216.63999557495117 + }, + "isolatedSum": { + "p50": 201.9520029425621, + "p90": 213.98400515317917, + "p95": 223.80800545215607, + "p99": 267.584003508091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 142.17600226402283, + "p90": 146.08000218868256, + "p95": 148.73600006103516, + "p99": 159.10400450229645 + }, + "combine": { + "p50": 65.37599861621857, + "p90": 67.90400296449661, + "p95": 68.86400282382965, + "p99": 94.65599805116653 + }, + "roundtrip": { + "p50": 193.27999651432037, + "p90": 198.55999946594238, + "p95": 202.84800231456757, + "p99": 220.70400416851044 + }, + "isolatedSum": { + "p50": 207.5520008802414, + "p90": 213.98400515317917, + "p95": 217.6000028848648, + "p99": 253.76000255346298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 151.7760008573532, + "p90": 161.40800714492798, + "p95": 169.0240055322647, + "p99": 181.5039962530136 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 81.66400343179703, + "p95": 82.8159973025322, + "p99": 93.37600320577621 + }, + "roundtrip": { + "p50": 216.99200570583344, + "p90": 221.76000475883484, + "p95": 224.83199834823608, + "p99": 274.3679881095886 + }, + "isolatedSum": { + "p50": 231.10400140285492, + "p90": 243.072010576725, + "p95": 251.8400028347969, + "p99": 274.8799994587898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 169.47199404239655, + "p90": 175.135999917984, + "p95": 179.9039989709854, + "p99": 192.9599940776825 + }, + "combine": { + "p50": 94.24000233411789, + "p90": 97.120001912117, + "p95": 98.91200065612793, + "p99": 122.91199713945389 + }, + "roundtrip": { + "p50": 251.42401456832886, + "p90": 256.19199872016907, + "p95": 261.9520127773285, + "p99": 296.7360019683838 + }, + "isolatedSum": { + "p50": 263.71199637651443, + "p90": 272.256001830101, + "p95": 278.81599962711334, + "p99": 315.8719912171364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fff1d937", + "identity": "b300|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "b300_6fb604e7", + "comparisonKey": "f420c37ce14dd37e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:46.539442+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 136.4160031080246, + "p90": 150.33599734306335, + "p95": 156.22399747371674, + "p99": 206.27200603485107 + }, + "combine": { + "p50": 60.06399914622307, + "p90": 62.07999959588051, + "p95": 62.880001962184906, + "p99": 73.85600358247757 + }, + "roundtrip": { + "p50": 187.16800212860107, + "p90": 191.93600118160248, + "p95": 194.94399428367615, + "p99": 210.78400313854218 + }, + "isolatedSum": { + "p50": 196.48000225424767, + "p90": 212.41599693894386, + "p95": 219.10399943590164, + "p99": 280.12800961732864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 137.37599551677704, + "p90": 142.71999895572662, + "p95": 146.464005112648, + "p99": 167.39200055599213 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 63.61600011587143, + "p95": 64.35199826955795, + "p99": 78.43200117349625 + }, + "roundtrip": { + "p50": 189.5039975643158, + "p90": 195.5839991569519, + "p95": 200.73600113391876, + "p99": 216.41600131988525 + }, + "isolatedSum": { + "p50": 199.26399737596512, + "p90": 206.33599907159805, + "p95": 210.81600338220596, + "p99": 245.82400172948837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 138.97599279880524, + "p90": 145.31199634075165, + "p95": 147.48799800872803, + "p99": 285.98400950431824 + }, + "combine": { + "p50": 64.44799900054932, + "p90": 66.46399945020676, + "p95": 67.32799857854843, + "p99": 70.97599655389786 + }, + "roundtrip": { + "p50": 193.40799748897552, + "p90": 199.45600628852844, + "p95": 203.19999754428864, + "p99": 220.99199891090393 + }, + "isolatedSum": { + "p50": 203.42399179935455, + "p90": 211.7759957909584, + "p95": 214.81599658727646, + "p99": 356.9600060582161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 140.70400595664978, + "p90": 151.71200037002563, + "p95": 161.50400042533875, + "p99": 184.9920004606247 + }, + "combine": { + "p50": 66.56000018119812, + "p90": 68.96000355482101, + "p95": 69.50400024652481, + "p99": 75.77600330114365 + }, + "roundtrip": { + "p50": 195.19999623298645, + "p90": 201.9519954919815, + "p95": 204.51200008392334, + "p99": 221.37600183486938 + }, + "isolatedSum": { + "p50": 207.2640061378479, + "p90": 220.67200392484665, + "p95": 231.00800067186356, + "p99": 260.76800376176834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 141.34399592876434, + "p90": 151.61600708961487, + "p95": 160.70400178432465, + "p99": 323.743999004364 + }, + "combine": { + "p50": 66.17599725723267, + "p90": 68.09599697589874, + "p95": 68.86400282382965, + "p99": 77.44000107049942 + }, + "roundtrip": { + "p50": 197.05599546432495, + "p90": 202.72000133991241, + "p95": 205.56800067424774, + "p99": 220.19200026988983 + }, + "isolatedSum": { + "p50": 207.519993185997, + "p90": 219.7120040655136, + "p95": 229.5680046081543, + "p99": 401.18400007486343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 144.25599575042725, + "p90": 153.4080058336258, + "p95": 166.1120057106018, + "p99": 182.72000551223755 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 72.22399860620499, + "p95": 72.89600372314453, + "p99": 76.22399926185608 + }, + "roundtrip": { + "p50": 200.19200444221497, + "p90": 254.91198897361755, + "p95": 321.4400112628937, + "p99": 468.9919948577881 + }, + "isolatedSum": { + "p50": 214.431993663311, + "p90": 225.63200443983078, + "p95": 239.00800943374634, + "p99": 258.9440047740936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 156.73600137233734, + "p90": 160.73599457740784, + "p95": 163.13600540161133, + "p99": 181.05599284172058 + }, + "combine": { + "p50": 84.73599702119827, + "p90": 86.7839977145195, + "p95": 87.61599659919739, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 228.0000001192093, + "p90": 234.27200317382812, + "p95": 238.49600553512573, + "p99": 262.08001375198364 + }, + "isolatedSum": { + "p50": 241.4719983935356, + "p90": 247.51999229192734, + "p95": 250.75200200080872, + "p99": 276.8319919705391 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 174.78400468826294, + "p90": 182.78400599956512, + "p95": 189.7599995136261, + "p99": 275.90399980545044 + }, + "combine": { + "p50": 102.84800082445145, + "p90": 105.34399747848511, + "p95": 106.27199709415436, + "p99": 119.55200135707855 + }, + "roundtrip": { + "p50": 264.1279995441437, + "p90": 269.8560059070587, + "p95": 274.59201216697693, + "p99": 357.08799958229065 + }, + "isolatedSum": { + "p50": 277.6320055127144, + "p90": 288.12800347805023, + "p95": 296.03199660778046, + "p99": 395.456001162529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb7f38a0", + "identity": "b300|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_6fb604e7", + "comparisonKey": "33f1f32c014ea4d5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:57.521954+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 135.5839967727661, + "p90": 140.8960074186325, + "p95": 145.37599682807922, + "p99": 161.31199896335602 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 68.06399673223495, + "p95": 68.96000355482101, + "p99": 74.49600100517273 + }, + "roundtrip": { + "p50": 193.79200041294098, + "p90": 203.99999618530273, + "p95": 208.064004778862, + "p99": 220.35199403762817 + }, + "isolatedSum": { + "p50": 201.727993786335, + "p90": 208.96000415086746, + "p95": 214.33600038290024, + "p99": 235.80799996852875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 139.00800049304962, + "p90": 152.28800475597382, + "p95": 162.36799955368042, + "p99": 174.75199699401855 + }, + "combine": { + "p50": 66.3679987192154, + "p90": 68.35199892520905, + "p95": 69.11999732255936, + "p99": 78.52800190448761 + }, + "roundtrip": { + "p50": 194.91200149059296, + "p90": 200.6399929523468, + "p95": 204.28800582885742, + "p99": 218.72000396251678 + }, + "isolatedSum": { + "p50": 205.37599921226501, + "p90": 220.64000368118286, + "p95": 231.48799687623978, + "p99": 253.27999889850616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 138.047993183136, + "p90": 142.5279974937439, + "p95": 146.11199498176575, + "p99": 159.2320054769516 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 70.46400010585785, + "p95": 71.03999704122543, + "p99": 80.1599994301796 + }, + "roundtrip": { + "p50": 195.42400538921356, + "p90": 200.57600736618042, + "p95": 204.83200252056122, + "p99": 219.9999988079071 + }, + "isolatedSum": { + "p50": 206.59199357032776, + "p90": 212.99199759960175, + "p95": 217.15199202299118, + "p99": 239.3920049071312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 139.29599523544312, + "p90": 143.0719941854477, + "p95": 147.0080018043518, + "p99": 231.74400627613068 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 72.57600128650665, + "p95": 73.21599870920181, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 197.11999595165253, + "p90": 201.21599733829498, + "p95": 203.61599326133728, + "p99": 222.3680019378662 + }, + "isolatedSum": { + "p50": 209.9519968032837, + "p90": 215.64799547195435, + "p95": 220.22400051355362, + "p99": 322.2400099039078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 140.03199338912964, + "p90": 147.93600142002106, + "p95": 151.7760008573532, + "p99": 159.0079963207245 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 72.76800274848938, + "p95": 74.07999783754349, + "p99": 169.40799355506897 + }, + "roundtrip": { + "p50": 198.4959989786148, + "p90": 203.45599949359894, + "p95": 207.8399956226349, + "p99": 217.631995677948 + }, + "isolatedSum": { + "p50": 211.03999018669128, + "p90": 220.70400416851044, + "p95": 225.8559986948967, + "p99": 328.41598987579346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 145.47200500965118, + "p90": 157.56799280643463, + "p95": 167.61599481105804, + "p99": 285.91999411582947 + }, + "combine": { + "p50": 76.48000121116638, + "p90": 85.4400023818016, + "p95": 88.79999816417694, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 207.0080041885376, + "p90": 214.08000588417053, + "p95": 217.0879989862442, + "p99": 235.6799989938736 + }, + "isolatedSum": { + "p50": 221.95200622081757, + "p90": 243.00799518823624, + "p95": 256.415992975235, + "p99": 387.8079950809479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 156.8640023469925, + "p90": 162.11199760437012, + "p95": 167.04000532627106, + "p99": 187.8719925880432 + }, + "combine": { + "p50": 90.7519981265068, + "p90": 93.05600076913834, + "p95": 93.53599697351456, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 234.0800017118454, + "p90": 239.68000710010529, + "p95": 245.7599937915802, + "p99": 267.67998933792114 + }, + "isolatedSum": { + "p50": 247.6160004734993, + "p90": 255.16799837350845, + "p95": 260.5760022997856, + "p99": 283.83999317884445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 181.05599284172058, + "p90": 187.32799589633942, + "p95": 192.86400079727173, + "p99": 223.29600155353546 + }, + "combine": { + "p50": 109.43999886512756, + "p90": 111.7440015077591, + "p95": 112.44799941778183, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 277.8240144252777, + "p90": 282.3359966278076, + "p95": 284.4479978084564, + "p99": 303.8400113582611 + }, + "isolatedSum": { + "p50": 290.49599170684814, + "p90": 299.0719974040985, + "p95": 305.31200021505356, + "p99": 344.8000028729439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c1c5290d", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_7a11eaa3", + "comparisonKey": "ca48d5ad253152f1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:01.657197+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.75200361013412, + "p90": 82.56000280380249, + "p95": 85.28000116348267, + "p99": 104.60799932479858 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 72.67200201749802, + "p95": 73.27999919652939, + "p99": 81.18399977684021 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 154.4959992170334, + "p95": 156.67200088500977, + "p99": 166.84800386428833 + }, + "isolatedSum": { + "p50": 149.28000420331955, + "p90": 155.2320048213005, + "p95": 158.56000036001205, + "p99": 185.7919991016388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 80.09599894285202, + "p90": 95.04000097513199, + "p95": 103.16800326108932, + "p99": 115.00799655914307 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 72.35199958086014, + "p95": 72.9919970035553, + "p99": 82.68799632787704 + }, + "roundtrip": { + "p50": 151.71200037002563, + "p90": 154.33600544929504, + "p95": 156.19200468063354, + "p99": 168.41599345207214 + }, + "isolatedSum": { + "p50": 150.55999904870987, + "p90": 167.39200055599213, + "p95": 176.16000026464462, + "p99": 197.6959928870201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 81.60000294446945, + "p90": 90.33600240945816, + "p95": 94.17600184679031, + "p99": 126.65599584579468 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 74.40000027418137, + "p95": 75.32799988985062, + "p99": 85.69599688053131 + }, + "roundtrip": { + "p50": 153.85599434375763, + "p90": 158.1760048866272, + "p95": 164.32000696659088, + "p99": 183.77600610256195 + }, + "isolatedSum": { + "p50": 153.98400276899338, + "p90": 164.73600268363953, + "p95": 169.50400173664093, + "p99": 212.351992726326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.63200318813324, + "p90": 83.8719978928566, + "p95": 85.11999994516373, + "p99": 106.65600001811981 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 92.8959995508194, + "p95": 99.2640033364296, + "p99": 114.72000181674957 + }, + "roundtrip": { + "p50": 155.35999834537506, + "p90": 169.3120002746582, + "p95": 176.1920005083084, + "p99": 187.48800456523895 + }, + "isolatedSum": { + "p50": 156.19200468063354, + "p90": 176.767997443676, + "p95": 184.38400328159332, + "p99": 221.37600183486938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.86399799585342, + "p90": 100.3199964761734, + "p95": 104.89600151777267, + "p99": 116.35199934244156 + }, + "combine": { + "p50": 75.13599842786789, + "p90": 77.47200131416321, + "p95": 78.94399762153625, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 155.64799308776855, + "p90": 158.87999534606934, + "p95": 161.31199896335602, + "p99": 176.15999281406403 + }, + "isolatedSum": { + "p50": 159.9999964237213, + "p90": 177.7919977903366, + "p95": 183.83999913930893, + "p99": 222.27200120687485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.62399649620056, + "p90": 93.31200271844864, + "p95": 97.95200079679489, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 79.83999699354172, + "p90": 81.95199817419052, + "p95": 83.45600217580795, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 166.72000288963318, + "p90": 169.88800466060638, + "p95": 173.50399494171143, + "p99": 190.33600389957428 + }, + "isolatedSum": { + "p50": 166.46399348974228, + "p90": 175.26400089263916, + "p95": 181.40800297260284, + "p99": 205.1519975066185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.58399766683578, + "p90": 103.93600165843964, + "p95": 110.07999628782272, + "p99": 118.43200027942657 + }, + "combine": { + "p50": 95.32800316810608, + "p90": 97.82399982213974, + "p95": 99.80800002813339, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 195.77600061893463, + "p90": 198.65599274635315, + "p95": 200.3840059041977, + "p99": 218.6560034751892 + }, + "isolatedSum": { + "p50": 190.91200083494186, + "p90": 201.76000148057938, + "p95": 209.88799631595612, + "p99": 240.00000208616257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.98400235176086, + "p90": 108.86400192975998, + "p95": 109.95200276374817, + "p99": 118.68800222873688 + }, + "combine": { + "p50": 115.80800265073776, + "p90": 118.27199906110764, + "p95": 119.71200257539749, + "p99": 140.19200205802917 + }, + "roundtrip": { + "p50": 242.94400215148926, + "p90": 246.5279996395111, + "p95": 250.17601251602173, + "p99": 276.7679989337921 + }, + "isolatedSum": { + "p50": 221.79200500249863, + "p90": 227.13600099086761, + "p95": 229.66400533914566, + "p99": 258.88000428676605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8966740d", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_6fb604e7", + "comparisonKey": "42b1599e0e77d529", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:14.882200+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 140.32000303268433, + "p90": 145.21600306034088, + "p95": 148.25600385665894, + "p99": 163.16799819469452 + }, + "combine": { + "p50": 70.30399888753891, + "p90": 72.57600128650665, + "p95": 73.2479989528656, + "p99": 76.80000364780426 + }, + "roundtrip": { + "p50": 198.7839937210083, + "p90": 208.28799903392792, + "p95": 211.19999885559082, + "p99": 266.975998878479 + }, + "isolatedSum": { + "p50": 210.62400192022324, + "p90": 217.79200434684753, + "p95": 221.50400280952454, + "p99": 239.96800184249878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 139.20000195503235, + "p90": 143.45599710941315, + "p95": 145.28000354766846, + "p99": 156.47999942302704 + }, + "combine": { + "p50": 70.14399766921997, + "p90": 71.3919997215271, + "p95": 72.48000055551529, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 200.83199441432953, + "p90": 205.9199959039688, + "p95": 207.7759951353073, + "p99": 218.52800250053406 + }, + "isolatedSum": { + "p50": 209.34399962425232, + "p90": 214.84799683094025, + "p95": 217.76000410318375, + "p99": 235.3919968008995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 141.95199310779572, + "p90": 146.7839926481247, + "p95": 150.33599734306335, + "p99": 177.85599827766418 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 74.17599856853485, + "p95": 75.07199794054031, + "p99": 81.79199695587158 + }, + "roundtrip": { + "p50": 203.80799472332, + "p90": 207.8399956226349, + "p95": 209.9200040102005, + "p99": 225.75999796390533 + }, + "isolatedSum": { + "p50": 214.33599293231964, + "p90": 220.95999121665955, + "p95": 225.40799528360367, + "p99": 259.64799523353577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 140.79999923706055, + "p90": 144.80000734329224, + "p95": 146.68799936771393, + "p99": 157.95199573040009 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 76.28799974918365, + "p95": 77.05599814653397, + "p99": 90.59199690818787 + }, + "roundtrip": { + "p50": 203.77600193023682, + "p90": 209.05600488185883, + "p95": 210.9760046005249, + "p99": 223.10400009155273 + }, + "isolatedSum": { + "p50": 214.30400013923645, + "p90": 221.0880070924759, + "p95": 223.7439975142479, + "p99": 248.54399263858795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 143.00799369812012, + "p90": 146.81600034236908, + "p95": 148.5760062932968, + "p99": 157.75999426841736 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 76.7040029168129, + "p95": 77.08799839019775, + "p99": 87.3280018568039 + }, + "roundtrip": { + "p50": 204.92799580097198, + "p90": 210.52800118923187, + "p95": 213.85599672794342, + "p99": 227.7120053768158 + }, + "isolatedSum": { + "p50": 217.79199689626694, + "p90": 223.52000325918198, + "p95": 225.66400468349457, + "p99": 245.08799612522125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 149.79200065135956, + "p90": 153.72799336910248, + "p95": 155.74400126934052, + "p99": 164.8000031709671 + }, + "combine": { + "p50": 79.6160027384758, + "p90": 81.66400343179703, + "p95": 82.68799632787704, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 216.09599888324738, + "p90": 220.99199891090393, + "p95": 224.09600019454956, + "p99": 258.7200105190277 + }, + "isolatedSum": { + "p50": 229.40800338983536, + "p90": 235.3919968008995, + "p95": 238.43199759721756, + "p99": 254.36799973249435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 165.82399606704712, + "p90": 268.5759961605072, + "p95": 274.59201216697693, + "p99": 304.03199791908264 + }, + "combine": { + "p50": 95.16800194978714, + "p90": 97.69599884748459, + "p95": 99.2640033364296, + "p99": 129.7920048236847 + }, + "roundtrip": { + "p50": 245.2480047941208, + "p90": 249.79199469089508, + "p95": 252.73600220680237, + "p99": 276.8639922142029 + }, + "isolatedSum": { + "p50": 260.99199801683426, + "p90": 366.2719950079918, + "p95": 373.8560155034065, + "p99": 433.82400274276733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 187.1040016412735, + "p90": 190.75199961662292, + "p95": 192.9599940776825, + "p99": 205.4080069065094 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 117.88800358772278, + "p95": 119.00799721479416, + "p99": 130.62399625778198 + }, + "roundtrip": { + "p50": 289.92000222206116, + "p90": 295.4240143299103, + "p95": 300.86401104927063, + "p99": 357.37600922584534 + }, + "isolatedSum": { + "p50": 302.496001124382, + "p90": 308.6400032043457, + "p95": 311.96799129247665, + "p99": 336.0320031642914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b5777ae4", + "identity": "b300|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "b300_6fb604e7", + "comparisonKey": "792d4ed03c2b0795", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:23.805742+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 139.55199718475342, + "p90": 153.56799960136414, + "p95": 166.07999801635742, + "p99": 182.23999440670013 + }, + "combine": { + "p50": 66.72000139951706, + "p90": 68.54400038719177, + "p95": 69.2799985408783, + "p99": 87.42400258779526 + }, + "roundtrip": { + "p50": 196.4160054922104, + "p90": 202.33599841594696, + "p95": 207.87200331687927, + "p99": 230.3999960422516 + }, + "isolatedSum": { + "p50": 206.27199858427048, + "p90": 222.1119999885559, + "p95": 235.35999655723572, + "p99": 269.6639969944954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 137.2479945421219, + "p90": 141.9840008020401, + "p95": 144.67200636863708, + "p99": 164.92800414562225 + }, + "combine": { + "p50": 69.2799985408783, + "p90": 71.23199850320816, + "p95": 72.31999933719635, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 196.48000597953796, + "p90": 200.99200308322906, + "p95": 202.65600085258484, + "p99": 215.80800414085388 + }, + "isolatedSum": { + "p50": 206.52799308300018, + "p90": 213.21599930524826, + "p95": 216.99200570583344, + "p99": 252.51200050115585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 139.23199474811554, + "p90": 154.1759967803955, + "p95": 164.57599401474, + "p99": 184.60799753665924 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 72.80000299215317, + "p95": 73.5040009021759, + "p99": 92.16000139713287 + }, + "roundtrip": { + "p50": 200.70399343967438, + "p90": 205.59999346733093, + "p95": 208.639994263649, + "p99": 224.99200701713562 + }, + "isolatedSum": { + "p50": 209.75999534130096, + "p90": 226.97599977254868, + "p95": 238.0799949169159, + "p99": 276.7679989337921 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 141.56800508499146, + "p90": 158.59200060367584, + "p95": 169.95200514793396, + "p99": 194.36800479888916 + }, + "combine": { + "p50": 73.63200187683105, + "p90": 75.77600330114365, + "p95": 76.76800340414047, + "p99": 83.39200168848038 + }, + "roundtrip": { + "p50": 202.72000133991241, + "p90": 210.24000644683838, + "p95": 213.18399906158447, + "p99": 220.96000611782074 + }, + "isolatedSum": { + "p50": 215.2000069618225, + "p90": 234.3680039048195, + "p95": 246.72000855207443, + "p99": 277.76000648736954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 144.25599575042725, + "p90": 162.6880019903183, + "p95": 171.87200486660004, + "p99": 233.47200453281403 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 76.67200267314911, + "p95": 78.62400263547897, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 204.48000729084015, + "p90": 210.87999641895294, + "p95": 213.85599672794342, + "p99": 234.55999791622162 + }, + "isolatedSum": { + "p50": 218.91199797391891, + "p90": 239.3600046634674, + "p95": 250.496007502079, + "p99": 338.9440029859543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 146.5280055999756, + "p90": 150.751993060112, + "p95": 153.79199385643005, + "p99": 167.9999977350235 + }, + "combine": { + "p50": 79.42400127649307, + "p90": 81.60000294446945, + "p95": 82.56000280380249, + "p99": 95.32800316810608 + }, + "roundtrip": { + "p50": 214.65599536895752, + "p90": 221.88800573349, + "p95": 224.5119959115982, + "p99": 260.80000400543213 + }, + "isolatedSum": { + "p50": 225.95200687646866, + "p90": 232.35199600458145, + "p95": 236.35199666023254, + "p99": 263.3280009031296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 161.69600188732147, + "p90": 165.6000018119812, + "p95": 169.18399930000305, + "p99": 190.49599766731262 + }, + "combine": { + "p50": 95.51999717950821, + "p90": 97.85600006580353, + "p95": 98.68799895048141, + "p99": 109.0880036354065 + }, + "roundtrip": { + "p50": 243.9039945602417, + "p90": 249.24799799919128, + "p95": 254.20799851417542, + "p99": 328.5439908504486 + }, + "isolatedSum": { + "p50": 257.2159990668297, + "p90": 263.45600187778473, + "p95": 267.87199825048447, + "p99": 299.5840013027191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 185.7919991016388, + "p90": 190.5599981546402, + "p95": 193.27999651432037, + "p99": 206.94400370121002 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 130.0799995660782, + "p95": 138.40000331401825, + "p99": 155.20000457763672 + }, + "roundtrip": { + "p50": 289.7599935531616, + "p90": 294.68798637390137, + "p95": 299.00801181793213, + "p99": 319.07200813293457 + }, + "isolatedSum": { + "p50": 302.20799893140793, + "p90": 320.6399977207184, + "p95": 331.6799998283386, + "p99": 362.14400827884674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a26d819", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "b300_29dcc6db", + "comparisonKey": "db38f65d89ab743e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:08.320961+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.12799721956253, + "p90": 80.6720033288002, + "p95": 85.15200018882751, + "p99": 95.8079993724823 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 72.35199958086014, + "p95": 73.08799773454666, + "p99": 84.48000252246857 + }, + "roundtrip": { + "p50": 139.52000439167023, + "p90": 142.62400567531586, + "p95": 143.93599331378937, + "p99": 159.5200002193451 + }, + "isolatedSum": { + "p50": 138.39999586343765, + "p90": 153.02400290966034, + "p95": 158.23999792337418, + "p99": 180.28800189495087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.55200028419495, + "p90": 69.50400024652481, + "p95": 70.43199986219406, + "p99": 78.62400263547897 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 72.38399982452393, + "p95": 72.80000299215317, + "p99": 74.23999905586243 + }, + "roundtrip": { + "p50": 139.48799669742584, + "p90": 143.0400013923645, + "p95": 145.31199634075165, + "p99": 161.72799468040466 + }, + "isolatedSum": { + "p50": 137.88799941539764, + "p90": 141.88800007104874, + "p95": 143.23200285434723, + "p99": 152.8640016913414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.3359991312027, + "p90": 80.1599994301796, + "p95": 84.51200276613235, + "p99": 93.18400174379349 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 74.20799881219864, + "p95": 74.91199672222137, + "p99": 77.82399654388428 + }, + "roundtrip": { + "p50": 141.50400459766388, + "p90": 146.43199741840363, + "p95": 151.71200037002563, + "p99": 171.1679995059967 + }, + "isolatedSum": { + "p50": 142.59199798107147, + "p90": 154.36799824237823, + "p95": 159.42399948835373, + "p99": 171.00799828767776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.7920024394989, + "p90": 75.55200159549713, + "p95": 82.46400207281113, + "p99": 96.16000205278397 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 76.4160007238388, + "p95": 77.63200253248215, + "p99": 114.1119971871376 + }, + "roundtrip": { + "p50": 142.65599846839905, + "p90": 145.4399973154068, + "p95": 146.65600657463074, + "p99": 158.27199816703796 + }, + "isolatedSum": { + "p50": 144.03200149536133, + "p90": 151.96800231933594, + "p95": 160.09600460529327, + "p99": 210.27199923992157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.26399874687195, + "p90": 73.47200065851212, + "p95": 74.94399696588516, + "p99": 94.17600184679031 + }, + "combine": { + "p50": 74.75200295448303, + "p90": 76.83199644088745, + "p95": 77.31200009584427, + "p99": 103.13600301742554 + }, + "roundtrip": { + "p50": 143.327996134758, + "p90": 146.08000218868256, + "p95": 147.45600521564484, + "p99": 156.25600516796112 + }, + "isolatedSum": { + "p50": 146.01600170135498, + "p90": 150.30399709939957, + "p95": 152.25599706172943, + "p99": 197.31200486421585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.01599735021591, + "p90": 75.87199658155441, + "p95": 77.40800082683563, + "p99": 96.16000205278397 + }, + "combine": { + "p50": 80.57600259780884, + "p90": 90.87999910116196, + "p95": 93.75999867916107, + "p99": 106.1440035700798 + }, + "roundtrip": { + "p50": 154.52800691127777, + "p90": 156.80000185966492, + "p95": 158.24000537395477, + "p99": 171.90399765968323 + }, + "isolatedSum": { + "p50": 154.59199994802475, + "p90": 166.75199568271637, + "p95": 171.1679995059967, + "p99": 202.30400562286377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.67999643087387, + "p90": 86.01599931716919, + "p95": 87.55200356245041, + "p99": 110.6560006737709 + }, + "combine": { + "p50": 94.94400024414062, + "p90": 97.34400361776352, + "p95": 98.39999675750732, + "p99": 115.39199948310852 + }, + "roundtrip": { + "p50": 183.96799266338348, + "p90": 186.75200641155243, + "p95": 187.83999979496002, + "p99": 202.30400562286377 + }, + "isolatedSum": { + "p50": 178.6239966750145, + "p90": 183.3600029349327, + "p95": 185.95200031995773, + "p99": 226.04800015687943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.98400038480759, + "p90": 96.22400254011154, + "p95": 97.28000313043594, + "p99": 105.15200346708298 + }, + "combine": { + "p50": 115.32799899578094, + "p90": 118.17599833011627, + "p95": 119.90399658679962, + "p99": 142.43200421333313 + }, + "roundtrip": { + "p50": 232.2559952735901, + "p90": 236.06400191783905, + "p95": 239.04000222682953, + "p99": 257.24801421165466 + }, + "isolatedSum": { + "p50": 209.31199938058853, + "p90": 214.4000008702278, + "p95": 217.18399971723557, + "p99": 247.5840076804161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bfa96b1e", + "identity": "b300|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_58db80e8", + "comparisonKey": "5e2adb9d8f6cec4d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:17.479256+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.67200046777725, + "p90": 113.95200341939926, + "p95": 117.85600334405899, + "p99": 130.14400005340576 + }, + "combine": { + "p50": 98.43199700117111, + "p90": 100.60799866914749, + "p95": 102.14400291442871, + "p99": 132.22399353981018 + }, + "roundtrip": { + "p50": 183.67999792099, + "p90": 190.88000059127808, + "p95": 192.86400079727173, + "p99": 208.67200195789337 + }, + "isolatedSum": { + "p50": 207.10399746894836, + "p90": 214.56000208854675, + "p95": 220.0000062584877, + "p99": 262.36799359321594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 131.32800161838531, + "p90": 137.53600418567657, + "p95": 140.6719982624054, + "p99": 153.76000106334686 + }, + "combine": { + "p50": 135.00800728797913, + "p90": 137.15200126171112, + "p95": 138.65600526332855, + "p99": 161.28000617027283 + }, + "roundtrip": { + "p50": 248.44799935817719, + "p90": 255.0080120563507, + "p95": 257.2160065174103, + "p99": 274.56000447273254 + }, + "isolatedSum": { + "p50": 266.33600890636444, + "p90": 274.6880054473877, + "p95": 279.32800352573395, + "p99": 315.0400072336197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 191.42399728298187, + "p90": 199.20000433921814, + "p95": 201.63199305534363, + "p99": 224.03199970722198 + }, + "combine": { + "p50": 198.97599518299103, + "p90": 208.064004778862, + "p95": 208.76799523830414, + "p99": 232.09600150585175 + }, + "roundtrip": { + "p50": 366.239994764328, + "p90": 373.76001477241516, + "p95": 376.76799297332764, + "p99": 391.55200123786926 + }, + "isolatedSum": { + "p50": 390.3999924659729, + "p90": 407.26400911808014, + "p95": 410.39998829364777, + "p99": 456.12800121307373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.7280101776123, + "p90": 312.4159872531891, + "p95": 326.24000310897827, + "p99": 375.90399384498596 + }, + "combine": { + "p50": 393.92000436782837, + "p90": 404.9279987812042, + "p95": 406.0159921646118, + "p99": 416.73600673675537 + }, + "roundtrip": { + "p50": 617.5680160522461, + "p90": 629.7280192375183, + "p95": 633.5359811782837, + "p99": 646.5920209884644 + }, + "isolatedSum": { + "p50": 695.6480145454407, + "p90": 717.3439860343933, + "p95": 732.2559952735901, + "p99": 792.6400005817413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 536.6079807281494, + "p90": 551.6160130500793, + "p95": 555.9359788894653, + "p99": 694.9759721755981 + }, + "combine": { + "p50": 762.0480060577393, + "p90": 773.0879783630371, + "p95": 774.1439938545227, + "p99": 810.9760284423828 + }, + "roundtrip": { + "p50": 1268.671989440918, + "p90": 1278.7200212478638, + "p95": 1283.5839986801147, + "p99": 1337.4719619750977 + }, + "isolatedSum": { + "p50": 1298.6559867858887, + "p90": 1324.7039914131165, + "p95": 1330.079972743988, + "p99": 1505.952000617981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 987.5519871711731, + "p90": 998.8800287246704, + "p95": 1001.3439655303955, + "p99": 1045.3439950942993 + }, + "combine": { + "p50": 1449.0879774093628, + "p90": 1460.8960151672363, + "p95": 1462.1440172195435, + "p99": 1477.9839515686035 + }, + "roundtrip": { + "p50": 2397.887945175171, + "p90": 2410.752058029175, + "p95": 2418.3359146118164, + "p99": 2433.8560104370117 + }, + "isolatedSum": { + "p50": 2436.639964580536, + "p90": 2459.7760438919067, + "p95": 2463.487982749939, + "p99": 2523.327946662903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-12e34b2c", + "identity": "b300|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_58db80e8", + "comparisonKey": "88f16c6492e0d1c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:28.300678+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.53599894046783, + "p90": 107.84000158309937, + "p95": 109.63200032711029, + "p99": 119.90399658679962 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 110.91200262308121, + "p95": 111.68000102043152, + "p99": 123.16799908876419 + }, + "roundtrip": { + "p50": 190.3039962053299, + "p90": 198.2080042362213, + "p95": 199.8080015182495, + "p99": 211.776003241539 + }, + "isolatedSum": { + "p50": 215.42400121688843, + "p90": 218.75200420618057, + "p95": 221.3120013475418, + "p99": 243.0719956755638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.90399372577667, + "p90": 152.25599706172943, + "p95": 153.79199385643005, + "p99": 167.90400445461273 + }, + "combine": { + "p50": 146.5280055999756, + "p90": 147.8399932384491, + "p95": 148.76799285411835, + "p99": 184.89600718021393 + }, + "roundtrip": { + "p50": 265.28000831604004, + "p90": 269.0559923648834, + "p95": 270.59200406074524, + "p99": 294.5919930934906 + }, + "isolatedSum": { + "p50": 294.43199932575226, + "p90": 300.0959903001785, + "p95": 302.5599867105484, + "p99": 352.80001163482666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.79200041294098, + "p90": 202.94399559497833, + "p95": 207.10399746894836, + "p99": 281.98400139808655 + }, + "combine": { + "p50": 218.78400444984436, + "p90": 220.99199891090393, + "p95": 222.9440063238144, + "p99": 283.1360101699829 + }, + "roundtrip": { + "p50": 388.3199989795685, + "p90": 392.5760090351105, + "p95": 393.92000436782837, + "p99": 407.74399042129517 + }, + "isolatedSum": { + "p50": 412.57600486278534, + "p90": 423.93599450588226, + "p95": 430.04800379276276, + "p99": 565.1200115680695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.73600459098816, + "p90": 321.6319978237152, + "p95": 324.9279856681824, + "p99": 395.4879939556122 + }, + "combine": { + "p50": 442.6240026950836, + "p90": 453.37599515914917, + "p95": 453.8240134716034, + "p99": 466.5600061416626 + }, + "roundtrip": { + "p50": 706.3360214233398, + "p90": 714.7520184516907, + "p95": 716.9600129127502, + "p99": 725.2799868583679 + }, + "isolatedSum": { + "p50": 755.3600072860718, + "p90": 775.0079929828644, + "p95": 778.7519991397858, + "p99": 862.0480000972748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 553.3760190010071, + "p90": 562.9760026931763, + "p95": 565.343976020813, + "p99": 606.3039898872375 + }, + "combine": { + "p50": 786.1760258674622, + "p90": 797.7280020713806, + "p95": 798.4960079193115, + "p99": 811.2639784812927 + }, + "roundtrip": { + "p50": 1314.0480518341064, + "p90": 1325.0880241394043, + "p95": 1328.544020652771, + "p99": 1348.479986190796 + }, + "isolatedSum": { + "p50": 1339.5520448684692, + "p90": 1360.7040047645569, + "p95": 1363.8399839401245, + "p99": 1417.5679683685303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1024.7039794921875, + "p90": 1034.9440574645996, + "p95": 1038.1120443344116, + "p99": 1079.6159505844116 + }, + "combine": { + "p50": 1485.6959581375122, + "p90": 1497.663974761963, + "p95": 1498.7519979476929, + "p99": 1547.8399991989136 + }, + "roundtrip": { + "p50": 2470.4320430755615, + "p90": 2481.920003890991, + "p95": 2487.391948699951, + "p99": 2500.1280307769775 + }, + "isolatedSum": { + "p50": 2510.3999376296997, + "p90": 2532.6080322265625, + "p95": 2536.8640422821045, + "p99": 2627.455949783325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8418647e", + "identity": "b300|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_58db80e8", + "comparisonKey": "186990c134376026", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:41.849016+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.64000022411346, + "p90": 120.15999853610992, + "p95": 126.17599964141846, + "p99": 133.53599607944489 + }, + "combine": { + "p50": 113.08799684047699, + "p90": 123.45600128173828, + "p95": 133.69600474834442, + "p99": 145.37599682807922 + }, + "roundtrip": { + "p50": 204.19199764728546, + "p90": 208.92800390720367, + "p95": 211.29600703716278, + "p99": 222.4320024251938 + }, + "isolatedSum": { + "p50": 221.72799706459045, + "p90": 243.6159998178482, + "p95": 259.8720043897629, + "p99": 278.9119929075241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.96800100803375, + "p90": 151.8079936504364, + "p95": 158.84800255298615, + "p99": 175.04000663757324 + }, + "combine": { + "p50": 157.8879952430725, + "p90": 159.2639982700348, + "p95": 160.41600704193115, + "p99": 182.3039948940277 + }, + "roundtrip": { + "p50": 277.44001150131226, + "p90": 283.29598903656006, + "p95": 285.2480113506317, + "p99": 297.37600684165955 + }, + "isolatedSum": { + "p50": 301.85599625110626, + "p90": 311.0719919204712, + "p95": 319.2640095949173, + "p99": 357.34400153160095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.7759951353073, + "p90": 215.64799547195435, + "p95": 218.59200298786163, + "p99": 228.86399924755096 + }, + "combine": { + "p50": 249.31199848651886, + "p90": 257.63198733329773, + "p95": 259.39199328422546, + "p99": 297.4720001220703 + }, + "roundtrip": { + "p50": 416.79999232292175, + "p90": 428.384006023407, + "p95": 445.69599628448486, + "p99": 496.8000054359436 + }, + "isolatedSum": { + "p50": 457.0879936218262, + "p90": 473.2799828052521, + "p95": 477.9839962720871, + "p99": 526.3359993696213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 333.0560028553009, + "p90": 337.50399947166443, + "p95": 339.07198905944824, + "p99": 379.7439932823181 + }, + "combine": { + "p50": 452.63999700546265, + "p90": 454.5919895172119, + "p95": 457.69599080085754, + "p99": 483.3599925041199 + }, + "roundtrip": { + "p50": 758.5279941558838, + "p90": 765.28000831604, + "p95": 767.7760124206543, + "p99": 779.8399925231934 + }, + "isolatedSum": { + "p50": 785.6959998607635, + "p90": 792.0959889888763, + "p95": 796.7679798603058, + "p99": 863.103985786438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 586.3680243492126, + "p90": 591.5520191192627, + "p95": 595.2000021934509, + "p99": 654.8799872398376 + }, + "combine": { + "p50": 809.0879917144775, + "p90": 811.8720054626465, + "p95": 814.2079710960388, + "p99": 824.5120048522949 + }, + "roundtrip": { + "p50": 1365.9839630126953, + "p90": 1374.2079734802246, + "p95": 1378.656029701233, + "p99": 1393.3759927749634 + }, + "isolatedSum": { + "p50": 1395.4560160636902, + "p90": 1403.4240245819092, + "p95": 1409.4079732894897, + "p99": 1479.3919920921326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1073.3439922332764, + "p90": 1080.672025680542, + "p95": 1084.9920511245728, + "p99": 1133.1199407577515 + }, + "combine": { + "p50": 1510.3360414505005, + "p90": 1521.7280387878418, + "p95": 1523.103952407837, + "p99": 1558.40003490448 + }, + "roundtrip": { + "p50": 2557.408094406128, + "p90": 2569.0560340881348, + "p95": 2572.5760459899902, + "p99": 2583.6799144744873 + }, + "isolatedSum": { + "p50": 2583.680033683777, + "p90": 2602.400064468384, + "p95": 2608.0960035324097, + "p99": 2691.5199756622314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9c5385b3", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_df222060", + "comparisonKey": "6e0e03618d466091", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:23.520422+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.32000041007996, + "p90": 127.23200023174286, + "p95": 128.7360042333603, + "p99": 139.39200341701508 + }, + "combine": { + "p50": 122.23999947309494, + "p90": 123.1359988451004, + "p95": 124.28800016641617, + "p99": 134.20799374580383 + }, + "roundtrip": { + "p50": 211.4879935979843, + "p90": 216.22399985790253, + "p95": 219.42399442195892, + "p99": 247.29600548744202 + }, + "isolatedSum": { + "p50": 246.5599998831749, + "p90": 250.36799907684326, + "p95": 253.02400439977646, + "p99": 273.5999971628189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.31200575828552, + "p90": 161.79199516773224, + "p95": 162.7199947834015, + "p99": 170.78399658203125 + }, + "combine": { + "p50": 160.25599837303162, + "p90": 171.39199376106262, + "p95": 174.8799979686737, + "p99": 196.1279958486557 + }, + "roundtrip": { + "p50": 288.1920039653778, + "p90": 295.2960133552551, + "p95": 299.74400997161865, + "p99": 310.9759986400604 + }, + "isolatedSum": { + "p50": 317.56800413131714, + "p90": 333.18398892879486, + "p95": 337.5999927520752, + "p99": 366.91199243068695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.27199375629425, + "p90": 225.24799406528473, + "p95": 226.68799757957458, + "p99": 240.4160052537918 + }, + "combine": { + "p50": 271.87201380729675, + "p90": 281.9199860095978, + "p95": 283.7440073490143, + "p99": 331.2320113182068 + }, + "roundtrip": { + "p50": 462.75201439857483, + "p90": 468.9599871635437, + "p95": 471.42401337623596, + "p99": 483.7760031223297 + }, + "isolatedSum": { + "p50": 494.144007563591, + "p90": 507.1679800748825, + "p95": 510.43200492858887, + "p99": 571.6480165719986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 342.1120047569275, + "p90": 348.9280045032501, + "p95": 353.11999917030334, + "p99": 390.49598574638367 + }, + "combine": { + "p50": 465.8240079879761, + "p90": 468.8960015773773, + "p95": 478.11201214790344, + "p99": 490.4640018939972 + }, + "roundtrip": { + "p50": 782.8159928321838, + "p90": 788.703978061676, + "p95": 791.808009147644, + "p99": 801.3120293617249 + }, + "isolatedSum": { + "p50": 807.9360127449036, + "p90": 817.8240060806274, + "p95": 831.2320113182068, + "p99": 880.9599876403809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 592.0000076293945, + "p90": 600.928008556366, + "p95": 604.7999858856201, + "p99": 677.0880222320557 + }, + "combine": { + "p50": 826.4639973640442, + "p90": 835.1680040359497, + "p95": 836.7360234260559, + "p99": 858.240008354187 + }, + "roundtrip": { + "p50": 1396.064043045044, + "p90": 1405.6639671325684, + "p95": 1410.9439849853516, + "p99": 1469.472050666809 + }, + "isolatedSum": { + "p50": 1418.4640049934387, + "p90": 1436.0960125923157, + "p95": 1441.536009311676, + "p99": 1535.3280305862427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1091.871976852417, + "p90": 1098.5599756240845, + "p95": 1105.6640148162842, + "p99": 1117.408037185669 + }, + "combine": { + "p50": 1538.9440059661865, + "p90": 1550.5919456481934, + "p95": 1559.4559907913208, + "p99": 1574.7519731521606 + }, + "roundtrip": { + "p50": 2607.935905456543, + "p90": 2620.1279163360596, + "p95": 2623.744010925293, + "p99": 2639.552116394043 + }, + "isolatedSum": { + "p50": 2630.8159828186035, + "p90": 2649.151921272278, + "p95": 2665.120005607605, + "p99": 2692.1600103378296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3b00916b", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_58db80e8", + "comparisonKey": "e7ef44ee57e51cb5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:32.089344+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.80799651145935, + "p90": 126.62400305271149, + "p95": 127.77599692344666, + "p99": 139.29599523544312 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 123.52000176906586, + "p95": 124.76799637079239, + "p99": 148.28799664974213 + }, + "roundtrip": { + "p50": 211.42399311065674, + "p90": 215.55200219154358, + "p95": 217.1200066804886, + "p99": 228.19200158119202 + }, + "isolatedSum": { + "p50": 246.07999622821808, + "p90": 250.14400482177734, + "p95": 252.54399329423904, + "p99": 287.58399188518524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.98400342464447, + "p90": 162.36799955368042, + "p95": 163.7759953737259, + "p99": 172.7679967880249 + }, + "combine": { + "p50": 160.44799983501434, + "p90": 170.46399414539337, + "p95": 171.32799327373505, + "p99": 183.52000415325165 + }, + "roundtrip": { + "p50": 287.55199909210205, + "p90": 295.52000761032104, + "p95": 298.40001463890076, + "p99": 309.85599756240845 + }, + "isolatedSum": { + "p50": 318.4320032596588, + "p90": 332.8319936990738, + "p95": 335.10398864746094, + "p99": 356.28800094127655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.4320024251938, + "p90": 226.04799270629883, + "p95": 228.7359982728958, + "p99": 247.9040026664734 + }, + "combine": { + "p50": 271.4880108833313, + "p90": 281.3119888305664, + "p95": 282.04798698425293, + "p99": 296.1919903755188 + }, + "roundtrip": { + "p50": 462.0159864425659, + "p90": 467.74399280548096, + "p95": 469.85599398612976, + "p99": 479.2320132255554 + }, + "isolatedSum": { + "p50": 493.9200133085251, + "p90": 507.35998153686523, + "p95": 510.78398525714874, + "p99": 544.0959930419922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 344.35200691223145, + "p90": 349.63199496269226, + "p95": 353.7600040435791, + "p99": 390.81600308418274 + }, + "combine": { + "p50": 465.85598587989807, + "p90": 468.7359929084778, + "p95": 477.31199860572815, + "p99": 490.6559884548187 + }, + "roundtrip": { + "p50": 783.3279967308044, + "p90": 795.6799864768982, + "p95": 804.4160008430481, + "p99": 823.360025882721 + }, + "isolatedSum": { + "p50": 810.2079927921295, + "p90": 818.36798787117, + "p95": 831.0720026493073, + "p99": 881.4719915390015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 590.7520055770874, + "p90": 597.6319909095764, + "p95": 601.0559797286987, + "p99": 620.8320260047913 + }, + "combine": { + "p50": 826.1759877204895, + "p90": 835.6159925460815, + "p95": 839.4560217857361, + "p99": 858.7520122528076 + }, + "roundtrip": { + "p50": 1396.064043045044, + "p90": 1407.6800346374512, + "p95": 1415.8400297164917, + "p99": 1507.2640180587769 + }, + "isolatedSum": { + "p50": 1416.927993297577, + "p90": 1433.247983455658, + "p95": 1440.5120015144348, + "p99": 1479.5840382575989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1092.8959846496582, + "p90": 1099.9679565429688, + "p95": 1103.808045387268, + "p99": 1172.8639602661133 + }, + "combine": { + "p50": 1538.7840270996094, + "p90": 1548.7040281295776, + "p95": 1558.6559772491455, + "p99": 1589.3440246582031 + }, + "roundtrip": { + "p50": 2607.5520515441895, + "p90": 2618.97611618042, + "p95": 2624.7360706329346, + "p99": 2643.0399417877197 + }, + "isolatedSum": { + "p50": 2631.6800117492676, + "p90": 2648.6719846725464, + "p95": 2662.4640226364136, + "p99": 2762.2079849243164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4717c5f8", + "identity": "b300|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_58db80e8", + "comparisonKey": "e25a99ae58e3f66f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:08.742356+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.31200051307678, + "p90": 127.93600559234619, + "p95": 129.50399518013, + "p99": 154.40000593662262 + }, + "combine": { + "p50": 122.30399996042252, + "p90": 123.58400225639343, + "p95": 124.76799637079239, + "p99": 134.8160058259964 + }, + "roundtrip": { + "p50": 212.67199516296387, + "p90": 218.84800493717194, + "p95": 222.20799326896667, + "p99": 247.0400035381317 + }, + "isolatedSum": { + "p50": 247.6160004734993, + "p90": 251.52000784873962, + "p95": 254.2719915509224, + "p99": 289.216011762619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.3759994506836, + "p90": 164.44799304008484, + "p95": 165.53600132465363, + "p99": 169.21600699424744 + }, + "combine": { + "p50": 160.76800227165222, + "p90": 170.30400037765503, + "p95": 170.9119975566864, + "p99": 187.80800700187683 + }, + "roundtrip": { + "p50": 291.23198986053467, + "p90": 299.9359965324402, + "p95": 304.60798740386963, + "p99": 325.9519934654236 + }, + "isolatedSum": { + "p50": 322.1440017223358, + "p90": 334.75199341773987, + "p95": 336.44799888134, + "p99": 357.02401399612427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 224.16000068187714, + "p90": 227.23199427127838, + "p95": 229.66399788856506, + "p99": 245.7599937915802 + }, + "combine": { + "p50": 271.7120051383972, + "p90": 281.72799944877625, + "p95": 282.55999088287354, + "p99": 317.0880079269409 + }, + "roundtrip": { + "p50": 466.1119878292084, + "p90": 475.3600060939789, + "p95": 477.4720072746277, + "p99": 490.30399322509766 + }, + "isolatedSum": { + "p50": 495.87200582027435, + "p90": 508.9599937200546, + "p95": 512.2239887714386, + "p99": 562.8480017185211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 341.47199988365173, + "p90": 348.80000352859497, + "p95": 351.1359989643097, + "p99": 389.69600200653076 + }, + "combine": { + "p50": 467.20001101493835, + "p90": 477.88798809051514, + "p95": 479.13599014282227, + "p99": 495.9999918937683 + }, + "roundtrip": { + "p50": 785.6640219688416, + "p90": 793.8879728317261, + "p95": 796.9920039176941, + "p99": 812.3199939727783 + }, + "isolatedSum": { + "p50": 808.6720108985901, + "p90": 826.6879916191101, + "p95": 830.271989107132, + "p99": 885.6959939002991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 588.9279842376709, + "p90": 594.048023223877, + "p95": 605.023980140686, + "p99": 702.1759748458862 + }, + "combine": { + "p50": 822.3680257797241, + "p90": 833.728015422821, + "p95": 835.0399732589722, + "p99": 884.4159841537476 + }, + "roundtrip": { + "p50": 1382.1439743041992, + "p90": 1392.9280042648315, + "p95": 1400.2879858016968, + "p99": 1416.2240028381348 + }, + "isolatedSum": { + "p50": 1411.296010017395, + "p90": 1427.776038646698, + "p95": 1440.0639533996582, + "p99": 1586.5919589996338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1080.191969871521, + "p90": 1089.3440246582031, + "p95": 1097.0239639282227, + "p99": 1115.4240369796753 + }, + "combine": { + "p50": 1523.2959985733032, + "p90": 1535.040020942688, + "p95": 1537.2480154037476, + "p99": 1559.8399639129639 + }, + "roundtrip": { + "p50": 2582.751989364624, + "p90": 2594.815969467163, + "p95": 2601.439952850342, + "p99": 2622.8160858154297 + }, + "isolatedSum": { + "p50": 2603.487968444824, + "p90": 2624.384045600891, + "p95": 2634.27197933197, + "p99": 2675.264000892639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-468277ce", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "b300_3cb8cb98", + "comparisonKey": "04a6b843867ff03e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:48.899817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.7599971294403, + "p90": 135.3279948234558, + "p95": 137.34400272369385, + "p99": 153.50399911403656 + }, + "combine": { + "p50": 137.34400272369385, + "p90": 146.2080031633377, + "p95": 146.65600657463074, + "p99": 195.77600061893463 + }, + "roundtrip": { + "p50": 248.44799935817719, + "p90": 254.20799851417542, + "p95": 255.8079957962036, + "p99": 273.27999472618103 + }, + "isolatedSum": { + "p50": 267.10399985313416, + "p90": 281.5359979867935, + "p95": 284.0000092983246, + "p99": 349.2799997329712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 170.04799842834473, + "p90": 177.63200402259827, + "p95": 180.00000715255737, + "p99": 192.51200556755066 + }, + "combine": { + "p50": 208.3519995212555, + "p90": 219.29599344730377, + "p95": 220.70400416851044, + "p99": 234.97599363327026 + }, + "roundtrip": { + "p50": 357.85600543022156, + "p90": 365.85599184036255, + "p95": 368.25600266456604, + "p99": 376.0960102081299 + }, + "isolatedSum": { + "p50": 378.3999979496002, + "p90": 396.92799746990204, + "p95": 400.7040113210678, + "p99": 427.4879992008209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 252.8960108757019, + "p90": 256.9279968738556, + "p95": 259.99999046325684, + "p99": 303.3919930458069 + }, + "combine": { + "p50": 346.49598598480225, + "p90": 355.23200035095215, + "p95": 356.1280071735382, + "p99": 369.82399225234985 + }, + "roundtrip": { + "p50": 572.3199844360352, + "p90": 581.1200141906738, + "p95": 586.1759781837463, + "p99": 597.7280139923096 + }, + "isolatedSum": { + "p50": 599.3919968605042, + "p90": 612.1599972248077, + "p95": 616.127997636795, + "p99": 673.2159852981567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 426.144003868103, + "p90": 430.7839870452881, + "p95": 435.39199233055115, + "p99": 450.8480131626129 + }, + "combine": { + "p50": 602.0479798316956, + "p90": 612.8000020980835, + "p95": 615.1040196418762, + "p99": 675.4559874534607 + }, + "roundtrip": { + "p50": 1013.3440494537354, + "p90": 1020.1280117034912, + "p95": 1024.7360467910767, + "p99": 1037.2480154037476 + }, + "isolatedSum": { + "p50": 1028.1919836997986, + "p90": 1043.5839891433716, + "p95": 1050.4960119724274, + "p99": 1126.3040006160736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 781.5679907798767, + "p90": 791.3280129432678, + "p95": 799.0080118179321, + "p99": 917.4079895019531 + }, + "combine": { + "p50": 1121.3120222091675, + "p90": 1132.6080560684204, + "p95": 1142.016053199768, + "p99": 1166.5279865264893 + }, + "roundtrip": { + "p50": 1883.936047554016, + "p90": 1896.5120315551758, + "p95": 1902.7199745178223, + "p99": 1990.4639720916748 + }, + "isolatedSum": { + "p50": 1902.8800129890442, + "p90": 1923.9360690116882, + "p95": 1941.0240650177002, + "p99": 2083.9359760284424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1484.320044517517, + "p90": 1499.7119903564453, + "p95": 1502.8480291366577, + "p99": 1532.9920053482056 + }, + "combine": { + "p50": 2149.535894393921, + "p90": 2161.8878841400146, + "p95": 2165.40789604187, + "p99": 2185.3439807891846 + }, + "roundtrip": { + "p50": 3612.6720905303955, + "p90": 3631.4239501953125, + "p95": 3638.495922088623, + "p99": 3774.048089981079 + }, + "isolatedSum": { + "p50": 3633.855938911438, + "p90": 3661.59987449646, + "p95": 3668.255925178528, + "p99": 3718.33598613739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9dbbda54", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "b300_0c4d187b", + "comparisonKey": "d773775d35c5f2c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:43.671145+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.47199648618698, + "p90": 97.6639986038208, + "p95": 99.58399832248688, + "p99": 163.96799683570862 + }, + "combine": { + "p50": 75.39200037717819, + "p90": 85.69599688053131, + "p95": 88.51200342178345, + "p99": 124.41600114107132 + }, + "roundtrip": { + "p50": 151.58399939537048, + "p90": 155.58399260044098, + "p95": 156.80000185966492, + "p99": 173.15199971199036 + }, + "isolatedSum": { + "p50": 168.86399686336517, + "p90": 183.3599954843521, + "p95": 188.09600174427032, + "p99": 288.38399797677994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 124.15999919176102, + "p90": 126.46399438381195, + "p95": 128.7360042333603, + "p99": 160.89600324630737 + }, + "combine": { + "p50": 123.3920007944107, + "p90": 125.59999525547028, + "p95": 127.16799974441528, + "p99": 146.27200365066528 + }, + "roundtrip": { + "p50": 212.89600431919098, + "p90": 220.2560007572174, + "p95": 222.04799950122833, + "p99": 227.1679937839508 + }, + "isolatedSum": { + "p50": 247.55199998617172, + "p90": 252.06398963928223, + "p95": 255.90400397777557, + "p99": 307.16800689697266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 218.07999908924103, + "p90": 220.60799598693848, + "p95": 221.95200622081757, + "p99": 268.095999956131 + }, + "combine": { + "p50": 268.7999904155731, + "p90": 269.82399821281433, + "p95": 270.4640030860901, + "p99": 307.16800689697266 + }, + "roundtrip": { + "p50": 469.6959853172302, + "p90": 473.1520116329193, + "p95": 474.17598962783813, + "p99": 500.5760192871094 + }, + "isolatedSum": { + "p50": 486.87998950481415, + "p90": 490.4319941997528, + "p95": 492.41600930690765, + "p99": 575.2640068531036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1c25525", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "b300_2b2c04f6", + "comparisonKey": "ea8122d6025712b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:08.734228+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.62399846315384, + "p90": 108.19199681282043, + "p95": 111.26399785280228, + "p99": 127.74400413036346 + }, + "combine": { + "p50": 95.8079993724823, + "p90": 97.79199957847595, + "p95": 98.01600128412247, + "p99": 100.00000149011612 + }, + "roundtrip": { + "p50": 167.64800250530243, + "p90": 174.52800273895264, + "p95": 176.12800002098083, + "p99": 178.9119988679886 + }, + "isolatedSum": { + "p50": 194.43199783563614, + "p90": 205.9839963912964, + "p95": 209.27999913692474, + "p99": 227.74400562047958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 120.44800072908401, + "p90": 125.5359947681427, + "p95": 127.29600071907043, + "p99": 143.8080072402954 + }, + "combine": { + "p50": 111.93600296974182, + "p90": 120.31999975442886, + "p95": 121.31199985742569, + "p99": 134.62400436401367 + }, + "roundtrip": { + "p50": 209.82399582862854, + "p90": 216.70399606227875, + "p95": 218.1120067834854, + "p99": 222.08000719547272 + }, + "isolatedSum": { + "p50": 232.38400369882584, + "p90": 245.85599452257156, + "p95": 248.60800057649612, + "p99": 278.4320116043091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 148.47999811172485, + "p90": 155.2319973707199, + "p95": 159.04000401496887, + "p99": 165.8560037612915 + }, + "combine": { + "p50": 157.24800527095795, + "p90": 159.36000645160675, + "p95": 159.90400314331055, + "p99": 171.23199999332428 + }, + "roundtrip": { + "p50": 278.81601452827454, + "p90": 286.49601340293884, + "p95": 289.15199637413025, + "p99": 300.1919984817505 + }, + "isolatedSum": { + "p50": 305.7280033826828, + "p90": 314.59200382232666, + "p95": 318.9440071582794, + "p99": 337.0880037546158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 220.06399929523468, + "p90": 224.70399737358093, + "p95": 226.04799270629883, + "p99": 240.25599658489227 + }, + "combine": { + "p50": 269.1200077533722, + "p90": 270.52798867225647, + "p95": 271.232008934021, + "p99": 285.12001037597656 + }, + "roundtrip": { + "p50": 462.8159999847412, + "p90": 468.86399388313293, + "p95": 471.42401337623596, + "p99": 491.61601066589355 + }, + "isolatedSum": { + "p50": 489.1840070486069, + "p90": 495.2319860458374, + "p95": 497.2800016403198, + "p99": 525.3760069608688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 377.50399112701416, + "p90": 384.223997592926, + "p95": 386.0799968242645, + "p99": 436.12799048423767 + }, + "combine": { + "p50": 467.1359956264496, + "p90": 470.2720046043396, + "p95": 478.11201214790344, + "p99": 505.6319832801819 + }, + "roundtrip": { + "p50": 817.5680041313171, + "p90": 827.1999955177307, + "p95": 831.2000036239624, + "p99": 844.8960185050964 + }, + "isolatedSum": { + "p50": 844.6399867534637, + "p90": 854.4960021972656, + "p95": 864.192008972168, + "p99": 941.7599737644196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 683.1039786338806, + "p90": 694.6560144424438, + "p95": 698.6560225486755, + "p99": 739.8719787597656 + }, + "combine": { + "p50": 835.9360098838806, + "p90": 845.2159762382507, + "p95": 847.0720052719116, + "p99": 860.4480028152466 + }, + "roundtrip": { + "p50": 1496.1600303649902, + "p90": 1507.2319507598877, + "p95": 1511.8399858474731, + "p99": 1535.1359844207764 + }, + "isolatedSum": { + "p50": 1519.0399885177612, + "p90": 1539.8719906806946, + "p95": 1545.7280278205872, + "p99": 1600.3199815750122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-45063ece", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "b300_d9b08302", + "comparisonKey": "7842ee8cc1c6c93d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:03.261461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.9039978981018, + "p90": 134.91199910640717, + "p95": 136.3839954137802, + "p99": 141.08799397945404 + }, + "combine": { + "p50": 134.5279961824417, + "p90": 135.8720064163208, + "p95": 136.09600067138672, + "p99": 138.20800185203552 + }, + "roundtrip": { + "p50": 237.59999871253967, + "p90": 243.29599738121033, + "p95": 244.57600712776184, + "p99": 248.25599789619446 + }, + "isolatedSum": { + "p50": 262.4319940805435, + "p90": 270.78400552272797, + "p95": 272.47999608516693, + "p99": 279.29599583148956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.47200083732605, + "p90": 168.32000017166138, + "p95": 171.48800194263458, + "p99": 177.05599963665009 + }, + "combine": { + "p50": 194.04800236225128, + "p90": 196.03200256824493, + "p95": 196.6720074415207, + "p99": 199.0399956703186 + }, + "roundtrip": { + "p50": 335.3919982910156, + "p90": 343.51998567581177, + "p95": 345.44000029563904, + "p99": 351.29600763320923 + }, + "isolatedSum": { + "p50": 359.52000319957733, + "p90": 364.3520027399063, + "p95": 368.1600093841553, + "p99": 376.0959953069687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.96800184249878, + "p90": 248.1279969215393, + "p95": 250.0160038471222, + "p99": 255.42399287223816 + }, + "combine": { + "p50": 343.4560000896454, + "p90": 345.0239896774292, + "p95": 346.2719917297363, + "p99": 355.26400804519653 + }, + "roundtrip": { + "p50": 556.6719770431519, + "p90": 563.1679892539978, + "p95": 565.0879740715027, + "p99": 574.8479962348938 + }, + "isolatedSum": { + "p50": 583.4240019321442, + "p90": 593.1519865989685, + "p95": 596.2879955768585, + "p99": 610.6880009174347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 395.1680064201355, + "p90": 399.04001355171204, + "p95": 400.12800693511963, + "p99": 403.5840034484863 + }, + "combine": { + "p50": 588.8640284538269, + "p90": 590.3040170669556, + "p95": 591.264009475708, + "p99": 602.3679971694946 + }, + "roundtrip": { + "p50": 956.60799741745, + "p90": 964.1280174255371, + "p95": 966.3680195808411, + "p99": 975.2960205078125 + }, + "isolatedSum": { + "p50": 984.0320348739624, + "p90": 989.3440306186676, + "p95": 991.3920164108276, + "p99": 1005.952000617981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 711.1679911613464, + "p90": 715.8079743385315, + "p95": 717.631995677948, + "p99": 737.824022769928 + }, + "combine": { + "p50": 1093.440055847168, + "p90": 1095.6159830093384, + "p95": 1097.823977470398, + "p99": 1117.184042930603 + }, + "roundtrip": { + "p50": 1775.712013244629, + "p90": 1782.1760177612305, + "p95": 1784.6720218658447, + "p99": 1800.1279830932617 + }, + "isolatedSum": { + "p50": 1804.6080470085144, + "p90": 1811.4239573478699, + "p95": 1815.455973148346, + "p99": 1855.008065700531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1390.2720212936401, + "p90": 1405.4720401763916, + "p95": 1407.8400135040283, + "p99": 1417.9519414901733 + }, + "combine": { + "p50": 2088.576078414917, + "p90": 2090.240001678467, + "p95": 2092.384099960327, + "p99": 2113.055944442749 + }, + "roundtrip": { + "p50": 3455.3918838500977, + "p90": 3471.2319374084473, + "p95": 3477.247953414917, + "p99": 3488.447904586792 + }, + "isolatedSum": { + "p50": 3478.848099708557, + "p90": 3495.7120418548584, + "p95": 3500.2241134643555, + "p99": 3531.0078859329224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bc8ea45", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "b300_ee223ecc", + "comparisonKey": "8c24a01cb59b79c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:49.040092+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.95200538635254, + "p90": 128.51199507713318, + "p95": 129.95199859142303, + "p99": 139.71200585365295 + }, + "combine": { + "p50": 123.19999933242798, + "p90": 133.7279975414276, + "p95": 134.8160058259964, + "p99": 146.84799313545227 + }, + "roundtrip": { + "p50": 217.92000532150269, + "p90": 226.6560047864914, + "p95": 232.12799429893494, + "p99": 239.87199366092682 + }, + "isolatedSum": { + "p50": 249.15200471878052, + "p90": 262.2399926185608, + "p95": 264.76800441741943, + "p99": 286.5599989891052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.02400422096252, + "p90": 164.41600024700165, + "p95": 165.6000018119812, + "p99": 179.00800704956055 + }, + "combine": { + "p50": 162.1759980916977, + "p90": 171.48800194263458, + "p95": 172.19200730323792, + "p99": 175.3920018672943 + }, + "roundtrip": { + "p50": 297.12000489234924, + "p90": 305.5360019207001, + "p95": 307.6480031013489, + "p99": 318.84801387786865 + }, + "isolatedSum": { + "p50": 323.2000023126602, + "p90": 335.90400218963623, + "p95": 337.7920091152191, + "p99": 354.40000891685486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 224.09600019454956, + "p90": 227.1679937839508, + "p95": 228.2239943742752, + "p99": 235.6480062007904 + }, + "combine": { + "p50": 271.7759907245636, + "p90": 282.04798698425293, + "p95": 282.3359966278076, + "p99": 293.4400141239166 + }, + "roundtrip": { + "p50": 463.03999423980713, + "p90": 469.760000705719, + "p95": 472.4160134792328, + "p99": 485.53600907325745 + }, + "isolatedSum": { + "p50": 495.87199091911316, + "p90": 509.21598076820374, + "p95": 510.5599910020828, + "p99": 529.088020324707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 340.1919901371002, + "p90": 347.7120101451874, + "p95": 349.0239977836609, + "p99": 360.1599931716919 + }, + "combine": { + "p50": 469.9839949607849, + "p90": 478.5279929637909, + "p95": 479.5840084552765, + "p99": 491.5199875831604 + }, + "roundtrip": { + "p50": 787.6160144805908, + "p90": 795.3919768333435, + "p95": 799.9680042266846, + "p99": 807.4880242347717 + }, + "isolatedSum": { + "p50": 810.1759850978851, + "p90": 826.2400031089783, + "p95": 828.6080062389374, + "p99": 851.6799807548523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 588.9599919319153, + "p90": 593.0560231208801, + "p95": 595.3919887542725, + "p99": 604.1920185089111 + }, + "combine": { + "p50": 824.895977973938, + "p90": 835.0399732589722, + "p95": 835.7120156288147, + "p99": 847.4879860877991 + }, + "roundtrip": { + "p50": 1391.711950302124, + "p90": 1399.7440338134766, + "p95": 1406.6239595413208, + "p99": 1417.0880317687988 + }, + "isolatedSum": { + "p50": 1413.8559699058533, + "p90": 1428.0959963798523, + "p95": 1431.1040043830872, + "p99": 1451.6800045967102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1083.7759971618652, + "p90": 1088.3840322494507, + "p95": 1093.951940536499, + "p99": 1109.1519594192505 + }, + "combine": { + "p50": 1534.9760055541992, + "p90": 1546.239972114563, + "p95": 1547.5200414657593, + "p99": 1559.2960119247437 + }, + "roundtrip": { + "p50": 2589.3120765686035, + "p90": 2600.6720066070557, + "p95": 2607.136011123657, + "p99": 2618.8480854034424 + }, + "isolatedSum": { + "p50": 2618.7520027160645, + "p90": 2634.6240043640137, + "p95": 2641.4719820022583, + "p99": 2668.447971343994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3dd2f6e2", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "b300_a50cd7ae", + "comparisonKey": "8824149a80666a54", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:08.440778+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.34399330615997, + "p90": 127.61600315570831, + "p95": 128.54400277137756, + "p99": 131.84000551700592 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 123.32800030708313, + "p95": 124.09599870443344, + "p99": 145.9839940071106 + }, + "roundtrip": { + "p50": 212.19199895858765, + "p90": 215.45599400997162, + "p95": 216.95999801158905, + "p99": 222.81600534915924 + }, + "isolatedSum": { + "p50": 247.6159930229187, + "p90": 250.94400346279144, + "p95": 252.640001475811, + "p99": 277.8239995241165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.84000265598297, + "p90": 164.67200219631195, + "p95": 166.6879951953888, + "p99": 223.7440049648285 + }, + "combine": { + "p50": 160.76800227165222, + "p90": 170.46399414539337, + "p95": 171.55200242996216, + "p99": 186.27199530601501 + }, + "roundtrip": { + "p50": 289.15199637413025, + "p90": 297.66398668289185, + "p95": 300.7360100746155, + "p99": 311.90401315689087 + }, + "isolatedSum": { + "p50": 320.6080049276352, + "p90": 335.1359963417053, + "p95": 338.23999762535095, + "p99": 410.0160002708435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 224.03199970722198, + "p90": 227.7120053768158, + "p95": 232.31999576091766, + "p99": 252.0959973335266 + }, + "combine": { + "p50": 281.0559868812561, + "p90": 283.1040024757385, + "p95": 285.3440046310425, + "p99": 369.24800276756287 + }, + "roundtrip": { + "p50": 458.20799469947815, + "p90": 466.0159945487976, + "p95": 468.1600034236908, + "p99": 478.5279929637909 + }, + "isolatedSum": { + "p50": 505.0879865884781, + "p90": 510.8160078525543, + "p95": 517.6640003919601, + "p99": 621.3440001010895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 339.136004447937, + "p90": 347.55200147628784, + "p95": 351.4559864997864, + "p99": 436.47998571395874 + }, + "combine": { + "p50": 469.6640074253082, + "p90": 478.5279929637909, + "p95": 480.4159998893738, + "p99": 504.92799282073975 + }, + "roundtrip": { + "p50": 784.6400141716003, + "p90": 790.336012840271, + "p95": 793.2159900665283, + "p99": 806.335985660553 + }, + "isolatedSum": { + "p50": 808.8000118732452, + "p90": 826.0799944400787, + "p95": 831.8719863891602, + "p99": 941.4079785346985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 588.9279842376709, + "p90": 592.9279923439026, + "p95": 595.9039926528931, + "p99": 603.4560203552246 + }, + "combine": { + "p50": 820.9599852561951, + "p90": 823.6799836158752, + "p95": 824.9599933624268, + "p99": 837.8239870071411 + }, + "roundtrip": { + "p50": 1380.9280395507812, + "p90": 1387.5199556350708, + "p95": 1392.2879695892334, + "p99": 1424.6400594711304 + }, + "isolatedSum": { + "p50": 1409.887969493866, + "p90": 1416.6079759597778, + "p95": 1420.8639860153198, + "p99": 1441.2800073623657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1085.9520435333252, + "p90": 1091.1040306091309, + "p95": 1098.912000656128, + "p99": 1284.2559814453125 + }, + "combine": { + "p50": 1523.6480236053467, + "p90": 1535.264015197754, + "p95": 1536.9600057601929, + "p99": 1574.4320154190063 + }, + "roundtrip": { + "p50": 2583.199977874756, + "p90": 2595.9999561309814, + "p95": 2601.439952850342, + "p99": 2610.8479499816895 + }, + "isolatedSum": { + "p50": 2609.600067138672, + "p90": 2626.3680458068848, + "p95": 2635.872006416321, + "p99": 2858.687996864319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1cc0a141", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_acd57143", + "comparisonKey": "2e681dcee959a2ba", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:49.803383+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.95199662446976, + "p90": 120.51200121641159, + "p95": 122.46400117874146, + "p99": 128.25599312782288 + }, + "combine": { + "p50": 127.77599692344666, + "p90": 134.5279961824417, + "p95": 135.3919953107834, + "p99": 158.07999670505524 + }, + "roundtrip": { + "p50": 228.0000001192093, + "p90": 233.18399488925934, + "p95": 235.26400327682495, + "p99": 251.39200687408447 + }, + "isolatedSum": { + "p50": 245.7279935479164, + "p90": 255.0399973988533, + "p95": 257.85599648952484, + "p99": 286.3359898328781 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 154.91199493408203, + "p90": 157.95199573040009, + "p95": 161.05599701404572, + "p99": 183.6480051279068 + }, + "combine": { + "p50": 183.74399840831757, + "p90": 187.29600310325623, + "p95": 194.94399428367615, + "p99": 243.93600225448608 + }, + "roundtrip": { + "p50": 316.5760040283203, + "p90": 322.7199912071228, + "p95": 325.79201459884644, + "p99": 338.3040130138397 + }, + "isolatedSum": { + "p50": 338.6559933423996, + "p90": 345.2479988336563, + "p95": 355.99999129772186, + "p99": 427.5840073823929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 227.77600586414337, + "p90": 235.87200045585632, + "p95": 237.7600073814392, + "p99": 243.0720031261444 + }, + "combine": { + "p50": 331.2000036239624, + "p90": 334.30400490760803, + "p95": 342.72000193595886, + "p99": 395.9360122680664 + }, + "roundtrip": { + "p50": 537.3439788818359, + "p90": 542.7520275115967, + "p95": 544.1280007362366, + "p99": 550.5920052528381 + }, + "isolatedSum": { + "p50": 558.9760094881058, + "p90": 570.1760053634644, + "p95": 580.4800093173981, + "p99": 639.0080153942108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 371.7760145664215, + "p90": 375.93600153923035, + "p95": 377.920001745224, + "p99": 448.60801100730896 + }, + "combine": { + "p50": 589.8240208625793, + "p90": 599.9360084533691, + "p95": 601.0879874229431, + "p99": 624.351978302002 + }, + "roundtrip": { + "p50": 933.5359930992126, + "p90": 940.0960206985474, + "p95": 943.1040287017822, + "p99": 954.8159837722778 + }, + "isolatedSum": { + "p50": 961.6000354290009, + "p90": 975.8720099925995, + "p95": 979.0079891681671, + "p99": 1072.959989309311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 674.0800142288208, + "p90": 684.6399903297424, + "p95": 691.0399794578552, + "p99": 777.9840230941772 + }, + "combine": { + "p50": 1072.0319747924805, + "p90": 1080.672025680542, + "p95": 1081.984043121338, + "p99": 1154.304027557373 + }, + "roundtrip": { + "p50": 1726.6240119934082, + "p90": 1735.8399629592896, + "p95": 1742.400050163269, + "p99": 1825.5360126495361 + }, + "isolatedSum": { + "p50": 1746.1119890213013, + "p90": 1765.3120160102844, + "p95": 1773.0240225791931, + "p99": 1932.2880506515503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1309.5359802246094, + "p90": 1321.7600584030151, + "p95": 1330.3680419921875, + "p99": 1369.3439960479736 + }, + "combine": { + "p50": 2051.584005355835, + "p90": 2055.0079345703125, + "p95": 2063.5199546813965, + "p99": 2127.8719902038574 + }, + "roundtrip": { + "p50": 3346.9440937042236, + "p90": 3357.4399948120117, + "p95": 3361.7920875549316, + "p99": 3380.8000087738037 + }, + "isolatedSum": { + "p50": 3361.1199855804443, + "p90": 3376.7679929733276, + "p95": 3393.887996673584, + "p99": 3497.215986251831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-deffdeef", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "b300_88389899", + "comparisonKey": "c476015c973029dd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:56.368866+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.7120019197464, + "p90": 120.80000340938568, + "p95": 122.40000069141388, + "p99": 128.28800082206726 + }, + "combine": { + "p50": 123.32800030708313, + "p90": 133.37600231170654, + "p95": 133.7919980287552, + "p99": 135.83999872207642 + }, + "roundtrip": { + "p50": 218.23999285697937, + "p90": 226.33600234985352, + "p95": 228.89600694179535, + "p99": 237.18400299549103 + }, + "isolatedSum": { + "p50": 239.04000222682953, + "p90": 254.17600572109222, + "p95": 256.19199872016907, + "p99": 264.1279995441437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.74400675296783, + "p90": 151.32799744606018, + "p95": 152.99199521541595, + "p99": 163.32800686359406 + }, + "combine": { + "p50": 173.18400740623474, + "p90": 183.26400220394135, + "p95": 183.77600610256195, + "p99": 206.88000321388245 + }, + "roundtrip": { + "p50": 301.91999673843384, + "p90": 308.4160089492798, + "p95": 310.33599376678467, + "p99": 324.3519961833954 + }, + "isolatedSum": { + "p50": 316.9280141592026, + "p90": 334.5919996500015, + "p95": 336.7680013179779, + "p99": 370.2080100774765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.9759964942932, + "p90": 212.64000236988068, + "p95": 214.1759991645813, + "p99": 266.78401231765747 + }, + "combine": { + "p50": 317.6319897174835, + "p90": 320.576012134552, + "p95": 330.3360044956207, + "p99": 358.14398527145386 + }, + "roundtrip": { + "p50": 501.50400400161743, + "p90": 508.7360143661499, + "p95": 511.03997230529785, + "p99": 521.5680003166199 + }, + "isolatedSum": { + "p50": 524.6079862117767, + "p90": 533.2160145044327, + "p95": 544.512003660202, + "p99": 624.9279975891113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 333.18400382995605, + "p90": 344.543993473053, + "p95": 347.6479947566986, + "p99": 374.30399656295776 + }, + "combine": { + "p50": 591.9679999351501, + "p90": 601.535975933075, + "p95": 605.1520109176636, + "p99": 650.7200002670288 + }, + "roundtrip": { + "p50": 903.3600091934204, + "p90": 909.0560078620911, + "p95": 911.7760062217712, + "p99": 930.0479888916016 + }, + "isolatedSum": { + "p50": 925.1520037651062, + "p90": 946.0799694061279, + "p95": 952.8000056743622, + "p99": 1025.0239968299866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 606.1760187149048, + "p90": 618.3040142059326, + "p95": 642.1120166778564, + "p99": 707.7119946479797 + }, + "combine": { + "p50": 1107.0079803466797, + "p90": 1117.4720525741577, + "p95": 1129.0559768676758, + "p99": 1181.823968887329 + }, + "roundtrip": { + "p50": 1653.8560390472412, + "p90": 1664.1600131988525, + "p95": 1667.6160097122192, + "p99": 1720.7039594650269 + }, + "isolatedSum": { + "p50": 1713.1839990615845, + "p90": 1735.7760667800903, + "p95": 1771.1679935455322, + "p99": 1889.5359635353088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1186.0159635543823, + "p90": 1198.8799571990967, + "p95": 1208.8639736175537, + "p99": 1256.608009338379 + }, + "combine": { + "p50": 2075.615882873535, + "p90": 2078.7200927734375, + "p95": 2087.775945663452, + "p99": 2137.183904647827 + }, + "roundtrip": { + "p50": 3201.9519805908203, + "p90": 3212.160110473633, + "p95": 3217.5040245056152, + "p99": 3236.3200187683105 + }, + "isolatedSum": { + "p50": 3261.6318464279175, + "p90": 3277.600049972534, + "p95": 3296.639919281006, + "p99": 3393.791913986206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f9f6d8b2", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "b300_28bc3f15", + "comparisonKey": "ec90df40591dcfa3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:44.012413+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.4800016283989, + "p90": 127.26399302482605, + "p95": 128.12800705432892, + "p99": 131.23199343681335 + }, + "combine": { + "p50": 122.52800166606903, + "p90": 123.6800029873848, + "p95": 124.25599992275238, + "p99": 133.08799266815186 + }, + "roundtrip": { + "p50": 214.01600539684296, + "p90": 217.40800142288208, + "p95": 219.42399442195892, + "p99": 229.50400412082672 + }, + "isolatedSum": { + "p50": 247.00800329446793, + "p90": 250.94399601221085, + "p95": 252.3840069770813, + "p99": 264.3199861049652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.8079949617386, + "p90": 163.68000209331512, + "p95": 164.89599645137787, + "p99": 170.17599940299988 + }, + "combine": { + "p50": 160.41600704193115, + "p90": 169.8240041732788, + "p95": 170.97599804401398, + "p99": 172.57599532604218 + }, + "roundtrip": { + "p50": 288.9600098133087, + "p90": 297.1520125865936, + "p95": 299.71200227737427, + "p99": 312.19199299812317 + }, + "isolatedSum": { + "p50": 320.22400200366974, + "p90": 333.50400626659393, + "p95": 335.87199449539185, + "p99": 342.75199472904205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.58399629592896, + "p90": 226.9439995288849, + "p95": 228.09599339962006, + "p99": 246.0159957408905 + }, + "combine": { + "p50": 281.95199370384216, + "p90": 284.09600257873535, + "p95": 286.01598739624023, + "p99": 297.4720001220703 + }, + "roundtrip": { + "p50": 465.37598967552185, + "p90": 471.3920056819916, + "p95": 474.94399547576904, + "p99": 485.82398891448975 + }, + "isolatedSum": { + "p50": 505.5359899997711, + "p90": 511.04000210762024, + "p95": 514.1119807958603, + "p99": 543.4879958629608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.9680120944977, + "p90": 351.6159951686859, + "p95": 353.34399342536926, + "p99": 384.3199908733368 + }, + "combine": { + "p50": 477.56800055503845, + "p90": 480.0640046596527, + "p95": 489.21599984169006, + "p99": 491.5519952774048 + }, + "roundtrip": { + "p50": 792.6080226898193, + "p90": 800.6399869918823, + "p95": 801.9840121269226, + "p99": 811.3600015640259 + }, + "isolatedSum": { + "p50": 825.5360126495361, + "p90": 831.6799998283386, + "p95": 842.5599932670593, + "p99": 875.8719861507416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 598.8159775733948, + "p90": 605.0559878349304, + "p95": 607.1680188179016, + "p99": 616.8000102043152 + }, + "combine": { + "p50": 837.4720215797424, + "p90": 847.104012966156, + "p95": 848.4479784965515, + "p99": 863.2640242576599 + }, + "roundtrip": { + "p50": 1411.4880561828613, + "p90": 1419.1679954528809, + "p95": 1424.2240190505981, + "p99": 1442.463994026184 + }, + "isolatedSum": { + "p50": 1436.2879991531372, + "p90": 1452.1600008010864, + "p95": 1455.6159973144531, + "p99": 1480.064034461975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1108.06405544281, + "p90": 1114.0799522399902, + "p95": 1118.3680295944214, + "p99": 1128.864049911499 + }, + "combine": { + "p50": 1576.3839483261108, + "p90": 1585.9839916229248, + "p95": 1588.479995727539, + "p99": 1608.512043952942 + }, + "roundtrip": { + "p50": 2664.9279594421387, + "p90": 2673.311948776245, + "p95": 2676.4159202575684, + "p99": 2688.3840560913086 + }, + "isolatedSum": { + "p50": 2684.448003768921, + "p90": 2700.063943862915, + "p95": 2706.8480253219604, + "p99": 2737.376093864441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ae4daa5", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "b300_58029714", + "comparisonKey": "4cc61ff34d59aa6d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:52.117015+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.91199779510498, + "p90": 129.98400628566742, + "p95": 131.1040073633194, + "p99": 138.65600526332855 + }, + "combine": { + "p50": 133.7919980287552, + "p90": 134.91199910640717, + "p95": 135.45599579811096, + "p99": 145.02400159835815 + }, + "roundtrip": { + "p50": 232.92799293994904, + "p90": 240.7039999961853, + "p95": 243.3599978685379, + "p99": 249.24799799919128 + }, + "isolatedSum": { + "p50": 260.70399582386017, + "p90": 264.8960053920746, + "p95": 266.56000316143036, + "p99": 283.6800068616867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.60800170898438, + "p90": 168.57600212097168, + "p95": 172.63999581336975, + "p99": 175.35999417304993 + }, + "combine": { + "p50": 187.04000115394592, + "p90": 196.06399536132812, + "p95": 196.6720074415207, + "p99": 209.53600108623505 + }, + "roundtrip": { + "p50": 325.6320059299469, + "p90": 331.712007522583, + "p95": 333.3120048046112, + "p99": 345.37601470947266 + }, + "isolatedSum": { + "p50": 351.6480028629303, + "p90": 364.6399974822998, + "p95": 369.31200325489044, + "p99": 384.895995259285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.7920001745224, + "p90": 241.72799289226532, + "p95": 244.25600469112396, + "p99": 269.0240144729614 + }, + "combine": { + "p50": 332.5760066509247, + "p90": 342.78398752212524, + "p95": 343.4560000896454, + "p99": 369.34399604797363 + }, + "roundtrip": { + "p50": 543.0399775505066, + "p90": 550.8800148963928, + "p95": 552.7039766311646, + "p99": 561.2480044364929 + }, + "isolatedSum": { + "p50": 570.3680068254471, + "p90": 584.5119804143906, + "p95": 587.7120047807693, + "p99": 638.3680105209351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.8080086708069, + "p90": 386.9119882583618, + "p95": 389.3119990825653, + "p99": 435.4560077190399 + }, + "combine": { + "p50": 577.6320099830627, + "p90": 588.3200168609619, + "p95": 588.9599919319153, + "p99": 602.4320125579834 + }, + "roundtrip": { + "p50": 942.3360228538513, + "p90": 948.2240080833435, + "p95": 950.3999948501587, + "p99": 959.551990032196 + }, + "isolatedSum": { + "p50": 957.4400186538696, + "p90": 975.2320051193237, + "p95": 978.2719910144806, + "p99": 1037.8880202770233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 682.1439862251282, + "p90": 691.3279891014099, + "p95": 695.9360241889954, + "p99": 745.8879947662354 + }, + "combine": { + "p50": 1057.088017463684, + "p90": 1067.8720474243164, + "p95": 1068.608045578003, + "p99": 1079.6799659729004 + }, + "roundtrip": { + "p50": 1716.8320417404175, + "p90": 1724.0639925003052, + "p95": 1727.2000312805176, + "p99": 1740.6079769134521 + }, + "isolatedSum": { + "p50": 1739.2320036888123, + "p90": 1759.2000365257263, + "p95": 1764.5440697669983, + "p99": 1825.5679607391357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1337.2479677200317, + "p90": 1353.4079790115356, + "p95": 1357.8879833221436, + "p99": 1376.3840198516846 + }, + "combine": { + "p50": 2026.2401103973389, + "p90": 2029.4079780578613, + "p95": 2037.856101989746, + "p99": 2051.487922668457 + }, + "roundtrip": { + "p50": 3344.575881958008, + "p90": 3362.46395111084, + "p95": 3368.4799671173096, + "p99": 3393.280029296875 + }, + "isolatedSum": { + "p50": 3363.4880781173706, + "p90": 3382.815957069397, + "p95": 3395.7440853118896, + "p99": 3427.8719425201416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-442a3923", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "b300_187551ba", + "comparisonKey": "2701653f3d330ad8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:27.942242+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.79999661445618, + "p90": 128.03199887275696, + "p95": 129.4720023870468, + "p99": 151.74399316310883 + }, + "combine": { + "p50": 122.56000190973282, + "p90": 123.90399724245071, + "p95": 125.37600100040436, + "p99": 140.03199338912964 + }, + "roundtrip": { + "p50": 213.98399770259857, + "p90": 221.11999988555908, + "p95": 225.72800517082214, + "p99": 254.7520101070404 + }, + "isolatedSum": { + "p50": 247.359998524189, + "p90": 251.93599611520767, + "p95": 254.84800338745117, + "p99": 291.77598655223846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.3199988603592, + "p90": 163.16799819469452, + "p95": 164.44799304008484, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 161.5999937057495, + "p90": 170.46399414539337, + "p95": 170.8800047636032, + "p99": 172.63999581336975 + }, + "roundtrip": { + "p50": 290.2719974517822, + "p90": 298.3039915561676, + "p95": 303.1359910964966, + "p99": 311.93599104881287 + }, + "isolatedSum": { + "p50": 321.9199925661087, + "p90": 333.6319923400879, + "p95": 335.32799780368805, + "p99": 341.0879969596863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 221.37600183486938, + "p90": 225.75999796390533, + "p95": 226.55999660491943, + "p99": 232.38399624824524 + }, + "combine": { + "p50": 272.8640139102936, + "p90": 282.04798698425293, + "p95": 282.30398893356323, + "p99": 294.20799016952515 + }, + "roundtrip": { + "p50": 462.6240134239197, + "p90": 470.0480103492737, + "p95": 471.8720018863678, + "p99": 482.1760058403015 + }, + "isolatedSum": { + "p50": 494.24001574516296, + "p90": 507.80798494815826, + "p95": 508.86398553848267, + "p99": 526.5919864177704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 341.6000008583069, + "p90": 348.83201122283936, + "p95": 350.20801424980164, + "p99": 362.62398958206177 + }, + "combine": { + "p50": 467.8719937801361, + "p90": 478.14399003982544, + "p95": 478.7519872188568, + "p99": 490.30399322509766 + }, + "roundtrip": { + "p50": 787.0399951934814, + "p90": 793.3120131492615, + "p95": 797.6319789886475, + "p99": 810.6240034103394 + }, + "isolatedSum": { + "p50": 809.471994638443, + "p90": 826.9760012626648, + "p95": 828.9600014686584, + "p99": 852.9279828071594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.1919779777527, + "p90": 592.3839807510376, + "p95": 594.7520136833191, + "p99": 618.9119815826416 + }, + "combine": { + "p50": 822.7519989013672, + "p90": 833.9200019836426, + "p95": 835.3599905967712, + "p99": 858.2720160484314 + }, + "roundtrip": { + "p50": 1387.1999979019165, + "p90": 1399.232029914856, + "p95": 1405.2480459213257, + "p99": 1430.1120042800903 + }, + "isolatedSum": { + "p50": 1406.9439768791199, + "p90": 1426.3039827346802, + "p95": 1430.1120042800903, + "p99": 1477.183997631073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1087.2000455856323, + "p90": 1096.2560176849365, + "p95": 1099.2319583892822, + "p99": 1121.1520433425903 + }, + "combine": { + "p50": 1538.0799770355225, + "p90": 1548.8959550857544, + "p95": 1559.2000484466553, + "p99": 1572.3199844360352 + }, + "roundtrip": { + "p50": 2605.4720878601074, + "p90": 2618.0479526519775, + "p95": 2627.3279190063477, + "p99": 2644.320011138916 + }, + "isolatedSum": { + "p50": 2625.280022621155, + "p90": 2645.151972770691, + "p95": 2658.4320068359375, + "p99": 2693.4720277786255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8a8d8b3", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "b300_225a9503", + "comparisonKey": "45b1a9adcb0eaeb8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:02.938590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.53600090742111, + "p90": 120.28799951076508, + "p95": 122.079998254776, + "p99": 133.02400708198547 + }, + "combine": { + "p50": 133.2480013370514, + "p90": 134.5919966697693, + "p95": 135.29600203037262, + "p99": 159.32799875736237 + }, + "roundtrip": { + "p50": 228.19200158119202, + "p90": 233.69599878787994, + "p95": 235.1360023021698, + "p99": 244.06400322914124 + }, + "isolatedSum": { + "p50": 250.7840022444725, + "p90": 254.87999618053436, + "p95": 257.3760002851486, + "p99": 292.35200583934784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.32800555229187, + "p90": 160.5439931154251, + "p95": 164.000004529953, + "p99": 176.41599476337433 + }, + "combine": { + "p50": 183.9359998703003, + "p90": 193.85600090026855, + "p95": 195.2960044145584, + "p99": 221.72799706459045 + }, + "roundtrip": { + "p50": 316.73601269721985, + "p90": 321.53600454330444, + "p95": 323.64800572395325, + "p99": 331.84000849723816 + }, + "isolatedSum": { + "p50": 339.26400542259216, + "p90": 354.39999401569366, + "p95": 359.2960089445114, + "p99": 398.1439918279648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 227.26400196552277, + "p90": 235.55199801921844, + "p95": 237.15199530124664, + "p99": 262.5280022621155 + }, + "combine": { + "p50": 331.32800459861755, + "p90": 337.2800052165985, + "p95": 343.29599142074585, + "p99": 356.3840091228485 + }, + "roundtrip": { + "p50": 537.663996219635, + "p90": 542.1760082244873, + "p95": 543.7120199203491, + "p99": 553.5359978675842 + }, + "isolatedSum": { + "p50": 558.5920065641403, + "p90": 572.832003235817, + "p95": 580.4479867219925, + "p99": 618.912011384964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 371.19999527931213, + "p90": 376.1279881000519, + "p95": 384.12800431251526, + "p99": 469.6640074253082 + }, + "combine": { + "p50": 589.6959900856018, + "p90": 600.0000238418579, + "p95": 600.928008556366, + "p99": 629.7600269317627 + }, + "roundtrip": { + "p50": 933.2799911499023, + "p90": 939.8080110549927, + "p95": 943.0080056190491, + "p99": 956.7999839782715 + }, + "isolatedSum": { + "p50": 960.8959853649139, + "p90": 976.1280119419098, + "p95": 985.0560128688812, + "p99": 1099.424034357071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 674.1440296173096, + "p90": 685.4400038719177, + "p95": 690.1440024375916, + "p99": 735.3919744491577 + }, + "combine": { + "p50": 1071.7439651489258, + "p90": 1080.672025680542, + "p95": 1081.663966178894, + "p99": 1106.9120168685913 + }, + "roundtrip": { + "p50": 1727.1039485931396, + "p90": 1736.9920015335083, + "p95": 1740.2880191802979, + "p99": 1761.4400386810303 + }, + "isolatedSum": { + "p50": 1745.8879947662354, + "p90": 1766.1120295524597, + "p95": 1771.8079686164856, + "p99": 1842.303991317749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1309.0239763259888, + "p90": 1320.736050605774, + "p95": 1322.9440450668335, + "p99": 1364.799976348877 + }, + "combine": { + "p50": 2051.935911178589, + "p90": 2055.840015411377, + "p95": 2063.29607963562, + "p99": 2086.7838859558105 + }, + "roundtrip": { + "p50": 3346.656084060669, + "p90": 3358.367919921875, + "p95": 3363.039970397949, + "p99": 3387.903928756714 + }, + "isolatedSum": { + "p50": 3360.9598875045776, + "p90": 3376.576066017151, + "p95": 3386.2401247024536, + "p99": 3451.5838623046875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7874bbcf", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_0818b53b", + "comparisonKey": "cf52e526fc82b58b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:38.971102+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.2480000257492, + "p90": 127.29600071907043, + "p95": 128.22400033473969, + "p99": 132.57600367069244 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 123.36000055074692, + "p95": 124.35200065374374, + "p99": 137.66400516033173 + }, + "roundtrip": { + "p50": 213.05599808692932, + "p90": 219.200000166893, + "p95": 221.18400037288666, + "p99": 234.65600609779358 + }, + "isolatedSum": { + "p50": 247.51999974250793, + "p90": 250.65600126981735, + "p95": 252.57600098848343, + "p99": 270.24000883102417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.83999454975128, + "p90": 162.84799575805664, + "p95": 163.93600404262543, + "p99": 175.26400089263916 + }, + "combine": { + "p50": 160.288006067276, + "p90": 170.3680008649826, + "p95": 171.26399278640747, + "p99": 184.51200425624847 + }, + "roundtrip": { + "p50": 288.5119915008545, + "p90": 295.3599989414215, + "p95": 297.4720001220703, + "p99": 316.76799058914185 + }, + "isolatedSum": { + "p50": 316.1280006170273, + "p90": 333.21599662303925, + "p95": 335.1999968290329, + "p99": 359.77600514888763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.00800681114197, + "p90": 226.33600234985352, + "p95": 228.19200158119202, + "p99": 251.80798768997192 + }, + "combine": { + "p50": 280.4799973964691, + "p90": 282.6560139656067, + "p95": 284.2240035533905, + "p99": 331.07200264930725 + }, + "roundtrip": { + "p50": 463.45600485801697, + "p90": 475.45599937438965, + "p95": 480.1279902458191, + "p99": 492.12801456451416 + }, + "isolatedSum": { + "p50": 503.4880042076111, + "p90": 508.9920163154602, + "p95": 512.4160051345825, + "p99": 582.8799903392792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 330.59200644493103, + "p90": 337.95198798179626, + "p95": 342.0799970626831, + "p99": 392.2240138053894 + }, + "combine": { + "p50": 465.503990650177, + "p90": 468.3839976787567, + "p95": 477.4079918861389, + "p99": 494.4320023059845 + }, + "roundtrip": { + "p50": 776.3199806213379, + "p90": 783.9679718017578, + "p95": 787.1360182762146, + "p99": 796.6079711914062 + }, + "isolatedSum": { + "p50": 796.095997095108, + "p90": 806.335985660553, + "p95": 819.487988948822, + "p99": 886.6560161113739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 592.9279923439026, + "p90": 602.0479798316956, + "p95": 604.1600108146667, + "p99": 618.2079911231995 + }, + "combine": { + "p50": 836.6079926490784, + "p90": 847.1360206604004, + "p95": 848.1600284576416, + "p99": 860.7680201530457 + }, + "roundtrip": { + "p50": 1409.567952156067, + "p90": 1417.4400568008423, + "p95": 1422.0800399780273, + "p99": 1443.3599710464478 + }, + "isolatedSum": { + "p50": 1429.535984992981, + "p90": 1449.184000492096, + "p95": 1452.3200392723083, + "p99": 1478.9760112762451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1087.6480340957642, + "p90": 1097.2800254821777, + "p95": 1099.6479988098145, + "p99": 1125.8879899978638 + }, + "combine": { + "p50": 1546.7519760131836, + "p90": 1550.1439571380615, + "p95": 1558.9439868927002, + "p99": 1571.5199708938599 + }, + "roundtrip": { + "p50": 2606.719970703125, + "p90": 2616.7678833007812, + "p95": 2620.9919452667236, + "p99": 2632.607936859131 + }, + "isolatedSum": { + "p50": 2634.4000101089478, + "p90": 2647.4239826202393, + "p95": 2658.5919857025146, + "p99": 2697.4079608917236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-41af014f", + "identity": "b300|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "b300_e8881afb", + "comparisonKey": "3c538bd5cf73c55f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:13.225307+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.12799894809723, + "p90": 127.10399925708771, + "p95": 128.12800705432892, + "p99": 143.8400000333786 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 123.36000055074692, + "p95": 123.71200323104858, + "p99": 133.7919980287552 + }, + "roundtrip": { + "p50": 212.8639966249466, + "p90": 217.72800385951996, + "p95": 219.90400552749634, + "p99": 236.2239956855774 + }, + "isolatedSum": { + "p50": 246.39999866485596, + "p90": 250.46399980783463, + "p95": 251.8400102853775, + "p99": 277.6319980621338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.2320054769516, + "p90": 163.29599916934967, + "p95": 164.2879992723465, + "p99": 178.3680021762848 + }, + "combine": { + "p50": 160.3199988603592, + "p90": 170.20800709724426, + "p95": 170.8800047636032, + "p99": 184.03199315071106 + }, + "roundtrip": { + "p50": 289.66400027275085, + "p90": 296.7360019683838, + "p95": 298.3680069446564, + "p99": 310.14400720596313 + }, + "isolatedSum": { + "p50": 319.5520043373108, + "p90": 333.50400626659393, + "p95": 335.1680040359497, + "p99": 362.39999532699585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.07200729846954, + "p90": 226.20800137519836, + "p95": 227.4560034275055, + "p99": 241.98399484157562 + }, + "combine": { + "p50": 274.1119861602783, + "p90": 282.368004322052, + "p95": 283.1999957561493, + "p99": 296.4160144329071 + }, + "roundtrip": { + "p50": 463.51999044418335, + "p90": 470.0480103492737, + "p95": 472.06398844718933, + "p99": 480.9280037879944 + }, + "isolatedSum": { + "p50": 497.18399345874786, + "p90": 508.57600569725037, + "p95": 510.6559991836548, + "p99": 538.4000092744827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 330.59200644493103, + "p90": 337.47199177742004, + "p95": 340.06398916244507, + "p99": 353.5040020942688 + }, + "combine": { + "p50": 465.5359983444214, + "p90": 468.32001209259033, + "p95": 477.24801301956177, + "p99": 504.41598892211914 + }, + "roundtrip": { + "p50": 775.8079767227173, + "p90": 783.4240198135376, + "p95": 787.775993347168, + "p99": 802.2400140762329 + }, + "isolatedSum": { + "p50": 796.1280047893524, + "p90": 805.7920038700104, + "p95": 817.3120021820068, + "p99": 857.9199910163879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 593.1519865989685, + "p90": 602.1119952201843, + "p95": 604.1920185089111, + "p99": 617.247998714447 + }, + "combine": { + "p50": 836.6720080375671, + "p90": 847.1999764442444, + "p95": 848.3200073242188, + "p99": 859.4880104064941 + }, + "roundtrip": { + "p50": 1408.7040424346924, + "p90": 1416.2880182266235, + "p95": 1421.4080572128296, + "p99": 1434.0800046920776 + }, + "isolatedSum": { + "p50": 1429.8239946365356, + "p90": 1449.3119716644287, + "p95": 1452.5120258331299, + "p99": 1476.7360091209412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1088.320016860962, + "p90": 1098.9439487457275, + "p95": 1101.6960144042969, + "p99": 1111.5200519561768 + }, + "combine": { + "p50": 1546.9759702682495, + "p90": 1550.3040552139282, + "p95": 1559.4559907913208, + "p99": 1604.6080589294434 + }, + "roundtrip": { + "p50": 2607.6159477233887, + "p90": 2617.1839237213135, + "p95": 2620.192050933838, + "p99": 2626.8160343170166 + }, + "isolatedSum": { + "p50": 2635.2959871292114, + "p90": 2649.2480039596558, + "p95": 2661.1520051956177, + "p99": 2716.12811088562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a0ac1627", + "identity": "b300|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_e76916bc", + "comparisonKey": "7e1afd937c2c9b0f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:27.886754+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.0000034570694, + "p90": 114.59200084209442, + "p95": 115.80800265073776, + "p99": 175.9359985589981 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 134.46399569511414, + "p95": 136.86400651931763, + "p99": 147.96799421310425 + }, + "roundtrip": { + "p50": 199.8399943113327, + "p90": 202.97600328922272, + "p95": 205.24799823760986, + "p99": 223.77599775791168 + }, + "isolatedSum": { + "p50": 234.40000414848328, + "p90": 249.05599653720856, + "p95": 252.6720091700554, + "p99": 323.90399277210236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.00000190734863, + "p90": 151.0400027036667, + "p95": 151.90400183200836, + "p99": 165.3120070695877 + }, + "combine": { + "p50": 160.22400557994843, + "p90": 170.04799842834473, + "p95": 171.07200622558594, + "p99": 183.55199694633484 + }, + "roundtrip": { + "p50": 275.1680016517639, + "p90": 284.2560112476349, + "p95": 286.1439883708954, + "p99": 301.82400345802307 + }, + "isolatedSum": { + "p50": 308.22400748729706, + "p90": 321.0880011320114, + "p95": 322.9760080575943, + "p99": 348.86400401592255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 210.84800362586975, + "p90": 213.98399770259857, + "p95": 216.0319983959198, + "p99": 236.7040067911148 + }, + "combine": { + "p50": 270.9760069847107, + "p90": 281.21599555015564, + "p95": 282.1759879589081, + "p99": 334.49599146842957 + }, + "roundtrip": { + "p50": 451.84001326560974, + "p90": 457.0240080356598, + "p95": 459.1360092163086, + "p99": 471.1039960384369 + }, + "isolatedSum": { + "p50": 481.82401061058044, + "p90": 495.1999932527542, + "p95": 498.2079863548279, + "p99": 571.1999982595444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 334.9759876728058, + "p90": 338.49599957466125, + "p95": 340.03201127052307, + "p99": 409.9839925765991 + }, + "combine": { + "p50": 465.8240079879761, + "p90": 469.02400255203247, + "p95": 477.53599286079407, + "p99": 489.76001143455505 + }, + "roundtrip": { + "p50": 771.8080282211304, + "p90": 778.2719731330872, + "p95": 783.1360101699829, + "p99": 790.336012840271 + }, + "isolatedSum": { + "p50": 800.7999956607819, + "p90": 807.5200021266937, + "p95": 817.5680041313171, + "p99": 899.7440040111542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 573.1840133666992, + "p90": 582.5920104980469, + "p95": 585.6000185012817, + "p99": 672.6080179214478 + }, + "combine": { + "p50": 826.8799781799316, + "p90": 835.968017578125, + "p95": 838.9760255813599, + "p99": 871.2319731712341 + }, + "roundtrip": { + "p50": 1378.8800239562988, + "p90": 1390.239953994751, + "p95": 1395.2959775924683, + "p99": 1407.2959423065186 + }, + "isolatedSum": { + "p50": 1400.0639915466309, + "p90": 1418.5600280761719, + "p95": 1424.5760440826416, + "p99": 1543.8399910926819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1058.9120388031006, + "p90": 1064.0640258789062, + "p95": 1065.951943397522, + "p99": 1081.4080238342285 + }, + "combine": { + "p50": 1538.7519598007202, + "p90": 1550.5919456481934, + "p95": 1560.1919889450073, + "p99": 1621.1199760437012 + }, + "roundtrip": { + "p50": 2572.4799633026123, + "p90": 2584.3520164489746, + "p95": 2591.2320613861084, + "p99": 2605.3121089935303 + }, + "isolatedSum": { + "p50": 2597.663998603821, + "p90": 2614.6559715270996, + "p95": 2626.1439323425293, + "p99": 2702.5279998779297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ad9400cd", + "identity": "b300|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "b300_6fb604e7", + "comparisonKey": "0a01670b3772aaaf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:14.819807+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 168.7999963760376, + "p90": 199.64799284934998, + "p95": 227.13600099086761, + "p99": 312.8319978713989 + }, + "combine": { + "p50": 94.7519987821579, + "p90": 97.4079966545105, + "p95": 98.81599992513657, + "p99": 112.47999966144562 + }, + "roundtrip": { + "p50": 250.2720057964325, + "p90": 254.68799471855164, + "p95": 258.14399123191833, + "p99": 278.6880135536194 + }, + "isolatedSum": { + "p50": 263.5519951581955, + "p90": 297.0559895038605, + "p95": 325.9520009160042, + "p99": 425.31199753284454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 209.79200303554535, + "p90": 215.7759964466095, + "p95": 221.5680032968521, + "p99": 272.2879946231842 + }, + "combine": { + "p50": 130.72000443935394, + "p90": 143.00799369812012, + "p95": 149.08799529075623, + "p99": 172.54400253295898 + }, + "roundtrip": { + "p50": 326.911985874176, + "p90": 332.35201239585876, + "p95": 336.06401085853577, + "p99": 354.43198680877686 + }, + "isolatedSum": { + "p50": 340.5120074748993, + "p90": 358.7839901447296, + "p95": 370.65599858760834, + "p99": 444.8319971561432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 289.63199257850647, + "p90": 306.8479895591736, + "p95": 311.8720054626465, + "p99": 364.47998881340027 + }, + "combine": { + "p50": 197.76000082492828, + "p90": 203.10400426387787, + "p95": 206.7520022392273, + "p99": 239.07199501991272 + }, + "roundtrip": { + "p50": 470.36799788475037, + "p90": 476.25601291656494, + "p95": 481.82401061058044, + "p99": 494.84801292419434 + }, + "isolatedSum": { + "p50": 487.39199340343475, + "p90": 509.95199382305145, + "p95": 518.6240077018738, + "p99": 603.551983833313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 478.2400131225586, + "p90": 485.8880043029785, + "p95": 490.1440143585205, + "p99": 559.008002281189 + }, + "combine": { + "p50": 384.19198989868164, + "p90": 392.5440013408661, + "p95": 396.60799503326416, + "p99": 419.1359877586365 + }, + "roundtrip": { + "p50": 858.5919737815857, + "p90": 866.4640188217163, + "p95": 870.6880211830139, + "p99": 884.5120072364807 + }, + "isolatedSum": { + "p50": 862.4320030212402, + "p90": 878.4320056438446, + "p95": 886.7520093917847, + "p99": 978.1439900398254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 857.4720025062561, + "p90": 871.8720078468323, + "p95": 877.7920007705688, + "p99": 969.2479968070984 + }, + "combine": { + "p50": 759.0399980545044, + "p90": 764.8000121116638, + "p95": 768.9599990844727, + "p99": 785.6000065803528 + }, + "roundtrip": { + "p50": 1584.5760107040405, + "p90": 1596.1600542068481, + "p95": 1604.1280031204224, + "p99": 1695.072054862976 + }, + "isolatedSum": { + "p50": 1616.5120005607605, + "p90": 1636.672019958496, + "p95": 1646.7519998550415, + "p99": 1754.8480033874512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1622.0159530639648, + "p90": 1635.424017906189, + "p95": 1643.2640552520752, + "p99": 1688.5119676589966 + }, + "combine": { + "p50": 1444.8000192642212, + "p90": 1452.9600143432617, + "p95": 1457.0560455322266, + "p99": 1474.7519493103027 + }, + "roundtrip": { + "p50": 3032.128095626831, + "p90": 3048.3200550079346, + "p95": 3054.624080657959, + "p99": 3133.375883102417 + }, + "isolatedSum": { + "p50": 3066.815972328186, + "p90": 3088.3840322494507, + "p95": 3100.3201007843018, + "p99": 3163.2639169692993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d1d4b495", + "identity": "b300|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "b300_6fb604e7", + "comparisonKey": "eb23866fcc99986d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:24.327371+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 175.9680062532425, + "p90": 181.85600638389587, + "p95": 183.3599954843521, + "p99": 196.4160054922104 + }, + "combine": { + "p50": 103.04000228643417, + "p90": 105.43999820947647, + "p95": 106.01600259542465, + "p99": 118.01599711179733 + }, + "roundtrip": { + "p50": 265.4080092906952, + "p90": 271.13598585128784, + "p95": 274.399995803833, + "p99": 290.23998975753784 + }, + "isolatedSum": { + "p50": 279.00800853967667, + "p90": 287.29600459337234, + "p95": 289.37599807977676, + "p99": 314.4320026040077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 221.18400037288666, + "p90": 227.743998169899, + "p95": 231.55200481414795, + "p99": 254.01601195335388 + }, + "combine": { + "p50": 138.8159990310669, + "p90": 142.4960047006607, + "p95": 144.896000623703, + "p99": 170.3680008649826 + }, + "roundtrip": { + "p50": 347.9999899864197, + "p90": 354.559987783432, + "p95": 360.25598645210266, + "p99": 373.24801087379456 + }, + "isolatedSum": { + "p50": 359.99999940395355, + "p90": 370.2400028705597, + "p95": 376.44800543785095, + "p99": 424.3840128183365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 315.2959942817688, + "p90": 320.76799869537354, + "p95": 323.5200047492981, + "p99": 373.53599071502686 + }, + "combine": { + "p50": 212.51200139522552, + "p90": 217.18400716781616, + "p95": 220.67199647426605, + "p99": 259.16799902915955 + }, + "roundtrip": { + "p50": 535.6159806251526, + "p90": 543.4880256652832, + "p95": 545.4720258712769, + "p99": 562.5600218772888 + }, + "isolatedSum": { + "p50": 527.8079956769943, + "p90": 537.9520058631897, + "p95": 544.1920012235641, + "p99": 632.7039897441864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 529.375970363617, + "p90": 537.4720096588135, + "p95": 548.6400127410889, + "p99": 613.3120059967041 + }, + "combine": { + "p50": 438.27199935913086, + "p90": 443.77601146698, + "p95": 445.72800397872925, + "p99": 460.4479968547821 + }, + "roundtrip": { + "p50": 940.1919841766357, + "p90": 948.2240080833435, + "p95": 951.3919949531555, + "p99": 977.6639938354492 + }, + "isolatedSum": { + "p50": 967.6479697227478, + "p90": 981.2480211257935, + "p95": 994.3680167198181, + "p99": 1073.7600028514862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 955.0079703330994, + "p90": 965.3120040893555, + "p95": 971.3280200958252, + "p99": 1031.0720205307007 + }, + "combine": { + "p50": 784.7679853439331, + "p90": 792.9919958114624, + "p95": 798.8160252571106, + "p99": 823.9359855651855 + }, + "roundtrip": { + "p50": 1715.391993522644, + "p90": 1728.6399602890015, + "p95": 1733.9199781417847, + "p99": 1776.1280536651611 + }, + "isolatedSum": { + "p50": 1739.7759556770325, + "p90": 1758.3039999008179, + "p95": 1770.1440453529358, + "p99": 1855.0080060958862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1837.5999927520752, + "p90": 1852.6719808578491, + "p95": 1859.1680526733398, + "p99": 1889.9840116500854 + }, + "combine": { + "p50": 1479.423999786377, + "p90": 1488.54398727417, + "p95": 1496.575951576233, + "p99": 1547.327995300293 + }, + "roundtrip": { + "p50": 3287.9679203033447, + "p90": 3302.1440505981445, + "p95": 3307.8079223632812, + "p99": 3317.984104156494 + }, + "isolatedSum": { + "p50": 3317.023992538452, + "p90": 3341.215968132019, + "p95": 3355.7440042495728, + "p99": 3437.3120069503784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d559ee9d", + "identity": "b300|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_6fb604e7", + "comparisonKey": "97511fa9aeb8e024", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:37.512918+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 180.86400628089905, + "p90": 188.06399405002594, + "p95": 194.07999515533447, + "p99": 210.78400313854218 + }, + "combine": { + "p50": 109.53599959611893, + "p90": 112.12799698114395, + "p95": 113.40799927711487, + "p99": 125.2480000257492 + }, + "roundtrip": { + "p50": 279.07198667526245, + "p90": 283.55199098587036, + "p95": 286.20800375938416, + "p99": 303.71201038360596 + }, + "isolatedSum": { + "p50": 290.400005877018, + "p90": 300.1919910311699, + "p95": 307.48799443244934, + "p99": 336.0320031642914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 233.3119958639145, + "p90": 238.0480021238327, + "p95": 241.88800156116486, + "p99": 257.63198733329773 + }, + "combine": { + "p50": 149.4079977273941, + "p90": 152.8639942407608, + "p95": 153.9520025253296, + "p99": 163.83999586105347 + }, + "roundtrip": { + "p50": 376.0960102081299, + "p90": 381.21598958969116, + "p95": 384.2880129814148, + "p99": 399.9359905719757 + }, + "isolatedSum": { + "p50": 382.7199935913086, + "p90": 390.9119963645935, + "p95": 395.84000408649445, + "p99": 421.4719831943512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 356.1280071735382, + "p90": 361.56800389289856, + "p95": 364.54400420188904, + "p99": 428.79998683929443 + }, + "combine": { + "p50": 245.4719990491867, + "p90": 250.88000297546387, + "p95": 253.66398692131042, + "p99": 312.9599988460541 + }, + "roundtrip": { + "p50": 600.1279950141907, + "p90": 611.8080019950867, + "p95": 623.2320070266724, + "p99": 647.8400230407715 + }, + "isolatedSum": { + "p50": 601.6000062227249, + "p90": 612.4480068683624, + "p95": 618.2079911231995, + "p99": 741.7599856853485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 608.6400151252747, + "p90": 616.2559986114502, + "p95": 620.8639740943909, + "p99": 678.5920262336731 + }, + "combine": { + "p50": 444.4800019264221, + "p90": 448.63998889923096, + "p95": 450.23998618125916, + "p99": 467.5840139389038 + }, + "roundtrip": { + "p50": 1033.4080457687378, + "p90": 1042.207956314087, + "p95": 1046.94402217865, + "p99": 1057.2160482406616 + }, + "isolatedSum": { + "p50": 1053.1200170516968, + "p90": 1064.8959875106812, + "p95": 1071.10396027565, + "p99": 1146.176040172577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1116.4480447769165, + "p90": 1123.968005180359, + "p95": 1128.7039518356323, + "p99": 1285.6639623641968 + }, + "combine": { + "p50": 803.48801612854, + "p90": 809.3760013580322, + "p95": 811.743974685669, + "p99": 832.1920037269592 + }, + "roundtrip": { + "p50": 1895.5199718475342, + "p90": 1904.736042022705, + "p95": 1909.6319675445557, + "p99": 1937.3760223388672 + }, + "isolatedSum": { + "p50": 1919.9360609054565, + "p90": 1933.344006538391, + "p95": 1940.4479265213013, + "p99": 2117.855966091156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2123.487949371338, + "p90": 2131.488084793091, + "p95": 2135.90407371521, + "p99": 2163.0399227142334 + }, + "combine": { + "p50": 1505.344033241272, + "p90": 1513.63205909729, + "p95": 1516.8639421463013, + "p99": 1536.0000133514404 + }, + "roundtrip": { + "p50": 3608.63995552063, + "p90": 3620.0320720672607, + "p95": 3625.119924545288, + "p99": 3643.615961074829 + }, + "isolatedSum": { + "p50": 3628.83198261261, + "p90": 3645.120143890381, + "p95": 3652.7680158615112, + "p99": 3699.039936065674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fd6111ca", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_7a11eaa3", + "comparisonKey": "83647e27807ca331", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:39.988509+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.69600015878677, + "p90": 108.22399705648422, + "p95": 109.0880036354065, + "p99": 118.9119964838028 + }, + "combine": { + "p50": 115.26399850845337, + "p90": 118.04799735546112, + "p95": 119.35999989509583, + "p99": 134.5279961824417 + }, + "roundtrip": { + "p50": 243.26400458812714, + "p90": 246.91200256347656, + "p95": 249.4720071554184, + "p99": 272.99201488494873 + }, + "isolatedSum": { + "p50": 220.95999866724014, + "p90": 226.27199441194534, + "p95": 228.44800353050232, + "p99": 253.4399926662445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 138.0160003900528, + "p90": 145.28000354766846, + "p95": 149.56800639629364, + "p99": 159.4880074262619 + }, + "combine": { + "p50": 161.69600188732147, + "p90": 166.6560024023056, + "p95": 168.73599588871002, + "p99": 177.279993891716 + }, + "roundtrip": { + "p50": 349.5039939880371, + "p90": 356.8960130214691, + "p95": 369.8880076408386, + "p99": 383.7760090827942 + }, + "isolatedSum": { + "p50": 299.71200227737427, + "p90": 311.93600594997406, + "p95": 318.30400228500366, + "p99": 336.7680013179779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.02400267124176, + "p90": 201.50400698184967, + "p95": 203.64800095558167, + "p99": 224.41600263118744 + }, + "combine": { + "p50": 280.5759906768799, + "p90": 287.55199909210205, + "p95": 293.66400837898254, + "p99": 367.8719997406006 + }, + "roundtrip": { + "p50": 575.0719904899597, + "p90": 581.3760161399841, + "p95": 584.447979927063, + "p99": 591.2320017814636 + }, + "isolatedSum": { + "p50": 477.59999334812164, + "p90": 489.0560060739517, + "p95": 497.3120093345642, + "p99": 592.288002371788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.9599988460541, + "p90": 319.7120130062103, + "p95": 323.93598556518555, + "p99": 349.2799997329712 + }, + "combine": { + "p50": 461.12000942230225, + "p90": 465.88799357414246, + "p95": 468.35198998451233, + "p99": 487.4880015850067 + }, + "roundtrip": { + "p50": 997.0239996910095, + "p90": 1005.8879852294922, + "p95": 1011.9999647140503, + "p99": 1033.471941947937 + }, + "isolatedSum": { + "p50": 774.0800082683563, + "p90": 785.6000065803528, + "p95": 792.2879755496979, + "p99": 836.7680013179779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 556.7359924316406, + "p90": 564.0959739685059, + "p95": 569.3439841270447, + "p99": 635.487973690033 + }, + "combine": { + "p50": 825.6000280380249, + "p90": 832.863986492157, + "p95": 836.9600176811218, + "p99": 855.2960157394409 + }, + "roundtrip": { + "p50": 1842.4960374832153, + "p90": 1853.119969367981, + "p95": 1859.2640161514282, + "p99": 2011.5199089050293 + }, + "isolatedSum": { + "p50": 1382.3360204696655, + "p90": 1396.9599604606628, + "p95": 1406.3040018081665, + "p99": 1490.7839894294739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1023.7760543823242, + "p90": 1032.7359437942505, + "p95": 1036.352038383484, + "p99": 1058.751940727234 + }, + "combine": { + "p50": 1537.11998462677, + "p90": 1546.7519760131836, + "p95": 1551.2319803237915, + "p99": 1567.5519704818726 + }, + "roundtrip": { + "p50": 3500.5440711975098, + "p90": 3511.45601272583, + "p95": 3516.8960094451904, + "p99": 3528.5439491271973 + }, + "isolatedSum": { + "p50": 2560.8960390090942, + "p90": 2579.487919807434, + "p95": 2587.5840187072754, + "p99": 2626.3039112091064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-404a22ca", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_6fb604e7", + "comparisonKey": "3eb0bbdbc12fabc5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:54.489051+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 186.3040030002594, + "p90": 190.49599766731262, + "p95": 193.53599846363068, + "p99": 209.18400585651398 + }, + "combine": { + "p50": 115.23199826478958, + "p90": 117.88800358772278, + "p95": 119.48800086975098, + "p99": 139.13600146770477 + }, + "roundtrip": { + "p50": 290.3999984264374, + "p90": 295.1680123806, + "p95": 298.0799973011017, + "p99": 310.65601110458374 + }, + "isolatedSum": { + "p50": 301.536001265049, + "p90": 308.3840012550354, + "p95": 313.02399933338165, + "p99": 348.32000732421875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 240.60800671577454, + "p90": 245.1840043067932, + "p95": 247.9040026664734, + "p99": 257.9199969768524 + }, + "combine": { + "p50": 158.9439958333969, + "p90": 162.75200247764587, + "p95": 165.0560051202774, + "p99": 189.31199610233307 + }, + "roundtrip": { + "p50": 395.80801129341125, + "p90": 401.8239974975586, + "p95": 404.7679901123047, + "p99": 421.63199186325073 + }, + "isolatedSum": { + "p50": 399.55200254917145, + "p90": 407.9360067844391, + "p95": 412.9600077867508, + "p99": 447.2319930791855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 380.0959885120392, + "p90": 385.24800539016724, + "p95": 387.7759873867035, + "p99": 414.11200165748596 + }, + "combine": { + "p50": 276.41600370407104, + "p90": 284.09600257873535, + "p95": 291.6159927845001, + "p99": 311.0400140285492 + }, + "roundtrip": { + "p50": 637.3760104179382, + "p90": 644.7039842605591, + "p95": 651.4880061149597, + "p99": 700.767993927002 + }, + "isolatedSum": { + "p50": 656.5119922161102, + "p90": 669.3440079689026, + "p95": 679.3919801712036, + "p99": 725.1520156860352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 650.592029094696, + "p90": 657.7280163764954, + "p95": 662.4640226364136, + "p99": 695.8400011062622 + }, + "combine": { + "p50": 460.4479968547821, + "p90": 465.9520089626312, + "p95": 470.0799882411957, + "p99": 481.5039932727814 + }, + "roundtrip": { + "p50": 1089.5359516143799, + "p90": 1097.7920293807983, + "p95": 1103.2960414886475, + "p99": 1114.5919561386108 + }, + "isolatedSum": { + "p50": 1111.0400259494781, + "p90": 1123.6800253391266, + "p95": 1132.5440108776093, + "p99": 1177.3439943790436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1205.4719924926758, + "p90": 1212.5439643859863, + "p95": 1217.2479629516602, + "p99": 1225.6959676742554 + }, + "combine": { + "p50": 825.7279992103577, + "p90": 832.9280018806458, + "p95": 836.8639945983887, + "p99": 848.800003528595 + }, + "roundtrip": { + "p50": 2008.9600086212158, + "p90": 2019.2320346832275, + "p95": 2023.4880447387695, + "p99": 2034.3360900878906 + }, + "isolatedSum": { + "p50": 2031.1999917030334, + "p90": 2045.471966266632, + "p95": 2054.111957550049, + "p99": 2074.4959712028503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2293.1840419769287, + "p90": 2304.03208732605, + "p95": 2308.6719512939453, + "p99": 2326.94411277771 + }, + "combine": { + "p50": 1536.736011505127, + "p90": 1546.1119413375854, + "p95": 1550.3360033035278, + "p99": 1576.5119791030884 + }, + "roundtrip": { + "p50": 3813.663959503174, + "p90": 3824.76806640625, + "p95": 3830.5280208587646, + "p99": 3852.735996246338 + }, + "isolatedSum": { + "p50": 3829.9200534820557, + "p90": 3850.1440286636353, + "p95": 3859.007954597473, + "p99": 3903.4560918807983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edfe832d", + "identity": "b300|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "b300_6fb604e7", + "comparisonKey": "e6e64d76073ed9ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:03.993862+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 190.46400487422943, + "p90": 194.240003824234, + "p95": 196.22400403022766, + "p99": 209.56799387931824 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 118.97599697113037, + "p95": 120.44800072908401, + "p99": 144.896000623703 + }, + "roundtrip": { + "p50": 294.624000787735, + "p90": 299.23200607299805, + "p95": 301.0239899158478, + "p99": 309.88800525665283 + }, + "isolatedSum": { + "p50": 306.7200034856796, + "p90": 313.2160007953644, + "p95": 316.6720047593117, + "p99": 354.46399450302124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 244.22399699687958, + "p90": 252.00000405311584, + "p95": 260.0319981575012, + "p99": 278.9439857006073 + }, + "combine": { + "p50": 161.18399798870087, + "p90": 165.98400473594666, + "p95": 169.0240055322647, + "p99": 213.6320024728775 + }, + "roundtrip": { + "p50": 398.6560106277466, + "p90": 403.9680063724518, + "p95": 407.8719913959503, + "p99": 427.839994430542 + }, + "isolatedSum": { + "p50": 405.40799498558044, + "p90": 417.9840087890625, + "p95": 429.05600368976593, + "p99": 492.5759881734848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 378.62399220466614, + "p90": 382.6560080051422, + "p95": 384.89601016044617, + "p99": 405.63198924064636 + }, + "combine": { + "p50": 273.6000120639801, + "p90": 277.536004781723, + "p95": 279.9679934978485, + "p99": 292.1279966831207 + }, + "roundtrip": { + "p50": 636.5439891815186, + "p90": 643.1999802589417, + "p95": 646.8160152435303, + "p99": 665.0559902191162 + }, + "isolatedSum": { + "p50": 652.2240042686462, + "p90": 660.1920127868652, + "p95": 664.8640036582947, + "p99": 697.7599859237671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 650.592029094696, + "p90": 658.4960222244263, + "p95": 664.031982421875, + "p99": 727.295994758606 + }, + "combine": { + "p50": 465.8240079879761, + "p90": 472.4479913711548, + "p95": 477.4720072746277, + "p99": 492.8640127182007 + }, + "roundtrip": { + "p50": 1095.6480503082275, + "p90": 1105.3760051727295, + "p95": 1110.3999614715576, + "p99": 1127.519965171814 + }, + "isolatedSum": { + "p50": 1116.4160370826721, + "p90": 1130.944013595581, + "p95": 1141.5039896965027, + "p99": 1220.1600074768066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1193.6960220336914, + "p90": 1201.5680074691772, + "p95": 1207.9999446868896, + "p99": 1270.5600261688232 + }, + "combine": { + "p50": 816.7679905891418, + "p90": 824.288010597229, + "p95": 828.2240033149719, + "p99": 842.2719836235046 + }, + "roundtrip": { + "p50": 1991.5200471878052, + "p90": 2003.904104232788, + "p95": 2011.9678974151611, + "p99": 2260.1280212402344 + }, + "isolatedSum": { + "p50": 2010.4640126228333, + "p90": 2025.8560180664062, + "p95": 2036.2239480018616, + "p99": 2112.832009792328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2286.2401008605957, + "p90": 2295.520067214966, + "p95": 2300.1599311828613, + "p99": 2315.4239654541016 + }, + "combine": { + "p50": 1520.8959579467773, + "p90": 1530.6559801101685, + "p95": 1535.0719690322876, + "p99": 1549.407958984375 + }, + "roundtrip": { + "p50": 3788.6719703674316, + "p90": 3803.6160469055176, + "p95": 3810.784101486206, + "p99": 3902.1120071411133 + }, + "isolatedSum": { + "p50": 3807.136058807373, + "p90": 3826.1760473251343, + "p95": 3835.231900215149, + "p99": 3864.8319244384766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-284561e6", + "identity": "b300|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "b300_29dcc6db", + "comparisonKey": "8592c064d8d5dd0c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:45.535027+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "b300-nv_12", + "sku": "b300", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "b300-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "B300 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1351, + "configuredUnits": 20, + "deviceUnits": 148, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577794209", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577794209", + "createdAt": "2026-07-02T08:53:49Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.7519987821579, + "p90": 97.6639986038208, + "p95": 99.0080013871193, + "p99": 137.2479945421219 + }, + "combine": { + "p50": 115.7120019197464, + "p90": 118.43200027942657, + "p95": 120.03199756145477, + "p99": 133.82400572299957 + }, + "roundtrip": { + "p50": 232.41600394248962, + "p90": 236.38400435447693, + "p95": 238.71999979019165, + "p99": 269.0559923648834 + }, + "isolatedSum": { + "p50": 210.4640007019043, + "p90": 216.09599888324738, + "p95": 219.03999894857407, + "p99": 271.07200026512146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.56000256538391, + "p90": 130.62399625778198, + "p95": 134.65599715709686, + "p99": 157.1200042963028 + }, + "combine": { + "p50": 161.0880047082901, + "p90": 166.49599373340607, + "p95": 168.57600212097168, + "p99": 179.967999458313 + }, + "roundtrip": { + "p50": 338.6879861354828, + "p90": 343.8720107078552, + "p95": 348.03199768066406, + "p99": 391.1359906196594 + }, + "isolatedSum": { + "p50": 287.648007273674, + "p90": 297.11998999118805, + "p95": 303.23199927806854, + "p99": 337.0880037546158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 185.18400192260742, + "p90": 189.91999328136444, + "p95": 192.51200556755066, + "p99": 201.85600221157074 + }, + "combine": { + "p50": 279.83999252319336, + "p90": 284.35200452804565, + "p95": 286.4319980144501, + "p99": 310.68798899650574 + }, + "roundtrip": { + "p50": 564.5760297775269, + "p90": 571.7759728431702, + "p95": 574.400007724762, + "p99": 586.080014705658 + }, + "isolatedSum": { + "p50": 465.0239944458008, + "p90": 474.2719978094101, + "p95": 478.94400358200073, + "p99": 512.5439912080765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.7919957637787, + "p90": 307.8719973564148, + "p95": 312.3840093612671, + "p99": 370.7520067691803 + }, + "combine": { + "p50": 461.34400367736816, + "p90": 466.1119878292084, + "p95": 470.2720046043396, + "p99": 480.4159998893738 + }, + "roundtrip": { + "p50": 987.9999756813049, + "p90": 997.1519708633423, + "p95": 1003.2320022583008, + "p99": 1013.5999917984009 + }, + "isolatedSum": { + "p50": 763.1359994411469, + "p90": 773.9839851856232, + "p95": 782.6560139656067, + "p99": 851.1680066585541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.7600193023682, + "p90": 545.4080104827881, + "p95": 548.9280223846436, + "p99": 622.1759915351868 + }, + "combine": { + "p50": 826.0160088539124, + "p90": 833.0879807472229, + "p95": 837.440013885498, + "p99": 853.056013584137 + }, + "roundtrip": { + "p50": 1824.0959644317627, + "p90": 1833.5039615631104, + "p95": 1837.440013885498, + "p99": 1848.8320112228394 + }, + "isolatedSum": { + "p50": 1363.7760281562805, + "p90": 1378.495991230011, + "p95": 1386.3680362701416, + "p99": 1475.2320051193237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 990.4320240020752, + "p90": 1001.4400482177734, + "p95": 1006.2719583511353, + "p99": 1026.9440412521362 + }, + "combine": { + "p50": 1537.1840000152588, + "p90": 1548.192024230957, + "p95": 1554.7840595245361, + "p99": 1560.479998588562 + }, + "roundtrip": { + "p50": 3467.360019683838, + "p90": 3483.2639694213867, + "p95": 3489.9840354919434, + "p99": 3558.784008026123 + }, + "isolatedSum": { + "p50": 2527.616024017334, + "p90": 2549.6320724487305, + "p95": 2561.0560178756714, + "p99": 2587.4240398406982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d21f2836", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||6c4175e2b7b86cb", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "1c88d04356617519", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:42.534590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6c4175e2b7b86cb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 88.128000497818, + "p90": 97.9200005531311, + "p95": 101.72799974679947, + "p99": 111.35999858379364 + }, + "combine": { + "p50": 63.19999694824219, + "p90": 69.2799985408783, + "p95": 70.68800181150436, + "p99": 78.27199995517731 + }, + "roundtrip": { + "p50": 129.7920048236847, + "p90": 139.3280029296875, + "p95": 142.94399321079254, + "p99": 152.67199277877808 + }, + "isolatedSum": { + "p50": 151.32799744606018, + "p90": 167.1999990940094, + "p95": 172.41600155830383, + "p99": 189.63199853897095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 131072, + "combineLogicalBytes": 131072, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 88.16000074148178, + "p90": 97.18400239944458, + "p95": 101.69599950313568, + "p99": 108.92800241708755 + }, + "combine": { + "p50": 65.37599861621857, + "p90": 70.04799693822861, + "p95": 71.32799923419952, + "p99": 74.36800003051758 + }, + "roundtrip": { + "p50": 130.3039938211441, + "p90": 139.8400068283081, + "p95": 143.00799369812012, + "p99": 152.16000378131866 + }, + "isolatedSum": { + "p50": 153.53599935770035, + "p90": 167.2319993376732, + "p95": 173.0239987373352, + "p99": 183.29600244760513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 245760, + "combineLogicalBytes": 245760, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 88.54400366544724, + "p90": 98.39999675750732, + "p95": 102.36799716949463, + "p99": 110.04800349473953 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 70.39999961853027, + "p95": 72.4480003118515, + "p99": 76.19199901819229 + }, + "roundtrip": { + "p50": 131.9359987974167, + "p90": 142.36800372600555, + "p95": 146.30399644374847, + "p99": 154.7520011663437 + }, + "isolatedSum": { + "p50": 154.78400141000748, + "p90": 168.7999963760376, + "p95": 174.81599748134613, + "p99": 186.24000251293182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 491520, + "combineLogicalBytes": 491520, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.67199718952179, + "p90": 97.34400361776352, + "p95": 101.08800232410431, + "p99": 109.66400057077408 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 72.28799909353256, + "p95": 73.7600028514862, + "p99": 81.50400221347809 + }, + "roundtrip": { + "p50": 133.85599851608276, + "p90": 143.0400013923645, + "p95": 146.59200608730316, + "p99": 153.18399667739868 + }, + "isolatedSum": { + "p50": 156.41599893569946, + "p90": 169.63200271129608, + "p95": 174.84800517559052, + "p99": 191.16800278425217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 958464, + "combineLogicalBytes": 958464, + "fanoutMean": 3.65625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 88.95999938249588, + "p90": 98.94400089979172, + "p95": 103.2319962978363, + "p99": 108.47999900579453 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 72.41600006818771, + "p95": 74.01599735021591, + "p99": 80.83199709653854 + }, + "roundtrip": { + "p50": 133.95200669765472, + "p90": 142.84799993038177, + "p95": 146.65600657463074, + "p99": 153.60000729560852 + }, + "isolatedSum": { + "p50": 156.8320021033287, + "p90": 171.36000096797943, + "p95": 177.24799364805222, + "p99": 189.31199610233307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1884160, + "combineLogicalBytes": 1884160, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 89.91999924182892, + "p90": 99.55199807882309, + "p95": 102.65599936246872, + "p99": 113.98400366306305 + }, + "combine": { + "p50": 67.74400174617767, + "p90": 72.4480003118515, + "p95": 73.95199686288834, + "p99": 79.83999699354172 + }, + "roundtrip": { + "p50": 135.55200397968292, + "p90": 144.76799964904785, + "p95": 148.3840048313141, + "p99": 155.39200603961945 + }, + "isolatedSum": { + "p50": 157.6640009880066, + "p90": 171.9999983906746, + "p95": 176.60799622535706, + "p99": 193.82400065660477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3768320, + "combineLogicalBytes": 3768320, + "fanoutMean": 3.59375, + "recvTokensMax": 121, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.84000027179718, + "p90": 106.6880002617836, + "p95": 110.52799969911575, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 71.23199850320816, + "p90": 75.87199658155441, + "p95": 78.59200239181519, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 143.16800236701965, + "p90": 151.2639969587326, + "p95": 155.03999590873718, + "p99": 165.95199704170227 + }, + "isolatedSum": { + "p50": 171.07199877500534, + "p90": 182.559996843338, + "p95": 189.12000209093094, + "p99": 200.86399465799332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7585792, + "combineLogicalBytes": 7585792, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 111.455999314785, + "p90": 119.32799965143204, + "p95": 122.20799922943115, + "p99": 131.32800161838531 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 86.04799956083298, + "p95": 87.45600283145905, + "p99": 92.16000139713287 + }, + "roundtrip": { + "p50": 166.01599752902985, + "p90": 174.30399358272552, + "p95": 177.18400061130524, + "p99": 185.08799374103546 + }, + "isolatedSum": { + "p50": 192.7039995789528, + "p90": 205.37599921226501, + "p95": 209.6640020608902, + "p99": 223.4880030155182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15294464, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2c825d77", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||3bda3dd7d4e88bf", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "3e04a6a0a6c2c447", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:33.577458+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3bda3dd7d4e88bf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 89.05600011348724, + "p90": 99.67999905347824, + "p95": 104.06400263309479, + "p99": 112.83200234174728 + }, + "combine": { + "p50": 65.11999666690826, + "p90": 71.87200337648392, + "p95": 73.5040009021759, + "p99": 80.19199967384338 + }, + "roundtrip": { + "p50": 134.14399325847626, + "p90": 142.81600713729858, + "p95": 146.4959979057312, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 154.1759967803955, + "p90": 171.55200242996216, + "p95": 177.5680035352707, + "p99": 193.02400201559067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 153600, + "combineLogicalBytes": 153600, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.9839997291565, + "p90": 100.0640019774437, + "p95": 103.7760004401207, + "p99": 116.70400202274323 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 73.85600358247757, + "p95": 75.58400183916092, + "p99": 83.96799862384796 + }, + "roundtrip": { + "p50": 135.903999209404, + "p90": 145.56799829006195, + "p95": 149.88799393177032, + "p99": 157.98400342464447 + }, + "isolatedSum": { + "p50": 160.25599837303162, + "p90": 173.92000555992126, + "p95": 179.36000227928162, + "p99": 200.6720006465912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 307200, + "combineLogicalBytes": 307200, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 90.36800265312195, + "p90": 101.69599950313568, + "p95": 106.49599879980087, + "p99": 114.49600011110306 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 73.53600114583969, + "p95": 75.23199915885925, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 136.06399297714233, + "p90": 145.50399780273438, + "p95": 151.48800611495972, + "p99": 160.47999262809753 + }, + "isolatedSum": { + "p50": 160.89600324630737, + "p90": 175.23200064897537, + "p95": 181.72799795866013, + "p99": 194.91200149059296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 604160, + "combineLogicalBytes": 604160, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 90.7839983701706, + "p90": 101.24800354242325, + "p95": 105.53599894046783, + "p99": 114.88000303506851 + }, + "combine": { + "p50": 70.62400132417679, + "p90": 74.01599735021591, + "p95": 75.58400183916092, + "p99": 84.16000008583069 + }, + "roundtrip": { + "p50": 138.5280042886734, + "p90": 147.96799421310425, + "p95": 152.8320014476776, + "p99": 157.6319932937622 + }, + "isolatedSum": { + "p50": 161.40799969434738, + "p90": 175.26400089263916, + "p95": 181.12000077962875, + "p99": 199.0400031208992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1208320, + "combineLogicalBytes": 1208320, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 90.20800143480301, + "p90": 99.0080013871193, + "p95": 102.11200267076492, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 70.592001080513, + "p90": 73.88799637556076, + "p95": 75.07199794054031, + "p99": 81.95199817419052 + }, + "roundtrip": { + "p50": 139.52000439167023, + "p90": 149.56800639629364, + "p95": 154.52800691127777, + "p99": 161.43999993801117 + }, + "isolatedSum": { + "p50": 160.800002515316, + "p90": 172.89599776268005, + "p95": 177.18400061130524, + "p99": 192.1279951930046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2447360, + "combineLogicalBytes": 2447360, + "fanoutMean": 3.734375, + "recvTokensMax": 62, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 91.07200056314468, + "p90": 100.99200159311295, + "p95": 104.22399640083313, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 74.14399832487106, + "p95": 75.42400062084198, + "p99": 82.24000036716461 + }, + "roundtrip": { + "p50": 141.95199310779572, + "p90": 152.28800475597382, + "p95": 155.61600029468536, + "p99": 162.4639928340912 + }, + "isolatedSum": { + "p50": 161.82400286197662, + "p90": 175.135999917984, + "p95": 179.6479970216751, + "p99": 192.31999665498734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4792320, + "combineLogicalBytes": 4792320, + "fanoutMean": 3.65625, + "recvTokensMax": 122, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.65599936246872, + "p90": 110.46399921178818, + "p95": 115.42399972677231, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 79.23199981451035, + "p95": 83.26400071382523, + "p99": 88.60799670219421 + }, + "roundtrip": { + "p50": 149.79200065135956, + "p90": 156.80000185966492, + "p95": 159.90400314331055, + "p99": 165.8879965543747 + }, + "isolatedSum": { + "p50": 177.12000012397766, + "p90": 189.69599902629852, + "p95": 198.68800044059753, + "p99": 210.78399568796158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9523200, + "combineLogicalBytes": 9523200, + "fanoutMean": 3.6328125, + "recvTokensMax": 242, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 112.86400258541107, + "p90": 120.06399780511856, + "p95": 123.80799651145935, + "p99": 129.95199859142303 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 93.21600198745728, + "p95": 95.61599791049957, + "p99": 98.9760011434555 + }, + "roundtrip": { + "p50": 172.57599532604218, + "p90": 179.4240027666092, + "p95": 181.7920058965683, + "p99": 190.23999571800232 + }, + "isolatedSum": { + "p50": 199.13600385189056, + "p90": 213.27999979257584, + "p95": 219.42399442195892, + "p99": 228.92799973487854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19097600, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b156a70a", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "7992eabfd5e5436d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:25.183965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.10400080680847, + "p90": 105.40799796581268, + "p95": 115.48800021409988, + "p99": 153.34400534629822 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 109.50399935245514, + "p95": 114.1119971871376, + "p99": 146.7839926481247 + }, + "roundtrip": { + "p50": 139.80799913406372, + "p90": 161.24799847602844, + "p95": 180.67200481891632, + "p99": 190.3039962053299 + }, + "isolatedSum": { + "p50": 162.30399906635284, + "p90": 214.91199731826782, + "p95": 229.5999974012375, + "p99": 300.1279979944229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 184320, + "combineLogicalBytes": 184320, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 91.26400202512741, + "p90": 103.93600165843964, + "p95": 111.1999973654747, + "p99": 148.41599762439728 + }, + "combine": { + "p50": 71.1359977722168, + "p90": 90.4960036277771, + "p95": 109.47199910879135, + "p99": 135.1040005683899 + }, + "roundtrip": { + "p50": 140.54399728775024, + "p90": 154.52800691127777, + "p95": 161.6639941930771, + "p99": 186.39999628067017 + }, + "isolatedSum": { + "p50": 162.3999997973442, + "p90": 194.43200528621674, + "p95": 220.67199647426605, + "p99": 283.51999819278717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368640, + "combineLogicalBytes": 368640, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 101.88800096511841, + "p90": 147.64800667762756, + "p95": 151.74399316310883, + "p99": 164.70399498939514 + }, + "combine": { + "p50": 72.09599763154984, + "p90": 99.5199978351593, + "p95": 110.6560006737709, + "p99": 133.7919980287552 + }, + "roundtrip": { + "p50": 150.2400040626526, + "p90": 187.58399784564972, + "p95": 194.46399807929993, + "p99": 205.9520035982132 + }, + "isolatedSum": { + "p50": 173.98399859666824, + "p90": 247.16800451278687, + "p95": 262.39999383687973, + "p99": 298.49599301815033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 712704, + "combineLogicalBytes": 712704, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 93.37600320577621, + "p90": 139.1039937734604, + "p95": 150.04800260066986, + "p99": 163.39200735092163 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 110.91200262308121, + "p95": 122.23999947309494, + "p99": 145.31199634075165 + }, + "roundtrip": { + "p50": 144.99199390411377, + "p90": 181.66400492191315, + "p95": 195.10400295257568, + "p99": 214.33599293231964 + }, + "isolatedSum": { + "p50": 167.10400581359863, + "p90": 250.0159963965416, + "p95": 272.2880020737648, + "p99": 308.7040036916733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1400832, + "combineLogicalBytes": 1400832, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 94.14400160312653, + "p90": 131.071999669075, + "p95": 148.25600385665894, + "p99": 158.720001578331 + }, + "combine": { + "p50": 74.27199929952621, + "p90": 109.15199667215347, + "p95": 115.61600118875504, + "p99": 134.8479986190796 + }, + "roundtrip": { + "p50": 147.39200472831726, + "p90": 184.06400084495544, + "p95": 192.19200313091278, + "p99": 212.41599321365356 + }, + "isolatedSum": { + "p50": 168.41600090265274, + "p90": 240.22399634122849, + "p95": 263.87200504541397, + "p99": 293.5680001974106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2826240, + "combineLogicalBytes": 2826240, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 102.11200267076492, + "p90": 148.22399616241455, + "p95": 152.41600573062897, + "p99": 166.6879951953888 + }, + "combine": { + "p50": 74.43200051784515, + "p90": 108.76800119876862, + "p95": 119.74400281906128, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 146.55999839305878, + "p90": 170.78399658203125, + "p95": 185.63200533390045, + "p99": 211.35999262332916 + }, + "isolatedSum": { + "p50": 176.54400318861008, + "p90": 256.99199736118317, + "p95": 272.16000854969025, + "p99": 306.4959943294525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5750784, + "combineLogicalBytes": 5750784, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 110.944002866745, + "p90": 148.03199470043182, + "p95": 153.76000106334686, + "p99": 167.61599481105804 + }, + "combine": { + "p50": 88.51200342178345, + "p90": 124.57600235939026, + "p95": 142.87999272346497, + "p99": 148.47999811172485 + }, + "roundtrip": { + "p50": 170.33599317073822, + "p90": 197.66399264335632, + "p95": 204.3199986219406, + "p99": 222.56000339984894 + }, + "isolatedSum": { + "p50": 199.45600628852844, + "p90": 272.6079970598221, + "p95": 296.63999378681183, + "p99": 316.0959929227829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11587584, + "combineLogicalBytes": 11587584, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 128.48000228405, + "p90": 155.87200224399567, + "p95": 164.63999450206757, + "p99": 184.1599941253662 + }, + "combine": { + "p50": 102.78400033712387, + "p90": 149.9200016260147, + "p95": 156.54399991035461, + "p99": 161.15200519561768 + }, + "roundtrip": { + "p50": 191.77600741386414, + "p90": 228.2239943742752, + "p95": 236.60799860954285, + "p99": 247.871994972229 + }, + "isolatedSum": { + "p50": 231.26400262117386, + "p90": 305.7920038700104, + "p95": 321.1839944124222, + "p99": 345.3119993209839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22941696, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-91d99255", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_abf250ec", + "comparisonKey": "cd89142ce006ddb2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:14.853303+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 87.67999708652496, + "p90": 99.20000284910202, + "p95": 104.16000336408615, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 73.7600028514862, + "p95": 77.40800082683563, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 139.29599523544312, + "p90": 148.6400067806244, + "p95": 153.1199961900711, + "p99": 161.28000617027283 + }, + "isolatedSum": { + "p50": 157.69599378108978, + "p90": 172.96000570058823, + "p95": 181.56800419092178, + "p99": 199.90400224924088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 87.87199854850769, + "p90": 99.55199807882309, + "p95": 103.64799946546555, + "p99": 113.15199732780457 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 75.03999769687653, + "p95": 77.79199630022049, + "p99": 82.04799890518188 + }, + "roundtrip": { + "p50": 141.27999544143677, + "p90": 152.0960032939911, + "p95": 155.7759940624237, + "p99": 165.69599509239197 + }, + "isolatedSum": { + "p50": 159.231998026371, + "p90": 174.59199577569962, + "p95": 181.43999576568604, + "p99": 195.19999623298645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.34400230646133, + "p90": 99.61599856615067, + "p95": 103.7760004401207, + "p99": 113.8560026884079 + }, + "combine": { + "p50": 71.9040036201477, + "p90": 75.6480023264885, + "p95": 79.74400371313095, + "p99": 88.128000497818 + }, + "roundtrip": { + "p50": 142.68800616264343, + "p90": 152.25599706172943, + "p95": 155.90399503707886, + "p99": 163.03999722003937 + }, + "isolatedSum": { + "p50": 161.24800592660904, + "p90": 175.26400089263916, + "p95": 183.52000415325165, + "p99": 201.9840031862259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.51200342178345, + "p90": 98.65599870681763, + "p95": 102.1760031580925, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 82.17599987983704, + "p95": 83.96799862384796, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 143.61600577831268, + "p90": 153.02400290966034, + "p95": 156.2879979610443, + "p99": 161.5999937057495 + }, + "isolatedSum": { + "p50": 163.29600661993027, + "p90": 180.83199858665466, + "p95": 186.14400178194046, + "p99": 202.39999890327454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 89.91999924182892, + "p90": 101.05600208044052, + "p95": 103.93600165843964, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 74.75200295448303, + "p90": 82.20800012350082, + "p95": 83.80799740552902, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 144.03200149536133, + "p90": 153.9199948310852, + "p95": 157.0879966020584, + "p99": 168.16000640392303 + }, + "isolatedSum": { + "p50": 164.67200219631195, + "p90": 183.26400220394135, + "p95": 187.74399906396866, + "p99": 199.5519995689392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.12800115346909, + "p90": 102.94400155544281, + "p95": 105.66399991512299, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 83.64800363779068, + "p95": 84.54400300979614, + "p99": 91.26400202512741 + }, + "roundtrip": { + "p50": 146.27200365066528, + "p90": 155.10399639606476, + "p95": 160.64000129699707, + "p99": 169.53599452972412 + }, + "isolatedSum": { + "p50": 171.29600048065186, + "p90": 186.5920051932335, + "p95": 190.20800292491913, + "p99": 203.45599949359894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.30399668216705, + "p90": 111.10399663448334, + "p95": 114.3679991364479, + "p99": 123.16799908876419 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 86.14400029182434, + "p95": 87.90399879217148, + "p99": 95.61599791049957 + }, + "roundtrip": { + "p50": 159.07199680805206, + "p90": 166.81599617004395, + "p95": 170.0800061225891, + "p99": 174.14399981498718 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 197.24799692630768, + "p95": 202.27199792861938, + "p99": 218.78399699926376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.27999895811081, + "p90": 126.39999389648438, + "p95": 129.63199615478516, + "p99": 135.80800592899323 + }, + "combine": { + "p50": 98.78399968147278, + "p90": 106.11200332641602, + "p95": 107.58399963378906, + "p99": 110.75200140476227 + }, + "roundtrip": { + "p50": 190.33600389957428, + "p90": 197.63199985027313, + "p95": 200.6080001592636, + "p99": 207.13600516319275 + }, + "isolatedSum": { + "p50": 216.0639986395836, + "p90": 232.5119972229004, + "p95": 237.21599578857422, + "p99": 246.5600073337555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-68a413bd", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "27bf22d33b31607a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:01.655564+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 87.61599659919739, + "p90": 98.04800152778625, + "p95": 102.1760031580925, + "p99": 109.31199789047241 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 74.30399954319, + "p95": 75.58400183916092, + "p99": 79.99999821186066 + }, + "roundtrip": { + "p50": 138.62399756908417, + "p90": 148.03199470043182, + "p95": 152.0639955997467, + "p99": 159.0079963207245 + }, + "isolatedSum": { + "p50": 158.1439971923828, + "p90": 172.35200107097626, + "p95": 177.76000499725342, + "p99": 189.31199610233307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 87.39200234413147, + "p90": 98.01600128412247, + "p95": 102.08000242710114, + "p99": 108.22399705648422 + }, + "combine": { + "p50": 71.80800288915634, + "p90": 75.3600001335144, + "p95": 79.26400005817413, + "p99": 88.06400001049042 + }, + "roundtrip": { + "p50": 139.90400731563568, + "p90": 149.31200444698334, + "p95": 152.28800475597382, + "p99": 160.5760008096695 + }, + "isolatedSum": { + "p50": 159.2000052332878, + "p90": 173.37600141763687, + "p95": 181.34400248527527, + "p99": 196.28799706697464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 88.25600147247314, + "p90": 99.07200187444687, + "p95": 102.68799960613251, + "p99": 110.78400164842606 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 76.4160007238388, + "p95": 78.72000336647034, + "p99": 83.74399691820145 + }, + "roundtrip": { + "p50": 140.86399972438812, + "p90": 151.5199989080429, + "p95": 153.6320000886917, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 160.96000373363495, + "p90": 175.48800259828568, + "p95": 181.40800297260284, + "p99": 194.5279985666275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.128000497818, + "p90": 98.49599748849869, + "p95": 102.24000364542007, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 81.37600123882294, + "p95": 83.96799862384796, + "p99": 91.20000153779984 + }, + "roundtrip": { + "p50": 143.5839980840683, + "p90": 152.6080071926117, + "p95": 156.12800419330597, + "p99": 164.51199352741241 + }, + "isolatedSum": { + "p50": 162.75200247764587, + "p90": 179.87199872732162, + "p95": 186.20800226926804, + "p99": 199.35999810695648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 89.1840010881424, + "p90": 98.30400347709656, + "p95": 101.40799731016159, + "p99": 112.70400136709213 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 82.24000036716461, + "p95": 83.77599716186523, + "p99": 86.97599917650223 + }, + "roundtrip": { + "p50": 143.61600577831268, + "p90": 153.05599570274353, + "p95": 157.56799280643463, + "p99": 165.18400609493256 + }, + "isolatedSum": { + "p50": 164.19199854135513, + "p90": 180.54400384426117, + "p95": 185.18399447202682, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 90.94399958848953, + "p90": 100.00000149011612, + "p95": 103.32799702882767, + "p99": 109.72800105810165 + }, + "combine": { + "p50": 77.44000107049942, + "p90": 84.51200276613235, + "p95": 86.30400151014328, + "p99": 95.32800316810608 + }, + "roundtrip": { + "p50": 145.53600549697876, + "p90": 154.14400398731232, + "p95": 158.04800391197205, + "p99": 164.0319973230362 + }, + "isolatedSum": { + "p50": 168.38400065898895, + "p90": 184.51200425624847, + "p95": 189.63199853897095, + "p99": 205.05600422620773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.82400047779083, + "p90": 110.6560006737709, + "p95": 112.5119999051094, + "p99": 121.69600278139114 + }, + "combine": { + "p50": 83.03999900817871, + "p90": 86.56000345945358, + "p95": 88.03199976682663, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 159.36000645160675, + "p90": 166.36799275875092, + "p95": 168.5439944267273, + "p99": 173.5360026359558 + }, + "isolatedSum": { + "p50": 184.86399948596954, + "p90": 197.2160041332245, + "p95": 200.54399967193604, + "p99": 213.6320024728775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.5120005607605, + "p90": 124.22399967908859, + "p95": 126.65599584579468, + "p99": 131.77600502967834 + }, + "combine": { + "p50": 98.9760011434555, + "p90": 105.27999699115753, + "p95": 107.51999914646149, + "p99": 112.47999966144562 + }, + "roundtrip": { + "p50": 190.62399864196777, + "p90": 197.66399264335632, + "p95": 201.02399587631226, + "p99": 207.2959989309311 + }, + "isolatedSum": { + "p50": 215.488001704216, + "p90": 229.50399667024612, + "p95": 234.17599499225616, + "p99": 244.25600469112396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-23458efa", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||7c1cc7238ca9a52", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "c3d62a34966099cb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:51.789369+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "7c1cc7238ca9a52", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.42400324344635, + "p90": 101.02400183677673, + "p95": 104.63999956846237, + "p99": 113.11999708414078 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 73.44000041484833, + "p95": 74.62400197982788, + "p99": 80.25600016117096 + }, + "roundtrip": { + "p50": 141.63200557231903, + "p90": 150.4960060119629, + "p95": 154.4640064239502, + "p99": 160.19199788570404 + }, + "isolatedSum": { + "p50": 161.53600066900253, + "p90": 174.46400225162506, + "p95": 179.26400154829025, + "p99": 193.37599724531174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.19200164079666, + "p90": 101.6319990158081, + "p95": 105.79200088977814, + "p99": 112.35199868679047 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 74.68800246715546, + "p95": 76.35200023651123, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 143.99999380111694, + "p90": 153.6639928817749, + "p95": 157.151997089386, + "p99": 164.48000073432922 + }, + "isolatedSum": { + "p50": 164.0320047736168, + "p90": 176.32000148296356, + "p95": 182.14400112628937, + "p99": 194.88000124692917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.16000139713287, + "p90": 103.7760004401207, + "p95": 108.03200304508209, + "p99": 117.34399944543839 + }, + "combine": { + "p50": 72.57600128650665, + "p90": 75.87199658155441, + "p95": 77.72800326347351, + "p99": 85.1840004324913 + }, + "roundtrip": { + "p50": 144.57599818706512, + "p90": 154.40000593662262, + "p95": 158.1439971923828, + "p99": 164.8000031709671 + }, + "isolatedSum": { + "p50": 164.73600268363953, + "p90": 179.6479970216751, + "p95": 185.7600063085556, + "p99": 202.5279998779297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.32000261545181, + "p90": 102.33599692583084, + "p95": 106.49599879980087, + "p99": 115.42399972677231 + }, + "combine": { + "p50": 74.68800246715546, + "p90": 82.14399963617325, + "p95": 83.48800241947174, + "p99": 87.8399983048439 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 154.4959992170334, + "p95": 158.6879938840866, + "p99": 166.6879951953888 + }, + "isolatedSum": { + "p50": 167.00800508260727, + "p90": 184.4799965620041, + "p95": 189.9840012192726, + "p99": 203.2639980316162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.67199784517288, + "p90": 102.94400155544281, + "p95": 106.20799660682678, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 81.727996468544, + "p95": 82.97599852085114, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 155.8080017566681, + "p95": 158.81599485874176, + "p99": 164.70399498939514 + }, + "isolatedSum": { + "p50": 166.9119969010353, + "p90": 184.67199802398682, + "p95": 189.18399512767792, + "p99": 200.8959949016571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 59, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.6959981918335, + "p90": 102.49599814414978, + "p95": 104.63999956846237, + "p99": 113.8560026884079 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 83.83999764919281, + "p95": 84.79999750852585, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 147.5519984960556, + "p90": 155.5519998073578, + "p95": 158.49600732326508, + "p99": 164.38399255275726 + }, + "isolatedSum": { + "p50": 170.9439978003502, + "p90": 186.3359957933426, + "p95": 189.43999707698822, + "p99": 201.664000749588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6623232, + "combineLogicalBytes": 6623232, + "fanoutMean": 3.609375, + "recvTokensMax": 117, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 104.032002389431, + "p90": 112.06399649381638, + "p95": 115.32799899578094, + "p99": 136.57599687576294 + }, + "combine": { + "p50": 82.49600231647491, + "p90": 86.2400010228157, + "p95": 88.70399743318558, + "p99": 96.09600156545639 + }, + "roundtrip": { + "p50": 160.73599457740784, + "p90": 167.39200055599213, + "p95": 169.76000368595123, + "p99": 175.4560023546219 + }, + "isolatedSum": { + "p50": 186.52800470590591, + "p90": 198.30399751663208, + "p95": 204.03199642896652, + "p99": 232.67199844121933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13189120, + "combineLogicalBytes": 13189120, + "fanoutMean": 3.59375, + "recvTokensMax": 234, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.45600062608719, + "p90": 128.76799702644348, + "p95": 131.84000551700592, + "p99": 136.28800213336945 + }, + "combine": { + "p50": 98.52799773216248, + "p90": 104.25599664449692, + "p95": 106.20799660682678, + "p99": 109.21599715948105 + }, + "roundtrip": { + "p50": 192.99200177192688, + "p90": 200.54399967193604, + "p95": 203.5199999809265, + "p99": 208.95999670028687 + }, + "isolatedSum": { + "p50": 217.98399835824966, + "p90": 233.0239936709404, + "p95": 238.0480021238327, + "p99": 245.5039992928505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26621952, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-808bb60e", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|decode|normal|none|none|0|tuned||bb358a3c2e68578", + "colorKey": "gb300_76c0d0f4", + "comparisonKey": "199169db31d7bd55", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:48.097521+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "bb358a3c2e68578", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 87.3280018568039, + "p90": 95.51999717950821, + "p95": 101.31199657917023, + "p99": 113.21599781513214 + }, + "combine": { + "p50": 69.66400146484375, + "p90": 72.92799651622772, + "p95": 75.13599842786789, + "p99": 82.43200182914734 + }, + "roundtrip": { + "p50": 137.82399892807007, + "p90": 147.13600277900696, + "p95": 152.0960032939911, + "p99": 159.96800363063812 + }, + "isolatedSum": { + "p50": 156.99200332164764, + "p90": 168.44799369573593, + "p95": 176.44799500703812, + "p99": 195.64799964427948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 87.45600283145905, + "p90": 97.59999811649323, + "p95": 101.43999755382538, + "p99": 107.35999792814255 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 76.22399926185608, + "p95": 79.55200225114822, + "p99": 84.44800227880478 + }, + "roundtrip": { + "p50": 139.39200341701508, + "p90": 149.9519944190979, + "p95": 154.88000214099884, + "p99": 164.32000696659088 + }, + "isolatedSum": { + "p50": 158.65600109100342, + "p90": 173.8239973783493, + "p95": 180.9919998049736, + "p99": 191.80800020694733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 4, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.02399986982346, + "p90": 98.43199700117111, + "p95": 103.26399654150009, + "p99": 111.61600053310394 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 76.38400048017502, + "p95": 79.77599650621414, + "p99": 85.60000360012054 + }, + "roundtrip": { + "p50": 141.08799397945404, + "p90": 151.13599598407745, + "p95": 155.5200070142746, + "p99": 161.98399662971497 + }, + "isolatedSum": { + "p50": 160.67200154066086, + "p90": 174.81599748134613, + "p95": 183.03999304771423, + "p99": 197.2160041332245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 4, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.06400001049042, + "p90": 97.9200005531311, + "p95": 101.69599950313568, + "p99": 109.0880036354065 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 82.91199803352356, + "p95": 85.21600067615509, + "p99": 334.3679904937744 + }, + "roundtrip": { + "p50": 142.5279974937439, + "p90": 152.19199657440186, + "p95": 157.05600380897522, + "p99": 164.8319959640503 + }, + "isolatedSum": { + "p50": 163.87200355529785, + "p90": 180.83199858665466, + "p95": 186.91200017929077, + "p99": 443.4559941291809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 4, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 88.70399743318558, + "p90": 98.75199943780899, + "p95": 102.14400291442871, + "p99": 108.60799998044968 + }, + "combine": { + "p50": 75.99999755620956, + "p90": 82.43200182914734, + "p95": 83.83999764919281, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 143.26399564743042, + "p90": 152.6080071926117, + "p95": 155.2640050649643, + "p99": 163.7440025806427 + }, + "isolatedSum": { + "p50": 164.70399498939514, + "p90": 181.18400126695633, + "p95": 185.98400056362152, + "p99": 199.10400360822678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 4, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 91.90399944782257, + "p90": 100.5759984254837, + "p95": 103.71199995279312, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 80.48000186681747, + "p90": 84.32000130414963, + "p95": 86.43200248479843, + "p99": 93.79199892282486 + }, + "roundtrip": { + "p50": 144.86399292945862, + "p90": 155.71199357509613, + "p95": 159.61599349975586, + "p99": 165.56799411773682 + }, + "isolatedSum": { + "p50": 172.38400131464005, + "p90": 184.89599972963333, + "p95": 190.14400243759155, + "p99": 204.03199642896652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 4, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.08000242710114, + "p90": 111.26399785280228, + "p95": 114.17599767446518, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 82.5280025601387, + "p90": 85.88799834251404, + "p95": 87.2960016131401, + "p99": 92.96000003814697 + }, + "roundtrip": { + "p50": 160.863995552063, + "p90": 168.83200407028198, + "p95": 172.09599912166595, + "p99": 181.2479943037033 + }, + "isolatedSum": { + "p50": 184.60800498723984, + "p90": 197.15199619531631, + "p95": 201.47199928760529, + "p99": 215.96799790859222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 4, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.58400160074234, + "p90": 127.6479959487915, + "p95": 129.7599971294403, + "p99": 135.6160044670105 + }, + "combine": { + "p50": 101.05600208044052, + "p90": 107.61599987745285, + "p95": 108.83200168609619, + "p99": 117.27999895811081 + }, + "roundtrip": { + "p50": 192.3840045928955, + "p90": 199.16799664497375, + "p95": 201.24800503253937, + "p99": 211.64800226688385 + }, + "isolatedSum": { + "p50": 220.64000368118286, + "p90": 235.26399582624435, + "p95": 238.5919988155365, + "p99": 252.8960034251213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ec4c96e", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|decode|normal|none|none|0|tuned||c9bbf5a132d7fdf", + "colorKey": "gb300_2da51caf", + "comparisonKey": "b7b7884473e62204", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:10.408504+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9bbf5a132d7fdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 82.62400329113007, + "p90": 91.20000153779984, + "p95": 96.22400254011154, + "p99": 102.08000242710114 + }, + "combine": { + "p50": 62.04799935221672, + "p90": 70.36799937486649, + "p95": 71.74400240182877, + "p99": 80.89599758386612 + }, + "roundtrip": { + "p50": 128.00000607967377, + "p90": 136.28800213336945, + "p95": 140.73599874973297, + "p99": 148.22399616241455 + }, + "isolatedSum": { + "p50": 144.6720026433468, + "p90": 161.56800091266632, + "p95": 167.9680049419403, + "p99": 182.97600001096725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 57344, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 85.66399663686752, + "p90": 95.0080007314682, + "p95": 99.04000163078308, + "p99": 106.175996363163 + }, + "combine": { + "p50": 57.50399827957153, + "p90": 60.83200126886368, + "p95": 62.272001057863235, + "p99": 70.27199864387512 + }, + "roundtrip": { + "p50": 123.87199699878693, + "p90": 133.34399461746216, + "p95": 137.82399892807007, + "p99": 144.57599818706512 + }, + "isolatedSum": { + "p50": 143.16799491643906, + "p90": 155.84000200033188, + "p95": 161.31200268864632, + "p99": 176.44799500703812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 87.99999952316284, + "p90": 97.120001912117, + "p95": 102.11200267076492, + "p99": 111.455999314785 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 72.54400104284286, + "p95": 73.98399710655212, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 135.80800592899323, + "p90": 146.01600170135498, + "p95": 149.88799393177032, + "p99": 157.05600380897522 + }, + "isolatedSum": { + "p50": 157.4079990386963, + "p90": 169.66400295495987, + "p95": 176.09599977731705, + "p99": 193.7600001692772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 97.47199714183807, + "p90": 103.64799946546555, + "p95": 108.51199924945831, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 76.1599987745285, + "p90": 83.61600339412689, + "p95": 84.73599702119827, + "p99": 92.44800359010696 + }, + "roundtrip": { + "p50": 147.71200716495514, + "p90": 154.08000349998474, + "p95": 157.79200196266174, + "p99": 166.01599752902985 + }, + "isolatedSum": { + "p50": 173.63199591636658, + "p90": 187.26400285959244, + "p95": 193.24799627065659, + "p99": 207.7760025858879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f171ac5", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|decode|normal|none|none|0|tuned||4dc6cbd03327f4e", + "colorKey": "gb300_22122c9a", + "comparisonKey": "3d526978606d8abc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:01.095202+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "4dc6cbd03327f4e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.80799871683121, + "p90": 103.45599800348282, + "p95": 106.59199953079224, + "p99": 116.2559986114502 + }, + "combine": { + "p50": 67.55200028419495, + "p90": 72.64000177383423, + "p95": 73.98399710655212, + "p99": 80.51200211048126 + }, + "roundtrip": { + "p50": 136.6720050573349, + "p90": 145.7280069589615, + "p95": 149.53599870204926, + "p99": 157.4079990386963 + }, + "isolatedSum": { + "p50": 159.35999900102615, + "p90": 176.09599977731705, + "p95": 180.57599663734436, + "p99": 196.76800072193146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 93.08800101280212, + "p90": 103.96800190210342, + "p95": 108.5439994931221, + "p99": 123.19999933242798 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 73.85600358247757, + "p95": 75.6160020828247, + "p99": 81.28000050783157 + }, + "roundtrip": { + "p50": 140.3840035200119, + "p90": 150.4639983177185, + "p95": 155.07200360298157, + "p99": 162.88000345230103 + }, + "isolatedSum": { + "p50": 163.42400014400482, + "p90": 177.824005484581, + "p95": 184.1600015759468, + "p99": 204.47999984025955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.76799857616425, + "p90": 103.07200253009796, + "p95": 107.4879989027977, + "p99": 114.30399864912033 + }, + "combine": { + "p50": 70.94399631023407, + "p90": 74.14399832487106, + "p95": 75.83999633789062, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 142.56000518798828, + "p90": 152.25599706172943, + "p95": 155.71199357509613, + "p99": 161.0880047082901 + }, + "isolatedSum": { + "p50": 163.71199488639832, + "p90": 177.21600085496902, + "p95": 183.32799524068832, + "p99": 197.88800179958344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.32000261545181, + "p90": 103.10400277376175, + "p95": 107.87200182676315, + "p99": 115.58400094509125 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 74.40000027418137, + "p95": 76.92799717187881, + "p99": 85.60000360012054 + }, + "roundtrip": { + "p50": 143.99999380111694, + "p90": 154.11199629306793, + "p95": 159.45599973201752, + "p99": 167.77600347995758 + }, + "isolatedSum": { + "p50": 163.42400014400482, + "p90": 177.50400304794312, + "p95": 184.79999899864197, + "p99": 201.1840045452118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 93.05600076913834, + "p90": 102.78400033712387, + "p95": 106.75200074911118, + "p99": 112.67200112342834 + }, + "combine": { + "p50": 70.94399631023407, + "p90": 74.01599735021591, + "p95": 75.74400305747986, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 142.07999408245087, + "p90": 151.2320041656494, + "p95": 154.84799444675446, + "p99": 163.55200111865997 + }, + "isolatedSum": { + "p50": 163.9999970793724, + "p90": 176.79999768733978, + "p95": 182.49600380659103, + "p99": 196.60799950361252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.25600212812424, + "p90": 103.16800326108932, + "p95": 107.51999914646149, + "p99": 113.82400244474411 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 75.68000257015228, + "p95": 77.85599678754807, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 144.6080058813095, + "p90": 154.65599298477173, + "p95": 157.6319932937622, + "p99": 166.30400717258453 + }, + "isolatedSum": { + "p50": 163.93600404262543, + "p90": 178.8480058312416, + "p95": 185.37599593400955, + "p99": 200.22400468587875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2752512, + "combineLogicalBytes": 2752512, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 94.17600184679031, + "p90": 103.87200117111206, + "p95": 107.39199817180634, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 78.17599922418594, + "p95": 81.82399719953537, + "p99": 86.17600053548813 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 157.21599757671356, + "p95": 161.76000237464905, + "p99": 171.4559942483902 + }, + "isolatedSum": { + "p50": 167.58400201797485, + "p90": 182.048000395298, + "p95": 189.2159953713417, + "p99": 201.50399953126907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5505024, + "combineLogicalBytes": 5505024, + "fanoutMean": 1.5, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.89600151777267, + "p90": 113.18399757146835, + "p95": 115.61600118875504, + "p99": 126.78399682044983 + }, + "combine": { + "p50": 88.16000074148178, + "p90": 95.8079993724823, + "p95": 97.43999689817429, + "p99": 102.55999863147736 + }, + "roundtrip": { + "p50": 167.58400201797485, + "p90": 175.32800137996674, + "p95": 177.18400061130524, + "p99": 186.20799481868744 + }, + "isolatedSum": { + "p50": 193.05600225925446, + "p90": 208.99199694395065, + "p95": 213.05599808692932, + "p99": 229.34399545192719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fcd76f50", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|decode|normal|none|none|0|tuned||0d921f8a9d2cb27", + "colorKey": "gb300_7e1244f6", + "comparisonKey": "a4a0d9c660bec30e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:14.293263+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "0d921f8a9d2cb27", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 86.04799956083298, + "p90": 95.58399766683578, + "p95": 100.35199671983719, + "p99": 106.11200332641602 + }, + "combine": { + "p50": 69.21599805355072, + "p90": 72.86400347948074, + "p95": 75.00799745321274, + "p99": 81.37600123882294 + }, + "roundtrip": { + "p50": 137.1839940547943, + "p90": 146.65600657463074, + "p95": 151.2639969587326, + "p99": 159.4880074262619 + }, + "isolatedSum": { + "p50": 155.2639976143837, + "p90": 168.44800114631653, + "p95": 175.35999417304993, + "p99": 187.48800456523895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 200704, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 86.17600053548813, + "p90": 95.67999839782715, + "p95": 100.60799866914749, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 70.592001080513, + "p90": 74.40000027418137, + "p95": 77.91999727487564, + "p99": 82.75199681520462 + }, + "roundtrip": { + "p50": 138.2399946451187, + "p90": 149.1200029850006, + "p95": 152.44799852371216, + "p99": 159.90400314331055 + }, + "isolatedSum": { + "p50": 156.76800161600113, + "p90": 170.07999867200851, + "p95": 178.52799594402313, + "p99": 192.22399592399597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 86.94399893283844, + "p90": 96.57599776983261, + "p95": 100.832000374794, + "p99": 109.76000130176544 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 77.11999863386154, + "p95": 79.93599772453308, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 140.03199338912964, + "p90": 149.3760049343109, + "p95": 153.28000485897064, + "p99": 162.432000041008 + }, + "isolatedSum": { + "p50": 158.4639996290207, + "p90": 173.69599640369415, + "p95": 180.7679980993271, + "p99": 195.20000368356705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 87.07199990749359, + "p90": 95.64799815416336, + "p95": 99.32799637317657, + "p99": 105.8880016207695 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 81.216000020504, + "p95": 82.62400329113007, + "p99": 88.48000317811966 + }, + "roundtrip": { + "p50": 141.9840008020401, + "p90": 151.93599462509155, + "p95": 155.2319973707199, + "p99": 164.32000696659088 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 176.86399817466736, + "p95": 181.95199966430664, + "p99": 194.36800479888916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 87.96799927949905, + "p90": 98.14400225877762, + "p95": 101.3759970664978, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 75.48800110816956, + "p90": 82.04799890518188, + "p95": 83.29600095748901, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 141.40799641609192, + "p90": 150.43200552463531, + "p95": 152.6080071926117, + "p99": 158.24000537395477 + }, + "isolatedSum": { + "p50": 163.4560003876686, + "p90": 180.1920011639595, + "p95": 184.67199802398682, + "p99": 197.56799936294556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3282944, + "combineLogicalBytes": 3282944, + "fanoutMean": 3.578125, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 90.01599997282028, + "p90": 97.98400104045868, + "p95": 100.54399818181992, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 83.52000266313553, + "p95": 85.02399921417236, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 144.25599575042725, + "p90": 152.70400047302246, + "p95": 157.0879966020584, + "p99": 164.48000073432922 + }, + "isolatedSum": { + "p50": 168.7680035829544, + "p90": 181.5040037035942, + "p95": 185.56799739599228, + "p99": 202.17600464820862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6694912, + "combineLogicalBytes": 6694912, + "fanoutMean": 3.6484375, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.03200173377991, + "p90": 108.0000028014183, + "p95": 111.26399785280228, + "p99": 119.93599683046341 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 85.40800213813782, + "p95": 86.87999844551086, + "p99": 89.9839997291565 + }, + "roundtrip": { + "p50": 158.78400206565857, + "p90": 166.07999801635742, + "p95": 169.76000368595123, + "p99": 175.35999417304993 + }, + "isolatedSum": { + "p50": 182.20800161361694, + "p90": 193.40800493955612, + "p95": 198.14399629831314, + "p99": 209.9199965596199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13318144, + "combineLogicalBytes": 13318144, + "fanoutMean": 3.62890625, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.63200163841248, + "p90": 124.41600114107132, + "p95": 128.31999361515045, + "p99": 133.85599851608276 + }, + "combine": { + "p50": 98.59199821949005, + "p90": 105.8880016207695, + "p95": 107.68000036478043, + "p99": 111.58400028944016 + }, + "roundtrip": { + "p50": 191.45600497722626, + "p90": 198.94400238990784, + "p95": 201.08799636363983, + "p99": 206.7839950323105 + }, + "isolatedSum": { + "p50": 216.22399985790253, + "p90": 230.30400276184082, + "p95": 235.99999397993088, + "p99": 245.43999880552292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5b63fb61", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|decode|normal|none|none|0|tuned||cc5ad1cb2e95ef6", + "colorKey": "gb300_822be538", + "comparisonKey": "345d438e7d409493", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:41.186548+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cc5ad1cb2e95ef6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.3408203125, + "eplbImbalanceAfter": 1.000390625, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 90.87999910116196, + "p90": 101.02400183677673, + "p95": 106.04800283908844, + "p99": 114.01599645614624 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 73.82400333881378, + "p95": 75.9039968252182, + "p99": 83.48800241947174 + }, + "roundtrip": { + "p50": 141.12000167369843, + "p90": 150.81599354743958, + "p95": 154.33600544929504, + "p99": 160.67199409008026 + }, + "isolatedSum": { + "p50": 161.53600066900253, + "p90": 174.84800517559052, + "p95": 181.95199966430664, + "p99": 197.50399887561798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 91.58399701118469, + "p90": 101.02400183677673, + "p95": 106.49599879980087, + "p99": 113.98400366306305 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 75.13599842786789, + "p95": 77.40800082683563, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 143.10400187969208, + "p90": 153.72799336910248, + "p95": 157.50400722026825, + "p99": 165.6319946050644 + }, + "isolatedSum": { + "p50": 163.64799439907074, + "p90": 176.16000026464462, + "p95": 183.9039996266365, + "p99": 197.34400510787964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 91.77599847316742, + "p90": 102.59199887514114, + "p95": 106.78400099277496, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 72.60800153017044, + "p90": 76.9599974155426, + "p95": 81.50400221347809, + "p99": 85.28000116348267 + }, + "roundtrip": { + "p50": 144.9279934167862, + "p90": 153.18399667739868, + "p95": 155.90399503707886, + "p99": 161.5999937057495 + }, + "isolatedSum": { + "p50": 164.38400000333786, + "p90": 179.55199629068375, + "p95": 188.28800320625305, + "p99": 196.28800451755524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 91.71199798583984, + "p90": 101.6319990158081, + "p95": 106.1440035700798, + "p99": 114.27199840545654 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 83.61600339412689, + "p95": 84.70399677753448, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 146.88000082969666, + "p90": 156.0640037059784, + "p95": 159.743994474411, + "p99": 164.06400501728058 + }, + "isolatedSum": { + "p50": 167.32800006866455, + "p90": 185.248002409935, + "p95": 190.8480003476143, + "p99": 206.30399882793427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1705984, + "combineLogicalBytes": 1705984, + "fanoutMean": 3.71875, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.57599711418152, + "p90": 102.04800218343735, + "p95": 106.36799782514572, + "p99": 115.13599753379822 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 82.5280025601387, + "p95": 84.19200032949448, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 146.14400267601013, + "p90": 154.6880006790161, + "p95": 157.60000050067902, + "p99": 165.47200083732605 + }, + "isolatedSum": { + "p50": 167.4559935927391, + "p90": 184.57600474357605, + "p95": 190.5599981546402, + "p99": 202.04799622297287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 3411968, + "fanoutMean": 3.71875, + "recvTokensMax": 62, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.28800237178802, + "p90": 101.18400305509567, + "p95": 104.2879968881607, + "p99": 115.10399729013443 + }, + "combine": { + "p50": 80.6720033288002, + "p90": 85.08799970149994, + "p95": 86.40000224113464, + "p99": 94.14400160312653 + }, + "roundtrip": { + "p50": 147.87200093269348, + "p90": 155.96799552440643, + "p95": 158.87999534606934, + "p99": 164.2879992723465 + }, + "isolatedSum": { + "p50": 172.96000570058823, + "p90": 186.2720027565956, + "p95": 190.68799912929535, + "p99": 209.24799889326096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6680576, + "combineLogicalBytes": 6680576, + "fanoutMean": 3.640625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 104.70400005578995, + "p90": 112.12799698114395, + "p95": 116.64000153541565, + "p99": 126.68800354003906 + }, + "combine": { + "p50": 83.29600095748901, + "p90": 86.7839977145195, + "p95": 88.60799670219421, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 160.99199652671814, + "p90": 168.06399822235107, + "p95": 171.424001455307, + "p99": 180.25599420070648 + }, + "isolatedSum": { + "p50": 188.00000101327896, + "p90": 198.91199469566345, + "p95": 205.24799823760986, + "p99": 221.11999988555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13432832, + "combineLogicalBytes": 13432832, + "fanoutMean": 3.66015625, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.04799735546112, + "p90": 126.52799487113953, + "p95": 129.34400141239166, + "p99": 135.00800728797913 + }, + "combine": { + "p50": 98.7199991941452, + "p90": 106.55999928712845, + "p95": 108.47999900579453, + "p99": 112.70400136709213 + }, + "roundtrip": { + "p50": 191.26400351524353, + "p90": 198.55999946594238, + "p95": 203.16800475120544, + "p99": 212.3199999332428 + }, + "isolatedSum": { + "p50": 216.76799654960632, + "p90": 233.08799415826797, + "p95": 237.8240004181862, + "p99": 247.71200865507126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26464256, + "combineLogicalBytes": 26464256, + "fanoutMean": 3.60546875, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac208fdf", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|decode|normal|none|none|0|tuned||c186e8c8d66ece3", + "colorKey": "gb300_75bb6e82", + "comparisonKey": "f2d0c8e30ec14eac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:38.070484+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c186e8c8d66ece3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.091796875, + "eplbImbalanceAfter": 1.00146484375, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 90.43200314044952, + "p90": 100.22400319576263, + "p95": 103.32799702882767, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 73.37599992752075, + "p95": 74.72000271081924, + "p99": 80.73599636554718 + }, + "roundtrip": { + "p50": 140.51200449466705, + "p90": 149.82399344444275, + "p95": 153.9199948310852, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 160.5440005660057, + "p90": 173.6000031232834, + "p95": 178.0479997396469, + "p99": 190.20799547433853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.91999924182892, + "p90": 100.60799866914749, + "p95": 104.76800054311752, + "p99": 114.14399743080139 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 74.68800246715546, + "p95": 76.57600194215775, + "p99": 83.26400071382523 + }, + "roundtrip": { + "p50": 141.37600362300873, + "p90": 151.45599842071533, + "p95": 156.031996011734, + "p99": 166.04800522327423 + }, + "isolatedSum": { + "p50": 161.0879972577095, + "p90": 175.29600113630295, + "p95": 181.34400248527527, + "p99": 197.40799814462662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 90.43200314044952, + "p90": 100.28800368309021, + "p95": 103.26399654150009, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 75.3600001335144, + "p95": 77.56800204515457, + "p99": 84.25600081682205 + }, + "roundtrip": { + "p50": 143.13599467277527, + "p90": 152.96000242233276, + "p95": 156.3519984483719, + "p99": 162.59199380874634 + }, + "isolatedSum": { + "p50": 162.6560017466545, + "p90": 175.64800381660461, + "p95": 180.83199858665466, + "p99": 196.1280032992363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 745472, + "combineLogicalBytes": 745472, + "fanoutMean": 3.25, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.02399986982346, + "p90": 98.43199700117111, + "p95": 102.30399668216705, + "p99": 108.92800241708755 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 81.95199817419052, + "p95": 83.5840031504631, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 144.0960019826889, + "p90": 154.33600544929504, + "p95": 157.4079990386963, + "p99": 162.20800578594208 + }, + "isolatedSum": { + "p50": 163.58400136232376, + "p90": 180.38399517536163, + "p95": 185.88799983263016, + "p99": 199.42400604486465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 91.80799871683121, + "p90": 103.55199873447418, + "p95": 107.58399963378906, + "p99": 128.28800082206726 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 82.14399963617325, + "p95": 84.70399677753448, + "p99": 98.91200065612793 + }, + "roundtrip": { + "p50": 145.56799829006195, + "p90": 156.00000321865082, + "p95": 160.12799739837646, + "p99": 168.70400309562683 + }, + "isolatedSum": { + "p50": 166.04799777269363, + "p90": 185.69599837064743, + "p95": 192.28799641132355, + "p99": 227.2000014781952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3225600, + "combineLogicalBytes": 3225600, + "fanoutMean": 3.515625, + "recvTokensMax": 60, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.83199906349182, + "p90": 102.75200009346008, + "p95": 105.56799918413162, + "p99": 111.29599809646606 + }, + "combine": { + "p50": 77.2159993648529, + "p90": 83.67999643087387, + "p95": 85.1840004324913, + "p99": 93.66399794816971 + }, + "roundtrip": { + "p50": 146.7519998550415, + "p90": 155.2319973707199, + "p95": 158.91200304031372, + "p99": 167.84000396728516 + }, + "isolatedSum": { + "p50": 170.04799842834473, + "p90": 186.43199652433395, + "p95": 190.75199961662292, + "p99": 204.95999604463577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6522880, + "combineLogicalBytes": 6522880, + "fanoutMean": 3.5546875, + "recvTokensMax": 118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.2319962978363, + "p90": 112.47999966144562, + "p95": 116.5120005607605, + "p99": 125.18399953842163 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 86.36800199747086, + "p95": 87.8399983048439, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 160.64000129699707, + "p90": 168.38400065898895, + "p95": 172.0000058412552, + "p99": 178.14399302005768 + }, + "isolatedSum": { + "p50": 186.23999506235123, + "p90": 198.84800165891647, + "p95": 204.3519988656044, + "p99": 216.12799912691116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13160448, + "combineLogicalBytes": 13160448, + "fanoutMean": 3.5859375, + "recvTokensMax": 238, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.15199798345566, + "p90": 126.11199915409088, + "p95": 128.4160017967224, + "p99": 135.96799969673157 + }, + "combine": { + "p50": 98.62399846315384, + "p90": 105.76000064611435, + "p95": 107.13600367307663, + "p99": 114.84800279140472 + }, + "roundtrip": { + "p50": 190.49599766731262, + "p90": 198.43199849128723, + "p95": 201.12000405788422, + "p99": 208.22399854660034 + }, + "isolatedSum": { + "p50": 215.7759964466095, + "p90": 231.87199980020523, + "p95": 235.55200546979904, + "p99": 250.8160024881363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26406912, + "combineLogicalBytes": 26406912, + "fanoutMean": 3.59765625, + "recvTokensMax": 474, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-23daac82", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_4b5c9507", + "comparisonKey": "f5b51006e1952b16", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:28.771811+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 92.0960009098053, + "p90": 132.1280002593994, + "p95": 148.5760062932968, + "p99": 161.79199516773224 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 79.23199981451035, + "p95": 82.75199681520462, + "p99": 93.72799843549728 + }, + "roundtrip": { + "p50": 141.02399349212646, + "p90": 158.36800634860992, + "p95": 169.11999881267548, + "p99": 197.50399887561798 + }, + "isolatedSum": { + "p50": 162.01600432395935, + "p90": 211.36000007390976, + "p95": 231.32800310850143, + "p99": 255.51999360322952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 93.66399794816971, + "p90": 147.71200716495514, + "p95": 151.99999511241913, + "p99": 161.76000237464905 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 91.77599847316742, + "p95": 110.52799969911575, + "p99": 144.99199390411377 + }, + "roundtrip": { + "p50": 145.1520025730133, + "p90": 178.71999740600586, + "p95": 188.4479969739914, + "p99": 202.91200280189514 + }, + "isolatedSum": { + "p50": 164.5440012216568, + "p90": 239.48800563812256, + "p95": 262.5279948115349, + "p99": 306.7519962787628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 93.66399794816971, + "p90": 146.33600413799286, + "p95": 151.42400562763214, + "p99": 165.12000560760498 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 96.99200093746185, + "p95": 116.41599982976913, + "p99": 136.6720050573349 + }, + "roundtrip": { + "p50": 140.22399485111237, + "p90": 159.32799875736237, + "p95": 180.51199615001678, + "p99": 197.31199741363525 + }, + "isolatedSum": { + "p50": 165.3119996190071, + "p90": 243.3280050754547, + "p95": 267.8400054574013, + "p99": 301.7920106649399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 101.24800354242325, + "p90": 149.24800395965576, + "p95": 152.70400047302246, + "p99": 162.6559942960739 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 117.18399822711945, + "p95": 124.38400089740753, + "p99": 153.1520038843155 + }, + "roundtrip": { + "p50": 145.60000598430634, + "p90": 179.36000227928162, + "p95": 187.19999492168427, + "p99": 204.67199385166168 + }, + "isolatedSum": { + "p50": 179.45600301027298, + "p90": 266.4320021867752, + "p95": 277.08800137043, + "p99": 315.8079981803894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 94.87999975681305, + "p90": 130.49599528312683, + "p95": 146.88000082969666, + "p99": 159.13599729537964 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 97.4079966545105, + "p95": 109.98400300741196, + "p99": 127.36000120639801 + }, + "roundtrip": { + "p50": 146.81600034236908, + "p90": 185.47199666500092, + "p95": 195.26399672031403, + "p99": 206.2080055475235 + }, + "isolatedSum": { + "p50": 175.99999904632568, + "p90": 227.90399193763733, + "p95": 256.8640038371086, + "p99": 286.49599850177765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 95.29600292444229, + "p90": 143.51999759674072, + "p95": 151.99999511241913, + "p99": 160.22400557994843 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 119.35999989509583, + "p95": 129.40800189971924, + "p99": 153.43999862670898 + }, + "roundtrip": { + "p50": 148.28799664974213, + "p90": 181.34400248527527, + "p95": 188.960000872612, + "p99": 210.33599972724915 + }, + "isolatedSum": { + "p50": 179.77600544691086, + "p90": 262.87999749183655, + "p95": 281.40799701213837, + "p99": 313.6640042066574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.72799974679947, + "p90": 145.4399973154068, + "p95": 151.87199413776398, + "p99": 161.40800714492798 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 120.92799693346024, + "p95": 135.3279948234558, + "p99": 148.12800288200378 + }, + "roundtrip": { + "p50": 159.5200002193451, + "p90": 194.59199905395508, + "p95": 207.71199464797974, + "p99": 225.95199942588806 + }, + "isolatedSum": { + "p50": 185.72799861431122, + "p90": 266.36799424886703, + "p95": 287.1999889612198, + "p99": 309.53601002693176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.19999867677689, + "p90": 147.5840061903, + "p95": 157.05600380897522, + "p99": 180.4800033569336 + }, + "combine": { + "p50": 102.04800218343735, + "p90": 149.50400590896606, + "p95": 155.20000457763672, + "p99": 160.38399934768677 + }, + "roundtrip": { + "p50": 190.3039962053299, + "p90": 227.87199914455414, + "p95": 235.4239970445633, + "p99": 250.0160038471222 + }, + "isolatedSum": { + "p50": 221.24800086021423, + "p90": 297.08801209926605, + "p95": 312.25600838661194, + "p99": 340.86400270462036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b241479b", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|decode|normal|none|none|0|tuned||3f8ffeba9f65629", + "colorKey": "gb300_bc29f115", + "comparisonKey": "55b3ee6f87226376", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:51.627711+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3f8ffeba9f65629", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.61599725484848, + "p90": 102.14400291442871, + "p95": 106.175996363163, + "p99": 115.03999680280685 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 73.79200309515, + "p95": 75.6160020828247, + "p99": 83.03999900817871 + }, + "roundtrip": { + "p50": 139.52000439167023, + "p90": 151.07199549674988, + "p95": 155.5200070142746, + "p99": 163.68000209331512 + }, + "isolatedSum": { + "p50": 161.6639941930771, + "p90": 175.9360060095787, + "p95": 181.7919984459877, + "p99": 198.07999581098557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 71680, + "combineLogicalBytes": 71680, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.22400188446045, + "p90": 103.04000228643417, + "p95": 107.35999792814255, + "p99": 113.98400366306305 + }, + "combine": { + "p50": 70.56000083684921, + "p90": 74.27199929952621, + "p95": 76.64000242948532, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 140.83200693130493, + "p90": 150.30400454998016, + "p95": 155.32800555229187, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 162.78400272130966, + "p90": 177.3120015859604, + "p95": 184.00000035762787, + "p99": 197.56800681352615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 94.30400282144547, + "p90": 105.66399991512299, + "p95": 109.3439981341362, + "p99": 116.86400324106216 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 74.33599978685379, + "p95": 76.09599828720093, + "p99": 83.96799862384796 + }, + "roundtrip": { + "p50": 143.10400187969208, + "p90": 154.27200496196747, + "p95": 158.720001578331, + "p99": 166.143998503685 + }, + "isolatedSum": { + "p50": 165.02400487661362, + "p90": 179.99999970197678, + "p95": 185.43999642133713, + "p99": 200.83200186491013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 286720, + "combineLogicalBytes": 286720, + "fanoutMean": 1.25, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 94.62399780750275, + "p90": 104.09600287675858, + "p95": 107.90400207042694, + "p99": 120.03199756145477 + }, + "combine": { + "p50": 71.1359977722168, + "p90": 74.91199672222137, + "p95": 77.18399912118912, + "p99": 86.30400151014328 + }, + "roundtrip": { + "p50": 144.19199526309967, + "p90": 155.4879993200302, + "p95": 159.90400314331055, + "p99": 173.24799299240112 + }, + "isolatedSum": { + "p50": 165.75999557971954, + "p90": 179.00799959897995, + "p95": 185.08800119161606, + "p99": 206.33599907159805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 1.21875, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 94.65599805116653, + "p90": 105.95200210809708, + "p95": 109.02400314807892, + "p99": 116.2559986114502 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 75.48800110816956, + "p95": 79.32800054550171, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 146.01600170135498, + "p90": 156.25600516796112, + "p95": 160.16000509262085, + "p99": 168.89600455760956 + }, + "isolatedSum": { + "p50": 166.1119982600212, + "p90": 181.44000321626663, + "p95": 188.35200369358063, + "p99": 200.8959949016571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 1.265625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 95.48799693584442, + "p90": 107.35999792814255, + "p95": 110.49599945545197, + "p99": 117.08799749612808 + }, + "combine": { + "p50": 73.15199822187424, + "p90": 77.37600058317184, + "p95": 80.60800284147263, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 157.0879966020584, + "p95": 159.71200168132782, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 168.63999515771866, + "p90": 184.7359985113144, + "p95": 191.1040022969246, + "p99": 202.5279998779297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2279424, + "combineLogicalBytes": 2279424, + "fanoutMean": 1.2421875, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.09600222110748, + "p90": 110.75200140476227, + "p95": 113.63200098276138, + "p99": 121.08799815177917 + }, + "combine": { + "p50": 83.0719992518425, + "p90": 86.62399649620056, + "p95": 88.06400001049042, + "p99": 97.88800030946732 + }, + "roundtrip": { + "p50": 155.13600409030914, + "p90": 163.90399634838104, + "p95": 169.95200514793396, + "p99": 176.15999281406403 + }, + "isolatedSum": { + "p50": 183.16800147294998, + "p90": 197.37599790096283, + "p95": 201.6960009932518, + "p99": 218.9759984612465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4587520, + "combineLogicalBytes": 4587520, + "fanoutMean": 1.25, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 111.16799712181091, + "p90": 120.95999717712402, + "p95": 124.57600235939026, + "p99": 130.048006772995 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 99.80800002813339, + "p95": 101.53599828481674, + "p99": 111.35999858379364 + }, + "roundtrip": { + "p50": 181.8239986896515, + "p90": 189.60000574588776, + "p95": 191.67999923229218, + "p99": 196.57599925994873 + }, + "isolatedSum": { + "p50": 207.35999941825867, + "p90": 220.76799720525742, + "p95": 226.112000644207, + "p99": 241.40800535678864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aafb63b6", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|decode|normal|none|none|0|tuned||e9a6e5febe08793", + "colorKey": "gb300_fd039f89", + "comparisonKey": "c2780dc4de2c6420", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:17.557032+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e9a6e5febe08793", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86328125, + "eplbImbalanceAfter": 1.0003348214285714, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 85.88799834251404, + "p90": 95.32800316810608, + "p95": 99.32799637317657, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 67.77600198984146, + "p90": 71.99999690055847, + "p95": 73.2479989528656, + "p99": 78.33600044250488 + }, + "roundtrip": { + "p50": 134.75200533866882, + "p90": 143.42400431632996, + "p95": 147.67999947071075, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 153.6640003323555, + "p90": 167.32800006866455, + "p95": 172.57599532604218, + "p99": 186.46399676799774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 85.50400286912918, + "p90": 96.89600020647049, + "p95": 101.72799974679947, + "p99": 107.90400207042694 + }, + "combine": { + "p50": 70.78400254249573, + "p90": 73.66400212049484, + "p95": 76.54400169849396, + "p99": 84.51200276613235 + }, + "roundtrip": { + "p50": 138.08000087738037, + "p90": 147.90399372577667, + "p95": 152.8960019350052, + "p99": 160.35200655460358 + }, + "isolatedSum": { + "p50": 156.2880054116249, + "p90": 170.56000232696533, + "p95": 178.27200144529343, + "p99": 192.4160048365593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 86.71999722719193, + "p90": 98.24000298976898, + "p95": 101.95200145244598, + "p99": 108.0000028014183 + }, + "combine": { + "p50": 71.58400118350983, + "p90": 75.42400062084198, + "p95": 79.8719972372055, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 139.45600390434265, + "p90": 148.70400726795197, + "p95": 152.8320014476776, + "p99": 158.4320068359375 + }, + "isolatedSum": { + "p50": 158.30399841070175, + "p90": 173.66400361061096, + "p95": 181.8239986896515, + "p99": 198.2720047235489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 788480, + "combineLogicalBytes": 788480, + "fanoutMean": 3.4375, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 86.20800077915192, + "p90": 97.08800166845322, + "p95": 102.33599692583084, + "p99": 115.74400216341019 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 81.02399855852127, + "p95": 83.13599973917007, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 141.50400459766388, + "p90": 150.56000649929047, + "p95": 153.76000106334686, + "p99": 163.10399770736694 + }, + "isolatedSum": { + "p50": 159.71200168132782, + "p90": 178.1120002269745, + "p95": 185.47199666500092, + "p99": 205.18400520086288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 86.43200248479843, + "p90": 95.58399766683578, + "p95": 99.93600100278854, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 74.01599735021591, + "p90": 81.56800270080566, + "p95": 83.45600217580795, + "p99": 91.23200178146362 + }, + "roundtrip": { + "p50": 142.0159935951233, + "p90": 151.42400562763214, + "p95": 154.91199493408203, + "p99": 160.44799983501434 + }, + "isolatedSum": { + "p50": 160.44799983501434, + "p90": 177.15200036764145, + "p95": 183.3920031785965, + "p99": 200.6080001592636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3196928, + "combineLogicalBytes": 3196928, + "fanoutMean": 3.484375, + "recvTokensMax": 59, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 89.08800035715103, + "p90": 98.78399968147278, + "p95": 102.9760017991066, + "p99": 112.92800307273865 + }, + "combine": { + "p50": 75.29599964618683, + "p90": 83.03999900817871, + "p95": 84.25600081682205, + "p99": 92.92799979448318 + }, + "roundtrip": { + "p50": 144.44799721240997, + "p90": 154.6880006790161, + "p95": 158.78400206565857, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 164.38400000333786, + "p90": 181.8239986896515, + "p95": 187.23200261592865, + "p99": 205.85600286722183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6408192, + "combineLogicalBytes": 6408192, + "fanoutMean": 3.4921875, + "recvTokensMax": 114, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.09600222110748, + "p90": 108.03200304508209, + "p95": 112.44799941778183, + "p99": 124.12799894809723 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 85.15200018882751, + "p95": 86.91199868917465, + "p99": 93.47199648618698 + }, + "roundtrip": { + "p50": 155.29599785804749, + "p90": 163.42400014400482, + "p95": 165.56799411773682, + "p99": 171.51999473571777 + }, + "isolatedSum": { + "p50": 181.7600056529045, + "p90": 193.1840032339096, + "p95": 199.35999810695648, + "p99": 217.5999954342842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12773376, + "combineLogicalBytes": 12773376, + "fanoutMean": 3.48046875, + "recvTokensMax": 226, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 113.63200098276138, + "p90": 122.3360002040863, + "p95": 125.15200674533844, + "p99": 135.77599823474884 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 105.12000322341919, + "p95": 107.39199817180634, + "p99": 112.09599673748016 + }, + "roundtrip": { + "p50": 187.51999735832214, + "p90": 195.00799477100372, + "p95": 198.36799800395966, + "p99": 203.45599949359894 + }, + "isolatedSum": { + "p50": 210.88000386953354, + "p90": 227.4560034275055, + "p95": 232.54400491714478, + "p99": 247.871994972229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25661440, + "combineLogicalBytes": 25661440, + "fanoutMean": 3.49609375, + "recvTokensMax": 454, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-68d2f78a", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|decode|normal|none|none|0|tuned||e596902aaaeb56c", + "colorKey": "gb300_82cf5a40", + "comparisonKey": "bb457cdbbe6d8bed", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:45.043656+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e596902aaaeb56c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 89.24800157546997, + "p90": 99.80800002813339, + "p95": 103.80800068378448, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 69.60000097751617, + "p90": 73.47200065851212, + "p95": 75.45600086450577, + "p99": 85.1840004324913 + }, + "roundtrip": { + "p50": 139.64800536632538, + "p90": 148.70400726795197, + "p95": 152.51199901103973, + "p99": 159.36000645160675 + }, + "isolatedSum": { + "p50": 158.84800255298615, + "p90": 173.2800006866455, + "p95": 179.26400154829025, + "p99": 199.96800273656845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.50400352478027, + "p90": 100.99200159311295, + "p95": 104.16000336408615, + "p99": 111.42399907112122 + }, + "combine": { + "p50": 70.78400254249573, + "p90": 74.36800003051758, + "p95": 76.7040029168129, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 141.7279988527298, + "p90": 151.42400562763214, + "p95": 155.8080017566681, + "p99": 163.10399770736694 + }, + "isolatedSum": { + "p50": 160.288006067276, + "p90": 175.36000162363052, + "p95": 180.86400628089905, + "p99": 193.69599968194962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 387072, + "combineLogicalBytes": 387072, + "fanoutMean": 3.375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.40800279378891, + "p90": 100.12800246477127, + "p95": 103.04000228643417, + "p99": 115.26399850845337 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 75.19999891519547, + "p95": 77.66400277614594, + "p99": 82.71999657154083 + }, + "roundtrip": { + "p50": 142.14399456977844, + "p90": 152.8960019350052, + "p95": 157.3439985513687, + "p99": 166.01599752902985 + }, + "isolatedSum": { + "p50": 160.5760008096695, + "p90": 175.32800137996674, + "p95": 180.7040050625801, + "p99": 197.9839950799942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 731136, + "combineLogicalBytes": 731136, + "fanoutMean": 3.1875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.21600133180618, + "p90": 99.55199807882309, + "p95": 103.93600165843964, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 81.56800270080566, + "p95": 83.55200290679932, + "p99": 89.79199826717377 + }, + "roundtrip": { + "p50": 144.28800344467163, + "p90": 154.4640064239502, + "p95": 158.75199437141418, + "p99": 169.18399930000305 + }, + "isolatedSum": { + "p50": 163.1999984383583, + "p90": 181.12000077962875, + "p95": 187.48800456523895, + "p99": 199.2959976196289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1519616, + "combineLogicalBytes": 1519616, + "fanoutMean": 3.3125, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 90.55999666452408, + "p90": 101.18400305509567, + "p95": 104.12800312042236, + "p99": 111.7440015077591 + }, + "combine": { + "p50": 74.40000027418137, + "p90": 81.98399841785431, + "p95": 83.52000266313553, + "p99": 93.63199770450592 + }, + "roundtrip": { + "p50": 143.71199905872345, + "p90": 153.21600437164307, + "p95": 156.8640023469925, + "p99": 168.70400309562683 + }, + "isolatedSum": { + "p50": 164.95999693870544, + "p90": 183.16800147294998, + "p95": 187.6480057835579, + "p99": 205.37599921226501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3082240, + "combineLogicalBytes": 3082240, + "fanoutMean": 3.359375, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 91.61599725484848, + "p90": 101.56799852848053, + "p95": 105.6319996714592, + "p99": 112.57600039243698 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 83.36000144481659, + "p95": 84.25600081682205, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 145.60000598430634, + "p90": 155.20000457763672, + "p95": 159.39199924468994, + "p99": 163.7440025806427 + }, + "isolatedSum": { + "p50": 169.40799355506897, + "p90": 184.92799997329712, + "p95": 189.88800048828125, + "p99": 205.82400262355804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6121472, + "combineLogicalBytes": 6121472, + "fanoutMean": 3.3359375, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.69599950313568, + "p90": 110.17599701881409, + "p95": 114.23999816179276, + "p99": 126.27199292182922 + }, + "combine": { + "p50": 82.24000036716461, + "p90": 85.85599809885025, + "p95": 87.23200112581253, + "p99": 95.04000097513199 + }, + "roundtrip": { + "p50": 159.19999778270721, + "p90": 167.00799763202667, + "p95": 169.79199647903442, + "p99": 175.26400089263916 + }, + "isolatedSum": { + "p50": 183.9359998703003, + "p90": 196.03199511766434, + "p95": 201.47199928760529, + "p99": 221.3119938969612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12271616, + "combineLogicalBytes": 12271616, + "fanoutMean": 3.34375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.11999773979187, + "p90": 124.7360035777092, + "p95": 127.83999741077423, + "p99": 135.29600203037262 + }, + "combine": { + "p50": 97.6639986038208, + "p90": 104.80000078678131, + "p95": 106.72000050544739, + "p99": 110.944002866745 + }, + "roundtrip": { + "p50": 191.103994846344, + "p90": 198.62399995326996, + "p95": 203.16800475120544, + "p99": 212.3199999332428 + }, + "isolatedSum": { + "p50": 214.78399634361267, + "p90": 229.5360043644905, + "p95": 234.55999791622162, + "p99": 246.24000489711761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f7e27b1", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|decode|normal|none|none|0|tuned||194008255dcd869", + "colorKey": "gb300_77edcf0e", + "comparisonKey": "7a29f1725c491dcd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:53.797316+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "194008255dcd869", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.865234375, + "eplbImbalanceAfter": 1.0003580729166668, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 92.57599711418152, + "p90": 102.91200131177902, + "p95": 109.11999642848969, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 71.80800288915634, + "p95": 72.80000299215317, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 146.33600413799286, + "p95": 150.30400454998016, + "p99": 156.15999698638916 + }, + "isolatedSum": { + "p50": 159.7759947180748, + "p90": 174.72000420093536, + "p95": 181.91999942064285, + "p99": 198.14399629831314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.38400310277939, + "p90": 102.59199887514114, + "p95": 105.59999942779541, + "p99": 112.83200234174728 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 74.36800003051758, + "p95": 76.28799974918365, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 143.0719941854477, + "p90": 152.8960019350052, + "p95": 157.18400478363037, + "p99": 163.7440025806427 + }, + "isolatedSum": { + "p50": 163.4560003876686, + "p90": 176.95999890565872, + "p95": 181.88799917697906, + "p99": 196.48000597953796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 3, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 91.39200299978256, + "p90": 100.51199793815613, + "p95": 105.0880029797554, + "p99": 113.0559965968132 + }, + "combine": { + "p50": 72.48000055551529, + "p90": 75.48800110816956, + "p95": 77.53600180149078, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 153.53600680828094, + "p95": 156.95999562740326, + "p99": 165.8560037612915 + }, + "isolatedSum": { + "p50": 163.87200355529785, + "p90": 175.99999904632568, + "p95": 182.62400478124619, + "p99": 198.4959989786148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 774144, + "combineLogicalBytes": 774144, + "fanoutMean": 3.375, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.3520028591156, + "p90": 101.98400169610977, + "p95": 106.39999806880951, + "p99": 115.80800265073776 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 82.49600231647491, + "p95": 83.93599838018417, + "p99": 88.41600269079208 + }, + "roundtrip": { + "p50": 147.20000326633453, + "p90": 156.54399991035461, + "p95": 159.2639982700348, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 167.36000031232834, + "p90": 184.4800040125847, + "p95": 190.33599644899368, + "p99": 204.22400534152985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.70399808883667, + "p90": 102.04800218343735, + "p95": 106.1440035700798, + "p99": 112.47999966144562 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 82.59200304746628, + "p95": 83.96799862384796, + "p99": 87.45600283145905 + }, + "roundtrip": { + "p50": 147.10399508476257, + "p90": 154.7520011663437, + "p95": 157.50400722026825, + "p99": 165.40800034999847 + }, + "isolatedSum": { + "p50": 167.52000153064728, + "p90": 184.64000523090363, + "p95": 190.11200219392776, + "p99": 199.93600249290466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3268608, + "combineLogicalBytes": 3268608, + "fanoutMean": 3.5625, + "recvTokensMax": 60, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.34400296211243, + "p90": 101.98400169610977, + "p95": 105.0880029797554, + "p99": 110.78400164842606 + }, + "combine": { + "p50": 78.20799946784973, + "p90": 83.8719978928566, + "p95": 85.34400165081024, + "p99": 89.31200206279755 + }, + "roundtrip": { + "p50": 148.41599762439728, + "p90": 155.71199357509613, + "p95": 158.720001578331, + "p99": 168.73599588871002 + }, + "isolatedSum": { + "p50": 171.55200242996216, + "p90": 185.85599958896637, + "p95": 190.43200463056564, + "p99": 200.0960037112236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6393856, + "combineLogicalBytes": 6393856, + "fanoutMean": 3.484375, + "recvTokensMax": 115, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 104.3199971318245, + "p90": 111.455999314785, + "p95": 116.28799885511398, + "p99": 122.65600264072418 + }, + "combine": { + "p50": 83.29600095748901, + "p90": 86.65599673986435, + "p95": 87.90399879217148, + "p99": 94.71999853849411 + }, + "roundtrip": { + "p50": 161.43999993801117, + "p90": 169.53599452972412, + "p95": 172.38399386405945, + "p99": 183.45600366592407 + }, + "isolatedSum": { + "p50": 187.6159980893135, + "p90": 198.11199605464935, + "p95": 204.19199764728546, + "p99": 217.3760011792183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13045760, + "combineLogicalBytes": 13045760, + "fanoutMean": 3.5546875, + "recvTokensMax": 234, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.79200285673141, + "p90": 126.30400061607361, + "p95": 129.95199859142303, + "p99": 138.46400380134583 + }, + "combine": { + "p50": 98.59199821949005, + "p90": 104.89600151777267, + "p95": 107.10400342941284, + "p99": 111.455999314785 + }, + "roundtrip": { + "p50": 191.71200692653656, + "p90": 199.2959976196289, + "p95": 202.43200659751892, + "p99": 208.38400721549988 + }, + "isolatedSum": { + "p50": 216.38400107622147, + "p90": 231.20000213384628, + "p95": 237.05600202083588, + "p99": 249.92000311613083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26263552, + "combineLogicalBytes": 26263552, + "fanoutMean": 3.578125, + "recvTokensMax": 469, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-599d7174", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_566ad107", + "comparisonKey": "49fb6f194dce971d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:31.195561+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 90.2400016784668, + "p90": 101.43999755382538, + "p95": 106.59199953079224, + "p99": 283.84000062942505 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 73.72800260782242, + "p95": 74.78400319814682, + "p99": 84.3840017914772 + }, + "roundtrip": { + "p50": 140.25600254535675, + "p90": 148.83199334144592, + "p95": 152.96000242233276, + "p99": 159.87199544906616 + }, + "isolatedSum": { + "p50": 160.70400178432465, + "p90": 175.1680001616478, + "p95": 181.37600272893906, + "p99": 368.22400242090225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 90.7519981265068, + "p90": 101.56799852848053, + "p95": 106.01600259542465, + "p99": 114.30399864912033 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 74.78400319814682, + "p95": 76.1599987745285, + "p99": 83.03999900817871 + }, + "roundtrip": { + "p50": 142.62400567531586, + "p90": 154.2080044746399, + "p95": 159.16800498962402, + "p99": 170.97599804401398 + }, + "isolatedSum": { + "p50": 161.9199961423874, + "p90": 176.35200172662735, + "p95": 182.17600136995316, + "p99": 197.34399765729904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 91.23200178146362, + "p90": 101.6639992594719, + "p95": 107.39199817180634, + "p99": 118.367999792099 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 75.16799867153168, + "p95": 77.47200131416321, + "p99": 84.60800349712372 + }, + "roundtrip": { + "p50": 143.10400187969208, + "p90": 153.85599434375763, + "p95": 157.3439985513687, + "p99": 166.75199568271637 + }, + "isolatedSum": { + "p50": 163.16799819469452, + "p90": 176.83199793100357, + "p95": 184.86399948596954, + "p99": 202.97600328922272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 90.68799763917923, + "p90": 100.3199964761734, + "p95": 105.53599894046783, + "p99": 427.42401361465454 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 80.1599994301796, + "p95": 83.23200047016144, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 146.01600170135498, + "p90": 155.29599785804749, + "p95": 158.65600109100342, + "p99": 167.26399958133698 + }, + "isolatedSum": { + "p50": 163.93599659204483, + "p90": 180.479995906353, + "p95": 188.76799941062927, + "p99": 514.2720118165016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 90.43200314044952, + "p90": 101.15200281143188, + "p95": 105.12000322341919, + "p99": 112.31999844312668 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 82.91199803352356, + "p95": 84.09599959850311, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 146.40000462532043, + "p90": 156.031996011734, + "p95": 159.32799875736237, + "p99": 164.95999693870544 + }, + "isolatedSum": { + "p50": 165.44000059366226, + "p90": 184.06400084495544, + "p95": 189.2160028219223, + "p99": 204.19199764728546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.19200164079666, + "p90": 101.69599950313568, + "p95": 105.50399869680405, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 83.83999764919281, + "p95": 84.73599702119827, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 147.2959965467453, + "p90": 156.12800419330597, + "p95": 159.10400450229645, + "p99": 167.00799763202667 + }, + "isolatedSum": { + "p50": 169.0559983253479, + "p90": 185.5359971523285, + "p95": 190.23999571800232, + "p99": 202.78400182724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.9200012087822, + "p90": 109.43999886512756, + "p95": 114.04799669981003, + "p99": 121.98399752378464 + }, + "combine": { + "p50": 83.13599973917007, + "p90": 86.56000345945358, + "p95": 87.67999708652496, + "p99": 92.8959995508194 + }, + "roundtrip": { + "p50": 157.60000050067902, + "p90": 165.50399363040924, + "p95": 169.5680022239685, + "p99": 175.7120043039322 + }, + "isolatedSum": { + "p50": 185.05600094795227, + "p90": 196.00000232458115, + "p95": 201.727993786335, + "p99": 214.87999707460403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.68800157308578, + "p90": 123.55200201272964, + "p95": 126.3359934091568, + "p99": 129.40800189971924 + }, + "combine": { + "p50": 96.73599898815155, + "p90": 100.47999769449234, + "p95": 103.71199995279312, + "p99": 109.92000252008438 + }, + "roundtrip": { + "p50": 188.83199989795685, + "p90": 196.16000354290009, + "p95": 199.0080028772354, + "p99": 202.94399559497833 + }, + "isolatedSum": { + "p50": 211.42400056123734, + "p90": 224.03199970722198, + "p95": 230.04799336194992, + "p99": 239.32800441980362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-93650bf6", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_7a5ea657", + "comparisonKey": "e20510ffd3dea1d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:40.199843+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 94.04800087213516, + "p90": 103.35999727249146, + "p95": 107.29599744081497, + "p99": 117.08799749612808 + }, + "combine": { + "p50": 71.03999704122543, + "p90": 75.07199794054031, + "p95": 78.015998005867, + "p99": 85.88799834251404 + }, + "roundtrip": { + "p50": 144.16000247001648, + "p90": 153.72799336910248, + "p95": 157.05600380897522, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 165.0879979133606, + "p90": 178.43199521303177, + "p95": 185.31199544668198, + "p99": 202.97599583864212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 94.46399658918381, + "p90": 104.92800176143646, + "p95": 109.50399935245514, + "p99": 116.83200299739838 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 76.25599950551987, + "p95": 79.26400005817413, + "p99": 82.8159973025322 + }, + "roundtrip": { + "p50": 145.82400023937225, + "p90": 153.1199961900711, + "p95": 156.25600516796112, + "p99": 164.32000696659088 + }, + "isolatedSum": { + "p50": 166.87999665737152, + "p90": 181.18400126695633, + "p95": 188.76799941062927, + "p99": 199.64800029993057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 94.59199756383896, + "p90": 104.60799932479858, + "p95": 108.47999900579453, + "p99": 113.82400244474411 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 79.0719985961914, + "p95": 81.727996468544, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 149.27999675273895, + "p90": 158.01599621772766, + "p95": 160.76800227165222, + "p99": 167.61599481105804 + }, + "isolatedSum": { + "p50": 168.09599846601486, + "p90": 183.67999792099, + "p95": 190.20799547433853, + "p99": 201.02400332689285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 95.20000219345093, + "p90": 105.47199845314026, + "p95": 107.744000852108, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 83.3280012011528, + "p95": 85.15200018882751, + "p99": 92.0960009098053 + }, + "roundtrip": { + "p50": 150.56000649929047, + "p90": 159.2639982700348, + "p95": 162.27200627326965, + "p99": 169.5999950170517 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 188.79999965429306, + "p95": 192.89600104093552, + "p99": 206.496000289917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 95.71199864149094, + "p90": 106.1440035700798, + "p95": 109.3439981341362, + "p99": 116.48000031709671 + }, + "combine": { + "p50": 75.96799731254578, + "p90": 82.5280025601387, + "p95": 84.41600203514099, + "p99": 88.79999816417694 + }, + "roundtrip": { + "p50": 149.1519957780838, + "p90": 158.27199816703796, + "p95": 161.47199273109436, + "p99": 168.83200407028198 + }, + "isolatedSum": { + "p50": 171.6799959540367, + "p90": 188.6720061302185, + "p95": 193.7600001692772, + "p99": 205.27999848127365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 96.3200032711029, + "p90": 106.4319983124733, + "p95": 109.47199910879135, + "p99": 119.23199892044067 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 84.44800227880478, + "p95": 85.21600067615509, + "p99": 94.40000355243683 + }, + "roundtrip": { + "p50": 150.81599354743958, + "p90": 159.96800363063812, + "p95": 162.9759967327118, + "p99": 170.30400037765503 + }, + "isolatedSum": { + "p50": 175.6800040602684, + "p90": 190.88000059127808, + "p95": 194.68799978494644, + "p99": 213.6320024728775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 107.13600367307663, + "p90": 115.07199704647064, + "p95": 118.43200027942657, + "p99": 127.36000120639801 + }, + "combine": { + "p50": 83.45600217580795, + "p90": 87.8399983048439, + "p95": 90.08000046014786, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 162.56000101566315, + "p90": 170.68800330162048, + "p95": 173.75999689102173, + "p99": 182.3039948940277 + }, + "isolatedSum": { + "p50": 190.59200584888458, + "p90": 202.91199535131454, + "p95": 208.51200073957443, + "p99": 224.92799907922745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 120.35199999809265, + "p90": 128.1919926404953, + "p95": 131.00799918174744, + "p99": 138.62399756908417 + }, + "combine": { + "p50": 99.67999905347824, + "p90": 106.01600259542465, + "p95": 107.71200060844421, + "p99": 115.23199826478958 + }, + "roundtrip": { + "p50": 192.89599359035492, + "p90": 201.53599977493286, + "p95": 205.05599677562714, + "p99": 212.22400665283203 + }, + "isolatedSum": { + "p50": 220.0319990515709, + "p90": 234.20799523591995, + "p95": 238.71999979019165, + "p99": 253.85599583387375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a16ed676", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_f7e2f257", + "comparisonKey": "0c26ad3e2abbf968", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:22.028028+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 93.34400296211243, + "p90": 103.71199995279312, + "p95": 107.39199817180634, + "p99": 118.04799735546112 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 74.72000271081924, + "p95": 76.35200023651123, + "p99": 82.97599852085114 + }, + "roundtrip": { + "p50": 142.36800372600555, + "p90": 153.08800339698792, + "p95": 156.99200332164764, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 164.06400501728058, + "p90": 178.43200266361237, + "p95": 183.74399840831757, + "p99": 201.02399587631226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.92799979448318, + "p90": 102.20800340175629, + "p95": 106.20799660682678, + "p99": 116.19199812412262 + }, + "combine": { + "p50": 72.1919983625412, + "p90": 76.09599828720093, + "p95": 78.3040001988411, + "p99": 83.90399813652039 + }, + "roundtrip": { + "p50": 144.19199526309967, + "p90": 154.2080044746399, + "p95": 158.36800634860992, + "p99": 167.00799763202667 + }, + "isolatedSum": { + "p50": 165.11999815702438, + "p90": 178.30400168895721, + "p95": 184.51199680566788, + "p99": 200.095996260643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 94.08000111579895, + "p90": 104.12800312042236, + "p95": 108.86400192975998, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 76.19199901819229, + "p95": 77.47200131416321, + "p99": 82.36800134181976 + }, + "roundtrip": { + "p50": 145.63199877738953, + "p90": 155.16799688339233, + "p95": 159.93599593639374, + "p99": 167.04000532627106 + }, + "isolatedSum": { + "p50": 167.36000031232834, + "p90": 180.32000213861465, + "p95": 186.3360032439232, + "p99": 200.9280025959015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 94.78399902582169, + "p90": 104.86400127410889, + "p95": 108.44799876213074, + "p99": 121.37600034475327 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 82.94399827718735, + "p95": 85.21600067615509, + "p99": 91.16800129413605 + }, + "roundtrip": { + "p50": 148.19200336933136, + "p90": 156.54399991035461, + "p95": 159.8079949617386, + "p99": 167.13599860668182 + }, + "isolatedSum": { + "p50": 170.20799964666367, + "p90": 187.80799955129623, + "p95": 193.66399943828583, + "p99": 212.5440016388893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.19200164079666, + "p90": 100.76799988746643, + "p95": 105.31199723482132, + "p99": 112.5440001487732 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 82.04799890518188, + "p95": 84.22400057315826, + "p99": 91.13600105047226 + }, + "roundtrip": { + "p50": 147.45600521564484, + "p90": 156.00000321865082, + "p95": 159.2320054769516, + "p99": 166.46400094032288 + }, + "isolatedSum": { + "p50": 167.36000031232834, + "p90": 182.81599879264832, + "p95": 189.53599780797958, + "p99": 203.68000119924545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 96.09600156545639, + "p90": 105.18400371074677, + "p95": 107.77600109577179, + "p99": 114.3679991364479 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 84.51200276613235, + "p95": 86.27200126647949, + "p99": 90.36800265312195 + }, + "roundtrip": { + "p50": 149.82399344444275, + "p90": 157.85600244998932, + "p95": 160.16000509262085, + "p99": 168.16000640392303 + }, + "isolatedSum": { + "p50": 173.15199971199036, + "p90": 189.69600647687912, + "p95": 194.04800236225128, + "p99": 204.73600178956985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 105.53599894046783, + "p90": 112.31999844312668, + "p95": 115.99999666213989, + "p99": 126.3359934091568 + }, + "combine": { + "p50": 84.19200032949448, + "p90": 87.90399879217148, + "p95": 89.63199704885483, + "p99": 100.5759984254837 + }, + "roundtrip": { + "p50": 161.69600188732147, + "p90": 167.9680049419403, + "p95": 170.9440052509308, + "p99": 179.87200617790222 + }, + "isolatedSum": { + "p50": 189.7279992699623, + "p90": 200.22399723529816, + "p95": 205.63199371099472, + "p99": 226.9119918346405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.81600320339203, + "p90": 126.8479973077774, + "p95": 130.87999820709229, + "p99": 136.4160031080246 + }, + "combine": { + "p50": 99.0080013871193, + "p90": 103.45599800348282, + "p95": 107.35999792814255, + "p99": 111.90400272607803 + }, + "roundtrip": { + "p50": 191.74399971961975, + "p90": 199.26400482654572, + "p95": 202.39999890327454, + "p99": 208.8319957256317 + }, + "isolatedSum": { + "p50": 217.82400459051132, + "p90": 230.30399531126022, + "p95": 238.23999613523483, + "p99": 248.32000583410263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bde48e41", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_960df5b0", + "comparisonKey": "da4fd3097f81f9b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:38.354717+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 70.81600278615952, + "p90": 79.68000322580338, + "p95": 82.75199681520462, + "p99": 90.65599739551544 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 73.69600236415863, + "p95": 75.23199915885925, + "p99": 81.4720019698143 + }, + "roundtrip": { + "p50": 122.20799922943115, + "p90": 129.4720023870468, + "p95": 133.7919980287552, + "p99": 140.47999680042267 + }, + "isolatedSum": { + "p50": 141.1520019173622, + "p90": 153.376005589962, + "p95": 157.98399597406387, + "p99": 172.12799936532974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 70.97599655389786, + "p90": 79.77599650621414, + "p95": 83.23200047016144, + "p99": 88.25600147247314 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 75.32799988985062, + "p95": 77.05599814653397, + "p99": 86.43200248479843 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 132.57600367069244, + "p95": 136.54400408267975, + "p99": 143.61600577831268 + }, + "isolatedSum": { + "p50": 143.13599467277527, + "p90": 155.10399639606476, + "p95": 160.2879986166954, + "p99": 174.68800395727158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 70.94399631023407, + "p90": 79.19999957084656, + "p95": 83.29600095748901, + "p99": 88.73599767684937 + }, + "combine": { + "p50": 72.54400104284286, + "p90": 75.45600086450577, + "p95": 76.64000242948532, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 131.3920021057129, + "p95": 135.71199774742126, + "p99": 144.1279947757721 + }, + "isolatedSum": { + "p50": 143.48799735307693, + "p90": 154.65600043535233, + "p95": 159.93600338697433, + "p99": 172.92799800634384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 71.80800288915634, + "p90": 80.12799918651581, + "p95": 82.84799754619598, + "p99": 88.67199718952179 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 82.40000158548355, + "p95": 83.83999764919281, + "p99": 88.48000317811966 + }, + "roundtrip": { + "p50": 128.25599312782288, + "p90": 137.1839940547943, + "p95": 141.6960060596466, + "p99": 487.87200450897217 + }, + "isolatedSum": { + "p50": 146.27200365066528, + "p90": 162.52800077199936, + "p95": 166.6879951953888, + "p99": 177.15200036764145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 72.83200323581696, + "p90": 81.7599967122078, + "p95": 85.56800335645676, + "p99": 93.75999867916107 + }, + "combine": { + "p50": 74.30399954319, + "p90": 81.727996468544, + "p95": 83.42400193214417, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 128.76799702644348, + "p90": 135.903999209404, + "p95": 138.97599279880524, + "p99": 146.40000462532043 + }, + "isolatedSum": { + "p50": 147.13600277900696, + "p90": 163.4879931807518, + "p95": 168.99200528860092, + "p99": 183.29599499702454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 77.63200253248215, + "p90": 83.67999643087387, + "p95": 86.43200248479843, + "p99": 95.04000097513199 + }, + "combine": { + "p50": 76.06399804353714, + "p90": 84.19200032949448, + "p95": 85.75999736785889, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 131.67999684810638, + "p90": 138.20800185203552, + "p95": 142.4960047006607, + "p99": 148.19200336933136 + }, + "isolatedSum": { + "p50": 153.6960005760193, + "p90": 167.87199676036835, + "p95": 172.19199985265732, + "p99": 184.60799753665924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 85.24800091981888, + "p90": 91.90399944782257, + "p95": 94.27200257778168, + "p99": 101.40799731016159 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 86.30400151014328, + "p95": 87.90399879217148, + "p99": 94.7519987821579 + }, + "roundtrip": { + "p50": 144.3520039319992, + "p90": 150.87999403476715, + "p95": 153.4080058336258, + "p99": 159.87199544906616 + }, + "isolatedSum": { + "p50": 168.19199919700623, + "p90": 178.20800095796585, + "p95": 182.17600136995316, + "p99": 196.1599960923195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 103.32799702882767, + "p90": 110.23999750614166, + "p95": 113.0559965968132, + "p99": 122.40000069141388 + }, + "combine": { + "p50": 98.52799773216248, + "p90": 104.80000078678131, + "p95": 107.4879989027977, + "p99": 111.87200248241425 + }, + "roundtrip": { + "p50": 174.112007021904, + "p90": 181.7920058965683, + "p95": 184.12800133228302, + "p99": 189.7280067205429 + }, + "isolatedSum": { + "p50": 201.85599476099014, + "p90": 215.03999829292297, + "p95": 220.5439954996109, + "p99": 234.27200317382812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60952a36", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_9dd2501d", + "comparisonKey": "f3cf905a7c031b25", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:58.352336+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 42.87999868392944, + "p90": 51.61599814891815, + "p95": 52.639998495578766, + "p99": 55.615998804569244 + }, + "combine": { + "p50": 37.28000074625015, + "p90": 43.455999344587326, + "p95": 45.823998749256134, + "p99": 48.54400083422661 + }, + "roundtrip": { + "p50": 62.84800171852112, + "p90": 67.00800359249115, + "p95": 68.25599819421768, + "p99": 72.35199958086014 + }, + "isolatedSum": { + "p50": 80.1599994301796, + "p90": 95.07199749350548, + "p95": 98.4639972448349, + "p99": 104.15999963879585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 42.87999868392944, + "p90": 51.42400041222572, + "p95": 52.15999856591225, + "p99": 56.063998490571976 + }, + "combine": { + "p50": 37.151999771595, + "p90": 43.23200136423111, + "p95": 45.24800181388855, + "p99": 49.82399940490723 + }, + "roundtrip": { + "p50": 63.74400109052658, + "p90": 67.61600077152252, + "p95": 68.31999868154526, + "p99": 72.9919970035553 + }, + "isolatedSum": { + "p50": 80.03199845552444, + "p90": 94.65600177645683, + "p95": 97.4080003798008, + "p99": 105.8879978954792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 42.97599941492081, + "p90": 51.58400163054466, + "p95": 52.44800075888634, + "p99": 56.63999915122986 + }, + "combine": { + "p50": 37.151999771595, + "p90": 40.95999896526337, + "p95": 45.152001082897186, + "p99": 48.287998884916306 + }, + "roundtrip": { + "p50": 64.09599632024765, + "p90": 68.12799721956253, + "p95": 70.8480030298233, + "p99": 76.9599974155426 + }, + "isolatedSum": { + "p50": 80.12799918651581, + "p90": 92.54400059580803, + "p95": 97.60000184178352, + "p99": 104.92799803614616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 43.87199878692627, + "p90": 52.06400156021118, + "p95": 52.73599922657013, + "p99": 59.4559982419014 + }, + "combine": { + "p50": 36.67199984192848, + "p90": 40.64000025391579, + "p95": 44.99199986457825, + "p99": 47.359999269247055 + }, + "roundtrip": { + "p50": 64.70400094985962, + "p90": 67.80800223350525, + "p95": 69.60000097751617, + "p99": 76.4480009675026 + }, + "isolatedSum": { + "p50": 80.54399862885475, + "p90": 92.70400181412697, + "p95": 97.72799909114838, + "p99": 106.81599751114845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 45.40799930691719, + "p90": 52.86400020122528, + "p95": 53.727999329566956, + "p99": 56.703999638557434 + }, + "combine": { + "p50": 36.99199855327606, + "p90": 40.95999896526337, + "p95": 45.3759990632534, + "p99": 48.448000103235245 + }, + "roundtrip": { + "p50": 65.76000154018402, + "p90": 69.34399902820587, + "p95": 71.71200215816498, + "p99": 78.43200117349625 + }, + "isolatedSum": { + "p50": 82.39999786019325, + "p90": 93.82399916648865, + "p95": 99.10399839282036, + "p99": 105.15199974179268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 44.83199864625931, + "p90": 52.480001002550125, + "p95": 53.568001836538315, + "p99": 55.84000051021576 + }, + "combine": { + "p50": 40.22400081157684, + "p90": 48.576001077890396, + "p95": 59.007998555898666, + "p99": 63.80800157785416 + }, + "roundtrip": { + "p50": 67.26399809122086, + "p90": 73.91999661922455, + "p95": 75.83999633789062, + "p99": 80.03199845552444 + }, + "isolatedSum": { + "p50": 85.05599945783615, + "p90": 101.05600208044052, + "p95": 112.57600039243698, + "p99": 119.64800208806992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 49.92000013589859, + "p90": 54.016001522541046, + "p95": 55.03999814391136, + "p99": 59.20000001788139 + }, + "combine": { + "p50": 48.0320006608963, + "p90": 50.04800111055374, + "p95": 50.912000238895416, + "p99": 54.23999950289726 + }, + "roundtrip": { + "p50": 78.75200361013412, + "p90": 83.67999643087387, + "p95": 86.65599673986435, + "p99": 89.82399851083755 + }, + "isolatedSum": { + "p50": 97.95200079679489, + "p90": 104.06400263309479, + "p95": 105.95199838280678, + "p99": 113.43999952077866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 59.20000001788139, + "p90": 65.8240020275116, + "p95": 67.23199784755707, + "p99": 70.49600034952164 + }, + "combine": { + "p50": 58.88000130653381, + "p90": 61.184000223875046, + "p95": 62.49599903821945, + "p99": 67.71200150251389 + }, + "roundtrip": { + "p50": 95.90400010347366, + "p90": 102.08000242710114, + "p95": 103.39199751615524, + "p99": 107.96800255775452 + }, + "isolatedSum": { + "p50": 118.0800013244152, + "p90": 127.00800225138664, + "p95": 129.72799688577652, + "p99": 138.20800185203552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ce085c6", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_fcaebdfd", + "comparisonKey": "f8e04ccf9b668ecf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:10.612948+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 42.24000126123428, + "p90": 50.84799975156784, + "p95": 52.000001072883606, + "p99": 54.30399999022484 + }, + "combine": { + "p50": 37.28000074625015, + "p90": 44.83199864625931, + "p95": 46.08000069856644, + "p99": 49.56800118088722 + }, + "roundtrip": { + "p50": 63.231997191905975, + "p90": 66.68800115585327, + "p95": 67.80800223350525, + "p99": 71.9040036201477 + }, + "isolatedSum": { + "p50": 79.52000200748444, + "p90": 95.67999839782715, + "p95": 98.08000177145004, + "p99": 103.87200117111206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 42.208001017570496, + "p90": 50.23999884724617, + "p95": 51.231998950242996, + "p99": 54.655998945236206 + }, + "combine": { + "p50": 36.73600032925606, + "p90": 42.62400045990944, + "p95": 45.40799930691719, + "p99": 47.488000243902206 + }, + "roundtrip": { + "p50": 63.26399743556976, + "p90": 67.07199662923813, + "p95": 68.60800087451935, + "p99": 74.8480036854744 + }, + "isolatedSum": { + "p50": 78.94400134682655, + "p90": 92.86399930715561, + "p95": 96.63999825716019, + "p99": 102.14399918913841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 43.58400031924248, + "p90": 51.93600058555603, + "p95": 53.279999643564224, + "p99": 60.03199890255928 + }, + "combine": { + "p50": 36.80000081658363, + "p90": 39.903998374938965, + "p95": 42.49599948525429, + "p99": 47.648001462221146 + }, + "roundtrip": { + "p50": 64.15999680757523, + "p90": 67.96800345182419, + "p95": 69.05599683523178, + "p99": 76.86399668455124 + }, + "isolatedSum": { + "p50": 80.38400113582611, + "p90": 91.839998960495, + "p95": 95.77599912881851, + "p99": 107.68000036478043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 44.03200000524521, + "p90": 52.15999856591225, + "p95": 52.960000932216644, + "p99": 60.864001512527466 + }, + "combine": { + "p50": 36.768000572919846, + "p90": 43.87199878692627, + "p95": 45.504000037908554, + "p99": 47.520000487565994 + }, + "roundtrip": { + "p50": 64.25599753856659, + "p90": 67.35999882221222, + "p95": 68.89600306749344, + "p99": 75.6480023264885 + }, + "isolatedSum": { + "p50": 80.80000057816505, + "p90": 96.03199735283852, + "p95": 98.4640009701252, + "p99": 108.38400200009346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 44.83199864625931, + "p90": 52.51200124621391, + "p95": 53.37600037455559, + "p99": 60.19200012087822 + }, + "combine": { + "p50": 36.639999598264694, + "p90": 40.511999279260635, + "p95": 44.92799937725067, + "p99": 48.51200059056282 + }, + "roundtrip": { + "p50": 65.76000154018402, + "p90": 68.92800331115723, + "p95": 70.62400132417679, + "p99": 76.25599950551987 + }, + "isolatedSum": { + "p50": 81.471998244524, + "p90": 93.02400052547455, + "p95": 98.30399975180626, + "p99": 108.70400071144104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 46.9760000705719, + "p90": 52.70399898290634, + "p95": 53.82400006055832, + "p99": 61.95199862122536 + }, + "combine": { + "p50": 38.94399851560593, + "p90": 46.879999339580536, + "p95": 47.968000173568726, + "p99": 50.144001841545105 + }, + "roundtrip": { + "p50": 66.91200286149979, + "p90": 71.10399752855301, + "p95": 74.07999783754349, + "p99": 77.85599678754807 + }, + "isolatedSum": { + "p50": 85.91999858617783, + "p90": 99.58399832248688, + "p95": 101.79200023412704, + "p99": 112.09600046277046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 51.00800096988678, + "p90": 54.17599901556969, + "p95": 55.615998804569244, + "p99": 61.85600161552429 + }, + "combine": { + "p50": 47.42399975657463, + "p90": 49.92000013589859, + "p95": 51.13599821925163, + "p99": 58.400001376867294 + }, + "roundtrip": { + "p50": 78.33600044250488, + "p90": 81.727996468544, + "p95": 84.19200032949448, + "p99": 88.639996945858 + }, + "isolatedSum": { + "p50": 98.43200072646141, + "p90": 104.09599915146828, + "p95": 106.75199702382088, + "p99": 120.25600299239159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 62.752000987529755, + "p90": 65.50399959087372, + "p95": 66.56000018119812, + "p99": 75.03999769687653 + }, + "combine": { + "p50": 58.49599838256836, + "p90": 60.5119988322258, + "p95": 61.503998935222626, + "p99": 64.03200328350067 + }, + "roundtrip": { + "p50": 95.74399888515472, + "p90": 102.30399668216705, + "p95": 103.55199873447418, + "p99": 105.6319996714592 + }, + "isolatedSum": { + "p50": 121.24799937009811, + "p90": 126.01599842309952, + "p95": 128.06399911642075, + "p99": 139.0720009803772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec7f40cf", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||6c4175e2b7b86cb", + "colorKey": "gb300_759e5033", + "comparisonKey": "13fe8a6a3a3971eb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:17.832107+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6c4175e2b7b86cb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 330.3680121898651, + "p90": 346.0800051689148, + "p95": 350.14399886131287, + "p99": 362.11198568344116 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 61.3120011985302, + "p95": 62.97600269317627, + "p99": 68.57600063085556 + }, + "roundtrip": { + "p50": 367.93598532676697, + "p90": 383.4559917449951, + "p95": 387.2320055961609, + "p99": 393.8240110874176 + }, + "isolatedSum": { + "p50": 388.2880136370659, + "p90": 407.392006367445, + "p95": 413.12000155448914, + "p99": 430.6879863142967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 65536, + "combineLogicalBytes": 131072, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 330.78399300575256, + "p90": 347.3280072212219, + "p95": 352.4160087108612, + "p99": 374.08000230789185 + }, + "combine": { + "p50": 58.43200162053108, + "p90": 61.824001371860504, + "p95": 63.45599889755249, + "p99": 73.18399846553802 + }, + "roundtrip": { + "p50": 369.79201436042786, + "p90": 386.01601123809814, + "p95": 390.52799344062805, + "p99": 399.3920087814331 + }, + "isolatedSum": { + "p50": 389.21599462628365, + "p90": 409.1520085930824, + "p95": 415.8720076084137, + "p99": 447.26400077342987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 122880, + "combineLogicalBytes": 245760, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 332.92800188064575, + "p90": 351.83998942375183, + "p95": 356.7039966583252, + "p99": 365.1840090751648 + }, + "combine": { + "p50": 59.55199897289276, + "p90": 63.13599646091461, + "p95": 64.60800021886826, + "p99": 72.92799651622772 + }, + "roundtrip": { + "p50": 371.0080087184906, + "p90": 388.5439932346344, + "p95": 392.09601283073425, + "p99": 413.2800102233887 + }, + "isolatedSum": { + "p50": 392.4800008535385, + "p90": 414.97598588466644, + "p95": 421.31199687719345, + "p99": 438.1120055913925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 245760, + "combineLogicalBytes": 491520, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 331.64799213409424, + "p90": 349.92000460624695, + "p95": 353.59999537467957, + "p99": 365.08798599243164 + }, + "combine": { + "p50": 60.80000102519989, + "p90": 64.35199826955795, + "p95": 67.74400174617767, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 369.9199855327606, + "p90": 384.92798805236816, + "p95": 389.0239894390106, + "p99": 404.4159948825836 + }, + "isolatedSum": { + "p50": 392.4479931592941, + "p90": 414.2720028758049, + "p95": 421.34399712085724, + "p99": 443.7439888715744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 479232, + "combineLogicalBytes": 958464, + "fanoutMean": 3.65625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 330.27198910713196, + "p90": 349.1840064525604, + "p95": 355.103999376297, + "p99": 368.0959939956665 + }, + "combine": { + "p50": 60.736000537872314, + "p90": 64.57599997520447, + "p95": 66.81600213050842, + "p99": 74.87999647855759 + }, + "roundtrip": { + "p50": 370.59199810028076, + "p90": 386.04798913002014, + "p95": 388.41599225997925, + "p99": 407.3919951915741 + }, + "isolatedSum": { + "p50": 391.0079896450043, + "p90": 413.7600064277649, + "p95": 421.9200015068054, + "p99": 442.9759904742241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 942080, + "combineLogicalBytes": 1884160, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 332.12798833847046, + "p90": 348.80000352859497, + "p95": 353.1840145587921, + "p99": 361.1519932746887 + }, + "combine": { + "p50": 61.69600039720535, + "p90": 65.2799978852272, + "p95": 68.03199648857117, + "p99": 76.7040029168129 + }, + "roundtrip": { + "p50": 372.0960021018982, + "p90": 387.7120018005371, + "p95": 392.2559916973114, + "p99": 401.12000703811646 + }, + "isolatedSum": { + "p50": 393.8239887356758, + "p90": 414.0800014138222, + "p95": 421.2160110473633, + "p99": 437.8559961915016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1884160, + "combineLogicalBytes": 3768320, + "fanoutMean": 3.59375, + "recvTokensMax": 121, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 336.41600608825684, + "p90": 356.1280071735382, + "p95": 360.6080114841461, + "p99": 379.07201051712036 + }, + "combine": { + "p50": 65.5359998345375, + "p90": 69.34399902820587, + "p95": 71.29599899053574, + "p99": 79.45600152015686 + }, + "roundtrip": { + "p50": 375.93600153923035, + "p90": 391.61598682403564, + "p95": 396.06401324272156, + "p99": 406.0159921646118 + }, + "isolatedSum": { + "p50": 401.95200592279434, + "p90": 425.4720062017441, + "p95": 431.90401047468185, + "p99": 458.5280120372772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3792896, + "combineLogicalBytes": 7585792, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 334.3679904937744, + "p90": 351.8719971179962, + "p95": 356.03201389312744, + "p99": 365.9839928150177 + }, + "combine": { + "p50": 77.02399790287018, + "p90": 80.64000308513641, + "p95": 82.36800134181976, + "p99": 90.97599983215332 + }, + "roundtrip": { + "p50": 385.8239948749542, + "p90": 403.51998805999756, + "p95": 406.43200278282166, + "p99": 413.37600350379944 + }, + "isolatedSum": { + "p50": 411.3919883966446, + "p90": 432.51200020313263, + "p95": 438.4000152349472, + "p99": 456.959992647171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7647232, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-298cf1e0", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||3bda3dd7d4e88bf", + "colorKey": "gb300_759e5033", + "comparisonKey": "1dc9dc2a7d2bf3a4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:07.973319+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3bda3dd7d4e88bf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 341.37600660324097, + "p90": 359.9039912223816, + "p95": 366.3040101528168, + "p99": 378.84798645973206 + }, + "combine": { + "p50": 61.59999966621399, + "p90": 65.2799978852272, + "p95": 66.94400310516357, + "p99": 73.44000041484833 + }, + "roundtrip": { + "p50": 382.4000060558319, + "p90": 398.0480134487152, + "p95": 403.29599380493164, + "p99": 413.4719967842102 + }, + "isolatedSum": { + "p50": 402.97600626945496, + "p90": 425.1839891076088, + "p95": 433.24801325798035, + "p99": 452.2879868745804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76800, + "combineLogicalBytes": 153600, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 342.24000573158264, + "p90": 362.2080087661743, + "p95": 369.4080114364624, + "p99": 394.75199580192566 + }, + "combine": { + "p50": 63.45599889755249, + "p90": 66.75200164318085, + "p95": 67.96800345182419, + "p99": 73.44000041484833 + }, + "roundtrip": { + "p50": 384.19198989868164, + "p90": 400.160014629364, + "p95": 404.2240083217621, + "p99": 411.327987909317 + }, + "isolatedSum": { + "p50": 405.69600462913513, + "p90": 428.96001040935516, + "p95": 437.3760148882866, + "p99": 468.191996216774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 153600, + "combineLogicalBytes": 307200, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 342.6559865474701, + "p90": 362.9760146141052, + "p95": 367.35999584198, + "p99": 378.04800271987915 + }, + "combine": { + "p50": 63.551999628543854, + "p90": 67.23199784755707, + "p95": 68.28799843788147, + "p99": 72.67200201749802 + }, + "roundtrip": { + "p50": 385.888010263443, + "p90": 403.23200821876526, + "p95": 408.4799885749817, + "p99": 421.60001397132874 + }, + "isolatedSum": { + "p50": 406.20798617601395, + "p90": 430.2080124616623, + "p95": 435.64799427986145, + "p99": 450.72000473737717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 302080, + "combineLogicalBytes": 604160, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 344.4480001926422, + "p90": 364.8639917373657, + "p95": 370.88000774383545, + "p99": 384.0000033378601 + }, + "combine": { + "p50": 65.63200056552887, + "p90": 69.43999975919724, + "p95": 70.592001080513, + "p99": 75.48800110816956 + }, + "roundtrip": { + "p50": 384.44799184799194, + "p90": 403.3600091934204, + "p95": 407.51999616622925, + "p99": 421.34401202201843 + }, + "isolatedSum": { + "p50": 410.0800007581711, + "p90": 434.30399149656296, + "p95": 441.47200882434845, + "p99": 459.48800444602966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 604160, + "combineLogicalBytes": 1208320, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 344.0319895744324, + "p90": 364.5760118961334, + "p95": 370.1440095901489, + "p99": 379.7439932823181 + }, + "combine": { + "p50": 66.04799628257751, + "p90": 69.47200000286102, + "p95": 71.45600020885468, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 385.0559890270233, + "p90": 400.92799067497253, + "p95": 405.2160084247589, + "p99": 419.3280041217804 + }, + "isolatedSum": { + "p50": 410.0799858570099, + "p90": 434.04801189899445, + "p95": 441.6000097990036, + "p99": 458.3999961614609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1223680, + "combineLogicalBytes": 2447360, + "fanoutMean": 3.734375, + "recvTokensMax": 62, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 342.0799970626831, + "p90": 362.0480000972748, + "p95": 369.50400471687317, + "p99": 378.84798645973206 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 71.00799679756165, + "p95": 72.35199958086014, + "p99": 77.18399912118912 + }, + "roundtrip": { + "p50": 383.0080032348633, + "p90": 399.4880020618439, + "p95": 403.4560024738312, + "p99": 411.3599956035614 + }, + "isolatedSum": { + "p50": 409.2479944229126, + "p90": 433.0559968948364, + "p95": 441.8560042977333, + "p99": 456.0319855809212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2396160, + "combineLogicalBytes": 4792320, + "fanoutMean": 3.65625, + "recvTokensMax": 122, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 347.104012966156, + "p90": 367.16800928115845, + "p95": 373.4720051288605, + "p99": 385.0559890270233 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 74.65600222349167, + "p95": 76.54400169849396, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 390.49598574638367, + "p90": 407.039999961853, + "p95": 410.8799993991852, + "p99": 421.1840033531189 + }, + "isolatedSum": { + "p50": 418.4640124440193, + "p90": 441.8240115046501, + "p95": 450.01600682735443, + "p99": 466.97598695755005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4761600, + "combineLogicalBytes": 9523200, + "fanoutMean": 3.6328125, + "recvTokensMax": 242, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 343.6479866504669, + "p90": 364.9919927120209, + "p95": 371.0399866104126, + "p99": 382.9439878463745 + }, + "combine": { + "p50": 83.36000144481659, + "p90": 87.26400136947632, + "p95": 89.40800279378891, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 403.84000539779663, + "p90": 419.1359877586365, + "p95": 424.0959882736206, + "p99": 431.71200156211853 + }, + "isolatedSum": { + "p50": 427.0079880952835, + "p90": 452.2559940814972, + "p95": 460.4479894042015, + "p99": 478.719986975193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9548800, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec972a8f", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_759e5033", + "comparisonKey": "821dc13b97d194cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:58.779561+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 330.9119939804077, + "p90": 348.89599680900574, + "p95": 354.43198680877686, + "p99": 367.39200353622437 + }, + "combine": { + "p50": 61.216000467538834, + "p90": 64.96000289916992, + "p95": 66.97600334882736, + "p99": 73.7600028514862 + }, + "roundtrip": { + "p50": 369.56799030303955, + "p90": 384.0639889240265, + "p95": 387.7440094947815, + "p99": 396.12799882888794 + }, + "isolatedSum": { + "p50": 392.12799444794655, + "p90": 413.85599970817566, + "p95": 421.4079901576042, + "p99": 441.15200638771057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 92160, + "combineLogicalBytes": 184320, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 331.2320113182068, + "p90": 352.06401348114014, + "p95": 357.31199383735657, + "p99": 364.5760118961334 + }, + "combine": { + "p50": 62.912002205848694, + "p90": 65.95200300216675, + "p95": 67.29599833488464, + "p99": 72.22399860620499 + }, + "roundtrip": { + "p50": 372.6719915866852, + "p90": 388.0319893360138, + "p95": 392.15999841690063, + "p99": 400.83199739456177 + }, + "isolatedSum": { + "p50": 394.1440135240555, + "p90": 418.0160164833069, + "p95": 424.6079921722412, + "p99": 436.8000105023384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 184320, + "combineLogicalBytes": 368640, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 333.75999331474304, + "p90": 352.86399722099304, + "p95": 357.9519987106323, + "p99": 364.9919927120209 + }, + "combine": { + "p50": 64.09599632024765, + "p90": 67.61600077152252, + "p95": 69.18399780988693, + "p99": 76.83199644088745 + }, + "roundtrip": { + "p50": 372.3199963569641, + "p90": 388.0639970302582, + "p95": 392.0319974422455, + "p99": 398.17601442337036 + }, + "isolatedSum": { + "p50": 397.8559896349907, + "p90": 420.47999799251556, + "p95": 427.13599652051926, + "p99": 441.8239891529083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 356352, + "combineLogicalBytes": 712704, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 331.4560055732727, + "p90": 349.0239977836609, + "p95": 353.59999537467957, + "p99": 361.5359961986542 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 69.63200122117996, + "p95": 71.55200093984604, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 374.7200071811676, + "p90": 391.1359906196594, + "p95": 394.75199580192566, + "p99": 404.00001406669617 + }, + "isolatedSum": { + "p50": 397.37600833177567, + "p90": 418.65599900484085, + "p95": 425.1519963145256, + "p99": 441.3439929485321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 700416, + "combineLogicalBytes": 1400832, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 334.81600880622864, + "p90": 354.0480136871338, + "p95": 359.74401235580444, + "p99": 373.24801087379456 + }, + "combine": { + "p50": 66.52799993753433, + "p90": 70.62400132417679, + "p95": 72.22399860620499, + "p99": 78.015998005867 + }, + "roundtrip": { + "p50": 375.5840063095093, + "p90": 390.6239867210388, + "p95": 394.6560025215149, + "p99": 403.6479890346527 + }, + "isolatedSum": { + "p50": 401.34400874376297, + "p90": 424.6720150113106, + "p95": 431.96801096200943, + "p99": 451.26400887966156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1413120, + "combineLogicalBytes": 2826240, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 333.5359990596771, + "p90": 353.66401076316833, + "p95": 359.48801040649414, + "p99": 378.7840008735657 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 71.71200215816498, + "p95": 73.2479989528656, + "p99": 76.80000364780426 + }, + "roundtrip": { + "p50": 375.5840063095093, + "p90": 391.29599928855896, + "p95": 395.26399970054626, + "p99": 400.67198872566223 + }, + "isolatedSum": { + "p50": 401.5040025115013, + "p90": 425.3760129213333, + "p95": 432.73600935935974, + "p99": 455.58400452136993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2875392, + "combineLogicalBytes": 5750784, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 335.4560136795044, + "p90": 355.26400804519653, + "p95": 359.23200845718384, + "p99": 369.6959912776947 + }, + "combine": { + "p50": 73.05599749088287, + "p90": 76.67200267314911, + "p95": 78.20799946784973, + "p99": 84.99199897050858 + }, + "roundtrip": { + "p50": 382.7199935913086, + "p90": 398.5919952392578, + "p95": 403.48801016807556, + "p99": 429.6639859676361 + }, + "isolatedSum": { + "p50": 408.51201117038727, + "p90": 431.93601071834564, + "p95": 437.44000792503357, + "p99": 454.6879902482033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5793792, + "combineLogicalBytes": 11587584, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 333.95200967788696, + "p90": 353.37600111961365, + "p95": 358.46400260925293, + "p99": 368.3519959449768 + }, + "combine": { + "p50": 88.128000497818, + "p90": 92.22400188446045, + "p95": 94.08000111579895, + "p99": 98.78399968147278 + }, + "roundtrip": { + "p50": 396.7039883136749, + "p90": 412.31998801231384, + "p95": 417.56799817085266, + "p99": 426.40000581741333 + }, + "isolatedSum": { + "p50": 422.08001017570496, + "p90": 445.6000030040741, + "p95": 452.5440037250519, + "p99": 467.1359956264496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11470848, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a46c6ce", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_5fe8d497", + "comparisonKey": "8906d25cba9a5e7f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:27.018361+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.10400080680847, + "p90": 101.69599950313568, + "p95": 105.31199723482132, + "p99": 112.76800185441971 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 66.84800237417221, + "p95": 68.54400038719177, + "p99": 73.88799637556076 + }, + "roundtrip": { + "p50": 220.2879935503006, + "p90": 237.92000114917755, + "p95": 243.45600605010986, + "p99": 263.5839879512787 + }, + "isolatedSum": { + "p50": 154.4959992170334, + "p90": 168.5440018773079, + "p95": 173.8559976220131, + "p99": 186.65599822998047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 91.90399944782257, + "p90": 103.67999970912933, + "p95": 107.90400207042694, + "p99": 114.78400230407715 + }, + "combine": { + "p50": 65.47199934720993, + "p90": 68.9919963479042, + "p95": 70.8480030298233, + "p99": 77.05599814653397 + }, + "roundtrip": { + "p50": 221.3120013475418, + "p90": 238.91200125217438, + "p95": 245.92000246047974, + "p99": 254.2400062084198 + }, + "isolatedSum": { + "p50": 157.3759987950325, + "p90": 172.67199605703354, + "p95": 178.75200510025024, + "p99": 191.84000045061111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.76799857616425, + "p90": 103.90400141477585, + "p95": 109.11999642848969, + "p99": 120.54400146007538 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 70.592001080513, + "p95": 72.1919983625412, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 222.01600670814514, + "p90": 239.07199501991272, + "p95": 243.6479926109314, + "p99": 252.57599353790283 + }, + "isolatedSum": { + "p50": 159.8079949617386, + "p90": 174.49600249528885, + "p95": 181.31199479103088, + "p99": 200.0960037112236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.0960009098053, + "p90": 104.16000336408615, + "p95": 108.15999656915665, + "p99": 120.35199999809265 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 73.05599749088287, + "p95": 76.03199779987335, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 225.18399357795715, + "p90": 241.5360063314438, + "p95": 248.31999838352203, + "p99": 258.04799795150757 + }, + "isolatedSum": { + "p50": 161.0879972577095, + "p90": 177.21600085496902, + "p95": 184.19199436903, + "p99": 204.00000363588333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 91.39200299978256, + "p90": 103.55199873447418, + "p95": 107.90400207042694, + "p99": 115.61600118875504 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 72.86400347948074, + "p95": 74.17599856853485, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 224.2559939622879, + "p90": 242.14400351047516, + "p95": 247.3279982805252, + "p99": 257.31199979782104 + }, + "isolatedSum": { + "p50": 160.76800227165222, + "p90": 176.41600221395493, + "p95": 182.0800006389618, + "p99": 197.59999960660934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.25600212812424, + "p90": 103.00800204277039, + "p95": 106.72000050544739, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 74.30399954319, + "p95": 75.99999755620956, + "p99": 85.82399785518646 + }, + "roundtrip": { + "p50": 227.29599475860596, + "p90": 242.8479939699173, + "p95": 246.14399671554565, + "p99": 255.3279995918274 + }, + "isolatedSum": { + "p50": 163.16800564527512, + "p90": 177.3120015859604, + "p95": 182.71999806165695, + "p99": 199.8719945549965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 97.02400118112564, + "p90": 106.59199953079224, + "p95": 109.79200154542923, + "p99": 117.76000261306763 + }, + "combine": { + "p50": 76.03199779987335, + "p90": 79.68000322580338, + "p95": 81.28000050783157, + "p99": 88.25600147247314 + }, + "roundtrip": { + "p50": 233.63199830055237, + "p90": 249.2160052061081, + "p95": 255.10400533676147, + "p99": 264.73599672317505 + }, + "isolatedSum": { + "p50": 173.055998980999, + "p90": 186.2720027565956, + "p95": 191.0720020532608, + "p99": 206.01600408554077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 108.35199803113937, + "p90": 114.97599631547928, + "p95": 119.87199634313583, + "p99": 127.42400169372559 + }, + "combine": { + "p50": 93.66399794816971, + "p90": 96.99200093746185, + "p95": 99.07200187444687, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 249.5039999485016, + "p90": 266.33599400520325, + "p95": 271.1679935455322, + "p99": 279.1360020637512 + }, + "isolatedSum": { + "p50": 202.01599597930908, + "p90": 211.96799725294113, + "p95": 218.9439982175827, + "p99": 233.40800404548645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa1d124e", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_759e5033", + "comparisonKey": "ed8275005fb2edc2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:18.899119+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 344.7679877281189, + "p90": 364.73599076271057, + "p95": 370.91198563575745, + "p99": 390.24001359939575 + }, + "combine": { + "p50": 66.81600213050842, + "p90": 70.94399631023407, + "p95": 73.60000163316727, + "p99": 84.76799726486206 + }, + "roundtrip": { + "p50": 387.1999979019165, + "p90": 406.14399313926697, + "p95": 411.74399852752686, + "p99": 419.5840060710907 + }, + "isolatedSum": { + "p50": 411.5839898586273, + "p90": 435.67998707294464, + "p95": 444.5119872689247, + "p99": 475.0080108642578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 347.135990858078, + "p90": 367.8080141544342, + "p95": 373.1519877910614, + "p99": 382.2399973869324 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 76.89599692821503, + "p95": 80.44800162315369, + "p99": 84.6719965338707 + }, + "roundtrip": { + "p50": 389.1200125217438, + "p90": 407.4240028858185, + "p95": 410.94401478767395, + "p99": 422.39999771118164 + }, + "isolatedSum": { + "p50": 415.6159907579422, + "p90": 444.70401108264923, + "p95": 453.5999894142151, + "p99": 466.91199392080307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 347.1679985523224, + "p90": 375.0079870223999, + "p95": 389.2480134963989, + "p99": 418.39998960494995 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 89.40800279378891, + "p95": 108.2879975438118, + "p99": 119.9679970741272 + }, + "roundtrip": { + "p50": 390.3999924659729, + "p90": 411.296010017395, + "p95": 423.64799976348877, + "p99": 451.32800936698914 + }, + "isolatedSum": { + "p50": 415.96800088882446, + "p90": 464.4159898161888, + "p95": 497.5360110402107, + "p99": 538.3679866790771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 351.4240086078644, + "p90": 378.464013338089, + "p95": 392.5760090351105, + "p99": 414.68799114227295 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 84.73599702119827, + "p95": 100.19200295209885, + "p99": 118.84800344705582 + }, + "roundtrip": { + "p50": 399.4880020618439, + "p90": 421.9839870929718, + "p95": 439.7760033607483, + "p99": 454.75199818611145 + }, + "isolatedSum": { + "p50": 423.10401052236557, + "p90": 463.20001035928726, + "p95": 492.7680119872093, + "p99": 533.5359945893288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 348.06400537490845, + "p90": 372.0319867134094, + "p95": 382.01600313186646, + "p99": 416.83200001716614 + }, + "combine": { + "p50": 72.25599884986877, + "p90": 107.39199817180634, + "p95": 116.73600226640701, + "p99": 140.6719982624054 + }, + "roundtrip": { + "p50": 396.92801237106323, + "p90": 429.56799268722534, + "p95": 442.9759979248047, + "p99": 463.99998664855957 + }, + "isolatedSum": { + "p50": 420.3200042247772, + "p90": 479.42398488521576, + "p95": 498.75200539827347, + "p99": 557.5039982795715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 350.3040075302124, + "p90": 370.94399333000183, + "p95": 375.99998712539673, + "p99": 396.60799503326416 + }, + "combine": { + "p50": 72.64000177383423, + "p90": 76.1599987745285, + "p95": 78.23999971151352, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 395.7119882106781, + "p90": 414.0479862689972, + "p95": 417.85600781440735, + "p99": 434.30399894714355 + }, + "isolatedSum": { + "p50": 422.94400930404663, + "p90": 447.10399210453033, + "p95": 454.23998683691025, + "p99": 486.62399500608444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 351.29600763320923, + "p90": 377.375990152359, + "p95": 387.4880075454712, + "p99": 413.34399580955505 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 85.60000360012054, + "p95": 89.66399729251862, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 403.6160111427307, + "p90": 429.6959936618805, + "p95": 447.6799964904785, + "p99": 468.9599871635437 + }, + "isolatedSum": { + "p50": 429.6640083193779, + "p90": 462.97599375247955, + "p95": 477.1520048379898, + "p99": 515.807993710041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 350.7840037345886, + "p90": 376.1279881000519, + "p95": 389.47200775146484, + "p99": 419.71200704574585 + }, + "combine": { + "p50": 96.3520035147667, + "p90": 144.6399986743927, + "p95": 150.62400698661804, + "p99": 158.01599621772766 + }, + "roundtrip": { + "p50": 418.0479943752289, + "p90": 436.67200207710266, + "p95": 441.8880045413971, + "p99": 461.1839950084686 + }, + "isolatedSum": { + "p50": 447.1360072493553, + "p90": 520.7679867744446, + "p95": 540.0960147380829, + "p99": 577.7280032634735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d890cd1", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||7c1cc7238ca9a52", + "colorKey": "gb300_759e5033", + "comparisonKey": "75dce58a45693e88", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:25.129991+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "7c1cc7238ca9a52", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 345.5359935760498, + "p90": 363.8719916343689, + "p95": 370.36800384521484, + "p99": 396.5759873390198 + }, + "combine": { + "p50": 65.43999910354614, + "p90": 68.9919963479042, + "p95": 70.592001080513, + "p99": 79.00799810886383 + }, + "roundtrip": { + "p50": 383.4240138530731, + "p90": 399.32799339294434, + "p95": 401.7600119113922, + "p99": 410.20798683166504 + }, + "isolatedSum": { + "p50": 410.97599267959595, + "p90": 432.8639879822731, + "p95": 440.96000492572784, + "p99": 475.5839854478836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 344.92799639701843, + "p90": 364.25599455833435, + "p95": 370.2079951763153, + "p99": 400.12800693511963 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 70.62400132417679, + "p95": 72.54400104284286, + "p99": 79.68000322580338 + }, + "roundtrip": { + "p50": 385.6320083141327, + "p90": 400.9920060634613, + "p95": 404.32000160217285, + "p99": 411.8080139160156 + }, + "isolatedSum": { + "p50": 412.2239947319031, + "p90": 434.87999588251114, + "p95": 442.7519962191582, + "p99": 479.808010160923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 344.9600040912628, + "p90": 364.22398686408997, + "p95": 370.0160086154938, + "p99": 380.3200125694275 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 72.54400104284286, + "p95": 74.75200295448303, + "p99": 80.83199709653854 + }, + "roundtrip": { + "p50": 388.12801241874695, + "p90": 403.872013092041, + "p95": 408.54400396347046, + "p99": 415.9359931945801 + }, + "isolatedSum": { + "p50": 413.440003991127, + "p90": 436.76798790693283, + "p95": 444.7680115699768, + "p99": 461.15200966596603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 346.49598598480225, + "p90": 366.8479919433594, + "p95": 374.55999851226807, + "p99": 385.98400354385376 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 74.75200295448303, + "p95": 75.83999633789062, + "p99": 84.51200276613235 + }, + "roundtrip": { + "p50": 389.72800970077515, + "p90": 406.94400668144226, + "p95": 410.7840061187744, + "p99": 419.0079867839813 + }, + "isolatedSum": { + "p50": 417.6959842443466, + "p90": 441.5999948978424, + "p95": 450.3999948501587, + "p99": 470.4960063099861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824320, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 345.7599878311157, + "p90": 365.05600810050964, + "p95": 371.5839982032776, + "p99": 383.2640051841736 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 74.14399832487106, + "p95": 75.93599706888199, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 390.3999924659729, + "p90": 405.1840007305145, + "p95": 410.4959964752197, + "p99": 416.9920086860657 + }, + "isolatedSum": { + "p50": 416.4799898862839, + "p90": 439.2000064253807, + "p95": 447.5199952721596, + "p99": 465.1840031147003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619968, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 59, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 346.3680148124695, + "p90": 365.63199758529663, + "p95": 371.45599722862244, + "p99": 383.32799077033997 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 76.57600194215775, + "p95": 77.91999727487564, + "p99": 83.00799876451492 + }, + "roundtrip": { + "p50": 390.1439905166626, + "p90": 407.00799226760864, + "p95": 410.3359878063202, + "p99": 418.17599534988403 + }, + "isolatedSum": { + "p50": 419.1040173172951, + "p90": 442.2079995274544, + "p95": 449.3759945034981, + "p99": 466.3359895348549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3311616, + "combineLogicalBytes": 6623232, + "fanoutMean": 3.609375, + "recvTokensMax": 117, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 349.3120074272156, + "p90": 370.65601348876953, + "p95": 376.76799297332764, + "p99": 386.24000549316406 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 81.88799768686295, + "p95": 83.5840031504631, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 398.9439904689789, + "p90": 416.3520038127899, + "p95": 420.6399917602539, + "p99": 431.8079948425293 + }, + "isolatedSum": { + "p50": 427.71200835704803, + "p90": 452.5440111756325, + "p95": 460.35199612379074, + "p99": 475.5840077996254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6594560, + "combineLogicalBytes": 13189120, + "fanoutMean": 3.59375, + "recvTokensMax": 234, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 349.4400084018707, + "p90": 367.90400743484497, + "p95": 374.6879994869232, + "p99": 395.904004573822 + }, + "combine": { + "p50": 94.55999732017517, + "p90": 98.43199700117111, + "p95": 99.87200051546097, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 416.0960018634796, + "p90": 431.8400025367737, + "p95": 436.0640048980713, + "p99": 447.7440118789673 + }, + "isolatedSum": { + "p50": 444.0000057220459, + "p90": 466.3360044360161, + "p95": 474.5600000023842, + "p99": 503.80800664424896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13310976, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ae321662", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_3c645a5f", + "comparisonKey": "e15d23e435b72754", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:51.648526+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 73.60000163316727, + "p90": 82.49600231647491, + "p95": 86.56000345945358, + "p99": 100.44799745082855 + }, + "combine": { + "p50": 65.05600363016129, + "p90": 68.60800087451935, + "p95": 70.81600278615952, + "p99": 74.68800246715546 + }, + "roundtrip": { + "p50": 197.1839964389801, + "p90": 210.52800118923187, + "p95": 214.33599293231964, + "p99": 221.5999960899353 + }, + "isolatedSum": { + "p50": 138.65600526332855, + "p90": 151.10400319099426, + "p95": 157.3760062456131, + "p99": 175.135999917984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 72.76800274848938, + "p90": 81.727996468544, + "p95": 85.9839990735054, + "p99": 93.72799843549728 + }, + "combine": { + "p50": 66.30399823188782, + "p90": 70.27199864387512, + "p95": 71.84000313282013, + "p99": 76.57600194215775 + }, + "roundtrip": { + "p50": 198.65599274635315, + "p90": 211.64800226688385, + "p95": 216.0000056028366, + "p99": 223.55200350284576 + }, + "isolatedSum": { + "p50": 139.0720009803772, + "p90": 151.99999511241913, + "p95": 157.82400220632553, + "p99": 170.30400037765503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 74.23999905586243, + "p90": 83.52000266313553, + "p95": 87.74399757385254, + "p99": 92.79999881982803 + }, + "combine": { + "p50": 66.94400310516357, + "p90": 70.68800181150436, + "p95": 72.48000055551529, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 200.32000541687012, + "p90": 212.79999613761902, + "p95": 217.21599996089935, + "p99": 224.5440036058426 + }, + "isolatedSum": { + "p50": 141.184002161026, + "p90": 154.2080044746399, + "p95": 160.22399812936783, + "p99": 175.4240021109581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 73.27999919652939, + "p90": 81.4720019698143, + "p95": 84.83199775218964, + "p99": 90.87999910116196 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 73.88799637556076, + "p95": 76.28799974918365, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 201.75999402999878, + "p90": 214.36800062656403, + "p95": 219.7760045528412, + "p99": 225.75999796390533 + }, + "isolatedSum": { + "p50": 143.45599710941315, + "p90": 155.35999834537506, + "p95": 161.1199975013733, + "p99": 174.40000176429749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 73.95199686288834, + "p90": 81.91999793052673, + "p95": 84.60800349712372, + "p99": 91.839998960495 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 73.11999797821045, + "p95": 74.65600222349167, + "p99": 77.31200009584427 + }, + "roundtrip": { + "p50": 202.91200280189514, + "p90": 215.13600647449493, + "p95": 219.9680060148239, + "p99": 226.23999416828156 + }, + "isolatedSum": { + "p50": 143.96799355745316, + "p90": 155.03999590873718, + "p95": 159.2640057206154, + "p99": 169.15199905633926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 74.5600014925003, + "p90": 82.24000036716461, + "p95": 86.94399893283844, + "p99": 92.54399687051773 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 74.94399696588516, + "p95": 76.80000364780426, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 205.76000213623047, + "p90": 217.72800385951996, + "p95": 223.07200729846954, + "p99": 232.31999576091766 + }, + "isolatedSum": { + "p50": 146.2080031633377, + "p90": 157.18399733304977, + "p95": 163.7440025806427, + "p99": 177.24799364805222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 82.43200182914734, + "p90": 87.74399757385254, + "p95": 92.28800237178802, + "p99": 101.47199779748917 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 79.96799796819687, + "p95": 81.91999793052673, + "p99": 92.8959995508194 + }, + "roundtrip": { + "p50": 212.47999370098114, + "p90": 225.72800517082214, + "p95": 230.5919975042343, + "p99": 236.735999584198 + }, + "isolatedSum": { + "p50": 159.13600474596024, + "p90": 167.7119955420494, + "p95": 174.20800030231476, + "p99": 194.36799734830856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 94.55999732017517, + "p90": 99.48799759149551, + "p95": 103.61599922180176, + "p99": 111.55200004577637 + }, + "combine": { + "p50": 94.59199756383896, + "p90": 98.36799651384354, + "p95": 99.80800002813339, + "p99": 105.53599894046783 + }, + "roundtrip": { + "p50": 226.75199806690216, + "p90": 237.5359982252121, + "p95": 242.01600253582, + "p99": 252.8960108757019 + }, + "isolatedSum": { + "p50": 189.15199488401413, + "p90": 197.85599410533905, + "p95": 203.42399924993515, + "p99": 217.0879989862442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c1af2079", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_9f7d75b6", + "comparisonKey": "c3cd200560f80390", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:40.769192+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 44.319998472929, + "p90": 51.58400163054466, + "p95": 55.84000051021576, + "p99": 61.91999837756157 + }, + "combine": { + "p50": 57.472001761198044, + "p90": 69.72800195217133, + "p95": 72.22399860620499, + "p99": 75.96799731254578 + }, + "roundtrip": { + "p50": 1526.6560316085815, + "p90": 1538.2399559020996, + "p95": 1540.511965751648, + "p99": 1545.7279682159424 + }, + "isolatedSum": { + "p50": 101.79200023412704, + "p90": 121.31200358271599, + "p95": 128.06399911642075, + "p99": 137.88799569010735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 43.616000562906265, + "p90": 49.18399825692177, + "p95": 52.76799947023392, + "p99": 56.671999394893646 + }, + "combine": { + "p50": 58.43200162053108, + "p90": 66.59200042486191, + "p95": 68.7360018491745, + "p99": 73.44000041484833 + }, + "roundtrip": { + "p50": 1528.1920433044434, + "p90": 1537.1520519256592, + "p95": 1540.2239561080933, + "p99": 1548.7680435180664 + }, + "isolatedSum": { + "p50": 102.04800218343735, + "p90": 115.77599868178368, + "p95": 121.50400131940842, + "p99": 130.11199980974197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 43.99999976158142, + "p90": 49.8879998922348, + "p95": 52.960000932216644, + "p99": 60.54399907588959 + }, + "combine": { + "p50": 56.384000927209854, + "p90": 69.56800073385239, + "p95": 71.87200337648392, + "p99": 77.72800326347351 + }, + "roundtrip": { + "p50": 1526.5920162200928, + "p90": 1540.544033050537, + "p95": 1543.0400371551514, + "p99": 1548.799991607666 + }, + "isolatedSum": { + "p50": 100.38400068879128, + "p90": 119.45600062608719, + "p95": 124.83200430870056, + "p99": 138.2720023393631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 43.935999274253845, + "p90": 49.50400069355965, + "p95": 52.86400020122528, + "p99": 59.74400043487549 + }, + "combine": { + "p50": 58.04799869656563, + "p90": 68.89600306749344, + "p95": 70.88000327348709, + "p99": 76.12799853086472 + }, + "roundtrip": { + "p50": 1529.2479991912842, + "p90": 1538.9440059661865, + "p95": 1542.4959659576416, + "p99": 1547.808051109314 + }, + "isolatedSum": { + "p50": 101.98399797081947, + "p90": 118.40000376105309, + "p95": 123.74400347471237, + "p99": 135.8719989657402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 43.83999854326248, + "p90": 49.8879998922348, + "p95": 54.1439987719059, + "p99": 59.039998799562454 + }, + "combine": { + "p50": 61.40799820423126, + "p90": 73.91999661922455, + "p95": 76.54400169849396, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 1531.8080186843872, + "p90": 1543.8400506973267, + "p95": 1546.94402217865, + "p99": 1553.4080266952515 + }, + "isolatedSum": { + "p50": 105.24799674749374, + "p90": 123.80799651145935, + "p95": 130.68800047039986, + "p99": 141.2159986793995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 44.60800066590309, + "p90": 49.40799996256828, + "p95": 52.70399898290634, + "p99": 58.46399813890457 + }, + "combine": { + "p50": 58.49599838256836, + "p90": 69.88800317049026, + "p95": 72.25599884986877, + "p99": 77.44000107049942 + }, + "roundtrip": { + "p50": 1530.400037765503, + "p90": 1542.688012123108, + "p95": 1544.9919700622559, + "p99": 1550.8480072021484 + }, + "isolatedSum": { + "p50": 103.10399904847145, + "p90": 119.29600313305855, + "p95": 124.95999783277512, + "p99": 135.903999209404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 46.879999339580536, + "p90": 51.872000098228455, + "p95": 55.48800155520439, + "p99": 62.144000083208084 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 73.91999661922455, + "p95": 77.08799839019775, + "p99": 80.6720033288002 + }, + "roundtrip": { + "p50": 1535.3920459747314, + "p90": 1546.9119548797607, + "p95": 1549.183964729309, + "p99": 1555.2639961242676 + }, + "isolatedSum": { + "p50": 109.50399935245514, + "p90": 125.791996717453, + "p95": 132.57599994540215, + "p99": 142.81600341200829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 52.191998809576035, + "p90": 55.87200075387955, + "p95": 57.66399949789047, + "p99": 64.83200192451477 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 79.68000322580338, + "p95": 82.24000036716461, + "p99": 86.30400151014328 + }, + "roundtrip": { + "p50": 1547.6160049438477, + "p90": 1556.0640096664429, + "p95": 1557.7600002288818, + "p99": 1562.3040199279785 + }, + "isolatedSum": { + "p50": 121.5679980814457, + "p90": 135.55200397968292, + "p95": 139.90399986505508, + "p99": 151.13600343465805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1795db59", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_5b7384da", + "comparisonKey": "1a75b01976159e63", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:10.960619+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 42.33599826693535, + "p90": 47.648001462221146, + "p95": 50.75199902057648, + "p99": 56.86400085687637 + }, + "combine": { + "p50": 55.615998804569244, + "p90": 68.15999746322632, + "p95": 70.36799937486649, + "p99": 75.9039968252182 + }, + "roundtrip": { + "p50": 1524.0960121154785, + "p90": 1534.2400074005127, + "p95": 1536.736011505127, + "p99": 1539.8720502853394 + }, + "isolatedSum": { + "p50": 97.9519970715046, + "p90": 115.80799892544746, + "p95": 121.11999839544296, + "p99": 132.76799768209457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 42.527999728918076, + "p90": 47.45600000023842, + "p95": 51.10400170087814, + "p99": 56.89600110054016 + }, + "combine": { + "p50": 55.135998874902725, + "p90": 65.5680000782013, + "p95": 68.06399673223495, + "p99": 74.52800124883652 + }, + "roundtrip": { + "p50": 1527.9680490493774, + "p90": 1544.2880392074585, + "p95": 1555.616021156311, + "p99": 1574.8159885406494 + }, + "isolatedSum": { + "p50": 97.6639986038208, + "p90": 113.02400007843971, + "p95": 119.1679984331131, + "p99": 131.42400234937668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 42.55999997258186, + "p90": 48.38399961590767, + "p95": 51.77599936723709, + "p99": 56.51199817657471 + }, + "combine": { + "p50": 55.135998874902725, + "p90": 71.99999690055847, + "p95": 75.45600086450577, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 1524.4159698486328, + "p90": 1535.7120037078857, + "p95": 1538.6559963226318, + "p99": 1542.9439544677734 + }, + "isolatedSum": { + "p50": 97.69599884748459, + "p90": 120.38399651646614, + "p95": 127.23200023174286, + "p99": 137.82399892807007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 43.83999854326248, + "p90": 60.99199876189232, + "p95": 65.50399959087372, + "p99": 91.839998960495 + }, + "combine": { + "p50": 57.151999324560165, + "p90": 67.00800359249115, + "p95": 70.81600278615952, + "p99": 78.04799824953079 + }, + "roundtrip": { + "p50": 1530.4640531539917, + "p90": 1550.112009048462, + "p95": 1562.9119873046875, + "p99": 1573.6000537872314 + }, + "isolatedSum": { + "p50": 100.99199786782265, + "p90": 128.00000235438347, + "p95": 136.32000237703323, + "p99": 169.8879972100258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 42.91199892759323, + "p90": 48.0320006608963, + "p95": 51.711998879909515, + "p99": 57.88800120353699 + }, + "combine": { + "p50": 57.72799998521805, + "p90": 70.27199864387512, + "p95": 73.2479989528656, + "p99": 80.25600016117096 + }, + "roundtrip": { + "p50": 1527.6479721069336, + "p90": 1539.5519733428955, + "p95": 1542.4959659576416, + "p99": 1548.9280223846436 + }, + "isolatedSum": { + "p50": 100.63999891281128, + "p90": 118.30399930477142, + "p95": 124.95999783277512, + "p99": 138.14400136470795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 44.79999840259552, + "p90": 65.8240020275116, + "p95": 83.67999643087387, + "p99": 105.43999820947647 + }, + "combine": { + "p50": 57.151999324560165, + "p90": 69.40799951553345, + "p95": 72.57600128650665, + "p99": 77.08799839019775 + }, + "roundtrip": { + "p50": 1529.8240184783936, + "p90": 1546.1119413375854, + "p95": 1554.144024848938, + "p99": 1583.2959413528442 + }, + "isolatedSum": { + "p50": 101.95199772715569, + "p90": 135.23200154304504, + "p95": 156.25599771738052, + "p99": 182.52799659967422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 45.56800052523613, + "p90": 49.02400076389313, + "p95": 52.83199995756149, + "p99": 61.503998935222626 + }, + "combine": { + "p50": 59.7120001912117, + "p90": 72.86400347948074, + "p95": 76.31999999284744, + "p99": 84.06399935483932 + }, + "roundtrip": { + "p50": 1532.2879552841187, + "p90": 1542.5280332565308, + "p95": 1546.4320182800293, + "p99": 1550.0160455703735 + }, + "isolatedSum": { + "p50": 105.28000071644783, + "p90": 121.88800424337387, + "p95": 129.15199995040894, + "p99": 145.56799829006195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 52.57600173354149, + "p90": 69.43999975919724, + "p95": 96.76799923181534, + "p99": 104.8320010304451 + }, + "combine": { + "p50": 69.47200000286102, + "p90": 80.38400113582611, + "p95": 83.29600095748901, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 1545.9200143814087, + "p90": 1553.887963294983, + "p95": 1556.7359924316406, + "p99": 1562.559962272644 + }, + "isolatedSum": { + "p50": 122.04800173640251, + "p90": 149.82400089502335, + "p95": 180.06400018930435, + "p99": 191.8720006942749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e594e335", + "identity": "gb300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||6c4175e2b7b86cb", + "colorKey": "gb300_11303bbb", + "comparisonKey": "36f1c4fa70b678cc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:19.478235+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6c4175e2b7b86cb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 93.05600076913834, + "p90": 104.032002389431, + "p95": 107.64800012111664, + "p99": 115.29599875211716 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 72.80000299215317, + "p95": 74.52800124883652, + "p99": 79.58400249481201 + }, + "roundtrip": { + "p50": 137.63199746608734, + "p90": 146.2399959564209, + "p95": 149.59999918937683, + "p99": 156.19200468063354 + }, + "isolatedSum": { + "p50": 161.6000011563301, + "p90": 176.83200538158417, + "p95": 182.17600136995316, + "p99": 194.88000124692917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 131072, + "combineLogicalBytes": 131072, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.16000139713287, + "p90": 101.53599828481674, + "p95": 105.40799796581268, + "p99": 111.00800335407257 + }, + "combine": { + "p50": 68.83200258016586, + "p90": 72.67200201749802, + "p95": 74.33599978685379, + "p99": 81.60000294446945 + }, + "roundtrip": { + "p50": 137.34400272369385, + "p90": 146.4959979057312, + "p95": 150.84800124168396, + "p99": 156.70399367809296 + }, + "isolatedSum": { + "p50": 160.99200397729874, + "p90": 174.20800030231476, + "p95": 179.74399775266647, + "p99": 192.60800629854202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 245760, + "combineLogicalBytes": 245760, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.32000261545181, + "p90": 103.4879982471466, + "p95": 107.71200060844421, + "p99": 130.20800054073334 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 73.95199686288834, + "p95": 76.76800340414047, + "p99": 82.84799754619598 + }, + "roundtrip": { + "p50": 137.85600662231445, + "p90": 150.01599490642548, + "p95": 154.1759967803955, + "p99": 187.23200261592865 + }, + "isolatedSum": { + "p50": 162.75200247764587, + "p90": 177.43999511003494, + "p95": 184.4800040125847, + "p99": 213.05599808692932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 491520, + "combineLogicalBytes": 491520, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 93.91999989748001, + "p90": 105.31199723482132, + "p95": 110.91200262308121, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 75.16799867153168, + "p95": 77.69600301980972, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 139.48799669742584, + "p90": 150.2079963684082, + "p95": 153.82400155067444, + "p99": 178.17600071430206 + }, + "isolatedSum": { + "p50": 165.18399864435196, + "p90": 180.479995906353, + "p95": 188.60800564289093, + "p99": 217.3759937286377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 958464, + "combineLogicalBytes": 958464, + "fanoutMean": 3.65625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 94.52799707651138, + "p90": 105.66399991512299, + "p95": 110.62400043010712, + "p99": 137.11999356746674 + }, + "combine": { + "p50": 71.77600264549255, + "p90": 75.6480023264885, + "p95": 79.29600030183792, + "p99": 83.80799740552902 + }, + "roundtrip": { + "p50": 140.9599930047989, + "p90": 152.0320028066635, + "p95": 155.5519998073578, + "p99": 182.3360025882721 + }, + "isolatedSum": { + "p50": 166.30399972200394, + "p90": 181.31200224161148, + "p95": 189.92000073194504, + "p99": 220.92799097299576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1884160, + "combineLogicalBytes": 1884160, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 94.97600048780441, + "p90": 105.92000186443329, + "p95": 108.86400192975998, + "p99": 142.81600713729858 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 79.55200225114822, + "p95": 81.56800270080566, + "p99": 86.07999980449677 + }, + "roundtrip": { + "p50": 142.2400027513504, + "p90": 153.4080058336258, + "p95": 158.1439971923828, + "p99": 188.86399269104004 + }, + "isolatedSum": { + "p50": 167.42400079965591, + "p90": 185.4720041155815, + "p95": 190.43200463056564, + "p99": 228.89600694179535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3768320, + "combineLogicalBytes": 3768320, + "fanoutMean": 3.59375, + "recvTokensMax": 121, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.26399654150009, + "p90": 110.68800091743469, + "p95": 115.1999980211258, + "p99": 124.38400089740753 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 81.69600367546082, + "p95": 83.96799862384796, + "p99": 123.36000055074692 + }, + "roundtrip": { + "p50": 149.21599626541138, + "p90": 157.6640009880066, + "p95": 160.47999262809753, + "p99": 164.51199352741241 + }, + "isolatedSum": { + "p50": 176.63999646902084, + "p90": 192.3840045928955, + "p95": 199.16799664497375, + "p99": 247.74400144815445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7585792, + "combineLogicalBytes": 7585792, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 115.7120019197464, + "p90": 124.38400089740753, + "p95": 130.3039938211441, + "p99": 162.33600676059723 + }, + "combine": { + "p50": 85.31200140714645, + "p90": 93.05600076913834, + "p95": 95.04000097513199, + "p99": 99.71199929714203 + }, + "roundtrip": { + "p50": 171.03999853134155, + "p90": 178.68800461292267, + "p95": 180.67200481891632, + "p99": 189.82400000095367 + }, + "isolatedSum": { + "p50": 201.02400332689285, + "p90": 217.44000166654587, + "p95": 225.3439947962761, + "p99": 262.04800605773926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15294464, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-81847484", + "identity": "gb300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||3bda3dd7d4e88bf", + "colorKey": "gb300_11303bbb", + "comparisonKey": "920d43eb6d9fa630", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:29.905869+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3bda3dd7d4e88bf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 85.50400286912918, + "p90": 95.96800059080124, + "p95": 101.24800354242325, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 66.14399701356888, + "p90": 73.88799637556076, + "p95": 75.93599706888199, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 133.02400708198547, + "p90": 141.27999544143677, + "p95": 145.88800072669983, + "p99": 152.63999998569489 + }, + "isolatedSum": { + "p50": 151.64799988269806, + "p90": 169.855996966362, + "p95": 177.18400061130524, + "p99": 208.70400220155716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 153600, + "combineLogicalBytes": 153600, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 85.56800335645676, + "p90": 94.68799829483032, + "p95": 97.18400239944458, + "p99": 105.24799674749374 + }, + "combine": { + "p50": 66.56000018119812, + "p90": 74.17599856853485, + "p95": 75.55200159549713, + "p99": 80.99199831485748 + }, + "roundtrip": { + "p50": 135.93600690364838, + "p90": 143.16800236701965, + "p95": 147.45600521564484, + "p99": 151.8079936504364 + }, + "isolatedSum": { + "p50": 152.12800353765488, + "p90": 168.86399686336517, + "p95": 172.7360039949417, + "p99": 186.23999506235123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 307200, + "combineLogicalBytes": 307200, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 85.91999858617783, + "p90": 95.42399644851685, + "p95": 100.51199793815613, + "p99": 133.82400572299957 + }, + "combine": { + "p50": 66.880002617836, + "p90": 74.46400076150894, + "p95": 75.96799731254578, + "p99": 81.34400099515915 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 145.4080045223236, + "p95": 149.27999675273895, + "p99": 199.96799528598785 + }, + "isolatedSum": { + "p50": 152.80000120401382, + "p90": 169.8879972100258, + "p95": 176.4799952507019, + "p99": 215.16800671815872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 604160, + "combineLogicalBytes": 604160, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 85.82399785518646, + "p90": 94.97600048780441, + "p95": 100.09600222110748, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 75.52000135183334, + "p95": 76.86399668455124, + "p99": 95.07200121879578 + }, + "roundtrip": { + "p50": 139.48799669742584, + "p90": 148.67199957370758, + "p95": 152.67199277877808, + "p99": 171.1360067129135 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 170.49600183963776, + "p95": 176.95999890565872, + "p99": 212.0639979839325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1208320, + "combineLogicalBytes": 1208320, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 86.97599917650223, + "p90": 97.53599762916565, + "p95": 102.33599692583084, + "p99": 130.52800297737122 + }, + "combine": { + "p50": 71.55200093984604, + "p90": 75.3600001335144, + "p95": 76.60800218582153, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 139.0720009803772, + "p90": 148.6400067806244, + "p95": 155.008003115654, + "p99": 174.6239960193634 + }, + "isolatedSum": { + "p50": 158.52800011634827, + "p90": 172.89599776268005, + "p95": 178.94399911165237, + "p99": 213.05600553750992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2447360, + "combineLogicalBytes": 2447360, + "fanoutMean": 3.734375, + "recvTokensMax": 62, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 87.13600039482117, + "p90": 97.05600142478943, + "p95": 101.85600072145462, + "p99": 137.7280056476593 + }, + "combine": { + "p50": 71.9040036201477, + "p90": 75.99999755620956, + "p95": 77.72800326347351, + "p99": 85.75999736785889 + }, + "roundtrip": { + "p50": 139.67999815940857, + "p90": 149.24800395965576, + "p95": 154.88000214099884, + "p99": 180.25599420070648 + }, + "isolatedSum": { + "p50": 159.04000401496887, + "p90": 173.055998980999, + "p95": 179.58400398492813, + "p99": 223.4880030155182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4792320, + "combineLogicalBytes": 4792320, + "fanoutMean": 3.65625, + "recvTokensMax": 122, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 98.88000041246414, + "p90": 107.19999670982361, + "p95": 111.1999973654747, + "p99": 122.3360002040863 + }, + "combine": { + "p50": 75.9039968252182, + "p90": 81.11999928951263, + "p95": 84.86399799585342, + "p99": 90.65599739551544 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 155.35999834537506, + "p95": 157.50400722026825, + "p99": 165.8560037612915 + }, + "isolatedSum": { + "p50": 174.78399723768234, + "p90": 188.31999599933624, + "p95": 196.06399536132812, + "p99": 212.99199759960175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9523200, + "combineLogicalBytes": 9523200, + "fanoutMean": 3.6328125, + "recvTokensMax": 242, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 110.78400164842606, + "p90": 119.00799721479416, + "p95": 124.22399967908859, + "p99": 147.48799800872803 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 92.19200164079666, + "p95": 97.24800288677216, + "p99": 100.73599964380264 + }, + "roundtrip": { + "p50": 172.41600155830383, + "p90": 179.07199263572693, + "p95": 181.63199722766876, + "p99": 190.2720034122467 + }, + "isolatedSum": { + "p50": 198.72000068426132, + "p90": 211.19999885559082, + "p95": 221.47200256586075, + "p99": 248.22399765253067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19097600, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-66fadd9d", + "identity": "gb300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_11303bbb", + "comparisonKey": "2c0b4c3fa6d51392", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:39.804230+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.67999774217606, + "p90": 105.82400113344193, + "p95": 112.73600161075592, + "p99": 144.70399916172028 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 76.64000242948532, + "p95": 79.32800054550171, + "p99": 100.16000270843506 + }, + "roundtrip": { + "p50": 141.34399592876434, + "p90": 154.55999970436096, + "p95": 163.87200355529785, + "p99": 197.1520036458969 + }, + "isolatedSum": { + "p50": 164.06399756669998, + "p90": 182.46400356292725, + "p95": 192.06400215625763, + "p99": 244.86400187015533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 184320, + "combineLogicalBytes": 184320, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.28800237178802, + "p90": 103.84000092744827, + "p95": 110.78400164842606, + "p99": 148.41599762439728 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 78.07999849319458, + "p95": 83.55200290679932, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 143.23200285434723, + "p90": 156.95999562740326, + "p95": 164.70399498939514, + "p99": 204.99199628829956 + }, + "isolatedSum": { + "p50": 165.24799913167953, + "p90": 181.91999942064285, + "p95": 194.33600455522537, + "p99": 254.46400046348572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368640, + "combineLogicalBytes": 368640, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.6399976015091, + "p90": 105.40799796581268, + "p95": 118.56000125408173, + "p99": 155.16799688339233 + }, + "combine": { + "p50": 73.44000041484833, + "p90": 77.60000228881836, + "p95": 86.7839977145195, + "p99": 118.78400295972824 + }, + "roundtrip": { + "p50": 145.60000598430634, + "p90": 157.24800527095795, + "p95": 166.9439971446991, + "p99": 202.55999267101288 + }, + "isolatedSum": { + "p50": 166.07999801635742, + "p90": 183.00800025463104, + "p95": 205.34399896860123, + "p99": 273.9519998431206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 712704, + "combineLogicalBytes": 712704, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.54399687051773, + "p90": 105.05600273609161, + "p95": 111.455999314785, + "p99": 143.10400187969208 + }, + "combine": { + "p50": 74.17599856853485, + "p90": 78.68800312280655, + "p95": 82.8159973025322, + "p99": 114.01599645614624 + }, + "roundtrip": { + "p50": 146.7200070619583, + "p90": 159.55199301242828, + "p95": 167.29600727558136, + "p99": 202.94399559497833 + }, + "isolatedSum": { + "p50": 166.71999543905258, + "p90": 183.74400585889816, + "p95": 194.2719966173172, + "p99": 257.1199983358383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1400832, + "combineLogicalBytes": 1400832, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.32000261545181, + "p90": 103.80800068378448, + "p95": 110.81600189208984, + "p99": 139.80799913406372 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 82.5280025601387, + "p95": 88.70399743318558, + "p99": 120.44800072908401 + }, + "roundtrip": { + "p50": 147.48799800872803, + "p90": 158.24000537395477, + "p95": 167.87199676036835, + "p99": 198.97599518299103 + }, + "isolatedSum": { + "p50": 166.88000410795212, + "p90": 186.3360032439232, + "p95": 199.51999932527542, + "p99": 260.25599986314774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2826240, + "combineLogicalBytes": 2826240, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.75999867916107, + "p90": 106.20799660682678, + "p95": 114.49600011110306, + "p99": 145.24799585342407 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 82.68799632787704, + "p95": 85.37600189447403, + "p99": 117.69600212574005 + }, + "roundtrip": { + "p50": 149.34399724006653, + "p90": 160.76800227165222, + "p95": 169.3120002746582, + "p99": 204.76800203323364 + }, + "isolatedSum": { + "p50": 169.37600076198578, + "p90": 188.89599293470383, + "p95": 199.8720020055771, + "p99": 262.9439979791641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5750784, + "combineLogicalBytes": 5750784, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.74400019645691, + "p90": 111.80800199508667, + "p95": 116.80000275373459, + "p99": 139.55199718475342 + }, + "combine": { + "p50": 83.83999764919281, + "p90": 88.03199976682663, + "p95": 90.81599861383438, + "p99": 113.98400366306305 + }, + "roundtrip": { + "p50": 156.41599893569946, + "p90": 169.11999881267548, + "p95": 178.8800060749054, + "p99": 214.65599536895752 + }, + "isolatedSum": { + "p50": 187.58399784564972, + "p90": 199.8400017619133, + "p95": 207.61600136756897, + "p99": 253.53600084781647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11587584, + "combineLogicalBytes": 11587584, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.34399944543839, + "p90": 129.50399518013, + "p95": 140.03199338912964, + "p99": 170.46399414539337 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 102.88000106811523, + "p95": 108.86400192975998, + "p99": 137.92000710964203 + }, + "roundtrip": { + "p50": 186.0480010509491, + "p90": 195.13599574565887, + "p95": 202.2400051355362, + "p99": 233.8559925556183 + }, + "isolatedSum": { + "p50": 215.10399878025055, + "p90": 232.38399624824524, + "p95": 248.89599531888962, + "p99": 308.3840012550354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22941696, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-095dbbd2", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_ffffac8f", + "comparisonKey": "e5ff3a78c6b1e24c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:10.684971+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 99.58399832248688, + "p90": 113.08799684047699, + "p95": 117.60000139474869, + "p99": 130.8480054140091 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 83.29600095748901, + "p95": 86.5280032157898, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 151.2320041656494, + "p90": 162.36799955368042, + "p95": 166.30400717258453, + "p99": 173.0239987373352 + }, + "isolatedSum": { + "p50": 174.3680015206337, + "p90": 196.383997797966, + "p95": 204.12800461053848, + "p99": 244.09600347280502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 97.88800030946732, + "p90": 112.31999844312668, + "p95": 119.23199892044067, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 84.06399935483932, + "p95": 87.77599781751633, + "p99": 113.92000317573547 + }, + "roundtrip": { + "p50": 155.87200224399567, + "p90": 169.88800466060638, + "p95": 176.4480024576187, + "p99": 191.3599967956543 + }, + "isolatedSum": { + "p50": 173.40800166130066, + "p90": 196.383997797966, + "p95": 207.007996737957, + "p99": 252.25600600242615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 98.94400089979172, + "p90": 112.2559979557991, + "p95": 116.7680025100708, + "p99": 125.50400197505951 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 83.71199667453766, + "p95": 85.88799834251404, + "p99": 94.33600306510925 + }, + "roundtrip": { + "p50": 155.35999834537506, + "p90": 167.9999977350235, + "p95": 171.9360053539276, + "p99": 180.7679980993271 + }, + "isolatedSum": { + "p50": 174.78399723768234, + "p90": 195.96799463033676, + "p95": 202.65600085258484, + "p99": 219.84000504016876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 100.92800110578537, + "p90": 115.87200313806534, + "p95": 123.80799651145935, + "p99": 141.59999787807465 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 86.496002972126, + "p95": 89.1840010881424, + "p99": 122.079998254776 + }, + "roundtrip": { + "p50": 157.9200029373169, + "p90": 169.18399930000305, + "p95": 173.75999689102173, + "p99": 207.64799416065216 + }, + "isolatedSum": { + "p50": 179.6800047159195, + "p90": 202.36800611019135, + "p95": 212.99199759960175, + "p99": 263.67999613285065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 101.9200012087822, + "p90": 115.64800143241882, + "p95": 120.67200243473053, + "p99": 138.75199854373932 + }, + "combine": { + "p50": 79.1039988398552, + "p90": 86.04799956083298, + "p95": 89.66399729251862, + "p99": 103.2319962978363 + }, + "roundtrip": { + "p50": 158.36800634860992, + "p90": 169.95200514793396, + "p95": 176.86399817466736, + "p99": 209.75999534130096 + }, + "isolatedSum": { + "p50": 181.0240000486374, + "p90": 201.6960009932518, + "p95": 210.33599972724915, + "p99": 241.98399484157562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 102.4319976568222, + "p90": 115.99999666213989, + "p95": 119.9679970741272, + "p99": 130.75199723243713 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 88.35200220346451, + "p95": 90.97599983215332, + "p99": 102.65599936246872 + }, + "roundtrip": { + "p50": 158.62399339675903, + "p90": 171.00800573825836, + "p95": 174.5920032262802, + "p99": 183.29599499702454 + }, + "isolatedSum": { + "p50": 185.5359971523285, + "p90": 204.3519988656044, + "p95": 210.94399690628052, + "p99": 233.40799659490585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 108.73600095510483, + "p90": 121.72800302505493, + "p95": 126.24000012874603, + "p99": 156.54399991035461 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 94.71999853849411, + "p95": 97.95200079679489, + "p99": 104.73600029945374 + }, + "roundtrip": { + "p50": 166.72000288963318, + "p90": 178.52799594402313, + "p95": 184.1599941253662, + "p99": 206.7520022392273 + }, + "isolatedSum": { + "p50": 196.03200256824493, + "p90": 216.44800156354904, + "p95": 224.19200092554092, + "p99": 261.28000020980835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 124.83199685811996, + "p90": 134.0160071849823, + "p95": 137.472003698349, + "p99": 148.22399616241455 + }, + "combine": { + "p50": 104.12800312042236, + "p90": 111.16799712181091, + "p95": 113.56800049543381, + "p99": 125.44000148773193 + }, + "roundtrip": { + "p50": 197.76000082492828, + "p90": 208.22399854660034, + "p95": 211.90400421619415, + "p99": 222.49600291252136 + }, + "isolatedSum": { + "p50": 228.95999997854233, + "p90": 245.1840043067932, + "p95": 251.0400041937828, + "p99": 273.6639976501465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b773d90a", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_11303bbb", + "comparisonKey": "153d0db711d4a10d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:14.809206+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 86.87999844551086, + "p90": 97.79199957847595, + "p95": 101.6639992594719, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 71.58400118350983, + "p90": 75.42400062084198, + "p95": 76.9599974155426, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 139.74399864673615, + "p90": 149.56800639629364, + "p95": 154.55999970436096, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 158.4639996290207, + "p90": 173.21600019931793, + "p95": 178.6239966750145, + "p99": 197.12000340223312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 86.91199868917465, + "p90": 97.28000313043594, + "p95": 101.15200281143188, + "p99": 109.66400057077408 + }, + "combine": { + "p50": 73.60000163316727, + "p90": 76.54400169849396, + "p95": 78.5600021481514, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 142.39999651908875, + "p90": 151.5520066022873, + "p95": 154.55999970436096, + "p99": 159.58400070667267 + }, + "isolatedSum": { + "p50": 160.51200032234192, + "p90": 173.8240048289299, + "p95": 179.71200495958328, + "p99": 197.59999960660934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 87.2960016131401, + "p90": 97.53599762916565, + "p95": 103.20000350475311, + "p99": 111.68000102043152 + }, + "combine": { + "p50": 74.30399954319, + "p90": 77.72800326347351, + "p95": 79.55200225114822, + "p99": 85.40800213813782 + }, + "roundtrip": { + "p50": 140.99200069904327, + "p90": 151.07199549674988, + "p95": 154.33600544929504, + "p99": 162.01600432395935 + }, + "isolatedSum": { + "p50": 161.6000011563301, + "p90": 175.26400089263916, + "p95": 182.75200575590134, + "p99": 197.08800315856934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 87.5839963555336, + "p90": 97.98400104045868, + "p95": 101.53599828481674, + "p99": 112.2559979557991 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 80.32000064849854, + "p95": 83.52000266313553, + "p99": 91.10400080680847 + }, + "roundtrip": { + "p50": 144.80000734329224, + "p90": 154.33600544929504, + "p95": 157.75999426841736, + "p99": 166.84800386428833 + }, + "isolatedSum": { + "p50": 163.16799819469452, + "p90": 178.30400168895721, + "p95": 185.05600094795227, + "p99": 203.35999876260757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 88.57599645853043, + "p90": 97.53599762916565, + "p95": 102.7199998497963, + "p99": 110.91200262308121 + }, + "combine": { + "p50": 75.99999755620956, + "p90": 81.05599880218506, + "p95": 83.45600217580795, + "p99": 89.24800157546997 + }, + "roundtrip": { + "p50": 143.96800100803375, + "p90": 153.9199948310852, + "p95": 158.30400586128235, + "p99": 166.46400094032288 + }, + "isolatedSum": { + "p50": 164.57599401474, + "p90": 178.5919964313507, + "p95": 186.17600202560425, + "p99": 200.16000419855118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 89.72799777984619, + "p90": 98.7199991941452, + "p95": 102.65599936246872, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 84.86399799585342, + "p95": 86.5280032157898, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 147.5519984960556, + "p90": 156.3519984483719, + "p95": 159.39199924468994, + "p99": 167.80799627304077 + }, + "isolatedSum": { + "p50": 167.13599860668182, + "p90": 183.58399718999863, + "p95": 189.18400257825851, + "p99": 199.45599883794785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.63999891281128, + "p90": 108.76800119876862, + "p95": 112.76800185441971, + "p99": 119.52000111341476 + }, + "combine": { + "p50": 83.5840031504631, + "p90": 87.74399757385254, + "p95": 89.1840010881424, + "p99": 96.89600020647049 + }, + "roundtrip": { + "p50": 157.8879952430725, + "p90": 165.75999557971954, + "p95": 168.67199540138245, + "p99": 174.23999309539795 + }, + "isolatedSum": { + "p50": 184.22400206327438, + "p90": 196.51199877262115, + "p95": 201.9520029425621, + "p99": 216.41600131988525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 113.50400000810623, + "p90": 122.81599640846252, + "p95": 125.50400197505951, + "p99": 130.49599528312683 + }, + "combine": { + "p50": 100.41599720716476, + "p90": 105.66399991512299, + "p95": 108.57599973678589, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 190.46400487422943, + "p90": 197.63199985027313, + "p95": 200.06400346755981, + "p99": 207.32800662517548 + }, + "isolatedSum": { + "p50": 213.919997215271, + "p90": 228.4799963235855, + "p95": 234.0800017118454, + "p99": 245.5039918422699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-249b63d1", + "identity": "gb300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||7c1cc7238ca9a52", + "colorKey": "gb300_11303bbb", + "comparisonKey": "e09e7e0e2685971e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:31:10.466285+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "7c1cc7238ca9a52", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 88.41600269079208, + "p90": 97.95200079679489, + "p95": 102.52799838781357, + "p99": 118.33599954843521 + }, + "combine": { + "p50": 70.68800181150436, + "p90": 73.85600358247757, + "p95": 75.39200037717819, + "p99": 80.6720033288002 + }, + "roundtrip": { + "p50": 140.1599943637848, + "p90": 149.1200029850006, + "p95": 152.16000378131866, + "p99": 159.7760021686554 + }, + "isolatedSum": { + "p50": 159.10400450229645, + "p90": 171.80800437927246, + "p95": 177.91999876499176, + "p99": 199.0080028772354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.21600133180618, + "p90": 100.25600343942642, + "p95": 104.3199971318245, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 75.29599964618683, + "p95": 78.23999971151352, + "p99": 84.16000008583069 + }, + "roundtrip": { + "p50": 141.95199310779572, + "p90": 151.90400183200836, + "p95": 156.63999319076538, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 160.89600324630737, + "p90": 175.55200308561325, + "p95": 182.559996843338, + "p99": 194.88000124692917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.02399986982346, + "p90": 98.78399968147278, + "p95": 102.68799960613251, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 72.57600128650665, + "p90": 76.48000121116638, + "p95": 80.6720033288002, + "p99": 85.34400165081024 + }, + "roundtrip": { + "p50": 143.64799857139587, + "p90": 152.41600573062897, + "p95": 156.0640037059784, + "p99": 165.12000560760498 + }, + "isolatedSum": { + "p50": 161.6000011563301, + "p90": 175.26400089263916, + "p95": 183.3600029349327, + "p99": 193.9840018749237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.50400352478027, + "p90": 98.36799651384354, + "p95": 102.4319976568222, + "p99": 110.59200018644333 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 82.33600109815598, + "p95": 83.99999886751175, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 145.21600306034088, + "p90": 154.40000593662262, + "p95": 157.21599757671356, + "p99": 163.2000058889389 + }, + "isolatedSum": { + "p50": 164.32000696659088, + "p90": 180.7039976119995, + "p95": 186.43199652433395, + "p99": 202.4639993906021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 90.43200314044952, + "p90": 100.54399818181992, + "p95": 103.93600165843964, + "p99": 115.35999923944473 + }, + "combine": { + "p50": 74.20799881219864, + "p90": 81.79199695587158, + "p95": 83.10399949550629, + "p99": 86.62399649620056 + }, + "roundtrip": { + "p50": 145.91999351978302, + "p90": 153.98399531841278, + "p95": 157.21599757671356, + "p99": 163.00800442695618 + }, + "isolatedSum": { + "p50": 164.64000195264816, + "p90": 182.3359951376915, + "p95": 187.04000115394592, + "p99": 201.9839957356453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 59, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.18400174379349, + "p90": 101.21600329875946, + "p95": 104.16000336408615, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 79.23199981451035, + "p90": 84.41600203514099, + "p95": 85.9839990735054, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 147.90399372577667, + "p90": 156.22399747371674, + "p95": 159.67999398708344, + "p99": 166.07999801635742 + }, + "isolatedSum": { + "p50": 172.41600155830383, + "p90": 185.63200533390045, + "p95": 190.14400243759155, + "p99": 202.71999388933182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6623232, + "combineLogicalBytes": 6623232, + "fanoutMean": 3.609375, + "recvTokensMax": 117, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.75999999046326, + "p90": 110.23999750614166, + "p95": 113.88800293207169, + "p99": 121.0239976644516 + }, + "combine": { + "p50": 83.45600217580795, + "p90": 86.68799698352814, + "p95": 88.19200098514557, + "p99": 94.24000233411789 + }, + "roundtrip": { + "p50": 159.2320054769516, + "p90": 167.10400581359863, + "p95": 170.01600563526154, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 185.2160021662712, + "p90": 196.9279944896698, + "p95": 202.08000391721725, + "p99": 215.2639999985695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13189120, + "combineLogicalBytes": 13189120, + "fanoutMean": 3.59375, + "recvTokensMax": 234, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.3119992017746, + "p90": 125.82400441169739, + "p95": 129.08799946308136, + "p99": 142.20799505710602 + }, + "combine": { + "p50": 98.4639972448349, + "p90": 106.4319983124733, + "p95": 108.44799876213074, + "p99": 116.38399958610535 + }, + "roundtrip": { + "p50": 191.03999435901642, + "p90": 197.05599546432495, + "p95": 200.00000298023224, + "p99": 206.7520022392273 + }, + "isolatedSum": { + "p50": 215.7759964466095, + "p90": 232.25600272417068, + "p95": 237.5359982252121, + "p99": 258.59199464321136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26621952, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3f6d433", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|decode|normal|none|none|0|tuned||bb358a3c2e68578", + "colorKey": "gb300_a16423a9", + "comparisonKey": "5c966daf4cb292d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:13.957217+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "bb358a3c2e68578", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 93.63199770450592, + "p90": 105.72800040245056, + "p95": 111.04000359773636, + "p99": 123.55200201272964 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 79.96799796819687, + "p95": 81.95199817419052, + "p99": 86.01599931716919 + }, + "roundtrip": { + "p50": 147.039994597435, + "p90": 156.41599893569946, + "p95": 160.96000373363495, + "p99": 168.32000017166138 + }, + "isolatedSum": { + "p50": 166.62399470806122, + "p90": 185.69599837064743, + "p95": 192.99200177192688, + "p99": 209.56800132989883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 94.2080020904541, + "p90": 105.21599650382996, + "p95": 109.18399691581726, + "p99": 115.07199704647064 + }, + "combine": { + "p50": 74.20799881219864, + "p90": 82.36800134181976, + "p95": 84.06399935483932, + "p99": 88.57599645853043 + }, + "roundtrip": { + "p50": 149.1200029850006, + "p90": 158.84800255298615, + "p95": 161.9199961423874, + "p99": 166.36799275875092 + }, + "isolatedSum": { + "p50": 168.41600090265274, + "p90": 187.58399784564972, + "p95": 193.24799627065659, + "p99": 203.64799350500107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 4, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 96.12800180912018, + "p90": 108.03200304508209, + "p95": 113.34399878978729, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 75.87199658155441, + "p90": 83.61600339412689, + "p95": 85.66399663686752, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 150.68799257278442, + "p90": 160.64000129699707, + "p95": 166.36799275875092, + "p99": 175.4560023546219 + }, + "isolatedSum": { + "p50": 171.9999983906746, + "p90": 191.64800643920898, + "p95": 199.00799542665482, + "p99": 213.18399906158447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 4, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 95.93600034713745, + "p90": 107.55199939012527, + "p95": 111.55200004577637, + "p99": 118.1119978427887 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 85.4400023818016, + "p95": 86.36800199747086, + "p99": 93.66399794816971 + }, + "roundtrip": { + "p50": 150.62400698661804, + "p90": 161.47199273109436, + "p95": 165.82399606704712, + "p99": 174.04800653457642 + }, + "isolatedSum": { + "p50": 176.12800002098083, + "p90": 192.99200177192688, + "p95": 197.92000204324722, + "p99": 211.7759957909584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 4, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 97.02400118112564, + "p90": 108.64000022411346, + "p95": 113.92000317573547, + "p99": 125.66399574279785 + }, + "combine": { + "p50": 80.06399869918823, + "p90": 85.40800213813782, + "p95": 87.07199990749359, + "p99": 94.11200135946274 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 160.99199652671814, + "p95": 165.69599509239197, + "p99": 172.92800545692444 + }, + "isolatedSum": { + "p50": 177.08799988031387, + "p90": 194.04800236225128, + "p95": 200.99200308322906, + "p99": 219.7759971022606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 4, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 97.4079966545105, + "p90": 108.03200304508209, + "p95": 112.38399893045425, + "p99": 123.32800030708313 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 86.91199868917465, + "p95": 88.70399743318558, + "p99": 95.29600292444229 + }, + "roundtrip": { + "p50": 153.56799960136414, + "p90": 164.32000696659088, + "p95": 167.42399334907532, + "p99": 173.63199591636658 + }, + "isolatedSum": { + "p50": 180.28799444437027, + "p90": 194.94400173425674, + "p95": 201.08799636363983, + "p99": 218.62400323152542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 4, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 109.76000130176544, + "p90": 120.89599668979645, + "p95": 124.1919994354248, + "p99": 130.78400492668152 + }, + "combine": { + "p50": 86.43200248479843, + "p90": 94.55999732017517, + "p95": 96.54399752616882, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 165.3120070695877, + "p90": 174.112007021904, + "p95": 177.63200402259827, + "p99": 183.9359998703003 + }, + "isolatedSum": { + "p50": 196.19200378656387, + "p90": 215.45599400997162, + "p95": 220.73599696159363, + "p99": 236.9600012898445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 4, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 123.77600371837616, + "p90": 133.18400084972382, + "p95": 135.80800592899323, + "p99": 143.00799369812012 + }, + "combine": { + "p50": 106.46399855613708, + "p90": 111.23199760913849, + "p95": 112.67200112342834, + "p99": 117.85600334405899 + }, + "roundtrip": { + "p50": 198.30399751663208, + "p90": 206.36799931526184, + "p95": 210.27199923992157, + "p99": 215.71199595928192 + }, + "isolatedSum": { + "p50": 230.24000227451324, + "p90": 244.4159984588623, + "p95": 248.48000705242157, + "p99": 260.8639970421791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec927f95", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|decode|normal|none|none|0|tuned||c9bbf5a132d7fdf", + "colorKey": "gb300_4f334ae0", + "comparisonKey": "d5abda10e896cffd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:45.598443+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9bbf5a132d7fdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 92.83199906349182, + "p90": 104.09600287675858, + "p95": 108.38399827480316, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 74.97599720954895, + "p95": 77.91999727487564, + "p99": 100.03200173377991 + }, + "roundtrip": { + "p50": 140.22399485111237, + "p90": 151.16800367832184, + "p95": 157.82399475574493, + "p99": 167.07199811935425 + }, + "isolatedSum": { + "p50": 161.24799847602844, + "p90": 179.07200008630753, + "p95": 186.3039955496788, + "p99": 216.19199961423874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 57344, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 94.4959968328476, + "p90": 107.55199939012527, + "p95": 112.83200234174728, + "p99": 126.17599964141846 + }, + "combine": { + "p50": 61.43999844789505, + "p90": 66.78400188684464, + "p95": 70.56000083684921, + "p99": 99.90400075912476 + }, + "roundtrip": { + "p50": 138.3039951324463, + "p90": 152.0320028066635, + "p95": 157.27999806404114, + "p99": 184.03199315071106 + }, + "isolatedSum": { + "p50": 155.93599528074265, + "p90": 174.3360012769699, + "p95": 183.3920031785965, + "p99": 226.0800004005432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 97.59999811649323, + "p90": 110.84800213575363, + "p95": 115.29599875211716, + "p99": 124.09599870443344 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 78.52800190448761, + "p95": 82.8159973025322, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 148.51200580596924, + "p90": 160.288006067276, + "p95": 165.02399742603302, + "p99": 176.54399573802948 + }, + "isolatedSum": { + "p50": 170.8799973130226, + "p90": 189.37600404024124, + "p95": 198.11199605464935, + "p99": 237.11999505758286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 105.02400249242783, + "p90": 113.50400000810623, + "p95": 116.60800129175186, + "p99": 127.71199643611908 + }, + "combine": { + "p50": 82.84799754619598, + "p90": 87.96799927949905, + "p95": 90.68799763917923, + "p99": 100.96000134944916 + }, + "roundtrip": { + "p50": 156.67200088500977, + "p90": 169.08800601959229, + "p95": 175.80799758434296, + "p99": 207.2959989309311 + }, + "isolatedSum": { + "p50": 187.8720000386238, + "p90": 201.47199928760529, + "p95": 207.2959989309311, + "p99": 228.67199778556824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0170e96a", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|decode|normal|none|none|0|tuned||4dc6cbd03327f4e", + "colorKey": "gb300_05480265", + "comparisonKey": "7e4a37f3f1b68108", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:20.587842+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "4dc6cbd03327f4e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 89.40800279378891, + "p90": 99.39199686050415, + "p95": 103.93600165843964, + "p99": 109.79200154542923 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 73.53600114583969, + "p95": 74.52800124883652, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 138.2399946451187, + "p90": 146.464005112648, + "p95": 149.6960073709488, + "p99": 154.08000349998474 + }, + "isolatedSum": { + "p50": 155.74400126934052, + "p90": 172.92799800634384, + "p95": 178.46400290727615, + "p99": 191.71199947595596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.31200206279755, + "p90": 98.27200323343277, + "p95": 103.4879982471466, + "p99": 111.93600296974182 + }, + "combine": { + "p50": 68.92800331115723, + "p90": 73.88799637556076, + "p95": 75.13599842786789, + "p99": 79.48800176382065 + }, + "roundtrip": { + "p50": 139.8719996213913, + "p90": 148.51200580596924, + "p95": 151.5520066022873, + "p99": 160.09600460529327 + }, + "isolatedSum": { + "p50": 158.24000537395477, + "p90": 172.15999960899353, + "p95": 178.6239966750145, + "p99": 191.42400473356247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.63199704885483, + "p90": 99.04000163078308, + "p95": 104.19200360774994, + "p99": 114.56000059843063 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 74.87999647855759, + "p95": 76.19199901819229, + "p99": 81.28000050783157 + }, + "roundtrip": { + "p50": 141.53599739074707, + "p90": 149.08799529075623, + "p95": 153.76000106334686, + "p99": 158.9760035276413 + }, + "isolatedSum": { + "p50": 159.90399569272995, + "p90": 173.91999810934067, + "p95": 180.38400262594223, + "p99": 195.8400011062622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.6959975361824, + "p90": 98.7199991941452, + "p95": 101.79200023412704, + "p99": 108.70400071144104 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 75.52000135183334, + "p95": 77.15199887752533, + "p99": 85.05599945783615 + }, + "roundtrip": { + "p50": 143.48800480365753, + "p90": 151.90400183200836, + "p95": 155.39200603961945, + "p99": 162.30399906635284 + }, + "isolatedSum": { + "p50": 161.75999492406845, + "p90": 174.24000054597855, + "p95": 178.94399911165237, + "p99": 193.7600001692772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 89.6959975361824, + "p90": 98.62399846315384, + "p95": 102.24000364542007, + "p99": 107.87200182676315 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 75.16799867153168, + "p95": 77.18399912118912, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 141.4400041103363, + "p90": 151.07199549674988, + "p95": 155.32800555229187, + "p99": 163.26400637626648 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 173.79199713468552, + "p95": 179.4240027666092, + "p99": 197.2160041332245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 90.08000046014786, + "p90": 98.68799895048141, + "p95": 102.27199643850327, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 76.22399926185608, + "p95": 77.60000228881836, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 144.0960019826889, + "p90": 153.6639928817749, + "p95": 156.76799416542053, + "p99": 165.02399742603302 + }, + "isolatedSum": { + "p50": 162.30399906635284, + "p90": 174.9119982123375, + "p95": 179.87199872732162, + "p99": 194.240003824234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2752512, + "combineLogicalBytes": 2752512, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 92.44800359010696, + "p90": 100.54399818181992, + "p95": 103.29599678516388, + "p99": 110.88000237941742 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 79.52000200748444, + "p95": 81.44000172615051, + "p99": 88.32000195980072 + }, + "roundtrip": { + "p50": 146.27200365066528, + "p90": 155.58399260044098, + "p95": 159.29600596427917, + "p99": 168.38400065898895 + }, + "isolatedSum": { + "p50": 167.77600347995758, + "p90": 180.06400018930435, + "p95": 184.7359985113144, + "p99": 199.20000433921814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5505024, + "combineLogicalBytes": 5505024, + "fanoutMean": 1.5, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 101.56799852848053, + "p90": 108.92800241708755, + "p95": 112.96000331640244, + "p99": 120.44800072908401 + }, + "combine": { + "p50": 88.70399743318558, + "p90": 95.39200365543365, + "p95": 97.4079966545105, + "p99": 102.91200131177902 + }, + "roundtrip": { + "p50": 166.49599373340607, + "p90": 172.5119948387146, + "p95": 174.5920032262802, + "p99": 181.66400492191315 + }, + "isolatedSum": { + "p50": 190.2719959616661, + "p90": 204.3200060725212, + "p95": 210.36799997091293, + "p99": 223.36000204086304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-74277841", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|decode|normal|none|none|0|tuned||0d921f8a9d2cb27", + "colorKey": "gb300_5ef5ae4f", + "comparisonKey": "d8e5053e53411c0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:15.508378+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "0d921f8a9d2cb27", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 87.96799927949905, + "p90": 97.75999933481216, + "p95": 101.72799974679947, + "p99": 109.11999642848969 + }, + "combine": { + "p50": 71.80800288915634, + "p90": 75.96799731254578, + "p95": 78.65600287914276, + "p99": 100.00000149011612 + }, + "roundtrip": { + "p50": 140.19200205802917, + "p90": 148.8959938287735, + "p95": 152.6080071926117, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 159.7760021686554, + "p90": 173.72799664735794, + "p95": 180.38400262594223, + "p99": 209.1199979186058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 200704, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 88.60799670219421, + "p90": 97.05600142478943, + "p95": 101.75999999046326, + "p99": 111.35999858379364 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 75.93599706888199, + "p95": 78.20799946784973, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 140.1599943637848, + "p90": 150.68799257278442, + "p95": 154.59200739860535, + "p99": 163.42400014400482 + }, + "isolatedSum": { + "p50": 161.1199975013733, + "p90": 172.99199849367142, + "p95": 179.967999458313, + "p99": 199.90400224924088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 87.8399983048439, + "p90": 96.09600156545639, + "p95": 99.61599856615067, + "p99": 104.19200360774994 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 77.66400277614594, + "p95": 80.92799782752991, + "p99": 85.63199639320374 + }, + "roundtrip": { + "p50": 141.66399836540222, + "p90": 150.84800124168396, + "p95": 153.02400290966034, + "p99": 162.9440039396286 + }, + "isolatedSum": { + "p50": 161.3439992070198, + "p90": 173.76000434160233, + "p95": 180.54399639368057, + "p99": 189.82400000095367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.70399743318558, + "p90": 98.62399846315384, + "p95": 101.95200145244598, + "p99": 109.0560033917427 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 83.29600095748901, + "p95": 84.57600325345993, + "p99": 88.44800293445587 + }, + "roundtrip": { + "p50": 143.96800100803375, + "p90": 152.92799472808838, + "p95": 156.25600516796112, + "p99": 163.68000209331512 + }, + "isolatedSum": { + "p50": 164.2879992723465, + "p90": 181.91999942064285, + "p95": 186.52800470590591, + "p99": 197.50400632619858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 88.8959988951683, + "p90": 98.91200065612793, + "p95": 103.71199995279312, + "p99": 131.1040073633194 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 83.20000022649765, + "p95": 84.99199897050858, + "p99": 94.04800087213516 + }, + "roundtrip": { + "p50": 144.99199390411377, + "p90": 155.4879993200302, + "p95": 159.93599593639374, + "p99": 185.40799617767334 + }, + "isolatedSum": { + "p50": 164.70400243997574, + "p90": 182.11200088262558, + "p95": 188.7039989233017, + "p99": 225.15200823545456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3282944, + "combineLogicalBytes": 3282944, + "fanoutMean": 3.578125, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 91.87199920415878, + "p90": 99.90400075912476, + "p95": 103.07200253009796, + "p99": 109.43999886512756 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 85.28000116348267, + "p95": 87.23200112581253, + "p99": 93.02400052547455 + }, + "roundtrip": { + "p50": 146.464005112648, + "p90": 154.7520011663437, + "p95": 158.33599865436554, + "p99": 165.69599509239197 + }, + "isolatedSum": { + "p50": 171.7439964413643, + "p90": 185.18400192260742, + "p95": 190.3040036559105, + "p99": 202.4639993906021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6694912, + "combineLogicalBytes": 6694912, + "fanoutMean": 3.6484375, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.73599964380264, + "p90": 109.56799983978271, + "p95": 112.5119999051094, + "p99": 119.77600306272507 + }, + "combine": { + "p50": 83.90399813652039, + "p90": 87.87199854850769, + "p95": 89.05600011348724, + "p99": 112.44799941778183 + }, + "roundtrip": { + "p50": 159.16800498962402, + "p90": 166.17600619792938, + "p95": 169.15200650691986, + "p99": 176.7359972000122 + }, + "isolatedSum": { + "p50": 184.63999778032303, + "p90": 197.4399983882904, + "p95": 201.56800001859665, + "p99": 232.2240024805069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13318144, + "combineLogicalBytes": 13318144, + "fanoutMean": 3.62890625, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.97599697113037, + "p90": 125.95200538635254, + "p95": 128.25599312782288, + "p99": 135.16800105571747 + }, + "combine": { + "p50": 99.96800124645233, + "p90": 107.26399719715118, + "p95": 109.27999764680862, + "p99": 113.66400122642517 + }, + "roundtrip": { + "p50": 191.74399971961975, + "p90": 199.23199713230133, + "p95": 201.664000749588, + "p99": 208.03199708461761 + }, + "isolatedSum": { + "p50": 218.9439982175827, + "p90": 233.21600258350372, + "p95": 237.5359907746315, + "p99": 248.83200228214264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-864ceb66", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|decode|normal|none|none|0|tuned||cc5ad1cb2e95ef6", + "colorKey": "gb300_43b106ef", + "comparisonKey": "8929b863888e8c72", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:08.692751+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cc5ad1cb2e95ef6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.3408203125, + "eplbImbalanceAfter": 1.000390625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 88.0960002541542, + "p90": 97.98400104045868, + "p95": 101.6319990158081, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 74.36800003051758, + "p95": 75.58400183916092, + "p99": 82.94399827718735 + }, + "roundtrip": { + "p50": 140.44800400733948, + "p90": 148.00000190734863, + "p95": 152.12799608707428, + "p99": 157.53600001335144 + }, + "isolatedSum": { + "p50": 159.4879999756813, + "p90": 172.35200107097626, + "p95": 177.21600085496902, + "p99": 191.51999801397324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 88.06400001049042, + "p90": 101.6639992594719, + "p95": 116.95999652147293, + "p99": 164.51199352741241 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 78.23999971151352, + "p95": 85.28000116348267, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 141.88799262046814, + "p90": 153.53600680828094, + "p95": 165.12000560760498, + "p99": 220.89600563049316 + }, + "isolatedSum": { + "p50": 161.27999871969223, + "p90": 179.9039989709854, + "p95": 202.2399976849556, + "p99": 285.5039909482002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 88.83199840784073, + "p90": 100.35199671983719, + "p95": 105.43999820947647, + "p99": 135.96799969673157 + }, + "combine": { + "p50": 73.88799637556076, + "p90": 78.91199737787247, + "p95": 84.83199775218964, + "p99": 115.32799899578094 + }, + "roundtrip": { + "p50": 143.327996134758, + "p90": 155.03999590873718, + "p95": 162.33600676059723, + "p99": 240.9600019454956 + }, + "isolatedSum": { + "p50": 162.7199947834015, + "p90": 179.26399409770966, + "p95": 190.2719959616661, + "p99": 251.2959986925125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.40800279378891, + "p90": 100.54399818181992, + "p95": 107.00800269842148, + "p99": 156.54399991035461 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 84.03199911117554, + "p95": 86.496002972126, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 145.91999351978302, + "p90": 157.53600001335144, + "p95": 164.35199975967407, + "p99": 221.0559993982315 + }, + "isolatedSum": { + "p50": 165.21600633859634, + "p90": 184.57599729299545, + "p95": 193.50400567054749, + "p99": 254.11199778318405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1705984, + "combineLogicalBytes": 1705984, + "fanoutMean": 3.71875, + "recvTokensMax": 31, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 88.83199840784073, + "p90": 98.81599992513657, + "p95": 101.85600072145462, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 75.39200037717819, + "p90": 82.97599852085114, + "p95": 86.04799956083298, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 145.63199877738953, + "p90": 157.98400342464447, + "p95": 162.08000481128693, + "p99": 194.91200149059296 + }, + "isolatedSum": { + "p50": 164.22399878501892, + "p90": 181.7919984459877, + "p95": 187.9040002822876, + "p99": 217.50400215387344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 3411968, + "fanoutMean": 3.71875, + "recvTokensMax": 62, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.44800359010696, + "p90": 103.90400141477585, + "p95": 120.64000219106674, + "p99": 171.36000096797943 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 85.85599809885025, + "p95": 88.70399743318558, + "p99": 134.88000631332397 + }, + "roundtrip": { + "p50": 148.51200580596924, + "p90": 163.13600540161133, + "p95": 185.248002409935, + "p99": 223.64799678325653 + }, + "isolatedSum": { + "p50": 169.6000024676323, + "p90": 189.7599995136261, + "p95": 209.34399962425232, + "p99": 306.2400072813034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6680576, + "combineLogicalBytes": 6680576, + "fanoutMean": 3.640625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.72799974679947, + "p90": 112.70400136709213, + "p95": 121.60000205039978, + "p99": 170.6559956073761 + }, + "combine": { + "p50": 84.09599959850311, + "p90": 87.74399757385254, + "p95": 89.66399729251862, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 159.84000265598297, + "p90": 167.32800006866455, + "p95": 170.49600183963776, + "p99": 220.19200026988983 + }, + "isolatedSum": { + "p50": 185.82399934530258, + "p90": 200.44799894094467, + "p95": 211.2639993429184, + "p99": 284.4479978084564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13432832, + "combineLogicalBytes": 13432832, + "fanoutMean": 3.66015625, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 115.61600118875504, + "p90": 128.80000472068787, + "p95": 147.39200472831726, + "p99": 183.67999792099 + }, + "combine": { + "p50": 100.19200295209885, + "p90": 109.47199910879135, + "p95": 117.98399686813354, + "p99": 150.30400454998016 + }, + "roundtrip": { + "p50": 191.26400351524353, + "p90": 202.14399695396423, + "p95": 223.13599288463593, + "p99": 261.31200790405273 + }, + "isolatedSum": { + "p50": 215.80800414085388, + "p90": 238.27200382947922, + "p95": 265.3760015964508, + "p99": 333.98400247097015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26464256, + "combineLogicalBytes": 26464256, + "fanoutMean": 3.60546875, + "recvTokensMax": 471, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9880f6da", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|decode|normal|none|none|0|tuned||c186e8c8d66ece3", + "colorKey": "gb300_339552af", + "comparisonKey": "f0af0b032b8c046a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:48.387410+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c186e8c8d66ece3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.091796875, + "eplbImbalanceAfter": 1.00146484375, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 91.90399944782257, + "p90": 100.89600086212158, + "p95": 104.3199971318245, + "p99": 111.80800199508667 + }, + "combine": { + "p50": 73.44000041484833, + "p90": 77.2479996085167, + "p95": 79.03999835252762, + "p99": 86.87999844551086 + }, + "roundtrip": { + "p50": 145.7280069589615, + "p90": 156.3200056552887, + "p95": 159.8079949617386, + "p99": 168.7999963760376 + }, + "isolatedSum": { + "p50": 165.3439998626709, + "p90": 178.14400047063828, + "p95": 183.3599954843521, + "p99": 198.68800044059753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 91.80799871683121, + "p90": 101.59999877214432, + "p95": 107.744000852108, + "p99": 119.99999731779099 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 83.93599838018417, + "p95": 124.51200187206268, + "p99": 132.25600123405457 + }, + "roundtrip": { + "p50": 147.90399372577667, + "p90": 154.78399395942688, + "p95": 158.9760035276413, + "p99": 167.77600347995758 + }, + "isolatedSum": { + "p50": 165.53600132465363, + "p90": 185.5359971523285, + "p95": 232.25600272417068, + "p99": 252.25599855184555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 93.08800101280212, + "p90": 103.35999727249146, + "p95": 106.72000050544739, + "p99": 111.7120012640953 + }, + "combine": { + "p50": 74.8480036854744, + "p90": 78.78399640321732, + "p95": 81.15199953317642, + "p99": 84.54400300979614 + }, + "roundtrip": { + "p50": 149.21599626541138, + "p90": 157.50400722026825, + "p95": 161.3759994506836, + "p99": 165.6000018119812 + }, + "isolatedSum": { + "p50": 167.93600469827652, + "p90": 182.14399367570877, + "p95": 187.8720000386238, + "p99": 196.25600427389145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 745472, + "combineLogicalBytes": 745472, + "fanoutMean": 3.25, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 93.31200271844864, + "p90": 102.52799838781357, + "p95": 106.55999928712845, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 80.6720033288002, + "p95": 83.52000266313553, + "p99": 88.67199718952179 + }, + "roundtrip": { + "p50": 150.39999783039093, + "p90": 159.71200168132782, + "p95": 162.88000345230103, + "p99": 169.3439930677414 + }, + "isolatedSum": { + "p50": 169.5680022239685, + "p90": 183.20000171661377, + "p95": 190.08000195026398, + "p99": 202.04799622297287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 93.40800344944, + "p90": 102.94400155544281, + "p95": 106.175996363163, + "p99": 112.35199868679047 + }, + "combine": { + "p50": 76.28799974918365, + "p90": 81.216000020504, + "p95": 84.70399677753448, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 151.0079950094223, + "p90": 161.05599701404572, + "p95": 164.73600268363953, + "p99": 170.9119975566864 + }, + "isolatedSum": { + "p50": 169.69600319862366, + "p90": 184.1600015759468, + "p95": 190.87999314069748, + "p99": 202.36799865961075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3225600, + "combineLogicalBytes": 3225600, + "fanoutMean": 3.515625, + "recvTokensMax": 60, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 94.71999853849411, + "p90": 104.3199971318245, + "p95": 107.00800269842148, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 85.21600067615509, + "p95": 87.16800063848495, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 151.5520066022873, + "p90": 161.6320013999939, + "p95": 164.2879992723465, + "p99": 171.23199999332428 + }, + "isolatedSum": { + "p50": 172.60799556970596, + "p90": 189.53599780797958, + "p95": 194.17600333690643, + "p99": 206.30399882793427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6522880, + "combineLogicalBytes": 6522880, + "fanoutMean": 3.5546875, + "recvTokensMax": 118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 105.66399991512299, + "p90": 112.70400136709213, + "p95": 115.77600240707397, + "p99": 122.72000312805176 + }, + "combine": { + "p50": 85.88799834251404, + "p90": 90.01599997282028, + "p95": 91.48799628019333, + "p99": 100.63999891281128 + }, + "roundtrip": { + "p50": 162.88000345230103, + "p90": 170.46399414539337, + "p95": 173.7920045852661, + "p99": 179.07199263572693 + }, + "isolatedSum": { + "p50": 191.55199825763702, + "p90": 202.72000133991241, + "p95": 207.2639986872673, + "p99": 223.36000204086304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13160448, + "combineLogicalBytes": 13160448, + "fanoutMean": 3.5859375, + "recvTokensMax": 238, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.88800358772278, + "p90": 124.92799758911133, + "p95": 127.96799838542938, + "p99": 135.8720064163208 + }, + "combine": { + "p50": 101.18400305509567, + "p90": 106.72000050544739, + "p95": 109.82400178909302, + "p99": 113.08799684047699 + }, + "roundtrip": { + "p50": 192.44800508022308, + "p90": 199.64799284934998, + "p95": 202.55999267101288, + "p99": 209.4399929046631 + }, + "isolatedSum": { + "p50": 219.07200664281845, + "p90": 231.64799809455872, + "p95": 237.7920001745224, + "p99": 248.9600032567978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26406912, + "combineLogicalBytes": 26406912, + "fanoutMean": 3.59765625, + "recvTokensMax": 474, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d74ae843", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_b554fd9a", + "comparisonKey": "838d77b5f62aa967", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:10.920543+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 88.06400001049042, + "p90": 98.04800152778625, + "p95": 101.6319990158081, + "p99": 107.58399963378906 + }, + "combine": { + "p50": 71.35999947786331, + "p90": 75.39200037717819, + "p95": 77.08799839019775, + "p99": 85.15200018882751 + }, + "roundtrip": { + "p50": 141.50400459766388, + "p90": 151.19999647140503, + "p95": 154.91199493408203, + "p99": 161.3440066576004 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 173.44000190496445, + "p95": 178.71999740600586, + "p99": 192.73599982261658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 87.26400136947632, + "p90": 95.74399888515472, + "p95": 100.92800110578537, + "p99": 105.40799796581268 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 76.51200145483017, + "p95": 79.39200103282928, + "p99": 86.07999980449677 + }, + "roundtrip": { + "p50": 143.16800236701965, + "p90": 153.53600680828094, + "p95": 158.01599621772766, + "p99": 164.95999693870544 + }, + "isolatedSum": { + "p50": 159.29599851369858, + "p90": 172.2560003399849, + "p95": 180.32000213861465, + "p99": 191.48799777030945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 87.23200112581253, + "p90": 97.08800166845322, + "p95": 100.28800368309021, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 72.83200323581696, + "p90": 76.48000121116638, + "p95": 79.26400005817413, + "p99": 86.33600175380707 + }, + "roundtrip": { + "p50": 142.84799993038177, + "p90": 153.6960005760193, + "p95": 157.56799280643463, + "p99": 164.60800170898438 + }, + "isolatedSum": { + "p50": 160.0640043616295, + "p90": 173.5680028796196, + "p95": 179.55200374126434, + "p99": 198.7520009279251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 87.96799927949905, + "p90": 97.75999933481216, + "p95": 101.31199657917023, + "p99": 107.90400207042694 + }, + "combine": { + "p50": 74.33599978685379, + "p90": 79.32800054550171, + "p95": 82.24000036716461, + "p99": 86.14400029182434 + }, + "roundtrip": { + "p50": 145.63199877738953, + "p90": 154.91199493408203, + "p95": 158.39999914169312, + "p99": 165.6319946050644 + }, + "isolatedSum": { + "p50": 162.30399906635284, + "p90": 177.08799988031387, + "p95": 183.55199694633484, + "p99": 194.04800236225128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 87.5839963555336, + "p90": 96.92800045013428, + "p95": 101.53599828481674, + "p99": 115.84000289440155 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 82.68799632787704, + "p95": 84.25600081682205, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 145.28000354766846, + "p90": 156.00000321865082, + "p95": 159.32799875736237, + "p99": 163.55200111865997 + }, + "isolatedSum": { + "p50": 162.59199380874634, + "p90": 179.61599677801132, + "p95": 185.7919991016388, + "p99": 205.18400520086288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 90.36800265312195, + "p90": 99.48799759149551, + "p95": 101.95200145244598, + "p99": 108.0000028014183 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 84.86399799585342, + "p95": 86.56000345945358, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 147.039994597435, + "p90": 155.61600029468536, + "p95": 158.4320068359375, + "p99": 165.6000018119812 + }, + "isolatedSum": { + "p50": 167.61600226163864, + "p90": 184.35199558734894, + "p95": 188.51200491189957, + "p99": 202.91200280189514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.84000027179718, + "p90": 107.32799768447876, + "p95": 111.455999314785, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 83.5840031504631, + "p90": 87.2960016131401, + "p95": 88.95999938249588, + "p99": 93.59999746084213 + }, + "roundtrip": { + "p50": 156.09599649906158, + "p90": 164.89599645137787, + "p95": 167.4560010433197, + "p99": 173.6000031232834 + }, + "isolatedSum": { + "p50": 183.42400342226028, + "p90": 194.62399929761887, + "p95": 200.41599869728088, + "p99": 212.15999871492386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 112.64000087976456, + "p90": 120.7680031657219, + "p95": 123.32800030708313, + "p99": 130.3360015153885 + }, + "combine": { + "p50": 97.9200005531311, + "p90": 102.20800340175629, + "p95": 105.82400113344193, + "p99": 110.62400043010712 + }, + "roundtrip": { + "p50": 186.62400543689728, + "p90": 193.92000138759613, + "p95": 196.76800072193146, + "p99": 205.53599298000336 + }, + "isolatedSum": { + "p50": 210.56000143289566, + "p90": 222.97600656747818, + "p95": 229.15200144052505, + "p99": 240.9600019454956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7691f722", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|decode|normal|none|none|0|tuned||3f8ffeba9f65629", + "colorKey": "gb300_b0a58d70", + "comparisonKey": "0f0b5267adf7b166", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:43.130621+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3f8ffeba9f65629", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 90.84799885749817, + "p90": 99.90400075912476, + "p95": 104.47999835014343, + "p99": 112.03200370073318 + }, + "combine": { + "p50": 70.592001080513, + "p90": 75.1039981842041, + "p95": 77.44000107049942, + "p99": 83.71199667453766 + }, + "roundtrip": { + "p50": 141.56800508499146, + "p90": 149.63200688362122, + "p95": 153.98399531841278, + "p99": 162.84799575805664 + }, + "isolatedSum": { + "p50": 161.43999993801117, + "p90": 175.00799894332886, + "p95": 181.91999942064285, + "p99": 195.74400037527084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 71680, + "combineLogicalBytes": 71680, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 89.9839997291565, + "p90": 99.5199978351593, + "p95": 102.68799960613251, + "p99": 108.89600217342377 + }, + "combine": { + "p50": 70.94399631023407, + "p90": 75.45600086450577, + "p95": 76.86399668455124, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 142.30400323867798, + "p90": 150.36800503730774, + "p95": 153.76000106334686, + "p99": 160.96000373363495 + }, + "isolatedSum": { + "p50": 160.92799603939056, + "p90": 174.97599869966507, + "p95": 179.55199629068375, + "p99": 191.0720020532608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.22400188446045, + "p90": 100.70399940013885, + "p95": 104.47999835014343, + "p99": 111.64800077676773 + }, + "combine": { + "p50": 71.1359977722168, + "p90": 76.03199779987335, + "p95": 78.94399762153625, + "p99": 84.86399799585342 + }, + "roundtrip": { + "p50": 142.39999651908875, + "p90": 151.87199413776398, + "p95": 155.7759940624237, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 163.35999965667725, + "p90": 176.7359972000122, + "p95": 183.4239959716797, + "p99": 196.51199877262115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 286720, + "combineLogicalBytes": 286720, + "fanoutMean": 1.25, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 93.44000369310379, + "p90": 102.68799960613251, + "p95": 107.39199817180634, + "p99": 113.63200098276138 + }, + "combine": { + "p50": 71.96799665689468, + "p90": 77.08799839019775, + "p95": 79.96799796819687, + "p99": 84.57600325345993 + }, + "roundtrip": { + "p50": 146.01600170135498, + "p90": 153.05599570274353, + "p95": 154.9759954214096, + "p99": 160.70400178432465 + }, + "isolatedSum": { + "p50": 165.40800034999847, + "p90": 179.77599799633026, + "p95": 187.3599961400032, + "p99": 198.2080042362213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 1.21875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 94.04800087213516, + "p90": 104.16000336408615, + "p95": 106.59199953079224, + "p99": 124.54400211572647 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 76.67200267314911, + "p95": 78.62400263547897, + "p99": 83.00799876451492 + }, + "roundtrip": { + "p50": 148.0640023946762, + "p90": 155.96799552440643, + "p95": 159.96800363063812, + "p99": 165.95199704170227 + }, + "isolatedSum": { + "p50": 166.4320006966591, + "p90": 180.83200603723526, + "p95": 185.2160021662712, + "p99": 207.5520008802414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 1.265625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.50399672985077, + "p90": 102.46399790048599, + "p95": 105.82400113344193, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 80.57600259780884, + "p95": 82.97599852085114, + "p99": 91.00800007581711 + }, + "roundtrip": { + "p50": 148.73600006103516, + "p90": 157.1200042963028, + "p95": 159.93599593639374, + "p99": 165.43999314308167 + }, + "isolatedSum": { + "p50": 168.12799870967865, + "p90": 183.04000049829483, + "p95": 188.79999965429306, + "p99": 202.5279998779297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2279424, + "combineLogicalBytes": 2279424, + "fanoutMean": 1.2421875, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.74399954080582, + "p90": 108.06400328874588, + "p95": 111.48799955844879, + "p99": 119.77600306272507 + }, + "combine": { + "p50": 83.93599838018417, + "p90": 88.19200098514557, + "p95": 89.9839997291565, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 157.1200042963028, + "p90": 164.48000073432922, + "p95": 168.7999963760376, + "p99": 178.8800060749054 + }, + "isolatedSum": { + "p50": 183.67999792099, + "p90": 196.25600427389145, + "p95": 201.47199928760529, + "p99": 216.48000180721283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4587520, + "combineLogicalBytes": 4587520, + "fanoutMean": 1.25, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 111.07199639081955, + "p90": 118.23999881744385, + "p95": 121.56800180673599, + "p99": 130.20800054073334 + }, + "combine": { + "p50": 96.73599898815155, + "p90": 102.36799716949463, + "p95": 104.41599786281586, + "p99": 110.20799726247787 + }, + "roundtrip": { + "p50": 182.5920045375824, + "p90": 189.18399512767792, + "p95": 191.96799397468567, + "p99": 198.88000190258026 + }, + "isolatedSum": { + "p50": 207.8079953789711, + "p90": 220.60799598693848, + "p95": 225.98399966955185, + "p99": 240.4159978032112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ad56df1d", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|decode|normal|none|none|0|tuned||e9a6e5febe08793", + "colorKey": "gb300_40a5347e", + "comparisonKey": "f3539dd99e238ac9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:35.826390+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e9a6e5febe08793", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86328125, + "eplbImbalanceAfter": 1.0003348214285714, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 87.93599903583527, + "p90": 98.24000298976898, + "p95": 102.49599814414978, + "p99": 107.4879989027977 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 74.43200051784515, + "p95": 76.80000364780426, + "p99": 81.53600245714188 + }, + "roundtrip": { + "p50": 140.09599387645721, + "p90": 149.21599626541138, + "p95": 152.8320014476776, + "p99": 160.44799983501434 + }, + "isolatedSum": { + "p50": 157.43999928236008, + "p90": 172.67200350761414, + "p95": 179.29600179195404, + "p99": 189.02400135993958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 88.25600147247314, + "p90": 98.08000177145004, + "p95": 102.01600193977356, + "p99": 107.55199939012527 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 75.93599706888199, + "p95": 79.39200103282928, + "p99": 85.75999736785889 + }, + "roundtrip": { + "p50": 142.30400323867798, + "p90": 149.9200016260147, + "p95": 153.82400155067444, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 160.19199788570404, + "p90": 174.01599884033203, + "p95": 181.40800297260284, + "p99": 193.31199675798416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 89.05600011348724, + "p90": 100.47999769449234, + "p95": 105.53599894046783, + "p99": 143.23200285434723 + }, + "combine": { + "p50": 73.82400333881378, + "p90": 78.65600287914276, + "p95": 81.50400221347809, + "p99": 89.6959975361824 + }, + "roundtrip": { + "p50": 143.99999380111694, + "p90": 153.18399667739868, + "p95": 156.51200711727142, + "p99": 179.00800704956055 + }, + "isolatedSum": { + "p50": 162.88000345230103, + "p90": 179.1360005736351, + "p95": 187.04000115394592, + "p99": 232.92800039052963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 788480, + "combineLogicalBytes": 788480, + "fanoutMean": 3.4375, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 88.44800293445587, + "p90": 98.78399968147278, + "p95": 101.88800096511841, + "p99": 110.01600325107574 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 82.43200182914734, + "p95": 84.3840017914772, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 144.86399292945862, + "p90": 154.88000214099884, + "p95": 157.56799280643463, + "p99": 164.92800414562225 + }, + "isolatedSum": { + "p50": 164.16000574827194, + "p90": 181.21600151062012, + "p95": 186.2720027565956, + "p99": 199.5519995689392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 89.1840010881424, + "p90": 99.39199686050415, + "p95": 102.68799960613251, + "p99": 112.09599673748016 + }, + "combine": { + "p50": 76.09599828720093, + "p90": 82.94399827718735, + "p95": 84.76799726486206, + "p99": 91.16800129413605 + }, + "roundtrip": { + "p50": 146.11199498176575, + "p90": 155.20000457763672, + "p95": 159.5200002193451, + "p99": 168.32000017166138 + }, + "isolatedSum": { + "p50": 165.27999937534332, + "p90": 182.3359951376915, + "p95": 187.45599687099457, + "p99": 203.2639980316162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3196928, + "combineLogicalBytes": 3196928, + "fanoutMean": 3.484375, + "recvTokensMax": 59, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 91.77599847316742, + "p90": 100.19200295209885, + "p95": 104.3199971318245, + "p99": 110.20799726247787 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 84.83199775218964, + "p95": 86.27200126647949, + "p99": 93.66399794816971 + }, + "roundtrip": { + "p50": 147.96799421310425, + "p90": 155.39200603961945, + "p95": 159.2320054769516, + "p99": 165.8879965543747 + }, + "isolatedSum": { + "p50": 170.97599804401398, + "p90": 185.02400070428848, + "p95": 190.59199839830399, + "p99": 203.87199521064758 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6408192, + "combineLogicalBytes": 6408192, + "fanoutMean": 3.4921875, + "recvTokensMax": 114, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.20800340175629, + "p90": 110.36799848079681, + "p95": 114.84800279140472, + "p99": 128.51199507713318 + }, + "combine": { + "p50": 83.61600339412689, + "p90": 88.28800171613693, + "p95": 92.38400310277939, + "p99": 134.91199910640717 + }, + "roundtrip": { + "p50": 158.27199816703796, + "p90": 167.55199432373047, + "p95": 171.48800194263458, + "p99": 235.48799753189087 + }, + "isolatedSum": { + "p50": 185.82400679588318, + "p90": 198.65600019693375, + "p95": 207.2320058941841, + "p99": 263.42399418354034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12773376, + "combineLogicalBytes": 12773376, + "fanoutMean": 3.48046875, + "recvTokensMax": 226, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.14399743080139, + "p90": 123.87199699878693, + "p95": 127.55200266838074, + "p99": 155.008003115654 + }, + "combine": { + "p50": 99.96800124645233, + "p90": 106.04800283908844, + "p95": 107.35999792814255, + "p99": 110.27199774980545 + }, + "roundtrip": { + "p50": 190.3039962053299, + "p90": 197.05599546432495, + "p95": 199.77599382400513, + "p99": 207.42399990558624 + }, + "isolatedSum": { + "p50": 214.11199867725372, + "p90": 229.91999983787537, + "p95": 234.91200059652328, + "p99": 265.28000086545944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25661440, + "combineLogicalBytes": 25661440, + "fanoutMean": 3.49609375, + "recvTokensMax": 454, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-010fd955", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|decode|normal|none|none|0|tuned||e596902aaaeb56c", + "colorKey": "gb300_95d14aab", + "comparisonKey": "6300fdd5434bd2aa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:39:24.879583+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e596902aaaeb56c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 97.120001912117, + "p90": 110.72000116109848, + "p95": 119.64800208806992, + "p99": 194.2719966173172 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 79.64800298213959, + "p95": 86.68799698352814, + "p99": 128.9599984884262 + }, + "roundtrip": { + "p50": 149.4079977273941, + "p90": 161.40800714492798, + "p95": 167.7439957857132, + "p99": 248.79999458789825 + }, + "isolatedSum": { + "p50": 172.12799936532974, + "p90": 190.36800414323807, + "p95": 206.33599907159805, + "p99": 323.2319951057434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 95.90400010347366, + "p90": 109.37599837779999, + "p95": 118.43200027942657, + "p99": 183.67999792099 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 85.05599945783615, + "p95": 99.5199978351593, + "p99": 139.1039937734604 + }, + "roundtrip": { + "p50": 150.56000649929047, + "p90": 161.21600568294525, + "p95": 165.95199704170227, + "p99": 177.85599827766418 + }, + "isolatedSum": { + "p50": 171.424001455307, + "p90": 194.43199783563614, + "p95": 217.95199811458588, + "p99": 322.7839916944504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 387072, + "combineLogicalBytes": 387072, + "fanoutMean": 3.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 95.8079993724823, + "p90": 106.1440035700798, + "p95": 111.7440015077591, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 76.4480009675026, + "p90": 84.73599702119827, + "p95": 87.77599781751633, + "p99": 125.69600343704224 + }, + "roundtrip": { + "p50": 155.03999590873718, + "p90": 167.26399958133698, + "p95": 173.12000691890717, + "p99": 266.04801416397095 + }, + "isolatedSum": { + "p50": 172.2560003399849, + "p90": 190.88000059127808, + "p95": 199.51999932527542, + "p99": 270.1440006494522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 731136, + "combineLogicalBytes": 731136, + "fanoutMean": 3.1875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 99.35999661684036, + "p90": 113.24799805879593, + "p95": 135.3279948234558, + "p99": 190.11199474334717 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 87.13600039482117, + "p95": 89.28000181913376, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 155.87200224399567, + "p90": 170.56000232696533, + "p95": 190.5599981546402, + "p99": 276.3519883155823 + }, + "isolatedSum": { + "p50": 177.75999754667282, + "p90": 200.3839984536171, + "p95": 224.60799664258957, + "p99": 295.4239919781685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1519616, + "combineLogicalBytes": 1519616, + "fanoutMean": 3.3125, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 99.16800260543823, + "p90": 112.8000020980835, + "p95": 120.99199742078781, + "p99": 177.3120015859604 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 87.80799806118011, + "p95": 92.99200028181076, + "p99": 146.68799936771393 + }, + "roundtrip": { + "p50": 156.22399747371674, + "p90": 167.90400445461273, + "p95": 177.05599963665009, + "p99": 255.10400533676147 + }, + "isolatedSum": { + "p50": 177.66400426626205, + "p90": 200.6080001592636, + "p95": 213.98399770259857, + "p99": 324.0000009536743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3082240, + "combineLogicalBytes": 3082240, + "fanoutMean": 3.359375, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 99.45599734783173, + "p90": 112.35199868679047, + "p95": 124.12799894809723, + "p99": 187.16800212860107 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 89.85599875450134, + "p95": 101.75999999046326, + "p99": 148.8640010356903 + }, + "roundtrip": { + "p50": 157.6319932937622, + "p90": 171.51999473571777, + "p95": 184.92799997329712, + "p99": 258.11201333999634 + }, + "isolatedSum": { + "p50": 182.78399854898453, + "p90": 202.2079974412918, + "p95": 225.8879989385605, + "p99": 336.0320031642914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6121472, + "combineLogicalBytes": 6121472, + "fanoutMean": 3.3359375, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 108.15999656915665, + "p90": 117.85600334405899, + "p95": 121.31199985742569, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 93.66399794816971, + "p95": 97.79199957847595, + "p99": 126.01600587368011 + }, + "roundtrip": { + "p50": 164.99200463294983, + "p90": 173.8560050725937, + "p95": 178.0800074338913, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 195.93599438667297, + "p90": 211.5200012922287, + "p95": 219.10399943590164, + "p99": 259.20000672340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12271616, + "combineLogicalBytes": 12271616, + "fanoutMean": 3.34375, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 124.28800016641617, + "p90": 136.28800213336945, + "p95": 149.63200688362122, + "p99": 209.98400449752808 + }, + "combine": { + "p50": 101.79200023412704, + "p90": 111.16799712181091, + "p95": 113.79200220108032, + "p99": 146.65600657463074 + }, + "roundtrip": { + "p50": 196.31999731063843, + "p90": 207.7759951353073, + "p95": 216.09599888324738, + "p99": 270.04799246788025 + }, + "isolatedSum": { + "p50": 226.0800004005432, + "p90": 247.45599925518036, + "p95": 263.42400908470154, + "p99": 356.6400110721588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d7daef4", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|decode|normal|none|none|0|tuned||194008255dcd869", + "colorKey": "gb300_f6eb4093", + "comparisonKey": "cee6d2745c462bab", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:39:38.491972+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "194008255dcd869", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.865234375, + "eplbImbalanceAfter": 1.0003580729166668, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 97.15200215578079, + "p90": 110.07999628782272, + "p95": 115.00799655914307, + "p99": 123.32800030708313 + }, + "combine": { + "p50": 71.42399996519089, + "p90": 76.25599950551987, + "p95": 77.56800204515457, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 142.7839994430542, + "p90": 154.78399395942688, + "p95": 158.24000537395477, + "p99": 164.09599781036377 + }, + "isolatedSum": { + "p50": 168.57600212097168, + "p90": 186.3359957933426, + "p95": 192.57599860429764, + "p99": 206.9760039448738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 96.09600156545639, + "p90": 109.11999642848969, + "p95": 114.52800035476685, + "p99": 169.18399930000305 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 79.9039974808693, + "p95": 88.95999938249588, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 152.63999998569489, + "p90": 168.2240068912506, + "p95": 182.68799781799316, + "p99": 257.82400369644165 + }, + "isolatedSum": { + "p50": 170.56000232696533, + "p90": 189.02399390935898, + "p95": 203.48799973726273, + "p99": 295.5839931964874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 3, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 96.25600278377533, + "p90": 107.77600109577179, + "p95": 112.99200356006622, + "p99": 126.88000500202179 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 82.0159986615181, + "p95": 85.63199639320374, + "p99": 90.43200314044952 + }, + "roundtrip": { + "p50": 152.67199277877808, + "p90": 164.2879992723465, + "p95": 169.76000368595123, + "p99": 177.279993891716 + }, + "isolatedSum": { + "p50": 172.06400632858276, + "p90": 189.7919997572899, + "p95": 198.62399995326996, + "p99": 217.3120081424713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 774144, + "combineLogicalBytes": 774144, + "fanoutMean": 3.375, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 96.76799923181534, + "p90": 108.76800119876862, + "p95": 114.78400230407715, + "p99": 161.28000617027283 + }, + "combine": { + "p50": 77.27999985218048, + "p90": 85.75999736785889, + "p95": 88.3840024471283, + "p99": 112.15999722480774 + }, + "roundtrip": { + "p50": 154.14400398731232, + "p90": 167.39200055599213, + "p95": 173.43999445438385, + "p99": 217.72800385951996 + }, + "isolatedSum": { + "p50": 174.04799908399582, + "p90": 194.5279985666275, + "p95": 203.16800475120544, + "p99": 273.44000339508057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 98.62399846315384, + "p90": 110.23999750614166, + "p95": 114.33599889278412, + "p99": 123.61600250005722 + }, + "combine": { + "p50": 76.83199644088745, + "p90": 85.02399921417236, + "p95": 88.06400001049042, + "p99": 96.25600278377533 + }, + "roundtrip": { + "p50": 155.87200224399567, + "p90": 167.1999990940094, + "p95": 171.1679995059967, + "p99": 186.97600066661835 + }, + "isolatedSum": { + "p50": 175.4559949040413, + "p90": 195.26399672031403, + "p95": 202.39999890327454, + "p99": 219.87200528383255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3268608, + "combineLogicalBytes": 3268608, + "fanoutMean": 3.5625, + "recvTokensMax": 60, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 96.83199971914291, + "p90": 109.6000000834465, + "p95": 113.79200220108032, + "p99": 147.8399932384491 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 88.35200220346451, + "p95": 91.32800251245499, + "p99": 126.81600451469421 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 169.37600076198578, + "p95": 175.99999904632568, + "p99": 255.13601303100586 + }, + "isolatedSum": { + "p50": 175.7119968533516, + "p90": 197.952002286911, + "p95": 205.1200047135353, + "p99": 274.6559977531433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6393856, + "combineLogicalBytes": 6393856, + "fanoutMean": 3.484375, + "recvTokensMax": 115, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 108.8000014424324, + "p90": 120.2239990234375, + "p95": 128.25599312782288, + "p99": 165.56799411773682 + }, + "combine": { + "p50": 87.80799806118011, + "p90": 94.17600184679031, + "p95": 100.76799988746643, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 167.4560010433197, + "p90": 178.94400656223297, + "p95": 185.63200533390045, + "p99": 237.15199530124664 + }, + "isolatedSum": { + "p50": 196.60799950361252, + "p90": 214.4000008702278, + "p95": 229.0239930152893, + "p99": 294.2719906568527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13045760, + "combineLogicalBytes": 13045760, + "fanoutMean": 3.5546875, + "recvTokensMax": 234, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 121.72800302505493, + "p90": 133.85599851608276, + "p95": 141.15199446678162, + "p99": 179.07199263572693 + }, + "combine": { + "p50": 101.47199779748917, + "p90": 108.8000014424324, + "p95": 113.95200341939926, + "p99": 130.78400492668152 + }, + "roundtrip": { + "p50": 195.96800208091736, + "p90": 205.88800311088562, + "p95": 213.53599429130554, + "p99": 254.88001108169556 + }, + "isolatedSum": { + "p50": 223.2000008225441, + "p90": 242.65599995851517, + "p95": 255.10399788618088, + "p99": 309.85599756240845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26263552, + "combineLogicalBytes": 26263552, + "fanoutMean": 3.578125, + "recvTokensMax": 469, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f112a523", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_f231b710", + "comparisonKey": "69b6362d34e5dcb9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:40:30.589441+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 90.68799763917923, + "p90": 102.04800218343735, + "p95": 105.47199845314026, + "p99": 115.39199948310852 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 78.3040001988411, + "p95": 82.30400085449219, + "p99": 87.26400136947632 + }, + "roundtrip": { + "p50": 143.99999380111694, + "p90": 154.81600165367126, + "p95": 157.72800147533417, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 162.75199502706528, + "p90": 180.35200238227844, + "p95": 187.77599930763245, + "p99": 202.65600085258484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 90.62399715185165, + "p90": 102.11200267076492, + "p95": 106.59199953079224, + "p99": 116.60800129175186 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 78.46400141716003, + "p95": 81.11999928951263, + "p99": 85.56800335645676 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 156.8319946527481, + "p95": 160.60799360275269, + "p99": 168.16000640392303 + }, + "isolatedSum": { + "p50": 163.35999965667725, + "p90": 180.57600408792496, + "p95": 187.71199882030487, + "p99": 202.17600464820862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 91.77599847316742, + "p90": 102.88000106811523, + "p95": 107.39199817180634, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 80.4160013794899, + "p95": 82.8159973025322, + "p99": 86.5280032157898 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 157.05600380897522, + "p95": 161.21600568294525, + "p99": 172.35200107097626 + }, + "isolatedSum": { + "p50": 165.15199840068817, + "p90": 183.29600244760513, + "p95": 190.20799547433853, + "p99": 206.7200019955635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.19200164079666, + "p90": 104.12800312042236, + "p95": 107.84000158309937, + "p99": 115.39199948310852 + }, + "combine": { + "p50": 76.67200267314911, + "p90": 83.3280012011528, + "p95": 84.73599702119827, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 148.54399859905243, + "p90": 158.33599865436554, + "p95": 162.11199760437012, + "p99": 172.19200730323792 + }, + "isolatedSum": { + "p50": 168.86400431394577, + "p90": 187.45600432157516, + "p95": 192.57599860429764, + "p99": 205.24799823760986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.92799979448318, + "p90": 103.55199873447418, + "p95": 107.29599744081497, + "p99": 115.58400094509125 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 84.51200276613235, + "p95": 86.01599931716919, + "p99": 93.28000247478485 + }, + "roundtrip": { + "p50": 149.1519957780838, + "p90": 159.32799875736237, + "p95": 162.4000072479248, + "p99": 168.5120016336441 + }, + "isolatedSum": { + "p50": 170.71999609470367, + "p90": 188.06400150060654, + "p95": 193.31199675798416, + "p99": 208.8640034198761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 93.05600076913834, + "p90": 103.00800204277039, + "p95": 106.65600001811981, + "p99": 114.88000303506851 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 85.88799834251404, + "p95": 87.80799806118011, + "p99": 96.67199850082397 + }, + "roundtrip": { + "p50": 148.8959938287735, + "p90": 159.87199544906616, + "p95": 164.35199975967407, + "p99": 173.21600019931793 + }, + "isolatedSum": { + "p50": 174.5920032262802, + "p90": 188.89600038528442, + "p95": 194.46399807929993, + "p99": 211.5520015358925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.78400033712387, + "p90": 111.39199882745743, + "p95": 115.9679964184761, + "p99": 122.20799922943115 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 88.79999816417694, + "p95": 91.45600348711014, + "p99": 97.75999933481216 + }, + "roundtrip": { + "p50": 160.5439931154251, + "p90": 169.91999745368958, + "p95": 173.15199971199036, + "p99": 179.19999361038208 + }, + "isolatedSum": { + "p50": 186.78399920463562, + "p90": 200.19199699163437, + "p95": 207.42399990558624, + "p99": 219.96799856424332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.12799763679504, + "p90": 125.15200674533844, + "p95": 127.42400169372559, + "p99": 137.11999356746674 + }, + "combine": { + "p50": 98.84800016880035, + "p90": 106.81600123643875, + "p95": 108.31999778747559, + "p99": 112.31999844312668 + }, + "roundtrip": { + "p50": 189.53600525856018, + "p90": 197.50399887561798, + "p95": 200.15999674797058, + "p99": 208.3200067281723 + }, + "isolatedSum": { + "p50": 214.9759978055954, + "p90": 231.9680079817772, + "p95": 235.74399948120117, + "p99": 249.43999201059341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-25977ab3", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_19caa41e", + "comparisonKey": "b530fd5cd90e1185", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:40:44.366989+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 92.00000017881393, + "p90": 101.08800232410431, + "p95": 104.63999956846237, + "p99": 110.84800213575363 + }, + "combine": { + "p50": 71.9040036201477, + "p90": 76.51200145483017, + "p95": 78.04799824953079, + "p99": 84.54400300979614 + }, + "roundtrip": { + "p50": 144.6080058813095, + "p90": 154.81600165367126, + "p95": 157.6319932937622, + "p99": 166.1120057106018 + }, + "isolatedSum": { + "p50": 163.90400379896164, + "p90": 177.60000377893448, + "p95": 182.68799781799316, + "p99": 195.39200514554977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 92.12800115346909, + "p90": 101.31199657917023, + "p95": 105.82400113344193, + "p99": 112.8000020980835 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 77.2479996085167, + "p95": 79.1039988398552, + "p99": 85.37600189447403 + }, + "roundtrip": { + "p50": 145.6640064716339, + "p90": 153.888002038002, + "p95": 157.21599757671356, + "p99": 163.96799683570862 + }, + "isolatedSum": { + "p50": 165.3760001063347, + "p90": 178.55999618768692, + "p95": 184.92799997329712, + "p99": 198.17600399255753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 92.47999638319016, + "p90": 103.39199751615524, + "p95": 107.68000036478043, + "p99": 116.28799885511398 + }, + "combine": { + "p50": 74.78400319814682, + "p90": 78.40000092983246, + "p95": 81.79199695587158, + "p99": 87.64799684286118 + }, + "roundtrip": { + "p50": 147.64800667762756, + "p90": 156.12800419330597, + "p95": 160.0320041179657, + "p99": 172.09599912166595 + }, + "isolatedSum": { + "p50": 167.26399958133698, + "p90": 181.7919984459877, + "p95": 189.471997320652, + "p99": 203.93599569797516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.12800115346909, + "p90": 101.08800232410431, + "p95": 105.31199723482132, + "p99": 113.34399878978729 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 82.75199681520462, + "p95": 84.60800349712372, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 149.50400590896606, + "p90": 158.9439958333969, + "p95": 162.75200247764587, + "p99": 170.52799463272095 + }, + "isolatedSum": { + "p50": 168.83200407028198, + "p90": 183.83999913930893, + "p95": 189.92000073194504, + "p99": 203.61600071191788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 92.8959995508194, + "p90": 102.55999863147736, + "p95": 106.6880002617836, + "p99": 115.55200070142746 + }, + "combine": { + "p50": 76.73600316047668, + "p90": 83.39200168848038, + "p95": 85.4720026254654, + "p99": 90.04800021648407 + }, + "roundtrip": { + "p50": 149.88799393177032, + "p90": 158.87999534606934, + "p95": 162.36799955368042, + "p99": 168.35199296474457 + }, + "isolatedSum": { + "p50": 169.63200271129608, + "p90": 185.95200031995773, + "p95": 192.160002887249, + "p99": 205.60000091791153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 94.59199756383896, + "p90": 103.90400141477585, + "p95": 106.62399977445602, + "p99": 111.93600296974182 + }, + "combine": { + "p50": 78.27199995517731, + "p90": 85.21600067615509, + "p95": 87.0399996638298, + "p99": 95.551997423172 + }, + "roundtrip": { + "p50": 151.5199989080429, + "p90": 160.12799739837646, + "p95": 164.06400501728058, + "p99": 172.0000058412552 + }, + "isolatedSum": { + "p50": 172.86399751901627, + "p90": 189.12000209093094, + "p95": 193.66399943828583, + "p99": 207.48800039291382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 106.6880002617836, + "p90": 115.58400094509125, + "p95": 119.29599940776825, + "p99": 125.47199428081512 + }, + "combine": { + "p50": 84.32000130414963, + "p90": 88.86399865150452, + "p95": 90.2400016784668, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 163.35999965667725, + "p90": 171.1679995059967, + "p95": 173.15199971199036, + "p99": 180.2240014076233 + }, + "isolatedSum": { + "p50": 191.00800156593323, + "p90": 204.44799959659576, + "p95": 209.53600108623505, + "p99": 222.1119925379753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.14399808645248, + "p90": 126.78399682044983, + "p95": 130.3039938211441, + "p99": 134.3040019273758 + }, + "combine": { + "p50": 100.60799866914749, + "p90": 106.175996363163, + "p95": 108.22399705648422, + "p99": 115.80800265073776 + }, + "roundtrip": { + "p50": 192.22399592399597, + "p90": 199.0080028772354, + "p95": 201.1519968509674, + "p99": 206.9759964942932 + }, + "isolatedSum": { + "p50": 218.75199675559998, + "p90": 232.95999318361282, + "p95": 238.52799087762833, + "p99": 250.11200457811356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-82316a19", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_f0245a54", + "comparisonKey": "a20e183114919a8e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:51.037528+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 89.40800279378891, + "p90": 107.61599987745285, + "p95": 130.36799430847168, + "p99": 158.78400206565857 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 87.0399996638298, + "p95": 99.29600358009338, + "p99": 112.03200370073318 + }, + "roundtrip": { + "p50": 142.33599603176117, + "p90": 155.58399260044098, + "p95": 161.6320013999939, + "p99": 203.36000621318817 + }, + "isolatedSum": { + "p50": 160.60800105333328, + "p90": 194.65599954128265, + "p95": 229.66399788856506, + "p99": 270.81600576639175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 94.78399902582169, + "p90": 148.60799908638, + "p95": 153.24799716472626, + "p99": 166.72000288963318 + }, + "combine": { + "p50": 73.63200187683105, + "p90": 104.2879968881607, + "p95": 116.48000031709671, + "p99": 143.8719928264618 + }, + "roundtrip": { + "p50": 143.90400052070618, + "p90": 164.32000696659088, + "p95": 186.3040030002594, + "p99": 213.47199380397797 + }, + "isolatedSum": { + "p50": 168.41600090265274, + "p90": 252.8959959745407, + "p95": 269.72799748182297, + "p99": 310.59199571609497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 90.87999910116196, + "p90": 119.52000111341476, + "p95": 135.6160044670105, + "p99": 152.5759994983673 + }, + "combine": { + "p50": 73.7600028514862, + "p90": 80.57600259780884, + "p95": 83.5840031504631, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 143.93599331378937, + "p90": 155.39200603961945, + "p95": 159.96800363063812, + "p99": 174.17599260807037 + }, + "isolatedSum": { + "p50": 164.64000195264816, + "p90": 200.0960037112236, + "p95": 219.2000076174736, + "p99": 265.53600281476974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 89.53599631786346, + "p90": 100.09600222110748, + "p95": 103.90400141477585, + "p99": 113.08799684047699 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 83.83999764919281, + "p95": 89.63199704885483, + "p99": 127.42400169372559 + }, + "roundtrip": { + "p50": 145.21600306034088, + "p90": 154.4959992170334, + "p95": 160.35200655460358, + "p99": 167.4560010433197 + }, + "isolatedSum": { + "p50": 166.23999923467636, + "p90": 183.9359998703003, + "p95": 193.53599846363068, + "p99": 240.51199853420258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 89.59999680519104, + "p90": 108.92800241708755, + "p95": 133.40799510478973, + "p99": 157.4079990386963 + }, + "combine": { + "p50": 77.15199887752533, + "p90": 109.82400178909302, + "p95": 120.54400146007538, + "p99": 140.3840035200119 + }, + "roundtrip": { + "p50": 146.43199741840363, + "p90": 172.992005944252, + "p95": 185.18400192260742, + "p99": 216.63999557495117 + }, + "isolatedSum": { + "p50": 166.75199568271637, + "p90": 218.75200420618057, + "p95": 253.9519965648651, + "p99": 297.7920025587082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 94.59199756383896, + "p90": 144.22400295734406, + "p95": 150.65599977970123, + "p99": 162.81600296497345 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 93.98400038480759, + "p95": 116.80000275373459, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 149.3760049343109, + "p90": 187.04000115394592, + "p95": 194.20799612998962, + "p99": 218.07999908924103 + }, + "isolatedSum": { + "p50": 175.51999539136887, + "p90": 238.20800334215164, + "p95": 267.4560025334358, + "p99": 302.62400209903717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.58399897813797, + "p90": 130.68799674510956, + "p95": 148.60799908638, + "p99": 163.5199934244156 + }, + "combine": { + "p50": 84.09599959850311, + "p90": 123.96799772977829, + "p95": 137.82399892807007, + "p99": 149.79200065135956 + }, + "roundtrip": { + "p50": 162.56000101566315, + "p90": 192.22399592399597, + "p95": 202.4639993906021, + "p99": 229.76000607013702 + }, + "isolatedSum": { + "p50": 187.67999857664108, + "p90": 254.65599447488785, + "p95": 286.4319980144501, + "p99": 313.31199407577515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.5680011510849, + "p90": 147.5519984960556, + "p95": 155.83999454975128, + "p99": 176.83200538158417 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 148.28799664974213, + "p95": 152.41600573062897, + "p99": 158.9439958333969 + }, + "roundtrip": { + "p50": 192.00000166893005, + "p90": 223.4559953212738, + "p95": 234.78400707244873, + "p99": 254.59200143814087 + }, + "isolatedSum": { + "p50": 221.8879982829094, + "p90": 295.83999514579773, + "p95": 308.25600028038025, + "p99": 335.7760012149811 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8777173e", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_30ab8c37", + "comparisonKey": "6f755917922b7fa1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:42.988385+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 80.60800284147263, + "p90": 90.62399715185165, + "p95": 94.71999853849411, + "p99": 104.2879968881607 + }, + "combine": { + "p50": 74.94399696588516, + "p90": 82.75199681520462, + "p95": 84.41600203514099, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 143.00799369812012, + "p95": 147.039994597435, + "p99": 155.4879993200302 + }, + "isolatedSum": { + "p50": 155.5519998073578, + "p90": 173.37599396705627, + "p95": 179.1360005736351, + "p99": 197.27999716997147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 80.28800040483475, + "p90": 90.7519981265068, + "p95": 95.32800316810608, + "p99": 103.42399775981903 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 84.1279998421669, + "p95": 86.04799956083298, + "p99": 93.18400174379349 + }, + "roundtrip": { + "p50": 136.25599443912506, + "p90": 144.76799964904785, + "p95": 147.2959965467453, + "p99": 156.51200711727142 + }, + "isolatedSum": { + "p50": 155.29599785804749, + "p90": 174.8799979686737, + "p95": 181.37600272893906, + "p99": 196.60799950361252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 81.66400343179703, + "p90": 92.06400066614151, + "p95": 95.29600292444229, + "p99": 103.87200117111206 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 84.1279998421669, + "p95": 86.2400010228157, + "p99": 92.86399930715561 + }, + "roundtrip": { + "p50": 138.87999951839447, + "p90": 151.19999647140503, + "p95": 156.12800419330597, + "p99": 165.43999314308167 + }, + "isolatedSum": { + "p50": 157.50399976968765, + "p90": 176.1920005083084, + "p95": 181.536003947258, + "p99": 196.73600047826767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 82.49600231647491, + "p90": 92.00000017881393, + "p95": 95.07200121879578, + "p99": 102.7199998497963 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 85.28000116348267, + "p95": 87.2960016131401, + "p99": 93.98400038480759 + }, + "roundtrip": { + "p50": 139.93600010871887, + "p90": 150.33599734306335, + "p95": 153.6960005760193, + "p99": 161.9199961423874 + }, + "isolatedSum": { + "p50": 159.7440019249916, + "p90": 177.2800013422966, + "p95": 182.36800283193588, + "p99": 196.70400023460388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 83.13599973917007, + "p90": 93.21600198745728, + "p95": 97.21600264310837, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 84.95999872684479, + "p95": 86.36800199747086, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 139.48799669742584, + "p90": 148.80000054836273, + "p95": 152.67199277877808, + "p99": 160.7999950647354 + }, + "isolatedSum": { + "p50": 160.60800105333328, + "p90": 178.17600071430206, + "p95": 183.58400464057922, + "p99": 202.11200416088104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 82.75199681520462, + "p90": 91.77599847316742, + "p95": 95.96800059080124, + "p99": 104.44799810647964 + }, + "combine": { + "p50": 82.2720006108284, + "p90": 86.7839977145195, + "p95": 89.6959975361824, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 139.93600010871887, + "p90": 149.6960073709488, + "p95": 154.30399775505066, + "p99": 167.00799763202667 + }, + "isolatedSum": { + "p50": 165.02399742603302, + "p90": 178.55999618768692, + "p95": 185.66399812698364, + "p99": 201.9839957356453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 95.20000219345093, + "p90": 103.42399775981903, + "p95": 106.6880002617836, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 87.8399983048439, + "p90": 96.47999703884125, + "p95": 98.27200323343277, + "p99": 105.34399747848511 + }, + "roundtrip": { + "p50": 150.87999403476715, + "p90": 159.04000401496887, + "p95": 162.01600432395935, + "p99": 168.70400309562683 + }, + "isolatedSum": { + "p50": 183.04000049829483, + "p90": 199.90399479866028, + "p95": 204.96000349521637, + "p99": 218.84799748659134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 108.35199803113937, + "p90": 116.64000153541565, + "p95": 118.94399672746658, + "p99": 126.11199915409088 + }, + "combine": { + "p50": 102.20800340175629, + "p90": 110.23999750614166, + "p95": 112.12799698114395, + "p99": 118.94399672746658 + }, + "roundtrip": { + "p50": 180.80000579357147, + "p90": 188.92799317836761, + "p95": 192.25600361824036, + "p99": 207.10399746894836 + }, + "isolatedSum": { + "p50": 210.56000143289566, + "p90": 226.8799990415573, + "p95": 231.07199370861053, + "p99": 245.05599588155746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6d74d59d", + "identity": "gb300|deepep|v2|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_dd475aee", + "comparisonKey": "9d7853553a4a9ba2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:47.379529+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 40.32000154256821, + "p90": 48.608001321554184, + "p95": 49.47200044989586, + "p99": 58.97599831223488 + }, + "combine": { + "p50": 35.80800071358681, + "p90": 42.080000042915344, + "p95": 43.20000112056732, + "p99": 45.56800052523613 + }, + "roundtrip": { + "p50": 60.60799956321716, + "p90": 64.51199948787689, + "p95": 65.72800129652023, + "p99": 70.78400254249573 + }, + "isolatedSum": { + "p50": 76.12800225615501, + "p90": 90.68800136446953, + "p95": 92.67200157046318, + "p99": 104.54399883747101 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 40.800001472234726, + "p90": 49.02400076389313, + "p95": 49.75999891757965, + "p99": 52.38400027155876 + }, + "combine": { + "p50": 35.87200120091438, + "p90": 43.296001851558685, + "p95": 43.99999976158142, + "p99": 46.14400118589401 + }, + "roundtrip": { + "p50": 60.99199876189232, + "p90": 64.83200192451477, + "p95": 65.85600227117538, + "p99": 71.00799679756165 + }, + "isolatedSum": { + "p50": 76.67200267314911, + "p90": 92.32000261545181, + "p95": 93.75999867916107, + "p99": 98.52800145745277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 40.28800129890442, + "p90": 48.576001077890396, + "p95": 49.855999648571014, + "p99": 53.0879981815815 + }, + "combine": { + "p50": 36.288000643253326, + "p90": 43.55200007557869, + "p95": 44.47999969124794, + "p99": 59.55199897289276 + }, + "roundtrip": { + "p50": 61.055999249219894, + "p90": 64.67200070619583, + "p95": 66.0799965262413, + "p99": 71.07199728488922 + }, + "isolatedSum": { + "p50": 76.57600194215775, + "p90": 92.12800115346909, + "p95": 94.33599933981895, + "p99": 112.63999715447426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 42.17600077390671, + "p90": 51.392000168561935, + "p95": 53.69599908590317, + "p99": 64.44799900054932 + }, + "combine": { + "p50": 37.79200091958046, + "p90": 46.9760000705719, + "p95": 48.25599864125252, + "p99": 59.55199897289276 + }, + "roundtrip": { + "p50": 62.3680017888546, + "p90": 68.31999868154526, + "p95": 72.92799651622772, + "p99": 76.4160007238388 + }, + "isolatedSum": { + "p50": 79.96800169348717, + "p90": 98.36800023913383, + "p95": 101.95199772715569, + "p99": 123.99999797344208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 41.79200157523155, + "p90": 50.11200159788132, + "p95": 51.19999870657921, + "p99": 62.81600147485733 + }, + "combine": { + "p50": 36.479998379945755, + "p90": 43.74400153756142, + "p95": 44.51199993491173, + "p99": 47.488000243902206 + }, + "roundtrip": { + "p50": 62.78400123119354, + "p90": 66.14399701356888, + "p95": 67.26399809122086, + "p99": 73.08799773454666 + }, + "isolatedSum": { + "p50": 78.27199995517731, + "p90": 93.85600313544273, + "p95": 95.71199864149094, + "p99": 110.30400171875954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 43.455999344587326, + "p90": 50.11200159788132, + "p95": 50.81599950790405, + "p99": 53.47200110554695 + }, + "combine": { + "p50": 43.455999344587326, + "p90": 45.632001012563705, + "p95": 46.39999940991402, + "p99": 52.319999784231186 + }, + "roundtrip": { + "p50": 66.84800237417221, + "p90": 73.08799773454666, + "p95": 74.07999783754349, + "p99": 77.79199630022049 + }, + "isolatedSum": { + "p50": 86.91199868917465, + "p90": 95.74400261044502, + "p95": 97.21599891781807, + "p99": 105.79200088977814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 49.72799867391586, + "p90": 63.64800035953522, + "p95": 66.27199798822403, + "p99": 100.22400319576263 + }, + "combine": { + "p50": 46.9760000705719, + "p90": 48.576001077890396, + "p95": 49.75999891757965, + "p99": 55.743999779224396 + }, + "roundtrip": { + "p50": 77.05599814653397, + "p90": 83.74399691820145, + "p95": 85.85599809885025, + "p99": 99.10400211811066 + }, + "isolatedSum": { + "p50": 96.70399874448776, + "p90": 112.22400143742561, + "p95": 116.03199690580368, + "p99": 155.96800297498703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 59.007998555898666, + "p90": 63.29599767923355, + "p95": 64.54399973154068, + "p99": 80.64000308513641 + }, + "combine": { + "p50": 56.703999638557434, + "p90": 59.4559982419014, + "p95": 60.99199876189232, + "p99": 70.23999840021133 + }, + "roundtrip": { + "p50": 95.93600034713745, + "p90": 112.70400136709213, + "p95": 125.44000148773193, + "p99": 143.36000382900238 + }, + "isolatedSum": { + "p50": 115.7119981944561, + "p90": 122.75199592113495, + "p95": 125.535998493433, + "p99": 150.88000148534775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87368d75", + "identity": "gb300|deepep|v2|7168|8|256|bf16|ll|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_982102a2", + "comparisonKey": "d44d62fc12233448", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:04.588592+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 40.44799879193306, + "p90": 49.0880012512207, + "p95": 50.175998359918594, + "p99": 54.11199852824211 + }, + "combine": { + "p50": 43.327998369932175, + "p90": 45.3759990632534, + "p95": 46.23999819159508, + "p99": 50.20799860358238 + }, + "roundtrip": { + "p50": 62.30400130152702, + "p90": 66.27199798822403, + "p95": 69.34399902820587, + "p99": 73.47200065851212 + }, + "isolatedSum": { + "p50": 83.77599716186523, + "p90": 94.4640003144741, + "p95": 96.41599655151367, + "p99": 104.3199971318245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 40.64000025391579, + "p90": 49.12000149488449, + "p95": 50.144001841545105, + "p99": 53.79199981689453 + }, + "combine": { + "p50": 43.42399910092354, + "p90": 45.72800174355507, + "p95": 46.560000628232956, + "p99": 50.65599828958511 + }, + "roundtrip": { + "p50": 62.111999839544296, + "p90": 66.6240006685257, + "p95": 69.98399645090103, + "p99": 74.5600014925003 + }, + "isolatedSum": { + "p50": 84.06399935483932, + "p90": 94.84800323843956, + "p95": 96.70400246977806, + "p99": 104.44799810647964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 40.76800122857094, + "p90": 49.215998500585556, + "p95": 50.84799975156784, + "p99": 56.76800012588501 + }, + "combine": { + "p50": 43.807998299598694, + "p90": 48.22399839758873, + "p95": 56.063998490571976, + "p99": 59.328000992536545 + }, + "roundtrip": { + "p50": 62.591999769210815, + "p90": 66.68800115585327, + "p95": 68.76800209283829, + "p99": 73.82400333881378 + }, + "isolatedSum": { + "p50": 84.57599952816963, + "p90": 97.43999689817429, + "p95": 106.91199824213982, + "p99": 116.09600111842155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 42.30400174856186, + "p90": 49.6320016682148, + "p95": 50.49600079655647, + "p99": 54.43200096487999 + }, + "combine": { + "p50": 42.75200143456459, + "p90": 45.40799930691719, + "p95": 46.431999653577805, + "p99": 50.4320003092289 + }, + "roundtrip": { + "p50": 62.880001962184906, + "p90": 66.20799750089645, + "p95": 69.72800195217133, + "p99": 73.44000041484833 + }, + "isolatedSum": { + "p50": 85.05600318312645, + "p90": 95.04000097513199, + "p95": 96.92800045013428, + "p99": 104.86400127410889 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 48.54400083422661, + "p90": 51.29599943757057, + "p95": 53.85600030422211, + "p99": 64.4799992442131 + }, + "combine": { + "p50": 44.096000492572784, + "p90": 46.04800045490265, + "p95": 46.94399982690811, + "p99": 54.52800169587135 + }, + "roundtrip": { + "p50": 64.15999680757523, + "p90": 70.75200229883194, + "p95": 73.05599749088287, + "p99": 77.44000107049942 + }, + "isolatedSum": { + "p50": 92.64000132679939, + "p90": 97.34399989247322, + "p95": 100.80000013113022, + "p99": 119.00800094008446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 50.04800111055374, + "p90": 52.38400027155876, + "p95": 53.53600159287453, + "p99": 59.167999774217606 + }, + "combine": { + "p50": 45.60000076889992, + "p90": 47.42399975657463, + "p95": 48.41599985957146, + "p99": 53.3440001308918 + }, + "roundtrip": { + "p50": 69.60000097751617, + "p90": 74.23999905586243, + "p95": 75.32799988985062, + "p99": 77.69600301980972 + }, + "isolatedSum": { + "p50": 95.64800187945366, + "p90": 99.80800002813339, + "p95": 101.95200145244598, + "p99": 112.5119999051094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 50.303999334573746, + "p90": 53.0879981815815, + "p95": 53.98400127887726, + "p99": 60.28800085186958 + }, + "combine": { + "p50": 47.29599878191948, + "p90": 55.67999929189682, + "p95": 57.11999908089638, + "p99": 59.39200147986412 + }, + "roundtrip": { + "p50": 76.51200145483017, + "p90": 84.3840017914772, + "p95": 86.17600053548813, + "p99": 89.72799777984619 + }, + "isolatedSum": { + "p50": 97.59999811649323, + "p90": 108.76799747347832, + "p95": 111.10400035977364, + "p99": 119.6800023317337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 62.65600025653839, + "p90": 65.79200178384781, + "p95": 71.77600264549255, + "p99": 76.60800218582153 + }, + "combine": { + "p50": 58.46399813890457, + "p90": 60.67200005054474, + "p95": 61.72800064086914, + "p99": 68.51200014352798 + }, + "roundtrip": { + "p50": 97.47199714183807, + "p90": 100.8640006184578, + "p95": 102.39999741315842, + "p99": 106.9440022110939 + }, + "isolatedSum": { + "p50": 121.11999839544296, + "p90": 126.46400183439255, + "p95": 133.5040032863617, + "p99": 145.12000232934952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a6a71ae", + "identity": "gb300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||6c4175e2b7b86cb", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "30637dac2e3dc497", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:31:45.837518+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6c4175e2b7b86cb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 328.67199182510376, + "p90": 346.1439907550812, + "p95": 350.9120047092438, + "p99": 360.3520095348358 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 63.1679967045784, + "p95": 65.60000032186508, + "p99": 72.80000299215317 + }, + "roundtrip": { + "p50": 370.4639971256256, + "p90": 387.2320055961609, + "p95": 390.5920088291168, + "p99": 400.64001083374023 + }, + "isolatedSum": { + "p50": 388.2879912853241, + "p90": 409.3119874596596, + "p95": 416.51200503110886, + "p99": 433.152012526989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 65536, + "combineLogicalBytes": 131072, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 327.9680013656616, + "p90": 346.8480110168457, + "p95": 352.8960049152374, + "p99": 384.2560052871704 + }, + "combine": { + "p50": 60.095999389886856, + "p90": 63.93600255250931, + "p95": 68.31999868154526, + "p99": 74.68800246715546 + }, + "roundtrip": { + "p50": 369.6959912776947, + "p90": 388.19199800491333, + "p95": 392.0319974422455, + "p99": 416.31999611854553 + }, + "isolatedSum": { + "p50": 388.0640007555485, + "p90": 410.784013569355, + "p95": 421.2160035967827, + "p99": 458.94400775432587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 122880, + "combineLogicalBytes": 245760, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 329.24801111221313, + "p90": 349.08801317214966, + "p95": 352.54400968551636, + "p99": 368.3199882507324 + }, + "combine": { + "p50": 61.24800071120262, + "p90": 64.54399973154068, + "p95": 66.27199798822403, + "p99": 73.63200187683105 + }, + "roundtrip": { + "p50": 370.36800384521484, + "p90": 387.29599118232727, + "p95": 392.2240138053894, + "p99": 405.7280123233795 + }, + "isolatedSum": { + "p50": 390.49601182341576, + "p90": 413.63201290369034, + "p95": 418.8160076737404, + "p99": 441.9519901275635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 245760, + "combineLogicalBytes": 491520, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 329.27998900413513, + "p90": 348.1920063495636, + "p95": 352.1920144557953, + "p99": 367.5520122051239 + }, + "combine": { + "p50": 62.24000081419945, + "p90": 65.24799764156342, + "p95": 66.04799628257751, + "p99": 68.06399673223495 + }, + "roundtrip": { + "p50": 371.5839982032776, + "p90": 386.7200016975403, + "p95": 391.32800698280334, + "p99": 400.9599983692169 + }, + "isolatedSum": { + "p50": 391.5199898183346, + "p90": 413.440003991127, + "p95": 418.2400107383728, + "p99": 435.61600893735886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 479232, + "combineLogicalBytes": 958464, + "fanoutMean": 3.65625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 329.6639919281006, + "p90": 349.92000460624695, + "p95": 354.3680012226105, + "p99": 366.2079870700836 + }, + "combine": { + "p50": 62.52799928188324, + "p90": 65.50399959087372, + "p95": 66.91200286149979, + "p99": 70.14399766921997 + }, + "roundtrip": { + "p50": 372.8959858417511, + "p90": 388.7360095977783, + "p95": 392.192006111145, + "p99": 400.06399154663086 + }, + "isolatedSum": { + "p50": 392.1919912099838, + "p90": 415.42400419712067, + "p95": 421.28000408411026, + "p99": 436.3519847393036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 942080, + "combineLogicalBytes": 1884160, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 329.15198802948, + "p90": 346.6239869594574, + "p95": 350.847989320755, + "p99": 358.7520122528076 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 66.68800115585327, + "p95": 67.96800345182419, + "p99": 71.71200215816498 + }, + "roundtrip": { + "p50": 371.5200126171112, + "p90": 389.1200125217438, + "p95": 393.0560052394867, + "p99": 400.160014629364 + }, + "isolatedSum": { + "p50": 392.5439864397049, + "p90": 413.31198811531067, + "p95": 418.8159927725792, + "p99": 430.4640144109726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1884160, + "combineLogicalBytes": 3768320, + "fanoutMean": 3.59375, + "recvTokensMax": 121, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 334.52799916267395, + "p90": 356.6719889640808, + "p95": 362.8160059452057, + "p99": 372.3840117454529 + }, + "combine": { + "p50": 67.77600198984146, + "p90": 71.87200337648392, + "p95": 74.01599735021591, + "p99": 80.54400235414505 + }, + "roundtrip": { + "p50": 376.1279881000519, + "p90": 393.3440148830414, + "p95": 397.7920114994049, + "p99": 410.91200709342957 + }, + "isolatedSum": { + "p50": 402.3040011525154, + "p90": 428.5439923405647, + "p95": 436.8320032954216, + "p99": 452.92801409959793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3792896, + "combineLogicalBytes": 7585792, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 331.7759931087494, + "p90": 349.5039939880371, + "p95": 353.7920117378235, + "p99": 372.0319867134094 + }, + "combine": { + "p50": 78.33600044250488, + "p90": 81.69600367546082, + "p95": 83.16799998283386, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 386.46399974823, + "p90": 403.328001499176, + "p95": 406.7839980125427, + "p99": 413.4080111980438 + }, + "isolatedSum": { + "p50": 410.1119935512543, + "p90": 431.1999976634979, + "p95": 436.96001172065735, + "p99": 458.9439854025841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7647232, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3cd0de4d", + "identity": "gb300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||3bda3dd7d4e88bf", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "de82be498031e7bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:54.813682+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3bda3dd7d4e88bf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 359.391987323761, + "p90": 380.8960020542145, + "p95": 387.4239921569824, + "p99": 398.81598949432373 + }, + "combine": { + "p50": 64.7360011935234, + "p90": 71.99999690055847, + "p95": 75.6480023264885, + "p99": 81.44000172615051 + }, + "roundtrip": { + "p50": 402.46400237083435, + "p90": 420.3200042247772, + "p95": 423.96798729896545, + "p99": 434.4319999217987 + }, + "isolatedSum": { + "p50": 424.1279885172844, + "p90": 452.89599895477295, + "p95": 463.0719944834709, + "p99": 480.25599122047424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76800, + "combineLogicalBytes": 153600, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 357.2799861431122, + "p90": 380.67200779914856, + "p95": 386.1120045185089, + "p99": 398.9439904689789 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 69.95200365781784, + "p95": 72.25599884986877, + "p99": 81.53600245714188 + }, + "roundtrip": { + "p50": 401.12000703811646, + "p90": 418.11200976371765, + "p95": 423.23198914527893, + "p99": 433.56800079345703 + }, + "isolatedSum": { + "p50": 423.3599826693535, + "p90": 450.6240114569664, + "p95": 458.3680033683777, + "p99": 480.47999292612076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 153600, + "combineLogicalBytes": 307200, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 360.6719970703125, + "p90": 385.6000006198883, + "p95": 397.69598841667175, + "p99": 418.9760088920593 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 88.03199976682663, + "p95": 99.0080013871193, + "p99": 117.34399944543839 + }, + "roundtrip": { + "p50": 404.7040045261383, + "p90": 426.2720048427582, + "p95": 440.12799859046936, + "p99": 463.5840058326721 + }, + "isolatedSum": { + "p50": 427.99999564886093, + "p90": 473.63200038671494, + "p95": 496.70398980379105, + "p99": 536.3200083374977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 302080, + "combineLogicalBytes": 604160, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 358.72000455856323, + "p90": 379.4879913330078, + "p95": 385.8560025691986, + "p99": 395.04000544548035 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 72.28799909353256, + "p95": 75.03999769687653, + "p99": 84.83199775218964 + }, + "roundtrip": { + "p50": 402.17599272727966, + "p90": 419.8080003261566, + "p95": 423.552006483078, + "p99": 431.551992893219 + }, + "isolatedSum": { + "p50": 427.4880066514015, + "p90": 451.7759904265404, + "p95": 460.89600026607513, + "p99": 479.87200319767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 604160, + "combineLogicalBytes": 1208320, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 360.1920008659363, + "p90": 381.76000118255615, + "p95": 387.5519931316376, + "p99": 407.26399421691895 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 72.09599763154984, + "p95": 75.07199794054031, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 405.023992061615, + "p90": 421.6960072517395, + "p95": 426.144003868103, + "p99": 434.6559941768646 + }, + "isolatedSum": { + "p50": 429.05600368976593, + "p90": 453.855998814106, + "p95": 462.6239910721779, + "p99": 491.96799099445343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1223680, + "combineLogicalBytes": 2447360, + "fanoutMean": 3.734375, + "recvTokensMax": 62, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 359.16799306869507, + "p90": 378.59201431274414, + "p95": 385.53598523139954, + "p99": 395.87199687957764 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 72.95999675989151, + "p95": 74.62400197982788, + "p99": 83.23200047016144 + }, + "roundtrip": { + "p50": 405.5039882659912, + "p90": 423.5199987888336, + "p95": 428.44799160957336, + "p99": 436.2879991531372 + }, + "isolatedSum": { + "p50": 428.5759925842285, + "p90": 451.55201107263565, + "p95": 460.1599872112274, + "p99": 479.1039973497391 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2396160, + "combineLogicalBytes": 4792320, + "fanoutMean": 3.65625, + "recvTokensMax": 122, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 362.2719943523407, + "p90": 384.41601395606995, + "p95": 391.3919925689697, + "p99": 405.37598729133606 + }, + "combine": { + "p50": 74.49600100517273, + "p90": 107.80800133943558, + "p95": 118.20799857378006, + "p99": 143.19999516010284 + }, + "roundtrip": { + "p50": 409.0240001678467, + "p90": 433.82400274276733, + "p95": 451.4879882335663, + "p99": 477.1200120449066 + }, + "isolatedSum": { + "p50": 436.7679953575134, + "p90": 492.2240152955055, + "p95": 509.5999911427498, + "p99": 548.5759824514389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4761600, + "combineLogicalBytes": 9523200, + "fanoutMean": 3.6328125, + "recvTokensMax": 242, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 360.9600067138672, + "p90": 382.4000060558319, + "p95": 386.7200016975403, + "p99": 396.67201042175293 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 90.08000046014786, + "p95": 91.71199798583984, + "p99": 98.84800016880035 + }, + "roundtrip": { + "p50": 420.54399847984314, + "p90": 437.75999546051025, + "p95": 442.52800941467285, + "p99": 451.7120122909546 + }, + "isolatedSum": { + "p50": 447.2320079803467, + "p90": 472.48000651597977, + "p95": 478.4319996833801, + "p99": 495.5200105905533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9548800, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa98472e", + "identity": "gb300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "f2145b6c280d36c3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:04.184456+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 337.40800619125366, + "p90": 359.2959940433502, + "p95": 372.2879886627197, + "p99": 447.84000515937805 + }, + "combine": { + "p50": 64.70400094985962, + "p90": 69.82400268316269, + "p95": 76.06399804353714, + "p99": 119.90399658679962 + }, + "roundtrip": { + "p50": 378.7199854850769, + "p90": 399.7119963169098, + "p95": 411.6480052471161, + "p99": 501.6639828681946 + }, + "isolatedSum": { + "p50": 402.1120071411133, + "p90": 429.1199967265129, + "p95": 448.35198670625687, + "p99": 567.7440017461777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 92160, + "combineLogicalBytes": 184320, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 338.3359909057617, + "p90": 360.0639998912811, + "p95": 394.01599764823914, + "p99": 449.2799937725067 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 70.56000083684921, + "p95": 72.54400104284286, + "p99": 80.28800040483475 + }, + "roundtrip": { + "p50": 380.19201159477234, + "p90": 399.3600010871887, + "p95": 412.76800632476807, + "p99": 497.3439872264862 + }, + "isolatedSum": { + "p50": 404.60798889398575, + "p90": 430.62400072813034, + "p95": 466.559998691082, + "p99": 529.5679941773415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 184320, + "combineLogicalBytes": 368640, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 342.81599521636963, + "p90": 368.4479892253876, + "p95": 403.3919870853424, + "p99": 471.45599126815796 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 72.09599763154984, + "p95": 75.48800110816956, + "p99": 91.77599847316742 + }, + "roundtrip": { + "p50": 384.799987077713, + "p90": 404.2559862136841, + "p95": 422.04800248146057, + "p99": 505.3120255470276 + }, + "isolatedSum": { + "p50": 410.43199598789215, + "p90": 440.5439868569374, + "p95": 478.87998819351196, + "p99": 563.2319897413254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 356352, + "combineLogicalBytes": 712704, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 337.98399567604065, + "p90": 357.4399948120117, + "p95": 365.1840090751648, + "p99": 446.30399346351624 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 73.44000041484833, + "p95": 76.12799853086472, + "p99": 106.27199709415436 + }, + "roundtrip": { + "p50": 385.3119909763336, + "p90": 403.5840034484863, + "p95": 411.5520119667053, + "p99": 494.4959878921509 + }, + "isolatedSum": { + "p50": 407.1359932422638, + "p90": 430.87999522686005, + "p95": 441.3120076060295, + "p99": 552.5759905576706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 700416, + "combineLogicalBytes": 1400832, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 339.1999900341034, + "p90": 361.952006816864, + "p95": 373.4720051288605, + "p99": 456.928014755249 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 73.18399846553802, + "p95": 74.94399696588516, + "p99": 82.8159973025322 + }, + "roundtrip": { + "p50": 385.0240111351013, + "p90": 403.4239947795868, + "p95": 413.2480025291443, + "p99": 509.7600221633911 + }, + "isolatedSum": { + "p50": 408.57598930597305, + "p90": 435.13600528240204, + "p95": 448.41600209474564, + "p99": 539.7440120577812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1413120, + "combineLogicalBytes": 2826240, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 341.18399024009705, + "p90": 360.76799035072327, + "p95": 370.07999420166016, + "p99": 446.8800127506256 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 75.13599842786789, + "p95": 77.08799839019775, + "p99": 105.02400249242783 + }, + "roundtrip": { + "p50": 387.58400082588196, + "p90": 406.43200278282166, + "p95": 413.4719967842102, + "p99": 511.55197620391846 + }, + "isolatedSum": { + "p50": 412.06399351358414, + "p90": 435.90398877859116, + "p95": 447.1679925918579, + "p99": 551.9040152430534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2875392, + "combineLogicalBytes": 5750784, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 344.9920117855072, + "p90": 368.0959939956665, + "p95": 378.495991230011, + "p99": 453.247994184494 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 81.66400343179703, + "p95": 88.54400366544724, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 394.432008266449, + "p90": 414.91198539733887, + "p95": 424.0640103816986, + "p99": 509.8559856414795 + }, + "isolatedSum": { + "p50": 420.8320081233978, + "p90": 449.75999742746353, + "p95": 467.0399948954582, + "p99": 576.9279971718788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5793792, + "combineLogicalBytes": 11587584, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 344.0000116825104, + "p90": 367.0719861984253, + "p95": 379.8399865627289, + "p99": 465.4400050640106 + }, + "combine": { + "p50": 91.80799871683121, + "p90": 97.59999811649323, + "p95": 102.24000364542007, + "p99": 122.81599640846252 + }, + "roundtrip": { + "p50": 411.8080139160156, + "p90": 436.6399943828583, + "p95": 457.0559859275818, + "p99": 518.4959769248962 + }, + "isolatedSum": { + "p50": 435.8080103993416, + "p90": 464.6719843149185, + "p95": 482.07999020814896, + "p99": 588.2560014724731 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11470848, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1aaf639b", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_6b2cb596", + "comparisonKey": "7a48ac4db6058716", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:49.049027+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 92.19200164079666, + "p90": 108.67200046777725, + "p95": 126.01600587368011, + "p99": 148.5760062932968 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 81.15199953317642, + "p95": 94.84799951314926, + "p99": 117.66400188207626 + }, + "roundtrip": { + "p50": 220.768004655838, + "p90": 244.51200664043427, + "p95": 256.28799200057983, + "p99": 274.1119861602783 + }, + "isolatedSum": { + "p50": 160.800002515316, + "p90": 189.82400000095367, + "p95": 220.86400538682938, + "p99": 266.2400081753731 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 91.96799993515015, + "p90": 102.27199643850327, + "p95": 106.88000172376633, + "p99": 113.98400366306305 + }, + "combine": { + "p50": 68.9919963479042, + "p90": 72.9919970035553, + "p95": 74.87999647855759, + "p99": 84.99199897050858 + }, + "roundtrip": { + "p50": 219.87199783325195, + "p90": 236.54399812221527, + "p95": 241.34400486946106, + "p99": 251.45599246025085 + }, + "isolatedSum": { + "p50": 160.95999628305435, + "p90": 175.26399344205856, + "p95": 181.7599982023239, + "p99": 198.97600263357162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 96.09600156545639, + "p90": 132.7359974384308, + "p95": 146.91199362277985, + "p99": 159.45599973201752 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 75.83999633789062, + "p95": 84.3840017914772, + "p99": 119.26399916410446 + }, + "roundtrip": { + "p50": 224.2559939622879, + "p90": 249.66399371623993, + "p95": 261.9520127773285, + "p99": 286.8160009384155 + }, + "isolatedSum": { + "p50": 166.9120043516159, + "p90": 208.5759937763214, + "p95": 231.29599541425705, + "p99": 278.719998896122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 92.38400310277939, + "p90": 103.5199984908104, + "p95": 108.03200304508209, + "p99": 119.80800330638885 + }, + "combine": { + "p50": 73.05599749088287, + "p90": 78.43200117349625, + "p95": 83.13599973917007, + "p99": 93.28000247478485 + }, + "roundtrip": { + "p50": 223.4880030155182, + "p90": 240.03200232982635, + "p95": 243.6159998178482, + "p99": 257.6960027217865 + }, + "isolatedSum": { + "p50": 165.44000059366226, + "p90": 181.95199966430664, + "p95": 191.16800278425217, + "p99": 213.0880057811737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 103.45599800348282, + "p90": 148.3840048313141, + "p95": 154.1759967803955, + "p99": 167.9680049419403 + }, + "combine": { + "p50": 73.63200187683105, + "p90": 101.15200281143188, + "p95": 114.3679991364479, + "p99": 138.65600526332855 + }, + "roundtrip": { + "p50": 224.16000068187714, + "p90": 245.15199661254883, + "p95": 258.9440047740936, + "p99": 284.35200452804565 + }, + "isolatedSum": { + "p50": 177.08799988031387, + "p90": 249.53600764274597, + "p95": 268.5439959168434, + "p99": 306.62401020526886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 92.0960009098053, + "p90": 102.91200131177902, + "p95": 106.23999685049057, + "p99": 113.76000195741653 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 79.45600152015686, + "p95": 82.33600109815598, + "p99": 89.21600133180618 + }, + "roundtrip": { + "p50": 225.8879989385605, + "p90": 240.57599902153015, + "p95": 247.55200743675232, + "p99": 268.19199323654175 + }, + "isolatedSum": { + "p50": 166.97599738836288, + "p90": 182.36800283193588, + "p95": 188.57599794864655, + "p99": 202.97600328922272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.10400211811066, + "p90": 107.77600109577179, + "p95": 112.09599673748016, + "p99": 122.01599776744843 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 84.41600203514099, + "p95": 86.01599931716919, + "p99": 96.16000205278397 + }, + "roundtrip": { + "p50": 234.43199694156647, + "p90": 248.83200228214264, + "p95": 253.79198789596558, + "p99": 260.4160010814667 + }, + "isolatedSum": { + "p50": 179.99999970197678, + "p90": 192.19200313091278, + "p95": 198.11199605464935, + "p99": 218.1759998202324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 110.27199774980545, + "p90": 116.35199934244156, + "p95": 119.32799965143204, + "p99": 125.98399817943573 + }, + "combine": { + "p50": 97.72799909114838, + "p90": 101.72799974679947, + "p95": 102.9760017991066, + "p99": 110.3999987244606 + }, + "roundtrip": { + "p50": 248.1600046157837, + "p90": 263.7760043144226, + "p95": 268.0639922618866, + "p99": 281.3760042190552 + }, + "isolatedSum": { + "p50": 207.99999684095383, + "p90": 218.07999908924103, + "p95": 222.30400145053864, + "p99": 236.38399690389633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d98c708f", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "716392dacda86f4d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:58.289884+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 395.4559862613678, + "p90": 432.0639967918396, + "p95": 535.1999998092651, + "p99": 580.2879929542542 + }, + "combine": { + "p50": 68.00000369548798, + "p90": 73.79200309515, + "p95": 78.43200117349625, + "p99": 128.57599556446075 + }, + "roundtrip": { + "p50": 435.263991355896, + "p90": 490.01601338386536, + "p95": 571.1039900779724, + "p99": 784.8640084266663 + }, + "isolatedSum": { + "p50": 463.4559899568558, + "p90": 505.8559998869896, + "p95": 613.6320009827614, + "p99": 708.8639885187149 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 362.36798763275146, + "p90": 416.48000478744507, + "p95": 431.93599581718445, + "p99": 570.2400207519531 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 73.47200065851212, + "p95": 75.6160020828247, + "p99": 94.08000111579895 + }, + "roundtrip": { + "p50": 407.45601058006287, + "p90": 468.8960015773773, + "p95": 477.9520034790039, + "p99": 604.4800281524658 + }, + "isolatedSum": { + "p50": 431.2639907002449, + "p90": 489.9520054459572, + "p95": 507.55199790000916, + "p99": 664.3200218677521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 356.83199763298035, + "p90": 434.04799699783325, + "p95": 442.55998730659485, + "p99": 452.38399505615234 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 75.6160020828247, + "p95": 80.19199967384338, + "p99": 106.88000172376633 + }, + "roundtrip": { + "p50": 404.38398718833923, + "p90": 493.24798583984375, + "p95": 502.1439790725708, + "p99": 519.2319750785828 + }, + "isolatedSum": { + "p50": 426.7840012907982, + "p90": 509.66399908065796, + "p95": 522.7519869804382, + "p99": 559.2639967799187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 360.9920144081116, + "p90": 430.59200048446655, + "p95": 487.61600255966187, + "p99": 581.3440084457397 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 77.79199630022049, + "p95": 82.46400207281113, + "p99": 126.8479973077774 + }, + "roundtrip": { + "p50": 406.97601437568665, + "p90": 475.5519926548004, + "p95": 489.1200065612793, + "p99": 625.0560283660889 + }, + "isolatedSum": { + "p50": 433.50401520729065, + "p90": 508.38399678468704, + "p95": 570.080004632473, + "p99": 708.1920057535172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 354.46399450302124, + "p90": 437.0560050010681, + "p95": 475.19999742507935, + "p99": 595.1039791107178 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 77.47200131416321, + "p95": 82.17599987983704, + "p99": 120.41600048542023 + }, + "roundtrip": { + "p50": 406.1119854450226, + "p90": 481.3759922981262, + "p95": 502.7199983596802, + "p99": 637.7919912338257 + }, + "isolatedSum": { + "p50": 426.62399262189865, + "p90": 514.5280063152313, + "p95": 557.3759973049164, + "p99": 715.519979596138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 363.9039993286133, + "p90": 423.9040017127991, + "p95": 443.5519874095917, + "p99": 577.0559906959534 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 80.54400235414505, + "p95": 85.31200140714645, + "p99": 113.66400122642517 + }, + "roundtrip": { + "p50": 412.51200437545776, + "p90": 479.42399978637695, + "p95": 510.24001836776733, + "p99": 631.2320232391357 + }, + "isolatedSum": { + "p50": 437.951996922493, + "p90": 504.4480040669441, + "p95": 528.8639888167381, + "p99": 690.7199919223785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 361.407995223999, + "p90": 443.2959854602814, + "p95": 451.200008392334, + "p99": 474.62400794029236 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 84.41600203514099, + "p95": 85.9839990735054, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 413.536012172699, + "p90": 497.24799394607544, + "p95": 504.7360062599182, + "p99": 518.6880230903625 + }, + "isolatedSum": { + "p50": 441.43999367952347, + "p90": 527.7119874954224, + "p95": 537.1840074658394, + "p99": 566.3360059261322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 362.43200302124023, + "p90": 443.1680142879486, + "p95": 456.60799741744995, + "p99": 480.47998547554016 + }, + "combine": { + "p50": 97.120001912117, + "p90": 102.08000242710114, + "p95": 104.47999835014343, + "p99": 114.1119971871376 + }, + "roundtrip": { + "p50": 435.0079894065857, + "p90": 517.6960229873657, + "p95": 531.4559936523438, + "p99": 647.1359729766846 + }, + "isolatedSum": { + "p50": 459.55200493335724, + "p90": 545.2480167150497, + "p95": 561.0879957675934, + "p99": 594.5919826626778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db2bf688", + "identity": "gb300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||7c1cc7238ca9a52", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "ac1c5bf87d2ccb89", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:33.671712+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "7c1cc7238ca9a52", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 370.6879913806915, + "p90": 390.8799886703491, + "p95": 397.95199036598206, + "p99": 435.10401248931885 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 73.72800260782242, + "p95": 76.57600194215775, + "p99": 83.3280012011528 + }, + "roundtrip": { + "p50": 417.08800196647644, + "p90": 432.9279959201813, + "p95": 436.8639886379242, + "p99": 444.95999813079834 + }, + "isolatedSum": { + "p50": 440.44799357652664, + "p90": 464.60799127817154, + "p95": 474.5279923081398, + "p99": 518.4320136904716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 370.1440095901489, + "p90": 387.4559998512268, + "p95": 392.41600036621094, + "p99": 400.41598677635193 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 75.6480023264885, + "p95": 78.14399898052216, + "p99": 86.5280032157898 + }, + "roundtrip": { + "p50": 416.9600009918213, + "p90": 432.51198530197144, + "p95": 435.87198853492737, + "p99": 443.5519874095917 + }, + "isolatedSum": { + "p50": 442.1440064907074, + "p90": 463.1040021777153, + "p95": 470.5599993467331, + "p99": 486.9439899921417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 375.328004360199, + "p90": 396.86399698257446, + "p95": 403.00801396369934, + "p99": 413.2480025291443 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 76.12799853086472, + "p95": 78.23999971151352, + "p99": 85.85599809885025 + }, + "roundtrip": { + "p50": 420.4480051994324, + "p90": 439.2000138759613, + "p95": 445.279985666275, + "p99": 457.40801095962524 + }, + "isolatedSum": { + "p50": 448.09600710868835, + "p90": 472.9919955134392, + "p95": 481.24801367521286, + "p99": 499.10400062799454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 373.1519877910614, + "p90": 392.89599657058716, + "p95": 397.599995136261, + "p99": 406.46401047706604 + }, + "combine": { + "p50": 75.45600086450577, + "p90": 79.1039988398552, + "p95": 81.216000020504, + "p99": 88.67199718952179 + }, + "roundtrip": { + "p50": 420.03199458122253, + "p90": 435.9999895095825, + "p95": 439.9360120296478, + "p99": 448.89599084854126 + }, + "isolatedSum": { + "p50": 448.60798865556717, + "p90": 471.99999541044235, + "p95": 478.815995156765, + "p99": 495.13600766658783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824320, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 374.08000230789185, + "p90": 393.3440148830414, + "p95": 400.7039964199066, + "p99": 416.128009557724 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 78.49600166082382, + "p95": 81.02399855852127, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 422.08001017570496, + "p90": 437.9520118236542, + "p95": 443.32799315452576, + "p99": 450.49598813056946 + }, + "isolatedSum": { + "p50": 448.89600574970245, + "p90": 471.8400165438652, + "p95": 481.7279949784279, + "p99": 505.12000918388367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619968, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 59, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 372.8320002555847, + "p90": 392.8320109844208, + "p95": 399.58399534225464, + "p99": 408.6720049381256 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 80.70400357246399, + "p95": 82.78399705886841, + "p99": 88.79999816417694 + }, + "roundtrip": { + "p50": 423.2960045337677, + "p90": 439.4240081310272, + "p95": 443.90401244163513, + "p99": 450.6239891052246 + }, + "isolatedSum": { + "p50": 449.8879984021187, + "p90": 473.53601455688477, + "p95": 482.36799240112305, + "p99": 497.47200310230255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3311616, + "combineLogicalBytes": 6623232, + "fanoutMean": 3.609375, + "recvTokensMax": 117, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 375.67999958992004, + "p90": 404.12798523902893, + "p95": 412.6400053501129, + "p99": 433.9199960231781 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 87.0399996638298, + "p95": 90.11200070381165, + "p99": 96.73599898815155 + }, + "roundtrip": { + "p50": 432.12801218032837, + "p90": 454.0480077266693, + "p95": 459.55199003219604, + "p99": 471.9040095806122 + }, + "isolatedSum": { + "p50": 458.6559981107712, + "p90": 491.16798490285873, + "p95": 502.75200605392456, + "p99": 530.6559950113297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6594560, + "combineLogicalBytes": 13189120, + "fanoutMean": 3.59375, + "recvTokensMax": 234, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 374.9760091304779, + "p90": 395.00799775123596, + "p95": 400.9920060634613, + "p99": 413.9519929885864 + }, + "combine": { + "p50": 99.13600236177444, + "p90": 102.84800082445145, + "p95": 104.41599786281586, + "p99": 111.48799955844879 + }, + "roundtrip": { + "p50": 447.58400321006775, + "p90": 465.37598967552185, + "p95": 467.8080081939697, + "p99": 479.36001420021057 + }, + "isolatedSum": { + "p50": 474.11201149225235, + "p90": 497.8559985756874, + "p95": 505.40800392627716, + "p99": 525.4399925470352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13310976, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c98cf73", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_b8a1aafe", + "comparisonKey": "3e65ca156c4a2ded", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:22.620457+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 73.15199822187424, + "p90": 80.73599636554718, + "p95": 85.75999736785889, + "p99": 93.88799965381622 + }, + "combine": { + "p50": 65.98400324583054, + "p90": 69.40799951553345, + "p95": 71.48800045251846, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 199.90399479866028, + "p90": 213.6320024728775, + "p95": 217.056006193161, + "p99": 224.5119959115982 + }, + "isolatedSum": { + "p50": 139.13600146770477, + "p90": 150.14399588108063, + "p95": 157.24799782037735, + "p99": 173.18399995565414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 73.79200309515, + "p90": 81.15199953317642, + "p95": 88.3840024471283, + "p99": 97.05600142478943 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 70.94399631023407, + "p95": 72.73600250482559, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 200.99200308322906, + "p90": 213.1199985742569, + "p95": 218.62399578094482, + "p99": 231.29600286483765 + }, + "isolatedSum": { + "p50": 141.2160024046898, + "p90": 152.0959958434105, + "p95": 161.1200049519539, + "p99": 177.47200280427933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 73.85600358247757, + "p90": 80.83199709653854, + "p95": 84.28800106048584, + "p99": 94.08000111579895 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 71.87200337648392, + "p95": 74.0479975938797, + "p99": 79.77599650621414 + }, + "roundtrip": { + "p50": 203.07199656963348, + "p90": 215.58399498462677, + "p95": 219.55199539661407, + "p99": 227.13600099086761 + }, + "isolatedSum": { + "p50": 142.08000153303146, + "p90": 152.70400047302246, + "p95": 158.33599865436554, + "p99": 173.8559976220131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 73.79200309515, + "p90": 81.08799904584885, + "p95": 86.07999980449677, + "p99": 93.53599697351456 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 73.72800260782242, + "p95": 75.45600086450577, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 205.4399996995926, + "p90": 217.6000028848648, + "p95": 221.79199755191803, + "p99": 227.07200050354004 + }, + "isolatedSum": { + "p50": 144.25600320100784, + "p90": 154.81600165367126, + "p95": 161.53600066900253, + "p99": 175.9359985589981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 74.11199808120728, + "p90": 83.10399949550629, + "p95": 87.39200234413147, + "p99": 95.93600034713745 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 74.23999905586243, + "p95": 75.32799988985062, + "p99": 79.6160027384758 + }, + "roundtrip": { + "p50": 206.33600652217865, + "p90": 218.75199675559998, + "p95": 222.46399521827698, + "p99": 228.96000742912292 + }, + "isolatedSum": { + "p50": 144.96000111103058, + "p90": 157.3439985513687, + "p95": 162.7200022339821, + "p99": 175.55200308561325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 74.46400076150894, + "p90": 79.96799796819687, + "p95": 83.99999886751175, + "p99": 93.05600076913834 + }, + "combine": { + "p50": 72.57600128650665, + "p90": 76.03199779987335, + "p95": 77.31200009584427, + "p99": 83.71199667453766 + }, + "roundtrip": { + "p50": 207.42399990558624, + "p90": 219.4560021162033, + "p95": 223.83999824523926, + "p99": 233.5360050201416 + }, + "isolatedSum": { + "p50": 147.0400020480156, + "p90": 155.99999576807022, + "p95": 161.31199896335602, + "p99": 176.767997443676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 83.20000022649765, + "p90": 87.64799684286118, + "p95": 91.10400080680847, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 82.04799890518188, + "p95": 84.73599702119827, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 214.7199958562851, + "p90": 228.32000255584717, + "p95": 233.08800160884857, + "p99": 242.8160011768341 + }, + "isolatedSum": { + "p50": 161.56800091266632, + "p90": 169.69599574804306, + "p95": 175.83999782800674, + "p99": 188.35199624300003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 94.43199634552002, + "p90": 98.88000041246414, + "p95": 101.69599950313568, + "p99": 108.96000266075134 + }, + "combine": { + "p50": 95.42399644851685, + "p90": 99.84000027179718, + "p95": 101.43999755382538, + "p99": 110.68800091743469 + }, + "roundtrip": { + "p50": 229.72799837589264, + "p90": 241.72799289226532, + "p95": 245.1840043067932, + "p99": 256.0960054397583 + }, + "isolatedSum": { + "p50": 189.85599279403687, + "p90": 198.72000068426132, + "p95": 203.13599705696106, + "p99": 219.64800357818604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cbd02980", + "identity": "gb300|deepep|v2|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_a02b594b", + "comparisonKey": "f74b65b4dd74052c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:39.338510+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 42.01599955558777, + "p90": 48.0320006608963, + "p95": 53.50400134921074, + "p99": 90.84799885749817 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 69.47200000286102, + "p95": 71.71200215816498, + "p99": 79.00799810886383 + }, + "roundtrip": { + "p50": 1526.9440412521362, + "p90": 1533.7599515914917, + "p95": 1536.0959768295288, + "p99": 1540.9280061721802 + }, + "isolatedSum": { + "p50": 103.93599793314934, + "p90": 117.50400066375732, + "p95": 125.21600350737572, + "p99": 169.855996966362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 41.88799858093262, + "p90": 45.88799923658371, + "p95": 48.64000156521797, + "p99": 57.21599981188774 + }, + "combine": { + "p50": 53.15199866890907, + "p90": 65.40799885988235, + "p95": 67.391999065876, + "p99": 71.96799665689468 + }, + "roundtrip": { + "p50": 1523.0079889297485, + "p90": 1535.5520248413086, + "p95": 1538.6240482330322, + "p99": 1549.8559474945068 + }, + "isolatedSum": { + "p50": 95.03999724984169, + "p90": 111.29599809646606, + "p95": 116.03200063109398, + "p99": 129.18399646878242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 42.527999728918076, + "p90": 48.00000041723251, + "p95": 51.231998950242996, + "p99": 68.83200258016586 + }, + "combine": { + "p50": 58.079998940229416, + "p90": 69.24799829721451, + "p95": 72.15999811887741, + "p99": 78.33600044250488 + }, + "roundtrip": { + "p50": 1525.056004524231, + "p90": 1537.0880365371704, + "p95": 1540.2560234069824, + "p99": 1548.1280088424683 + }, + "isolatedSum": { + "p50": 100.60799866914749, + "p90": 117.24799871444702, + "p95": 123.3919970691204, + "p99": 147.16800302267075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 42.55999997258186, + "p90": 48.09600114822388, + "p95": 52.960000932216644, + "p99": 88.79999816417694 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 69.40799951553345, + "p95": 72.57600128650665, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 1530.56001663208, + "p90": 1538.4000539779663, + "p95": 1541.0879850387573, + "p99": 1545.1840162277222 + }, + "isolatedSum": { + "p50": 105.50400242209435, + "p90": 117.50400066375732, + "p95": 125.5360022187233, + "p99": 179.6799972653389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 42.4639992415905, + "p90": 48.16000163555145, + "p95": 51.77599936723709, + "p99": 62.55999952554703 + }, + "combine": { + "p50": 62.591999769210815, + "p90": 70.46400010585785, + "p95": 73.18399846553802, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 1531.999945640564, + "p90": 1540.9280061721802, + "p95": 1544.9919700622559, + "p99": 1562.2080564498901 + }, + "isolatedSum": { + "p50": 105.05599901080132, + "p90": 118.6240017414093, + "p95": 124.95999783277512, + "p99": 146.7519998550415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 41.95199906826019, + "p90": 46.751998364925385, + "p95": 50.56000128388405, + "p99": 57.82400071620941 + }, + "combine": { + "p50": 60.06399914622307, + "p90": 72.38399982452393, + "p95": 78.23999971151352, + "p99": 104.70400005578995 + }, + "roundtrip": { + "p50": 1527.6479721069336, + "p90": 1540.4479503631592, + "p95": 1543.0400371551514, + "p99": 1552.0000457763672 + }, + "isolatedSum": { + "p50": 102.01599821448326, + "p90": 119.13599818944931, + "p95": 128.80000099539757, + "p99": 162.52800077199936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 45.184001326560974, + "p90": 50.75199902057648, + "p95": 52.57600173354149, + "p99": 62.97600269317627 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 75.6160020828247, + "p95": 78.94399762153625, + "p99": 85.37600189447403 + }, + "roundtrip": { + "p50": 1532.7999591827393, + "p90": 1544.6079969406128, + "p95": 1547.4239587783813, + "p99": 1554.4960498809814 + }, + "isolatedSum": { + "p50": 107.10399970412254, + "p90": 126.36800110340118, + "p95": 131.51999935507774, + "p99": 148.3520045876503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 50.40000006556511, + "p90": 54.84800040721893, + "p95": 58.01599845290184, + "p99": 93.50399672985077 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 77.11999863386154, + "p95": 79.93599772453308, + "p99": 86.81599795818329 + }, + "roundtrip": { + "p50": 1546.3039875030518, + "p90": 1554.527997970581, + "p95": 1558.0799579620361, + "p99": 1573.9200115203857 + }, + "isolatedSum": { + "p50": 119.7119988501072, + "p90": 131.96799904108047, + "p95": 137.95199617743492, + "p99": 180.31999468803406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ddd52884", + "identity": "gb300|deepep|v2|7168|8|256|fp8|ll|runtime-visible-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_d936aa1f", + "comparisonKey": "3d684ef4096d5610", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:13.956587+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 40.16000032424927, + "p90": 43.616000562906265, + "p95": 47.968000173568726, + "p99": 75.9039968252182 + }, + "combine": { + "p50": 52.352000027894974, + "p90": 68.06399673223495, + "p95": 72.89600372314453, + "p99": 77.63200253248215 + }, + "roundtrip": { + "p50": 1520.7040309906006, + "p90": 1557.5679540634155, + "p95": 1570.304036140442, + "p99": 1583.8079452514648 + }, + "isolatedSum": { + "p50": 92.51200035214424, + "p90": 111.67999729514122, + "p95": 120.86400389671326, + "p99": 153.53599935770035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 41.439998894929886, + "p90": 66.3359984755516, + "p95": 71.96799665689468, + "p99": 102.78400033712387 + }, + "combine": { + "p50": 52.319999784231186, + "p90": 67.61600077152252, + "p95": 71.71200215816498, + "p99": 95.10400146245956 + }, + "roundtrip": { + "p50": 1520.2239751815796, + "p90": 1544.927954673767, + "p95": 1555.4239749908447, + "p99": 1571.071982383728 + }, + "isolatedSum": { + "p50": 93.75999867916107, + "p90": 133.95199924707413, + "p95": 143.67999881505966, + "p99": 197.88800179958344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 41.18400067090988, + "p90": 45.69600149989128, + "p95": 49.18399825692177, + "p99": 101.50399804115295 + }, + "combine": { + "p50": 53.247999399900436, + "p90": 65.11999666690826, + "p95": 70.01599669456482, + "p99": 80.86399734020233 + }, + "roundtrip": { + "p50": 1521.6000080108643, + "p90": 1534.783959388733, + "p95": 1543.1679487228394, + "p99": 1560.8960390090942 + }, + "isolatedSum": { + "p50": 94.43200007081032, + "p90": 110.81599816679955, + "p95": 119.19999495148659, + "p99": 182.36799538135529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 41.152000427246094, + "p90": 44.863998889923096, + "p95": 48.928000032901764, + "p99": 96.19200229644775 + }, + "combine": { + "p50": 53.53600159287453, + "p90": 62.6240000128746, + "p95": 66.46399945020676, + "p99": 101.18400305509567 + }, + "roundtrip": { + "p50": 1522.3040580749512, + "p90": 1538.7519598007202, + "p95": 1547.584056854248, + "p99": 1569.1839456558228 + }, + "isolatedSum": { + "p50": 94.68800202012062, + "p90": 107.4879989027977, + "p95": 115.39199948310852, + "p99": 197.37600535154343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 40.383998304605484, + "p90": 44.35199871659279, + "p95": 47.488000243902206, + "p99": 55.39200082421303 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 64.03200328350067, + "p95": 66.68800115585327, + "p99": 71.23199850320816 + }, + "roundtrip": { + "p50": 1523.1679677963257, + "p90": 1530.3679704666138, + "p95": 1533.1840515136719, + "p99": 1536.895990371704 + }, + "isolatedSum": { + "p50": 96.03199735283852, + "p90": 108.38400200009346, + "p95": 114.17600139975548, + "p99": 126.62399932742119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 41.21600091457367, + "p90": 44.44799944758415, + "p95": 47.26399853825569, + "p99": 53.75999957323074 + }, + "combine": { + "p50": 56.2559999525547, + "p90": 64.60800021886826, + "p95": 68.00000369548798, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 1525.1840353012085, + "p90": 1534.432053565979, + "p95": 1540.4160022735596, + "p99": 1567.296028137207 + }, + "isolatedSum": { + "p50": 97.47200086712837, + "p90": 109.05599966645241, + "p95": 115.26400223374367, + "p99": 138.65599781274796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 42.94399917125702, + "p90": 47.26399853825569, + "p95": 51.7439991235733, + "p99": 98.84800016880035 + }, + "combine": { + "p50": 58.14399942755699, + "p90": 74.78400319814682, + "p95": 80.73599636554718, + "p99": 106.91200196743011 + }, + "roundtrip": { + "p50": 1528.5439491271973, + "p90": 1540.3200387954712, + "p95": 1548.1280088424683, + "p99": 1574.5919942855835 + }, + "isolatedSum": { + "p50": 101.08799859881401, + "p90": 122.04800173640251, + "p95": 132.47999548912048, + "p99": 205.76000213623047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 49.215998500585556, + "p90": 52.76799947023392, + "p95": 54.23999950289726, + "p99": 61.5679994225502 + }, + "combine": { + "p50": 66.880002617836, + "p90": 77.53600180149078, + "p95": 81.95199817419052, + "p99": 118.65600198507309 + }, + "roundtrip": { + "p50": 1543.071985244751, + "p90": 1556.831955909729, + "p95": 1562.6239776611328, + "p99": 1584.7359895706177 + }, + "isolatedSum": { + "p50": 116.09600111842155, + "p90": 130.3040012717247, + "p95": 136.19199767708778, + "p99": 180.2240014076233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1708e07", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "c86d940414a55991", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:34.120470+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.07200187444687, + "p90": 112.89600282907486, + "p95": 117.15199798345566, + "p99": 131.29599392414093 + }, + "combine": { + "p50": 71.6480016708374, + "p90": 76.19199901819229, + "p95": 81.91999793052673, + "p99": 85.60000360012054 + }, + "roundtrip": { + "p50": 143.68000626564026, + "p90": 157.0879966020584, + "p95": 161.15200519561768, + "p99": 178.56000363826752 + }, + "isolatedSum": { + "p50": 170.72000354528427, + "p90": 189.08800184726715, + "p95": 199.0719959139824, + "p99": 216.89599752426147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.76799923181534, + "p90": 112.03200370073318, + "p95": 115.90400338172913, + "p99": 122.01599776744843 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 81.56800270080566, + "p95": 84.51200276613235, + "p99": 99.71199929714203 + }, + "roundtrip": { + "p50": 146.7519998550415, + "p90": 157.05600380897522, + "p95": 162.6559942960739, + "p99": 177.69600450992584 + }, + "isolatedSum": { + "p50": 169.50400173664093, + "p90": 193.60000640153885, + "p95": 200.41600614786148, + "p99": 221.72799706459045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 101.24800354242325, + "p90": 140.54399728775024, + "p95": 149.72800016403198, + "p99": 166.04800522327423 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 95.93600034713745, + "p95": 106.4319983124733, + "p99": 134.20799374580383 + }, + "roundtrip": { + "p50": 148.41599762439728, + "p90": 183.3599954843521, + "p95": 192.86400079727173, + "p99": 212.2880071401596 + }, + "isolatedSum": { + "p50": 174.46400225162506, + "p90": 236.4799976348877, + "p95": 256.1599984765053, + "p99": 300.25599896907806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.28800368309021, + "p90": 112.06399649381638, + "p95": 115.77600240707397, + "p99": 122.04799801111221 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 83.13599973917007, + "p95": 85.4400023818016, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 151.39199793338776, + "p90": 161.69600188732147, + "p95": 168.32000017166138, + "p99": 175.4239946603775 + }, + "isolatedSum": { + "p50": 174.94400590658188, + "p90": 195.19999623298645, + "p95": 201.21600478887558, + "p99": 219.4879949092865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.87200051546097, + "p90": 112.64000087976456, + "p95": 115.68000167608261, + "p99": 124.38400089740753 + }, + "combine": { + "p50": 76.48000121116638, + "p90": 83.67999643087387, + "p95": 85.4400023818016, + "p99": 98.1760025024414 + }, + "roundtrip": { + "p50": 151.8400013446808, + "p90": 162.6559942960739, + "p95": 166.6879951953888, + "p99": 175.20000040531158 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 196.31999731063843, + "p95": 201.12000405788422, + "p99": 222.56000339984894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.9760011434555, + "p90": 111.64800077676773, + "p95": 116.7680025100708, + "p99": 125.47199428081512 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 84.76799726486206, + "p95": 86.81599795818329, + "p99": 95.32800316810608 + }, + "roundtrip": { + "p50": 153.02400290966034, + "p90": 164.5440012216568, + "p95": 169.15200650691986, + "p99": 179.1040003299713 + }, + "isolatedSum": { + "p50": 179.77599799633026, + "p90": 196.4159980416298, + "p95": 203.5840004682541, + "p99": 220.7999974489212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.27199774980545, + "p90": 143.5520052909851, + "p95": 154.30399775505066, + "p99": 177.98399925231934 + }, + "combine": { + "p50": 94.2080020904541, + "p90": 120.57600170373917, + "p95": 136.4160031080246, + "p99": 157.69599378108978 + }, + "roundtrip": { + "p50": 171.61600291728973, + "p90": 197.9839950799942, + "p95": 212.96000480651855, + "p99": 238.49600553512573 + }, + "isolatedSum": { + "p50": 204.47999984025955, + "p90": 264.1280069947243, + "p95": 290.72000086307526, + "p99": 335.6799930334091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.31999975442886, + "p90": 148.51200580596924, + "p95": 157.75999426841736, + "p99": 182.91200697422028 + }, + "combine": { + "p50": 108.47999900579453, + "p90": 135.6160044670105, + "p95": 147.93600142002106, + "p99": 158.4320068359375 + }, + "roundtrip": { + "p50": 195.99999487400055, + "p90": 214.52799439430237, + "p95": 227.90400683879852, + "p99": 248.25599789619446 + }, + "isolatedSum": { + "p50": 228.7999987602234, + "p90": 284.12801027297974, + "p95": 305.6959956884384, + "p99": 341.3440138101578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e8c8650", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "94583a6ef392e3d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:34.352949+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.73599898815155, + "p90": 136.06399297714233, + "p95": 149.3760049343109, + "p99": 160.96000373363495 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 94.04800087213516, + "p95": 109.8880022764206, + "p99": 134.36800241470337 + }, + "roundtrip": { + "p50": 144.0960019826889, + "p90": 159.07199680805206, + "p95": 164.35199975967407, + "p99": 174.52800273895264 + }, + "isolatedSum": { + "p50": 170.43200135231018, + "p90": 230.1119938492775, + "p95": 259.2640072107315, + "p99": 295.3280061483383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.94400024414062, + "p90": 110.55999994277954, + "p95": 119.19999867677689, + "p99": 130.52800297737122 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 80.79999685287476, + "p95": 83.55200290679932, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 145.6959992647171, + "p90": 161.05599701404572, + "p95": 167.87199676036835, + "p99": 181.0240000486374 + }, + "isolatedSum": { + "p50": 168.64000260829926, + "p90": 191.3599967956543, + "p95": 202.7520015835762, + "p99": 217.3760011792183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.16000205278397, + "p90": 117.40799993276596, + "p95": 126.49600207805634, + "p99": 139.20000195503235 + }, + "combine": { + "p50": 76.54400169849396, + "p90": 94.59199756383896, + "p95": 111.42399907112122, + "p99": 132.51200318336487 + }, + "roundtrip": { + "p50": 148.12800288200378, + "p90": 173.50399494171143, + "p95": 184.09599363803864, + "p99": 208.12800526618958 + }, + "isolatedSum": { + "p50": 172.70400375127792, + "p90": 211.99999749660492, + "p95": 237.92000114917755, + "p99": 271.7120051383972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.96800124645233, + "p90": 136.73600554466248, + "p95": 148.28799664974213, + "p99": 170.97599804401398 + }, + "combine": { + "p50": 80.6720033288002, + "p90": 99.04000163078308, + "p95": 117.15199798345566, + "p99": 138.2399946451187 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 184.12800133228302, + "p95": 194.75199282169342, + "p99": 214.88000452518463 + }, + "isolatedSum": { + "p50": 180.64000457525253, + "p90": 235.77600717544556, + "p95": 265.4399946331978, + "p99": 309.2159926891327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.99200159311295, + "p90": 131.00799918174744, + "p95": 146.2399959564209, + "p99": 171.29600048065186 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 97.72799909114838, + "p95": 110.36799848079681, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 151.64799988269806, + "p90": 185.47199666500092, + "p95": 193.9840018749237, + "p99": 210.52800118923187 + }, + "isolatedSum": { + "p50": 182.5920045375824, + "p90": 228.7359982728958, + "p95": 256.6079944372177, + "p99": 300.8960038423538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.65599936246872, + "p90": 141.53599739074707, + "p95": 149.88799393177032, + "p99": 169.3439930677414 + }, + "combine": { + "p50": 83.96799862384796, + "p90": 110.78400164842606, + "p95": 122.17599898576736, + "p99": 146.7519998550415 + }, + "roundtrip": { + "p50": 155.61600029468536, + "p90": 184.03199315071106, + "p95": 191.03999435901642, + "p99": 209.60000157356262 + }, + "isolatedSum": { + "p50": 186.62399798631668, + "p90": 252.31999903917313, + "p95": 272.0639929175377, + "p99": 316.0959929227829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.64800012111664, + "p90": 137.63199746608734, + "p95": 145.82400023937225, + "p99": 166.87999665737152 + }, + "combine": { + "p50": 96.92800045013428, + "p90": 108.06400328874588, + "p95": 130.75199723243713, + "p99": 154.08000349998474 + }, + "roundtrip": { + "p50": 175.77600479125977, + "p90": 194.62400674819946, + "p95": 206.11199736595154, + "p99": 242.5280064344406 + }, + "isolatedSum": { + "p50": 204.57600057125092, + "p90": 245.69600075483322, + "p95": 276.5759974718094, + "p99": 320.96000015735626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.99999797344208, + "p90": 138.43199610710144, + "p95": 149.72800016403198, + "p99": 190.33600389957428 + }, + "combine": { + "p50": 118.20799857378006, + "p90": 146.43199741840363, + "p95": 150.9760022163391, + "p99": 160.35200655460358 + }, + "roundtrip": { + "p50": 209.6640020608902, + "p90": 229.91999983787537, + "p95": 239.3919974565506, + "p99": 255.5519938468933 + }, + "isolatedSum": { + "p50": 242.20799654722214, + "p90": 284.86399352550507, + "p95": 300.7040023803711, + "p99": 350.68801045417786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c993840", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "1c929d1cf59e66d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:33.506221+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.11200201511383, + "p90": 116.12799763679504, + "p95": 122.56000190973282, + "p99": 134.62400436401367 + }, + "combine": { + "p50": 81.40800148248672, + "p90": 87.10400015115738, + "p95": 90.40000289678574, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 154.23999726772308, + "p90": 169.53599452972412, + "p95": 174.97600615024567, + "p99": 186.8479996919632 + }, + "isolatedSum": { + "p50": 179.52000349760056, + "p90": 203.23199778795242, + "p95": 212.96000480651855, + "p99": 240.09600281715393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 99.20000284910202, + "p90": 114.30399864912033, + "p95": 122.43200093507767, + "p99": 134.783998131752 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 85.88799834251404, + "p95": 89.34400230646133, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 154.88000214099884, + "p90": 171.1360067129135, + "p95": 176.1920005083084, + "p99": 187.51999735832214 + }, + "isolatedSum": { + "p50": 179.23200130462646, + "p90": 200.19199699163437, + "p95": 211.776003241539, + "p99": 231.74399882555008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 101.18400305509567, + "p90": 118.59200149774551, + "p95": 126.39999389648438, + "p99": 159.9999964237213 + }, + "combine": { + "p50": 83.42400193214417, + "p90": 87.13600039482117, + "p95": 89.53599631786346, + "p99": 99.32799637317657 + }, + "roundtrip": { + "p50": 157.0879966020584, + "p90": 170.9440052509308, + "p95": 179.58399653434753, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 184.60800498723984, + "p90": 205.72800189256668, + "p95": 215.93599021434784, + "p99": 259.3279927968979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 102.65599936246872, + "p90": 119.1679984331131, + "p95": 123.48800152540207, + "p99": 143.90400052070618 + }, + "combine": { + "p50": 84.03199911117554, + "p90": 90.91199934482574, + "p95": 96.99200093746185, + "p99": 107.45599865913391 + }, + "roundtrip": { + "p50": 159.0079963207245, + "p90": 175.74399709701538, + "p95": 182.68799781799316, + "p99": 197.63199985027313 + }, + "isolatedSum": { + "p50": 186.68799847364426, + "p90": 210.07999777793884, + "p95": 220.48000246286392, + "p99": 251.3599991798401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 102.81600058078766, + "p90": 117.98399686813354, + "p95": 123.3920007944107, + "p99": 140.35199582576752 + }, + "combine": { + "p50": 85.1840004324913, + "p90": 90.59199690818787, + "p95": 95.0080007314682, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 160.0639969110489, + "p90": 175.52000284194946, + "p95": 182.5920045375824, + "p99": 194.11200284957886 + }, + "isolatedSum": { + "p50": 188.00000101327896, + "p90": 208.5759937763214, + "p95": 218.4000015258789, + "p99": 239.9359941482544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.87200117111206, + "p90": 118.23999881744385, + "p95": 122.56000190973282, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 95.58399766683578, + "p95": 98.08000177145004, + "p99": 108.19199681282043 + }, + "roundtrip": { + "p50": 165.12000560760498, + "p90": 177.69600450992584, + "p95": 183.32800269126892, + "p99": 199.13600385189056 + }, + "isolatedSum": { + "p50": 191.16800278425217, + "p90": 213.82399648427963, + "p95": 220.64000368118286, + "p99": 241.37599766254425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.15999722480774, + "p90": 124.51200187206268, + "p95": 129.2479932308197, + "p99": 185.5040043592453 + }, + "combine": { + "p50": 101.27999633550644, + "p90": 109.02400314807892, + "p95": 111.455999314785, + "p99": 117.11999773979187 + }, + "roundtrip": { + "p50": 184.1599941253662, + "p90": 195.26399672031403, + "p95": 200.15999674797058, + "p99": 214.9759978055954 + }, + "isolatedSum": { + "p50": 213.43999356031418, + "p90": 233.5360050201416, + "p95": 240.7039925456047, + "p99": 302.62400209903717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.791996717453, + "p90": 136.09600067138672, + "p95": 139.8719996213913, + "p99": 154.4319987297058 + }, + "combine": { + "p50": 122.75200337171555, + "p90": 129.05600666999817, + "p95": 132.4159950017929, + "p99": 137.7280056476593 + }, + "roundtrip": { + "p50": 217.18400716781616, + "p90": 227.29599475860596, + "p95": 231.55200481414795, + "p99": 245.7599937915802 + }, + "isolatedSum": { + "p50": 248.54400008916855, + "p90": 265.1520073413849, + "p95": 272.2879946231842, + "p99": 292.1600043773651 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7324ba0b", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_74218200", + "comparisonKey": "771769a5e7987ff5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:10.548213+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.05600076913834, + "p90": 109.3439981341362, + "p95": 116.95999652147293, + "p99": 126.11199915409088 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 86.01599931716919, + "p95": 88.57599645853043, + "p99": 96.73599898815155 + }, + "roundtrip": { + "p50": 152.41600573062897, + "p90": 166.81599617004395, + "p95": 173.15199971199036, + "p99": 186.8479996919632 + }, + "isolatedSum": { + "p50": 174.72000420093536, + "p90": 195.3599974513054, + "p95": 205.53599298000336, + "p99": 222.84799814224243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.55999732017517, + "p90": 111.84000223875046, + "p95": 118.81600320339203, + "p99": 131.071999669075 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 87.55200356245041, + "p95": 93.02400052547455, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 154.08000349998474, + "p90": 167.00799763202667, + "p95": 173.24799299240112, + "p99": 185.98400056362152 + }, + "isolatedSum": { + "p50": 177.34399437904358, + "p90": 199.39200580120087, + "p95": 211.84000372886658, + "p99": 229.5359969139099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.07200121879578, + "p90": 110.72000116109848, + "p95": 115.29599875211716, + "p99": 127.45599448680878 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 90.17600119113922, + "p95": 93.85599941015244, + "p99": 110.33599823713303 + }, + "roundtrip": { + "p50": 156.80000185966492, + "p90": 172.06400632858276, + "p95": 177.15199291706085, + "p99": 183.77600610256195 + }, + "isolatedSum": { + "p50": 179.55200374126434, + "p90": 200.8960023522377, + "p95": 209.1519981622696, + "p99": 237.7919927239418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.57599776983261, + "p90": 111.90400272607803, + "p95": 116.92799627780914, + "p99": 125.88800489902496 + }, + "combine": { + "p50": 84.57600325345993, + "p90": 92.00000017881393, + "p95": 95.07200121879578, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 158.9439958333969, + "p90": 173.7920045852661, + "p95": 179.1040003299713, + "p99": 189.40800428390503 + }, + "isolatedSum": { + "p50": 181.15200102329254, + "p90": 203.90400290489197, + "p95": 211.99999749660492, + "p99": 234.3360036611557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.15200215578079, + "p90": 113.72800171375275, + "p95": 119.00799721479416, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 85.7279971241951, + "p90": 93.21600198745728, + "p95": 95.87199985980988, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 160.70400178432465, + "p90": 173.6000031232834, + "p95": 178.9119988679886, + "p99": 188.6720061302185 + }, + "isolatedSum": { + "p50": 182.8799992799759, + "p90": 206.94400370121002, + "p95": 214.87999707460403, + "p99": 240.31998962163925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.80000013113022, + "p90": 114.97599631547928, + "p95": 120.25599926710129, + "p99": 132.7359974384308 + }, + "combine": { + "p50": 93.37600320577621, + "p90": 97.79199957847595, + "p95": 100.25600343942642, + "p99": 108.2879975438118 + }, + "roundtrip": { + "p50": 165.12000560760498, + "p90": 178.1120002269745, + "p95": 183.16799402236938, + "p99": 197.24799692630768 + }, + "isolatedSum": { + "p50": 194.17600333690643, + "p90": 212.76799589395523, + "p95": 220.5120027065277, + "p99": 241.02399498224258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.92800307273865, + "p90": 123.45600128173828, + "p95": 128.1919926404953, + "p99": 138.047993183136 + }, + "combine": { + "p50": 107.04000294208527, + "p90": 111.42399907112122, + "p95": 116.80000275373459, + "p99": 124.54400211572647 + }, + "roundtrip": { + "p50": 189.4720047712326, + "p90": 200.70399343967438, + "p95": 204.57600057125092, + "p99": 219.52000260353088 + }, + "isolatedSum": { + "p50": 219.9680060148239, + "p90": 234.8800003528595, + "p95": 244.9919953942299, + "p99": 262.59199529886246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.43200159072876, + "p90": 137.79200613498688, + "p95": 142.14399456977844, + "p99": 162.62400150299072 + }, + "combine": { + "p50": 123.99999797344208, + "p90": 132.54399597644806, + "p95": 135.23200154304504, + "p99": 142.81600713729858 + }, + "roundtrip": { + "p50": 224.86400604248047, + "p90": 233.5679978132248, + "p95": 238.91200125217438, + "p99": 250.65600872039795 + }, + "isolatedSum": { + "p50": 250.43199956417084, + "p90": 270.33600211143494, + "p95": 277.3759961128235, + "p99": 305.4400086402893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d0d8f23", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "61b32b843c8fbec1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:15.234732+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 90.84799885749817, + "p90": 112.76800185441971, + "p95": 124.1919994354248, + "p99": 185.02399325370789 + }, + "combine": { + "p50": 80.92799782752991, + "p90": 86.30400151014328, + "p95": 90.33600240945816, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 164.22399878501892, + "p95": 171.58399522304535, + "p99": 237.18400299549103 + }, + "isolatedSum": { + "p50": 171.77599668502808, + "p90": 199.072003364563, + "p95": 214.52800184488297, + "p99": 283.26399624347687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 90.84799885749817, + "p90": 104.92800176143646, + "p95": 112.06399649381638, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 87.71199733018875, + "p95": 92.44800359010696, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 148.51200580596924, + "p90": 160.67199409008026, + "p95": 165.8560037612915, + "p99": 183.3920031785965 + }, + "isolatedSum": { + "p50": 171.87199741601944, + "p90": 192.6399990916252, + "p95": 204.51200008392334, + "p99": 223.26399385929108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 93.47199648618698, + "p90": 112.5440001487732, + "p95": 121.37600034475327, + "p99": 291.3919985294342 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 89.53599631786346, + "p95": 93.08800101280212, + "p99": 105.66399991512299 + }, + "roundtrip": { + "p50": 151.87199413776398, + "p90": 167.32800006866455, + "p95": 172.92800545692444, + "p99": 185.31200289726257 + }, + "isolatedSum": { + "p50": 176.28799378871918, + "p90": 202.07999646663666, + "p95": 214.4640013575554, + "p99": 397.0559984445572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 92.92799979448318, + "p90": 107.4879989027977, + "p95": 116.12799763679504, + "p99": 130.3039938211441 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 90.52799642086029, + "p95": 92.99200028181076, + "p99": 99.90400075912476 + }, + "roundtrip": { + "p50": 153.6639928817749, + "p90": 168.70400309562683, + "p95": 176.35199427604675, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 176.1920005083084, + "p90": 198.015995323658, + "p95": 209.1199979186058, + "p99": 230.20799458026886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.42399644851685, + "p90": 111.10399663448334, + "p95": 117.40799993276596, + "p99": 160.38399934768677 + }, + "combine": { + "p50": 84.89599823951721, + "p90": 92.32000261545181, + "p95": 95.13600170612335, + "p99": 104.60799932479858 + }, + "roundtrip": { + "p50": 156.2879979610443, + "p90": 169.66399550437927, + "p95": 177.15199291706085, + "p99": 227.07200050354004 + }, + "isolatedSum": { + "p50": 180.31999468803406, + "p90": 203.42399924993515, + "p95": 212.5440016388893, + "p99": 264.99199867248535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.88800030946732, + "p90": 109.79200154542923, + "p95": 114.94400352239609, + "p99": 127.68000364303589 + }, + "combine": { + "p50": 92.44800359010696, + "p90": 98.39999675750732, + "p95": 102.7199998497963, + "p99": 109.31199789047241 + }, + "roundtrip": { + "p50": 161.3759994506836, + "p90": 175.87199807167053, + "p95": 182.559996843338, + "p99": 193.9840018749237 + }, + "isolatedSum": { + "p50": 190.33600389957428, + "p90": 208.19199830293655, + "p95": 217.66400337219238, + "p99": 236.9920015335083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.04800349473953, + "p90": 123.10399860143661, + "p95": 127.51999497413635, + "p99": 149.9200016260147 + }, + "combine": { + "p50": 105.47199845314026, + "p90": 111.90400272607803, + "p95": 116.7680025100708, + "p99": 154.88000214099884 + }, + "roundtrip": { + "p50": 186.14399433135986, + "p90": 198.88000190258026, + "p95": 202.91200280189514, + "p99": 248.1600046157837 + }, + "isolatedSum": { + "p50": 215.5200019478798, + "p90": 235.00800132751465, + "p95": 244.28799748420715, + "p99": 304.80000376701355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.32800030708313, + "p90": 133.82400572299957, + "p95": 137.63199746608734, + "p99": 154.52800691127777 + }, + "combine": { + "p50": 124.35200065374374, + "p90": 133.5040032863617, + "p95": 137.66400516033173, + "p99": 155.35999834537506 + }, + "roundtrip": { + "p50": 221.66399657726288, + "p90": 232.4800044298172, + "p95": 236.51200532913208, + "p99": 253.24800610542297 + }, + "isolatedSum": { + "p50": 247.68000096082687, + "p90": 267.32800900936127, + "p95": 275.29600262641907, + "p99": 309.88800525665283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cb8753e8", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "e0f3959bcbc3fc9a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:35.811751+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.83199971914291, + "p90": 111.455999314785, + "p95": 119.52000111341476, + "p99": 140.9599930047989 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 86.71999722719193, + "p95": 93.47199648618698, + "p99": 120.31999975442886 + }, + "roundtrip": { + "p50": 151.93599462509155, + "p90": 167.39200055599213, + "p95": 177.0240068435669, + "p99": 209.1200053691864 + }, + "isolatedSum": { + "p50": 177.7919977903366, + "p90": 198.17599654197693, + "p95": 212.99199759960175, + "p99": 261.27999275922775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.44799679517746, + "p90": 114.27199840545654, + "p95": 120.7680031657219, + "p99": 155.5200070142746 + }, + "combine": { + "p50": 83.16799998283386, + "p90": 88.32000195980072, + "p95": 92.06400066614151, + "p99": 110.75200140476227 + }, + "roundtrip": { + "p50": 154.40000593662262, + "p90": 169.5999950170517, + "p95": 173.567995429039, + "p99": 192.22399592399597 + }, + "isolatedSum": { + "p50": 179.61599677801132, + "p90": 202.59200036525726, + "p95": 212.8320038318634, + "p99": 266.27200841903687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.4639972448349, + "p90": 118.1119978427887, + "p95": 126.65599584579468, + "p99": 163.93600404262543 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 89.28000181913376, + "p95": 94.01600062847137, + "p99": 112.22399771213531 + }, + "roundtrip": { + "p50": 155.2319973707199, + "p90": 172.8000044822693, + "p95": 181.21600151062012, + "p99": 219.4879949092865 + }, + "isolatedSum": { + "p50": 181.72799795866013, + "p90": 207.39199966192245, + "p95": 220.67199647426605, + "p99": 276.16000175476074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.52799773216248, + "p90": 114.68800157308578, + "p95": 119.48800086975098, + "p99": 133.44000279903412 + }, + "combine": { + "p50": 84.79999750852585, + "p90": 91.51999652385712, + "p95": 95.71199864149094, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 157.72800147533417, + "p90": 172.0000058412552, + "p95": 177.88800597190857, + "p99": 191.64800643920898 + }, + "isolatedSum": { + "p50": 183.32799524068832, + "p90": 206.2079980969429, + "p95": 215.1999995112419, + "p99": 237.92000114917755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.04800152778625, + "p90": 114.27199840545654, + "p95": 119.03999745845795, + "p99": 127.9039978981018 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 93.44000369310379, + "p95": 96.19200229644775, + "p99": 99.74399954080582 + }, + "roundtrip": { + "p50": 159.67999398708344, + "p90": 173.2800006866455, + "p95": 177.95200645923615, + "p99": 185.92000007629395 + }, + "isolatedSum": { + "p50": 183.55200439691544, + "p90": 207.71200209856033, + "p95": 215.2319997549057, + "p99": 227.64799743890762 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.82400047779083, + "p90": 116.54400080442429, + "p95": 121.21599912643433, + "p99": 149.75999295711517 + }, + "combine": { + "p50": 92.79999881982803, + "p90": 97.6639986038208, + "p95": 99.7759997844696, + "p99": 107.19999670982361 + }, + "roundtrip": { + "p50": 165.0560051202774, + "p90": 178.75200510025024, + "p95": 184.38400328159332, + "p99": 226.04799270629883 + }, + "isolatedSum": { + "p50": 194.62399929761887, + "p90": 214.2079994082451, + "p95": 220.99199891090393, + "p99": 256.9599896669388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.76800185441971, + "p90": 123.74400347471237, + "p95": 127.26399302482605, + "p99": 138.7840062379837 + }, + "combine": { + "p50": 107.68000036478043, + "p90": 112.57600039243698, + "p95": 118.17599833011627, + "p99": 123.48800152540207 + }, + "roundtrip": { + "p50": 190.8160001039505, + "p90": 200.70399343967438, + "p95": 207.20000565052032, + "p99": 224.03199970722198 + }, + "isolatedSum": { + "p50": 220.44800221920013, + "p90": 236.32000386714935, + "p95": 245.43999135494232, + "p99": 262.2720077633858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.23200023174286, + "p90": 139.13600146770477, + "p95": 144.28800344467163, + "p99": 154.33600544929504 + }, + "combine": { + "p50": 124.64000284671783, + "p90": 132.47999548912048, + "p95": 135.48800349235535, + "p99": 142.71999895572662 + }, + "roundtrip": { + "p50": 225.40800273418427, + "p90": 237.56800591945648, + "p95": 241.63199961185455, + "p99": 255.90398907661438 + }, + "isolatedSum": { + "p50": 251.8720030784607, + "p90": 271.61599695682526, + "p95": 279.776006937027, + "p99": 297.05600440502167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f069cd1a", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||f1c99f5cf8ca9ed", + "colorKey": "gb300_d4c8afb8", + "comparisonKey": "947bc78137c317bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:50.543471+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f1c99f5cf8ca9ed", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.31200337409973, + "p90": 112.70400136709213, + "p95": 118.49600076675415, + "p99": 132.64000415802002 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 88.41600269079208, + "p95": 92.47999638319016, + "p99": 100.832000374794 + }, + "roundtrip": { + "p50": 156.12800419330597, + "p90": 170.20800709724426, + "p95": 177.63200402259827, + "p99": 204.73599433898926 + }, + "isolatedSum": { + "p50": 181.44000321626663, + "p90": 201.12000405788422, + "p95": 210.9759971499443, + "p99": 233.47200453281403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.04800152778625, + "p90": 112.09599673748016, + "p95": 116.89600348472595, + "p99": 127.45599448680878 + }, + "combine": { + "p50": 85.11999994516373, + "p90": 91.39200299978256, + "p95": 95.58399766683578, + "p99": 111.7120012640953 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 171.55200242996216, + "p95": 177.85599827766418, + "p99": 192.47999787330627 + }, + "isolatedSum": { + "p50": 183.16800147294998, + "p90": 203.48799973726273, + "p95": 212.48000115156174, + "p99": 239.16799575090408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 102.78400033712387, + "p90": 139.93600010871887, + "p95": 148.19200336933136, + "p99": 157.69599378108978 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 116.12799763679504, + "p95": 130.20800054073334, + "p99": 154.04799580574036 + }, + "roundtrip": { + "p50": 163.4880006313324, + "p90": 189.69599902629852, + "p95": 197.05599546432495, + "p99": 228.19200158119202 + }, + "isolatedSum": { + "p50": 189.92000073194504, + "p90": 256.0639977455139, + "p95": 278.4000039100647, + "p99": 311.74398958683014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.0080013871193, + "p90": 113.3119985461235, + "p95": 118.84800344705582, + "p99": 130.52800297737122 + }, + "combine": { + "p50": 87.07199990749359, + "p90": 96.16000205278397, + "p95": 98.33600372076035, + "p99": 108.96000266075134 + }, + "roundtrip": { + "p50": 161.8880033493042, + "p90": 175.07199943065643, + "p95": 180.89599907398224, + "p99": 207.87200331687927 + }, + "isolatedSum": { + "p50": 186.08000129461288, + "p90": 209.47200059890747, + "p95": 217.18400716781616, + "p99": 239.48800563812256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 101.18400305509567, + "p90": 116.31999909877777, + "p95": 120.38400024175644, + "p99": 133.69600474834442 + }, + "combine": { + "p50": 89.50400352478027, + "p90": 97.21600264310837, + "p95": 99.7759997844696, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 164.35199975967407, + "p90": 176.35199427604675, + "p95": 180.67200481891632, + "p99": 194.39999759197235 + }, + "isolatedSum": { + "p50": 190.68800657987595, + "p90": 213.53600174188614, + "p95": 220.16000002622604, + "p99": 248.73600155115128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.59999942779541, + "p90": 116.64000153541565, + "p95": 122.17599898576736, + "p99": 134.71999764442444 + }, + "combine": { + "p50": 95.67999839782715, + "p90": 99.67999905347824, + "p95": 102.88000106811523, + "p99": 115.10399729013443 + }, + "roundtrip": { + "p50": 172.06400632858276, + "p90": 181.85600638389587, + "p95": 187.9040002822876, + "p99": 198.55999946594238 + }, + "isolatedSum": { + "p50": 201.27999782562256, + "p90": 216.3200005888939, + "p95": 225.0560000538826, + "p99": 249.82399493455887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 121.31199985742569, + "p90": 146.7839926481247, + "p95": 163.55200111865997, + "p99": 664.8960113525391 + }, + "combine": { + "p50": 112.03200370073318, + "p90": 148.6400067806244, + "p95": 158.27199816703796, + "p99": 897.5039720535278 + }, + "roundtrip": { + "p50": 203.36000621318817, + "p90": 234.17599499225616, + "p95": 248.416006565094, + "p99": 326.1120021343231 + }, + "isolatedSum": { + "p50": 233.34400355815887, + "p90": 295.4239994287491, + "p95": 321.82399928569794, + "p99": 1562.399983406067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 143.64799857139587, + "p90": 176.12800002098083, + "p95": 183.87199938297272, + "p99": 195.48800587654114 + }, + "combine": { + "p50": 146.33600413799286, + "p90": 155.008003115654, + "p95": 160.73599457740784, + "p99": 183.61599743366241 + }, + "roundtrip": { + "p50": 258.5279941558838, + "p90": 268.73600482940674, + "p95": 274.4959890842438, + "p99": 282.6560139656067 + }, + "isolatedSum": { + "p50": 289.98400270938873, + "p90": 331.1360031366348, + "p95": 344.60799396038055, + "p99": 379.10400331020355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8068f2a4", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_f163949b", + "comparisonKey": "13efb5d3604f8176", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:18.225965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.43199700117111, + "p90": 115.55200070142746, + "p95": 122.65600264072418, + "p99": 134.5600038766861 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 79.23199981451035, + "p95": 82.49600231647491, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 144.22400295734406, + "p90": 157.3439985513687, + "p95": 162.59199380874634, + "p99": 174.46400225162506 + }, + "isolatedSum": { + "p50": 170.55999487638474, + "p90": 194.7840005159378, + "p95": 205.1520049571991, + "p99": 221.76000475883484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 102.94400155544281, + "p90": 118.367999792099, + "p95": 122.81599640846252, + "p99": 138.46400380134583 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 82.8159973025322, + "p95": 85.40800213813782, + "p99": 94.81599926948547 + }, + "roundtrip": { + "p50": 151.61600708961487, + "p90": 164.22399878501892, + "p95": 169.3120002746582, + "p99": 179.48800325393677 + }, + "isolatedSum": { + "p50": 176.92799866199493, + "p90": 201.1839970946312, + "p95": 208.22399854660034, + "p99": 233.2800030708313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.87200117111206, + "p90": 118.81600320339203, + "p95": 122.65600264072418, + "p99": 132.32000172138214 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 88.3840024471283, + "p95": 92.6079973578453, + "p99": 102.27199643850327 + }, + "roundtrip": { + "p50": 157.50400722026825, + "p90": 171.10399901866913, + "p95": 176.09600722789764, + "p99": 183.61599743366241 + }, + "isolatedSum": { + "p50": 186.97600066661835, + "p90": 207.20000565052032, + "p95": 215.2639999985695, + "p99": 234.5919981598854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.13600367307663, + "p90": 118.40000003576279, + "p95": 125.34399330615997, + "p99": 144.76799964904785 + }, + "combine": { + "p50": 85.02399921417236, + "p90": 91.58399701118469, + "p95": 95.0080007314682, + "p99": 101.56799852848053 + }, + "roundtrip": { + "p50": 162.36799955368042, + "p90": 172.35200107097626, + "p95": 175.9680062532425, + "p99": 187.6160055398941 + }, + "isolatedSum": { + "p50": 192.160002887249, + "p90": 209.98399704694748, + "p95": 220.35199403762817, + "p99": 246.33599817752838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e180de44", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_c93e2296", + "comparisonKey": "657a9fa446798c99", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:20.865248+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.87199985980988, + "p90": 117.69600212574005, + "p95": 133.2480013370514, + "p99": 464.2240107059479 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 81.66400343179703, + "p95": 86.30400151014328, + "p99": 145.6640064716339 + }, + "roundtrip": { + "p50": 147.61599898338318, + "p90": 166.49599373340607, + "p95": 182.8799992799759, + "p99": 5796.576023101807 + }, + "isolatedSum": { + "p50": 168.7680035829544, + "p90": 199.36000555753708, + "p95": 219.55200284719467, + "p99": 609.8880171775818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.22400254011154, + "p90": 113.66400122642517, + "p95": 120.80000340938568, + "p99": 203.0400037765503 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 82.04799890518188, + "p95": 84.89599823951721, + "p99": 97.50399738550186 + }, + "roundtrip": { + "p50": 148.83199334144592, + "p90": 160.0639969110489, + "p95": 167.13599860668182, + "p99": 761.3760232925415 + }, + "isolatedSum": { + "p50": 169.72800344228745, + "p90": 195.71200013160706, + "p95": 205.6960016489029, + "p99": 300.54400116205215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.68799895048141, + "p90": 119.23199892044067, + "p95": 127.93600559234619, + "p99": 176.2239933013916 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 82.30400085449219, + "p95": 85.66399663686752, + "p99": 123.1359988451004 + }, + "roundtrip": { + "p50": 148.51200580596924, + "p90": 164.15999829769135, + "p95": 173.95199835300446, + "p99": 221.53599560260773 + }, + "isolatedSum": { + "p50": 172.7679967880249, + "p90": 201.53599977493286, + "p95": 213.60000222921371, + "p99": 299.359992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.79199957847595, + "p90": 116.22399836778641, + "p95": 128.28800082206726, + "p99": 210.65600216388702 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 85.4720026254654, + "p95": 90.27200192213058, + "p99": 118.9119964838028 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 164.15999829769135, + "p95": 170.20800709724426, + "p99": 195.23200392723083 + }, + "isolatedSum": { + "p50": 174.40000176429749, + "p90": 201.6960009932518, + "p95": 218.56000274419785, + "p99": 329.5679986476898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.95200079679489, + "p90": 116.92799627780914, + "p95": 123.83999675512314, + "p99": 187.1359944343567 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 84.41600203514099, + "p95": 88.51200342178345, + "p99": 106.6880002617836 + }, + "roundtrip": { + "p50": 150.91200172901154, + "p90": 169.8240041732788, + "p95": 186.65599822998047, + "p99": 255.0080120563507 + }, + "isolatedSum": { + "p50": 175.61600357294083, + "p90": 201.34399831295013, + "p95": 212.35200017690659, + "p99": 293.8239946961403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.59999811649323, + "p90": 115.29599875211716, + "p95": 123.45600128173828, + "p99": 173.47200214862823 + }, + "combine": { + "p50": 81.79199695587158, + "p90": 86.11200004816055, + "p95": 88.83199840784073, + "p99": 127.74400413036346 + }, + "roundtrip": { + "p50": 152.319997549057, + "p90": 167.64800250530243, + "p95": 175.32800137996674, + "p99": 238.24000358581543 + }, + "isolatedSum": { + "p50": 179.3919950723648, + "p90": 201.4079988002777, + "p95": 212.287999689579, + "p99": 301.2160062789917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.96800124645233, + "p90": 118.17599833011627, + "p95": 128.7039965391159, + "p99": 165.27999937534332 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 88.3840024471283, + "p95": 94.87999975681305, + "p99": 136.09600067138672 + }, + "roundtrip": { + "p50": 156.5759927034378, + "p90": 173.50399494171143, + "p95": 183.00800025463104, + "p99": 236.83199286460876 + }, + "isolatedSum": { + "p50": 183.29600244760513, + "p90": 206.56000077724457, + "p95": 223.58399629592896, + "p99": 301.37600004673004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.49599879980087, + "p90": 120.51200121641159, + "p95": 128.60800325870514, + "p99": 207.23199844360352 + }, + "combine": { + "p50": 97.56799787282944, + "p90": 105.31199723482132, + "p95": 108.76800119876862, + "p99": 131.8719983100891 + }, + "roundtrip": { + "p50": 177.95200645923615, + "p90": 188.63999843597412, + "p95": 193.92000138759613, + "p99": 244.51200664043427 + }, + "isolatedSum": { + "p50": 204.0639966726303, + "p90": 225.8239984512329, + "p95": 237.37600445747375, + "p99": 339.1039967536926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e28fe10b", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||15404c7c0ec01b5", + "colorKey": "gb300_440d13a2", + "comparisonKey": "aa2d44f964843de7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:23.788375+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15404c7c0ec01b5", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 90.01599997282028, + "p90": 105.05600273609161, + "p95": 111.68000102043152, + "p99": 124.28800016641617 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 84.57600325345993, + "p95": 86.36800199747086, + "p99": 92.67199784517288 + }, + "roundtrip": { + "p50": 146.2080031633377, + "p90": 160.92799603939056, + "p95": 166.07999801635742, + "p99": 174.84800517559052 + }, + "isolatedSum": { + "p50": 169.21599954366684, + "p90": 189.63200598955154, + "p95": 198.04800301790237, + "p99": 216.95999801158905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 91.48799628019333, + "p90": 106.4319983124733, + "p95": 113.24799805879593, + "p99": 127.6479959487915 + }, + "combine": { + "p50": 81.44000172615051, + "p90": 85.91999858617783, + "p95": 90.2400016784668, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 148.99200201034546, + "p90": 162.1759980916977, + "p95": 168.7680035829544, + "p99": 178.43200266361237 + }, + "isolatedSum": { + "p50": 172.92799800634384, + "p90": 192.35199689865112, + "p95": 203.48799973726273, + "p99": 224.22399371862411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 91.16800129413605, + "p90": 108.22399705648422, + "p95": 112.67200112342834, + "p99": 120.70400267839432 + }, + "combine": { + "p50": 82.33600109815598, + "p90": 87.3280018568039, + "p95": 93.47199648618698, + "p99": 107.19999670982361 + }, + "roundtrip": { + "p50": 149.85600113868713, + "p90": 163.00800442695618, + "p95": 171.2000072002411, + "p99": 181.37599527835846 + }, + "isolatedSum": { + "p50": 173.50400239229202, + "p90": 195.55199891328812, + "p95": 206.14399760961533, + "p99": 227.90399938821793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.98400038480759, + "p90": 107.32799768447876, + "p95": 113.11999708414078, + "p99": 123.64800274372101 + }, + "combine": { + "p50": 83.36000144481659, + "p90": 87.71199733018875, + "p95": 92.76799857616425, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 153.34400534629822, + "p90": 165.95199704170227, + "p95": 172.12800681591034, + "p99": 181.98400735855103 + }, + "isolatedSum": { + "p50": 177.34400182962418, + "p90": 195.0399950146675, + "p95": 205.88799566030502, + "p99": 228.95999997854233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.84799951314926, + "p90": 108.38399827480316, + "p95": 112.57600039243698, + "p99": 124.7360035777092 + }, + "combine": { + "p50": 84.54400300979614, + "p90": 90.30400216579437, + "p95": 95.90400010347366, + "p99": 104.19200360774994 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 169.3439930677414, + "p95": 177.66399681568146, + "p99": 189.40800428390503 + }, + "isolatedSum": { + "p50": 179.3920025229454, + "p90": 198.68800044059753, + "p95": 208.48000049591064, + "p99": 228.92800718545914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.88000041246414, + "p90": 110.1439967751503, + "p95": 116.89600348472595, + "p99": 131.20000064373016 + }, + "combine": { + "p50": 90.62399715185165, + "p90": 96.54399752616882, + "p95": 100.12800246477127, + "p99": 107.39199817180634 + }, + "roundtrip": { + "p50": 161.8880033493042, + "p90": 172.41600155830383, + "p95": 179.80800569057465, + "p99": 197.40800559520721 + }, + "isolatedSum": { + "p50": 189.5039975643158, + "p90": 206.68799430131912, + "p95": 217.02400594949722, + "p99": 238.5919988155365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.07199639081955, + "p90": 120.80000340938568, + "p95": 126.36800110340118, + "p99": 148.99200201034546 + }, + "combine": { + "p50": 106.46399855613708, + "p90": 110.43199896812439, + "p95": 113.47199976444244, + "p99": 121.15199863910675 + }, + "roundtrip": { + "p50": 189.37599658966064, + "p90": 198.59200716018677, + "p95": 204.28800582885742, + "p99": 228.2239943742752 + }, + "isolatedSum": { + "p50": 217.53599494695663, + "p90": 231.23200237751007, + "p95": 239.84000086784363, + "p99": 270.1440006494522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.89599800109863, + "p90": 139.90400731563568, + "p95": 145.4080045223236, + "p99": 157.6640009880066 + }, + "combine": { + "p50": 135.83999872207642, + "p90": 144.3520039319992, + "p95": 145.9839940071106, + "p99": 156.25600516796112 + }, + "roundtrip": { + "p50": 242.46400594711304, + "p90": 251.80798768997192, + "p95": 255.67999482154846, + "p99": 266.7520046234131 + }, + "isolatedSum": { + "p50": 264.73599672317505, + "p90": 284.2560112476349, + "p95": 291.3919985294342, + "p99": 313.9200061559677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-419170bd", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_87f4d4ec", + "comparisonKey": "fd229a6aff63668c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:34.903102+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.0640013217926, + "p90": 111.84000223875046, + "p95": 117.88800358772278, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 81.40800148248672, + "p90": 87.10400015115738, + "p95": 90.30400216579437, + "p99": 99.87200051546097 + }, + "roundtrip": { + "p50": 152.63999998569489, + "p90": 165.02399742603302, + "p95": 169.50400173664093, + "p99": 178.68800461292267 + }, + "isolatedSum": { + "p50": 177.47200280427933, + "p90": 198.94400238990784, + "p95": 208.19200575351715, + "p99": 231.967993080616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.00000083446503, + "p90": 112.19199746847153, + "p95": 117.79200285673141, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 82.62400329113007, + "p90": 87.07199990749359, + "p95": 93.75999867916107, + "p99": 98.14400225877762 + }, + "roundtrip": { + "p50": 153.3759981393814, + "p90": 166.07999801635742, + "p95": 171.07200622558594, + "p99": 187.74400651454926 + }, + "isolatedSum": { + "p50": 178.6240041255951, + "p90": 199.26399737596512, + "p95": 211.5520015358925, + "p99": 230.23999482393265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.55999797582626, + "p90": 114.59200084209442, + "p95": 120.35199999809265, + "p99": 141.85599982738495 + }, + "combine": { + "p50": 84.32000130414963, + "p90": 88.44800293445587, + "p95": 95.39200365543365, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 158.4320068359375, + "p90": 169.88800466060638, + "p95": 175.48799514770508, + "p99": 197.24799692630768 + }, + "isolatedSum": { + "p50": 182.8799992799759, + "p90": 203.0400037765503, + "p95": 215.7440036535263, + "p99": 244.35199797153473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.24000298976898, + "p90": 113.56800049543381, + "p95": 119.9679970741272, + "p99": 141.88799262046814 + }, + "combine": { + "p50": 84.76799726486206, + "p90": 89.75999802350998, + "p95": 94.01600062847137, + "p99": 99.96800124645233 + }, + "roundtrip": { + "p50": 159.45599973201752, + "p90": 170.9119975566864, + "p95": 176.38400197029114, + "p99": 190.3039962053299 + }, + "isolatedSum": { + "p50": 183.00800025463104, + "p90": 203.3279985189438, + "p95": 213.98399770259857, + "p99": 241.85599386692047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.9760011434555, + "p90": 112.70400136709213, + "p95": 117.69600212574005, + "p99": 127.36000120639801 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 94.14400160312653, + "p95": 96.25600278377533, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 161.21600568294525, + "p90": 173.72800409793854, + "p95": 176.92799866199493, + "p99": 188.4479969739914 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 206.84800297021866, + "p95": 213.95200490951538, + "p99": 227.13600099086761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.94400155544281, + "p90": 114.30399864912033, + "p95": 118.8800036907196, + "p99": 129.69599664211273 + }, + "combine": { + "p50": 94.01600062847137, + "p90": 98.39999675750732, + "p95": 99.90400075912476, + "p99": 110.20799726247787 + }, + "roundtrip": { + "p50": 166.6560024023056, + "p90": 178.43200266361237, + "p95": 182.91200697422028, + "p99": 189.98399376869202 + }, + "isolatedSum": { + "p50": 196.96000218391418, + "p90": 212.70399540662766, + "p95": 218.78400444984436, + "p99": 239.9039939045906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.18399757146835, + "p90": 124.03199821710587, + "p95": 129.31199371814728, + "p99": 136.1600011587143 + }, + "combine": { + "p50": 107.29599744081497, + "p90": 112.03200370073318, + "p95": 115.99999666213989, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 189.43999707698822, + "p90": 198.55999946594238, + "p95": 201.7280012369156, + "p99": 207.8080028295517 + }, + "isolatedSum": { + "p50": 220.47999501228333, + "p90": 236.06400191783905, + "p95": 245.31199038028717, + "p99": 259.71200317144394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.2800009250641, + "p90": 138.46400380134583, + "p95": 141.9840008020401, + "p99": 152.92799472808838 + }, + "combine": { + "p50": 129.40800189971924, + "p90": 135.0719928741455, + "p95": 137.53600418567657, + "p99": 150.7200002670288 + }, + "roundtrip": { + "p50": 229.12000119686127, + "p90": 237.69600689411163, + "p95": 240.7359927892685, + "p99": 249.66399371623993 + }, + "isolatedSum": { + "p50": 258.6880028247833, + "p90": 273.53599667549133, + "p95": 279.5200049877167, + "p99": 303.6479949951172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c886abc0", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_8b7def4e", + "comparisonKey": "8adbe858ea6e1f63", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:53.671003+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 104.73600029945374, + "p90": 148.0640023946762, + "p95": 152.44799852371216, + "p99": 169.27999258041382 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 95.93600034713745, + "p95": 121.50400131940842, + "p99": 143.39199662208557 + }, + "roundtrip": { + "p50": 166.24000668525696, + "p90": 200.19200444221497, + "p95": 209.08799767494202, + "p99": 239.80799317359924 + }, + "isolatedSum": { + "p50": 185.69599837064743, + "p90": 244.00000274181366, + "p95": 273.9519998431206, + "p99": 312.6719892024994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.80800265073776, + "p90": 149.9519944190979, + "p95": 155.29599785804749, + "p99": 168.19199919700623 + }, + "combine": { + "p50": 86.04799956083298, + "p90": 121.37600034475327, + "p95": 135.29600203037262, + "p99": 147.5519984960556 + }, + "roundtrip": { + "p50": 167.10400581359863, + "p90": 194.72000002861023, + "p95": 201.75999402999878, + "p99": 218.46400201320648 + }, + "isolatedSum": { + "p50": 201.85600221157074, + "p90": 271.32799476385117, + "p95": 290.5919998884201, + "p99": 315.74399769306183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 102.88000106811523, + "p90": 127.10399925708771, + "p95": 138.46400380134583, + "p99": 159.87199544906616 + }, + "combine": { + "p50": 85.69599688053131, + "p90": 123.07199835777283, + "p95": 134.65599715709686, + "p99": 153.888002038002 + }, + "roundtrip": { + "p50": 163.64799439907074, + "p90": 196.4160054922104, + "p95": 210.04800498485565, + "p99": 234.17599499225616 + }, + "isolatedSum": { + "p50": 188.57599794864655, + "p90": 250.17599761486053, + "p95": 273.1200009584427, + "p99": 313.7599974870682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 104.70400005578995, + "p90": 147.35999703407288, + "p95": 153.1520038843155, + "p99": 167.29600727558136 + }, + "combine": { + "p50": 86.46400272846222, + "p90": 123.4240010380745, + "p95": 131.6159963607788, + "p99": 148.03199470043182 + }, + "roundtrip": { + "p50": 161.47199273109436, + "p90": 196.16000354290009, + "p95": 205.34400641918182, + "p99": 222.52799570560455 + }, + "isolatedSum": { + "p50": 191.16800278425217, + "p90": 270.78399807214737, + "p95": 284.7680002450943, + "p99": 315.3280019760132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 101.40799731016159, + "p90": 129.66400384902954, + "p95": 149.85600113868713, + "p99": 167.23200678825378 + }, + "combine": { + "p50": 86.04799956083298, + "p90": 121.5360015630722, + "p95": 137.56799697875977, + "p99": 157.8879952430725 + }, + "roundtrip": { + "p50": 161.98399662971497, + "p90": 192.6400065422058, + "p95": 211.90400421619415, + "p99": 235.74399948120117 + }, + "isolatedSum": { + "p50": 187.45599687099457, + "p90": 251.20000541210175, + "p95": 287.4239981174469, + "p99": 325.1200020313263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.63999956846237, + "p90": 144.96000111103058, + "p95": 151.8400013446808, + "p99": 167.52000153064728 + }, + "combine": { + "p50": 95.87199985980988, + "p90": 143.19999516010284, + "p95": 151.296004652977, + "p99": 159.10400450229645 + }, + "roundtrip": { + "p50": 173.0560064315796, + "p90": 203.0400037765503, + "p95": 218.55999529361725, + "p99": 240.79999327659607 + }, + "isolatedSum": { + "p50": 200.51199942827225, + "p90": 288.1599962711334, + "p95": 303.1360059976578, + "p99": 326.6240060329437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 119.64800208806992, + "p90": 149.08799529075623, + "p95": 156.47999942302704, + "p99": 167.00799763202667 + }, + "combine": { + "p50": 109.37599837779999, + "p90": 149.59999918937683, + "p95": 157.27999806404114, + "p99": 165.98400473594666 + }, + "roundtrip": { + "p50": 193.12000274658203, + "p90": 225.69599747657776, + "p95": 237.56800591945648, + "p99": 267.07199215888977 + }, + "isolatedSum": { + "p50": 229.0240004658699, + "p90": 298.68799448013306, + "p95": 313.7599974870682, + "p99": 332.9920023679733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.36799430847168, + "p90": 151.71200037002563, + "p95": 162.81600296497345, + "p99": 188.35200369358063 + }, + "combine": { + "p50": 131.58400356769562, + "p90": 155.61600029468536, + "p95": 166.55999422073364, + "p99": 188.4479969739914 + }, + "roundtrip": { + "p50": 227.55199670791626, + "p90": 250.91201066970825, + "p95": 263.5200023651123, + "p99": 283.488005399704 + }, + "isolatedSum": { + "p50": 261.9519978761673, + "p90": 307.328000664711, + "p95": 329.3759971857071, + "p99": 376.800000667572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3eecced", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_b3a88763", + "comparisonKey": "1521f576cce519c9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:39.337805+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.83199971914291, + "p90": 113.72800171375275, + "p95": 118.65600198507309, + "p99": 128.38399410247803 + }, + "combine": { + "p50": 76.22399926185608, + "p90": 84.63999629020691, + "p95": 86.59200370311737, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 151.8400013446808, + "p90": 166.143998503685, + "p95": 169.91999745368958, + "p99": 189.88800048828125 + }, + "isolatedSum": { + "p50": 173.055998980999, + "p90": 198.36799800395966, + "p95": 205.24800568819046, + "p99": 222.81599044799805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.78399968147278, + "p90": 115.32799899578094, + "p95": 119.84000355005264, + "p99": 132.89600610733032 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 85.88799834251404, + "p95": 87.67999708652496, + "p99": 95.71199864149094 + }, + "roundtrip": { + "p50": 153.18399667739868, + "p90": 165.69599509239197, + "p95": 171.9679981470108, + "p99": 183.67999792099 + }, + "isolatedSum": { + "p50": 178.30400168895721, + "p90": 201.21599733829498, + "p95": 207.5200006365776, + "p99": 228.60800474882126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.80800002813339, + "p90": 116.31999909877777, + "p95": 122.43200093507767, + "p99": 136.48000359535217 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 89.34400230646133, + "p95": 95.16800194978714, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 153.3759981393814, + "p90": 168.06399822235107, + "p95": 172.95999825000763, + "p99": 187.51999735832214 + }, + "isolatedSum": { + "p50": 182.68799781799316, + "p90": 205.6640014052391, + "p95": 217.6000028848648, + "p99": 236.83200031518936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.12800246477127, + "p90": 114.23999816179276, + "p95": 119.99999731779099, + "p99": 132.89600610733032 + }, + "combine": { + "p50": 84.70399677753448, + "p90": 92.38400310277939, + "p95": 96.28800302743912, + "p99": 110.68800091743469 + }, + "roundtrip": { + "p50": 157.31200575828552, + "p90": 171.1679995059967, + "p95": 176.60799622535706, + "p99": 187.29600310325623 + }, + "isolatedSum": { + "p50": 184.83199924230576, + "p90": 206.62400126457214, + "p95": 216.2880003452301, + "p99": 243.58400702476501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.8640006184578, + "p90": 115.13599753379822, + "p95": 119.39200013875961, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 88.60799670219421, + "p95": 94.55999732017517, + "p99": 104.032002389431 + }, + "roundtrip": { + "p50": 156.76799416542053, + "p90": 171.90399765968323, + "p95": 177.5359958410263, + "p99": 191.96799397468567 + }, + "isolatedSum": { + "p50": 185.248002409935, + "p90": 203.74399423599243, + "p95": 213.95199745893478, + "p99": 232.92800039052963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.92800110578537, + "p90": 114.14399743080139, + "p95": 118.40000003576279, + "p99": 136.76799833774567 + }, + "combine": { + "p50": 87.20000088214874, + "p90": 95.29600292444229, + "p95": 97.95200079679489, + "p99": 101.53599828481674 + }, + "roundtrip": { + "p50": 163.35999965667725, + "p90": 173.8239973783493, + "p95": 180.31999468803406, + "p99": 191.45600497722626 + }, + "isolatedSum": { + "p50": 188.1280019879341, + "p90": 209.44000035524368, + "p95": 216.35200083255768, + "p99": 238.3039966225624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.47999900579453, + "p90": 121.24799937009811, + "p95": 126.8479973077774, + "p99": 141.76000654697418 + }, + "combine": { + "p50": 101.08800232410431, + "p90": 110.01600325107574, + "p95": 112.64000087976456, + "p99": 125.31200051307678 + }, + "roundtrip": { + "p50": 188.57599794864655, + "p90": 200.095996260643, + "p95": 204.48000729084015, + "p99": 212.79999613761902 + }, + "isolatedSum": { + "p50": 209.56800132989883, + "p90": 231.26400262117386, + "p95": 239.48799818754196, + "p99": 267.07200706005096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.42400169372559, + "p90": 137.5039964914322, + "p95": 141.92000031471252, + "p99": 152.8639942407608 + }, + "combine": { + "p50": 135.23200154304504, + "p90": 144.22400295734406, + "p95": 146.59200608730316, + "p99": 151.8079936504364 + }, + "roundtrip": { + "p50": 239.3919974565506, + "p90": 248.35200607776642, + "p95": 251.0719895362854, + "p99": 258.7200105190277 + }, + "isolatedSum": { + "p50": 262.65600323677063, + "p90": 281.72799944877625, + "p95": 288.5120064020157, + "p99": 304.6719878911972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-04585c92", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fc79fe5fdca4c", + "colorKey": "gb300_961589b9", + "comparisonKey": "484727a851531c1a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:56.380486+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fc79fe5fdca4c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.27200257778168, + "p90": 115.68000167608261, + "p95": 120.4800009727478, + "p99": 134.5919966697693 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 79.8719972372055, + "p95": 83.13599973917007, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 142.39999651908875, + "p90": 156.76799416542053, + "p95": 160.67199409008026, + "p99": 170.1440066099167 + }, + "isolatedSum": { + "p50": 167.29599982500076, + "p90": 195.55199891328812, + "p95": 203.61600071191788, + "p99": 225.50399601459503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.67199850082397, + "p90": 114.30399864912033, + "p95": 122.14399874210358, + "p99": 141.9840008020401 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 81.08799904584885, + "p95": 84.32000130414963, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 144.41600441932678, + "p90": 159.87199544906616, + "p95": 166.143998503685, + "p99": 173.50399494171143 + }, + "isolatedSum": { + "p50": 169.75999623537064, + "p90": 195.39199769496918, + "p95": 206.4640000462532, + "p99": 238.304004073143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.57599776983261, + "p90": 116.19199812412262, + "p95": 124.38400089740753, + "p99": 136.06399297714233 + }, + "combine": { + "p50": 73.53600114583969, + "p90": 81.44000172615051, + "p95": 84.927998483181, + "p99": 93.88799965381622 + }, + "roundtrip": { + "p50": 148.22399616241455, + "p90": 158.91200304031372, + "p95": 164.70399498939514, + "p99": 175.74399709701538 + }, + "isolatedSum": { + "p50": 170.1119989156723, + "p90": 197.63199985027313, + "p95": 209.31199938058853, + "p99": 229.95199263095856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.71199929714203, + "p90": 114.75200206041336, + "p95": 121.5360015630722, + "p99": 130.23999333381653 + }, + "combine": { + "p50": 75.42400062084198, + "p90": 84.16000008583069, + "p95": 85.95199882984161, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 151.16800367832184, + "p90": 164.22399878501892, + "p95": 170.3680008649826, + "p99": 178.14399302005768 + }, + "isolatedSum": { + "p50": 175.135999917984, + "p90": 198.91200214624405, + "p95": 207.48800039291382, + "p99": 226.20799392461777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 101.59999877214432, + "p90": 118.33599954843521, + "p95": 128.1919926404953, + "p99": 142.17600226402283 + }, + "combine": { + "p50": 80.32000064849854, + "p90": 85.9839990735054, + "p95": 88.48000317811966, + "p99": 97.08800166845322 + }, + "roundtrip": { + "p50": 153.18399667739868, + "p90": 166.52800142765045, + "p95": 172.41600155830383, + "p99": 181.34400248527527 + }, + "isolatedSum": { + "p50": 181.91999942064285, + "p90": 204.3199986219406, + "p95": 216.67199581861496, + "p99": 239.26400393247604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.32799702882767, + "p90": 118.6240017414093, + "p95": 124.95999783277512, + "p99": 142.5279974937439 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 92.73599833250046, + "p95": 94.97600048780441, + "p99": 98.11200201511383 + }, + "roundtrip": { + "p50": 159.39199924468994, + "p90": 173.3119934797287, + "p95": 177.5680035352707, + "p99": 188.1919950246811 + }, + "isolatedSum": { + "p50": 189.08799439668655, + "p90": 211.36000007390976, + "p95": 219.93599832057953, + "p99": 240.63999950885773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 107.71200060844421, + "p90": 121.47200107574463, + "p95": 127.45599448680878, + "p99": 141.24800264835358 + }, + "combine": { + "p50": 98.59199821949005, + "p90": 106.39999806880951, + "p95": 108.89600217342377, + "p99": 115.4559999704361 + }, + "roundtrip": { + "p50": 180.16000092029572, + "p90": 191.00800156593323, + "p95": 195.23200392723083, + "p99": 208.38400721549988 + }, + "isolatedSum": { + "p50": 206.30399882793427, + "p90": 227.87199914455414, + "p95": 236.35199666023254, + "p99": 256.7040026187897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.50400197505951, + "p90": 137.2479945421219, + "p95": 142.62400567531586, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 132.09599256515503, + "p90": 137.34400272369385, + "p95": 141.92000031471252, + "p99": 149.1519957780838 + }, + "roundtrip": { + "p50": 228.06400060653687, + "p90": 239.51999843120575, + "p95": 243.48799884319305, + "p99": 253.28001379966736 + }, + "isolatedSum": { + "p50": 257.59999454021454, + "p90": 274.59199726581573, + "p95": 284.5440059900284, + "p99": 298.0799973011017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-33e9cd0d", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_db9a43b5", + "comparisonKey": "d24055c7960098e6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:05.616491+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.88800030946732, + "p90": 114.23999816179276, + "p95": 117.91999638080597, + "p99": 130.52800297737122 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 85.82399785518646, + "p95": 91.13600105047226, + "p99": 100.28800368309021 + }, + "roundtrip": { + "p50": 151.2639969587326, + "p90": 164.92800414562225, + "p95": 167.55199432373047, + "p99": 183.80799889564514 + }, + "isolatedSum": { + "p50": 177.40800231695175, + "p90": 200.06399601697922, + "p95": 209.05599743127823, + "p99": 230.81600666046143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.24000298976898, + "p90": 114.43199962377548, + "p95": 120.92799693346024, + "p99": 135.45599579811096 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 89.75999802350998, + "p95": 93.75999867916107, + "p99": 99.61599856615067 + }, + "roundtrip": { + "p50": 155.2319973707199, + "p90": 171.23199999332428, + "p95": 175.52000284194946, + "p99": 222.91199862957 + }, + "isolatedSum": { + "p50": 180.41600286960602, + "p90": 204.19199764728546, + "p95": 214.6879956126213, + "p99": 235.07199436426163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.16800260543823, + "p90": 114.656001329422, + "p95": 118.49600076675415, + "p99": 129.82399761676788 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 89.21600133180618, + "p95": 92.00000017881393, + "p99": 101.24800354242325 + }, + "roundtrip": { + "p50": 158.33599865436554, + "p90": 172.28800058364868, + "p95": 177.44000256061554, + "p99": 191.96799397468567 + }, + "isolatedSum": { + "p50": 182.27200210094452, + "p90": 203.87200266122818, + "p95": 210.49600094556808, + "p99": 231.07200115919113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.42399710416794, + "p90": 116.19199812412262, + "p95": 122.46400117874146, + "p99": 140.86399972438812 + }, + "combine": { + "p50": 85.1840004324913, + "p90": 94.04800087213516, + "p95": 96.73599898815155, + "p99": 104.99200224876404 + }, + "roundtrip": { + "p50": 162.36799955368042, + "p90": 173.43999445438385, + "p95": 179.1359931230545, + "p99": 192.54399836063385 + }, + "isolatedSum": { + "p50": 184.60799753665924, + "p90": 210.23999899625778, + "p95": 219.200000166893, + "p99": 245.85600197315216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.35199671983719, + "p90": 114.59200084209442, + "p95": 119.35999989509583, + "p99": 136.9280070066452 + }, + "combine": { + "p50": 85.95199882984161, + "p90": 94.91200000047684, + "p95": 96.99200093746185, + "p99": 106.52799904346466 + }, + "roundtrip": { + "p50": 162.88000345230103, + "p90": 174.78400468826294, + "p95": 179.1680008172989, + "p99": 187.19999492168427 + }, + "isolatedSum": { + "p50": 186.3039955496788, + "p90": 209.50400084257126, + "p95": 216.35200083255768, + "p99": 243.45600605010986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.39199751615524, + "p90": 115.90400338172913, + "p95": 119.39200013875961, + "p99": 132.1280002593994 + }, + "combine": { + "p50": 93.24800223112106, + "p90": 98.36799651384354, + "p95": 101.95200145244598, + "p99": 111.07199639081955 + }, + "roundtrip": { + "p50": 168.19199919700623, + "p90": 178.81600558757782, + "p95": 182.65600502490997, + "p99": 192.32000410556793 + }, + "isolatedSum": { + "p50": 196.6399997472763, + "p90": 214.27199989557266, + "p95": 221.3440015912056, + "p99": 243.19999665021896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.60800063610077, + "p90": 123.45600128173828, + "p95": 127.45599448680878, + "p99": 139.26400244235992 + }, + "combine": { + "p50": 107.32799768447876, + "p90": 113.34399878978729, + "p95": 117.40799993276596, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 189.82400000095367, + "p90": 200.28799772262573, + "p95": 204.8960030078888, + "p99": 224.06400740146637 + }, + "isolatedSum": { + "p50": 219.93599832057953, + "p90": 236.80000007152557, + "p95": 244.86399441957474, + "p99": 260.8320042490959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.9039978981018, + "p90": 138.2720023393631, + "p95": 141.9840008020401, + "p99": 153.9520025253296 + }, + "combine": { + "p50": 125.91999769210815, + "p90": 132.64000415802002, + "p95": 134.43200290203094, + "p99": 139.93600010871887 + }, + "roundtrip": { + "p50": 225.8560061454773, + "p90": 234.72000658512115, + "p95": 238.20799589157104, + "p99": 242.91199445724487 + }, + "isolatedSum": { + "p50": 253.82399559020996, + "p90": 270.9120064973831, + "p95": 276.41600370407104, + "p99": 293.88800263404846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f55a7c17", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_15a35db4", + "comparisonKey": "2d8b83ad658760e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:11.450461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 89.24800157546997, + "p90": 105.95200210809708, + "p95": 111.64800077676773, + "p99": 125.21600723266602 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 84.44800227880478, + "p95": 87.20000088214874, + "p99": 94.40000355243683 + }, + "roundtrip": { + "p50": 147.35999703407288, + "p90": 160.22400557994843, + "p95": 166.33599996566772, + "p99": 176.5120029449463 + }, + "isolatedSum": { + "p50": 169.15199905633926, + "p90": 190.40000438690186, + "p95": 198.84800165891647, + "p99": 219.61601078510284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 89.82399851083755, + "p90": 106.6880002617836, + "p95": 112.09599673748016, + "p99": 123.36000055074692 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 84.86399799585342, + "p95": 87.48800307512283, + "p99": 93.75999867916107 + }, + "roundtrip": { + "p50": 147.77599275112152, + "p90": 159.13599729537964, + "p95": 163.7440025806427, + "p99": 170.49600183963776 + }, + "isolatedSum": { + "p50": 170.01599818468094, + "p90": 191.55199825763702, + "p95": 199.583999812603, + "p99": 217.119999229908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.47999638319016, + "p90": 107.39199817180634, + "p95": 115.03999680280685, + "p99": 142.65599846839905 + }, + "combine": { + "p50": 81.44000172615051, + "p90": 87.07199990749359, + "p95": 91.42400324344635, + "p99": 96.41599655151367 + }, + "roundtrip": { + "p50": 150.59199929237366, + "p90": 164.000004529953, + "p95": 169.5680022239685, + "p99": 178.49600315093994 + }, + "isolatedSum": { + "p50": 173.91999810934067, + "p90": 194.46399807929993, + "p95": 206.4640000462532, + "p99": 239.07199501991272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.50399672985077, + "p90": 106.78400099277496, + "p95": 113.37599903345108, + "p99": 131.9359987974167 + }, + "combine": { + "p50": 83.03999900817871, + "p90": 88.54400366544724, + "p95": 93.44000369310379, + "p99": 103.80800068378448 + }, + "roundtrip": { + "p50": 153.34400534629822, + "p90": 166.4000004529953, + "p95": 173.98400604724884, + "p99": 184.35199558734894 + }, + "isolatedSum": { + "p50": 176.54399573802948, + "p90": 195.3280046582222, + "p95": 206.81600272655487, + "p99": 235.74399948120117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.62399780750275, + "p90": 108.5439994931221, + "p95": 112.76800185441971, + "p99": 124.15999919176102 + }, + "combine": { + "p50": 84.22400057315826, + "p90": 92.0960009098053, + "p95": 95.48799693584442, + "p99": 109.3439981341362 + }, + "roundtrip": { + "p50": 154.81600165367126, + "p90": 167.90400445461273, + "p95": 174.5920032262802, + "p99": 191.23199582099915 + }, + "isolatedSum": { + "p50": 178.847998380661, + "p90": 200.6400004029274, + "p95": 208.25599879026413, + "p99": 233.50399732589722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.65599870681763, + "p90": 109.8880022764206, + "p95": 114.75200206041336, + "p99": 126.94400548934937 + }, + "combine": { + "p50": 91.39200299978256, + "p90": 95.64799815416336, + "p95": 98.4639972448349, + "p99": 107.26399719715118 + }, + "roundtrip": { + "p50": 160.96000373363495, + "p90": 172.95999825000763, + "p95": 177.40799486637115, + "p99": 201.50400698184967 + }, + "isolatedSum": { + "p50": 190.0480017066002, + "p90": 205.53600043058395, + "p95": 213.21599930524826, + "p99": 234.20800268650055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.83200168609619, + "p90": 120.80000340938568, + "p95": 127.9039978981018, + "p99": 143.8719928264618 + }, + "combine": { + "p50": 105.0880029797554, + "p90": 110.59200018644333, + "p95": 113.98400366306305, + "p99": 122.78400361537933 + }, + "roundtrip": { + "p50": 187.6160055398941, + "p90": 198.14400374889374, + "p95": 201.88799500465393, + "p99": 212.25599944591522 + }, + "isolatedSum": { + "p50": 213.9200046658516, + "p90": 231.392003595829, + "p95": 241.88800156116486, + "p99": 266.6559964418411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.48000228405, + "p90": 138.0160003900528, + "p95": 142.84799993038177, + "p99": 153.50399911403656 + }, + "combine": { + "p50": 134.17600095272064, + "p90": 142.71999895572662, + "p95": 146.36799693107605, + "p99": 153.47200632095337 + }, + "roundtrip": { + "p50": 238.8480007648468, + "p90": 247.16800451278687, + "p95": 250.91201066970825, + "p99": 260.76799631118774 + }, + "isolatedSum": { + "p50": 262.65600323677063, + "p90": 280.7359993457794, + "p95": 289.2159968614578, + "p99": 306.97600543498993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d14c709", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_46b172da", + "comparisonKey": "23a6c8c598f2838f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:21.117125+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.51199728250504, + "p90": 115.23199826478958, + "p95": 120.89599668979645, + "p99": 147.39200472831726 + }, + "combine": { + "p50": 81.37600123882294, + "p90": 86.62399649620056, + "p95": 88.83199840784073, + "p99": 95.48799693584442 + }, + "roundtrip": { + "p50": 153.47200632095337, + "p90": 168.09600591659546, + "p95": 174.9120056629181, + "p99": 187.99999356269836 + }, + "isolatedSum": { + "p50": 177.88799852132797, + "p90": 201.85599476099014, + "p95": 209.72799509763718, + "p99": 242.88000166416168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.96000069379807, + "p90": 115.29599875211716, + "p95": 124.51200187206268, + "p99": 139.00800049304962 + }, + "combine": { + "p50": 82.40000158548355, + "p90": 87.3280018568039, + "p95": 89.75999802350998, + "p99": 97.82399982213974 + }, + "roundtrip": { + "p50": 155.90399503707886, + "p90": 169.5999950170517, + "p95": 173.69599640369415, + "p99": 188.1600022315979 + }, + "isolatedSum": { + "p50": 179.36000227928162, + "p90": 202.62400060892105, + "p95": 214.27199989557266, + "p99": 236.83200031518936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.07200187444687, + "p90": 113.79200220108032, + "p95": 120.67200243473053, + "p99": 139.615997672081 + }, + "combine": { + "p50": 83.96799862384796, + "p90": 89.50400352478027, + "p95": 93.9520001411438, + "p99": 103.35999727249146 + }, + "roundtrip": { + "p50": 157.56799280643463, + "p90": 171.7119961977005, + "p95": 177.5359958410263, + "p99": 192.60799884796143 + }, + "isolatedSum": { + "p50": 183.04000049829483, + "p90": 203.2960057258606, + "p95": 214.62400257587433, + "p99": 242.97599494457245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.84800016880035, + "p90": 113.88800293207169, + "p95": 119.26399916410446, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 85.79199761152267, + "p90": 92.6399976015091, + "p95": 96.28800302743912, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 160.16000509262085, + "p90": 175.29599368572235, + "p95": 182.40000307559967, + "p99": 208.70399475097656 + }, + "isolatedSum": { + "p50": 184.63999778032303, + "p90": 206.52800053358078, + "p95": 215.55200219154358, + "p99": 235.55199056863785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.51199793815613, + "p90": 114.3999993801117, + "p95": 119.61600184440613, + "p99": 130.40000200271606 + }, + "combine": { + "p50": 86.68799698352814, + "p90": 94.33600306510925, + "p95": 98.04800152778625, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 162.6559942960739, + "p90": 183.00800025463104, + "p95": 195.71200013160706, + "p99": 211.35999262332916 + }, + "isolatedSum": { + "p50": 187.19999492168427, + "p90": 208.73600244522095, + "p95": 217.66400337219238, + "p99": 233.37600380182266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.44799810647964, + "p90": 116.86400324106216, + "p95": 123.00799787044525, + "p99": 136.06399297714233 + }, + "combine": { + "p50": 93.50399672985077, + "p90": 99.23200309276581, + "p95": 101.40799731016159, + "p99": 109.82400178909302 + }, + "roundtrip": { + "p50": 167.1999990940094, + "p90": 181.66400492191315, + "p95": 186.3040030002594, + "p99": 204.99199628829956 + }, + "isolatedSum": { + "p50": 197.9519948363304, + "p90": 216.09600633382797, + "p95": 224.41599518060684, + "p99": 245.88799476623535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.14399743080139, + "p90": 124.41600114107132, + "p95": 130.11200726032257, + "p99": 142.5279974937439 + }, + "combine": { + "p50": 107.42399841547012, + "p90": 112.60800063610077, + "p95": 115.07199704647064, + "p99": 122.65600264072418 + }, + "roundtrip": { + "p50": 191.42399728298187, + "p90": 202.04800367355347, + "p95": 206.01600408554077, + "p99": 226.75199806690216 + }, + "isolatedSum": { + "p50": 221.56799584627151, + "p90": 237.0240017771721, + "p95": 245.1840043067932, + "p99": 265.1840001344681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.05600666999817, + "p90": 139.8719996213913, + "p95": 143.74400675296783, + "p99": 172.03199863433838 + }, + "combine": { + "p50": 124.86399710178375, + "p90": 131.84000551700592, + "p95": 134.0160071849823, + "p99": 140.44800400733948 + }, + "roundtrip": { + "p50": 225.8879989385605, + "p90": 236.15999519824982, + "p95": 241.11999571323395, + "p99": 256.28799200057983 + }, + "isolatedSum": { + "p50": 253.92000377178192, + "p90": 271.7120051383972, + "p95": 277.76001393795013, + "p99": 312.48000264167786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e40c9223", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_6e04dda3", + "comparisonKey": "a225bda519f2d24b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:08.624842+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.90400075912476, + "p90": 136.54400408267975, + "p95": 148.70400726795197, + "p99": 165.98400473594666 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 97.98400104045868, + "p95": 116.70400202274323, + "p99": 138.33600282669067 + }, + "roundtrip": { + "p50": 153.21600437164307, + "p90": 184.12800133228302, + "p95": 190.49599766731262, + "p99": 203.80799472332 + }, + "isolatedSum": { + "p50": 180.60800433158875, + "p90": 234.52800512313843, + "p95": 265.4080092906952, + "p99": 304.32000756263733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.89600086212158, + "p90": 146.88000082969666, + "p95": 153.47200632095337, + "p99": 176.1920005083084 + }, + "combine": { + "p50": 82.33600109815598, + "p90": 119.87199634313583, + "p95": 136.4160031080246, + "p99": 149.3760049343109 + }, + "roundtrip": { + "p50": 153.9199948310852, + "p90": 182.72000551223755, + "p95": 200.80000162124634, + "p99": 228.35199534893036 + }, + "isolatedSum": { + "p50": 183.23200196027756, + "p90": 266.7519971728325, + "p95": 289.88800942897797, + "p99": 325.5680054426193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 101.75999999046326, + "p90": 140.19200205802917, + "p95": 148.15999567508698, + "p99": 164.19200599193573 + }, + "combine": { + "p50": 83.64800363779068, + "p90": 104.96000200510025, + "p95": 116.92799627780914, + "p99": 138.91200721263885 + }, + "roundtrip": { + "p50": 157.69599378108978, + "p90": 184.64000523090363, + "p95": 196.57599925994873, + "p99": 210.78400313854218 + }, + "isolatedSum": { + "p50": 185.40800362825394, + "p90": 245.15200406312943, + "p95": 265.0879919528961, + "p99": 303.1040132045746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 102.20800340175629, + "p90": 132.22399353981018, + "p95": 144.19199526309967, + "p99": 161.1199975013733 + }, + "combine": { + "p50": 84.76799726486206, + "p90": 97.9200005531311, + "p95": 103.93600165843964, + "p99": 145.9520012140274 + }, + "roundtrip": { + "p50": 157.82399475574493, + "p90": 179.1680008172989, + "p95": 192.28799641132355, + "p99": 220.32000124454498 + }, + "isolatedSum": { + "p50": 186.97600066661835, + "p90": 230.14399409294128, + "p95": 248.1279969215393, + "p99": 307.0719987154007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 102.84800082445145, + "p90": 147.5519984960556, + "p95": 155.32800555229187, + "p99": 171.1360067129135 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 119.35999989509583, + "p95": 136.9599997997284, + "p99": 153.24799716472626 + }, + "roundtrip": { + "p50": 165.0560051202774, + "p90": 191.29599630832672, + "p95": 200.22399723529816, + "p99": 223.39199483394623 + }, + "isolatedSum": { + "p50": 188.35200369358063, + "p90": 266.9119983911514, + "p95": 292.28800535202026, + "p99": 324.38400387763977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.80000078678131, + "p90": 130.14400005340576, + "p95": 142.07999408245087, + "p99": 171.9679981470108 + }, + "combine": { + "p50": 91.16800129413605, + "p90": 112.67200112342834, + "p95": 132.25600123405457, + "p99": 151.7760008573532 + }, + "roundtrip": { + "p50": 168.2880073785782, + "p90": 201.50400698184967, + "p95": 209.47200059890747, + "p99": 230.24000227451324 + }, + "isolatedSum": { + "p50": 195.96800208091736, + "p90": 242.8160011768341, + "p95": 274.33599531650543, + "p99": 323.743999004364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.91200262308121, + "p90": 131.6159963607788, + "p95": 142.4960047006607, + "p99": 181.34400248527527 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 143.8080072402954, + "p95": 151.0400027036667, + "p99": 165.8560037612915 + }, + "roundtrip": { + "p50": 188.7039989233017, + "p90": 208.41600000858307, + "p95": 227.7120053768158, + "p99": 246.39999866485596 + }, + "isolatedSum": { + "p50": 217.02400594949722, + "p90": 275.4240036010742, + "p95": 293.5360074043274, + "p99": 347.2000062465668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.87999820709229, + "p90": 147.2640037536621, + "p95": 153.60000729560852, + "p99": 172.15999960899353 + }, + "combine": { + "p50": 136.31999492645264, + "p90": 148.41599762439728, + "p95": 156.38400614261627, + "p99": 175.23199319839478 + }, + "roundtrip": { + "p50": 241.63199961185455, + "p90": 257.56800174713135, + "p95": 272.5760042667389, + "p99": 296.25600576400757 + }, + "isolatedSum": { + "p50": 267.1999931335449, + "p90": 295.6800013780594, + "p95": 309.9840134382248, + "p99": 347.3919928073883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3ab662a4", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_04de5a5b", + "comparisonKey": "3a5f0bb6e0d0b96c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:18.538662+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.28000313043594, + "p90": 114.23999816179276, + "p95": 119.74400281906128, + "p99": 130.97600638866425 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 86.496002972126, + "p95": 89.15200084447861, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 153.9520025253296, + "p90": 167.52000153064728, + "p95": 173.15199971199036, + "p99": 196.1279958486557 + }, + "isolatedSum": { + "p50": 178.336001932621, + "p90": 200.73600113391876, + "p95": 208.8960036635399, + "p99": 228.54400426149368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.12800246477127, + "p90": 117.27999895811081, + "p95": 121.91999703645706, + "p99": 137.15200126171112 + }, + "combine": { + "p50": 84.09599959850311, + "p90": 88.0960002541542, + "p95": 90.43200314044952, + "p99": 99.80800002813339 + }, + "roundtrip": { + "p50": 155.87200224399567, + "p90": 172.19200730323792, + "p95": 179.45599555969238, + "p99": 191.71200692653656 + }, + "isolatedSum": { + "p50": 184.22400206327438, + "p90": 205.37599921226501, + "p95": 212.35200017690659, + "p99": 236.9600012898445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 100.54399818181992, + "p90": 114.72000181674957, + "p95": 120.25599926710129, + "p99": 149.05600249767303 + }, + "combine": { + "p50": 83.83999764919281, + "p90": 87.93599903583527, + "p95": 90.14400094747543, + "p99": 99.74399954080582 + }, + "roundtrip": { + "p50": 157.4079990386963, + "p90": 171.03999853134155, + "p95": 178.3359944820404, + "p99": 189.88800048828125 + }, + "isolatedSum": { + "p50": 184.38399583101273, + "p90": 202.65600085258484, + "p95": 210.40000021457672, + "p99": 248.80000203847885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 101.47199779748917, + "p90": 115.26399850845337, + "p95": 120.60800194740295, + "p99": 131.71200454235077 + }, + "combine": { + "p50": 85.02399921417236, + "p90": 93.1520015001297, + "p95": 96.76799923181534, + "p99": 102.62399911880493 + }, + "roundtrip": { + "p50": 160.99199652671814, + "p90": 173.15199971199036, + "p95": 179.1680008172989, + "p99": 213.34399282932281 + }, + "isolatedSum": { + "p50": 186.49599701166153, + "p90": 208.41600000858307, + "p95": 217.3760011792183, + "p99": 234.3360036611557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.8640006184578, + "p90": 116.48000031709671, + "p95": 121.15199863910675, + "p99": 134.17600095272064 + }, + "combine": { + "p50": 86.36800199747086, + "p90": 94.78399902582169, + "p95": 97.37599641084671, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 164.19200599193573, + "p90": 176.28799378871918, + "p95": 180.89599907398224, + "p99": 189.34400379657745 + }, + "isolatedSum": { + "p50": 187.23200261592865, + "p90": 211.2639993429184, + "p95": 218.52799504995346, + "p99": 239.55199867486954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.76800054311752, + "p90": 117.11999773979187, + "p95": 121.21599912643433, + "p99": 128.9599984884262 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 99.67999905347824, + "p95": 102.36799716949463, + "p99": 109.31199789047241 + }, + "roundtrip": { + "p50": 168.44800114631653, + "p90": 181.43999576568604, + "p95": 188.89600038528442, + "p99": 202.65600085258484 + }, + "isolatedSum": { + "p50": 198.7520009279251, + "p90": 216.7999967932701, + "p95": 223.58399629592896, + "p99": 238.27199637889862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.63200098276138, + "p90": 126.71999633312225, + "p95": 131.71200454235077, + "p99": 153.08800339698792 + }, + "combine": { + "p50": 108.5439994931221, + "p90": 113.21599781513214, + "p95": 117.72800236940384, + "p99": 124.35200065374374 + }, + "roundtrip": { + "p50": 191.00800156593323, + "p90": 201.31200551986694, + "p95": 206.1759978532791, + "p99": 213.02400529384613 + }, + "isolatedSum": { + "p50": 222.17600047588348, + "p90": 239.9359941482544, + "p95": 249.4400069117546, + "p99": 277.44000405073166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.54400277137756, + "p90": 141.2159949541092, + "p95": 144.6399986743927, + "p99": 156.73600137233734 + }, + "combine": { + "p50": 124.60800260305405, + "p90": 132.9279989004135, + "p95": 134.68800485134125, + "p99": 140.28799533843994 + }, + "roundtrip": { + "p50": 228.2560020685196, + "p90": 238.0799949169159, + "p95": 241.63199961185455, + "p99": 256.0639977455139 + }, + "isolatedSum": { + "p50": 253.1520053744316, + "p90": 274.1439938545227, + "p95": 279.32800352573395, + "p99": 297.0239967107773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-48c02d24", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_8cda999b", + "comparisonKey": "f43e80b5c2df2021", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:44.909033+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.44000369310379, + "p90": 110.55999994277954, + "p95": 115.84000289440155, + "p99": 132.7040046453476 + }, + "combine": { + "p50": 77.37600058317184, + "p90": 84.3840017914772, + "p95": 86.43200248479843, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 150.56000649929047, + "p90": 163.2319986820221, + "p95": 169.5999950170517, + "p99": 179.48800325393677 + }, + "isolatedSum": { + "p50": 170.81600427627563, + "p90": 194.94400173425674, + "p95": 202.27200537919998, + "p99": 225.18400102853775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 93.75999867916107, + "p90": 109.98400300741196, + "p95": 115.90400338172913, + "p99": 127.32799351215363 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 86.97599917650223, + "p95": 91.00800007581711, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 153.56799960136414, + "p90": 167.1999990940094, + "p95": 174.78400468826294, + "p99": 183.96799266338348 + }, + "isolatedSum": { + "p50": 175.83999782800674, + "p90": 196.96000218391418, + "p95": 206.91200345754623, + "p99": 224.2879942059517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 94.84799951314926, + "p90": 111.80800199508667, + "p95": 118.23999881744385, + "p99": 138.08000087738037 + }, + "combine": { + "p50": 82.91199803352356, + "p90": 87.48800307512283, + "p95": 91.51999652385712, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 155.93600273132324, + "p90": 170.1119989156723, + "p95": 176.28799378871918, + "p99": 196.06399536132812 + }, + "isolatedSum": { + "p50": 177.75999754667282, + "p90": 199.2960050702095, + "p95": 209.75999534130096, + "p99": 234.0480014681816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.67999839782715, + "p90": 110.33599823713303, + "p95": 114.3999993801117, + "p99": 123.26399981975555 + }, + "combine": { + "p50": 84.28800106048584, + "p90": 90.30400216579437, + "p95": 94.43199634552002, + "p99": 101.72799974679947 + }, + "roundtrip": { + "p50": 157.6640009880066, + "p90": 172.41600155830383, + "p95": 179.61600422859192, + "p99": 190.68799912929535 + }, + "isolatedSum": { + "p50": 179.967999458313, + "p90": 200.6400004029274, + "p95": 208.8319957256317, + "p99": 224.99199956655502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.98400104045868, + "p90": 114.94400352239609, + "p95": 121.60000205039978, + "p99": 135.04000008106232 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 93.24800223112106, + "p95": 95.67999839782715, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 160.60799360275269, + "p90": 173.50399494171143, + "p95": 179.58399653434753, + "p99": 199.8399943113327 + }, + "isolatedSum": { + "p50": 183.52000415325165, + "p90": 208.19200575351715, + "p95": 217.28000044822693, + "p99": 235.3919968008995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.75999999046326, + "p90": 113.56800049543381, + "p95": 117.60000139474869, + "p99": 127.74400413036346 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 98.36799651384354, + "p95": 101.1200025677681, + "p99": 113.66400122642517 + }, + "roundtrip": { + "p50": 165.43999314308167, + "p90": 177.69600450992584, + "p95": 183.1360012292862, + "p99": 198.84799420833588 + }, + "isolatedSum": { + "p50": 192.7039995789528, + "p90": 211.93599700927734, + "p95": 218.72000396251678, + "p99": 241.40800535678864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.38399893045425, + "p90": 123.29600006341934, + "p95": 127.80800461769104, + "p99": 135.903999209404 + }, + "combine": { + "p50": 106.84800148010254, + "p90": 112.12799698114395, + "p95": 115.48800021409988, + "p99": 122.27199971675873 + }, + "roundtrip": { + "p50": 188.9919936656952, + "p90": 199.16799664497375, + "p95": 205.4399996995926, + "p99": 233.024001121521 + }, + "isolatedSum": { + "p50": 219.2320004105568, + "p90": 235.4239970445633, + "p95": 243.29600483179092, + "p99": 258.1759989261627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.24000012874603, + "p90": 137.40800321102142, + "p95": 141.4400041103363, + "p99": 148.95999431610107 + }, + "combine": { + "p50": 123.45600128173828, + "p90": 131.96800649166107, + "p95": 134.0479999780655, + "p99": 148.44800531864166 + }, + "roundtrip": { + "p50": 225.0880002975464, + "p90": 235.6799989938736, + "p95": 239.80799317359924, + "p99": 258.1759989261627 + }, + "isolatedSum": { + "p50": 249.6960014104843, + "p90": 269.3760097026825, + "p95": 275.4880040884018, + "p99": 297.40799963474274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-99af315f", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_20de545c", + "comparisonKey": "fcd0e10182ca372c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:48.913470+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.5920017361641, + "p90": 91.96799993515015, + "p95": 99.04000163078308, + "p99": 118.68800222873688 + }, + "combine": { + "p50": 81.63200318813324, + "p90": 87.36000210046768, + "p95": 92.47999638319016, + "p99": 99.32799637317657 + }, + "roundtrip": { + "p50": 134.8479986190796, + "p90": 148.54399859905243, + "p95": 152.16000378131866, + "p99": 168.35199296474457 + }, + "isolatedSum": { + "p50": 156.22400492429733, + "p90": 179.32800203561783, + "p95": 191.51999801397324, + "p99": 218.01599860191345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.74400305747986, + "p90": 91.87199920415878, + "p95": 99.64799880981445, + "p99": 116.41599982976913 + }, + "combine": { + "p50": 82.33600109815598, + "p90": 87.90399879217148, + "p95": 94.01600062847137, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 136.22400164604187, + "p90": 148.47999811172485, + "p95": 153.08800339698792, + "p99": 160.0639969110489 + }, + "isolatedSum": { + "p50": 158.08000415563583, + "p90": 179.77599799633026, + "p95": 193.66399943828583, + "p99": 218.46400201320648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.88799703121185, + "p90": 93.56799721717834, + "p95": 101.47199779748917, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 83.61600339412689, + "p90": 87.74399757385254, + "p95": 92.3520028591156, + "p99": 97.72799909114838 + }, + "roundtrip": { + "p50": 140.06400108337402, + "p90": 151.93599462509155, + "p95": 157.72800147533417, + "p99": 166.87999665737152 + }, + "isolatedSum": { + "p50": 161.50400042533875, + "p90": 181.31199479103088, + "p95": 193.82400065660477, + "p99": 230.9119999408722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 79.83999699354172, + "p90": 91.90399944782257, + "p95": 98.59199821949005, + "p99": 111.455999314785 + }, + "combine": { + "p50": 84.3840017914772, + "p90": 90.33600240945816, + "p95": 95.16800194978714, + "p99": 103.93600165843964 + }, + "roundtrip": { + "p50": 139.77600634098053, + "p90": 152.5759994983673, + "p95": 157.53600001335144, + "p99": 166.52800142765045 + }, + "isolatedSum": { + "p50": 164.22399878501892, + "p90": 182.24000185728073, + "p95": 193.7600001692772, + "p99": 215.39200097322464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.4720019698143, + "p90": 92.51199662685394, + "p95": 97.37599641084671, + "p99": 113.56800049543381 + }, + "combine": { + "p50": 85.40800213813782, + "p90": 94.11200135946274, + "p95": 96.41599655151367, + "p99": 101.98400169610977 + }, + "roundtrip": { + "p50": 142.20799505710602, + "p90": 155.58399260044098, + "p95": 160.0639969110489, + "p99": 186.11200153827667 + }, + "isolatedSum": { + "p50": 166.88000410795212, + "p90": 186.62399798631668, + "p95": 193.79199296236038, + "p99": 215.55200219154358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.57600325345993, + "p90": 95.93600034713745, + "p95": 99.61599856615067, + "p99": 113.92000317573547 + }, + "combine": { + "p50": 92.96000003814697, + "p90": 98.4639972448349, + "p95": 100.60799866914749, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 148.47999811172485, + "p90": 159.87199544906616, + "p95": 163.32800686359406, + "p99": 172.35200107097626 + }, + "isolatedSum": { + "p50": 177.5360032916069, + "p90": 194.39999759197235, + "p95": 200.22399723529816, + "p99": 222.4320024251938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.63999825716019, + "p90": 105.3759977221489, + "p95": 109.24799740314484, + "p99": 119.32799965143204 + }, + "combine": { + "p50": 106.81600123643875, + "p90": 112.41599917411804, + "p95": 116.86400324106216, + "p99": 122.01599776744843 + }, + "roundtrip": { + "p50": 173.6000031232834, + "p90": 182.97599256038666, + "p95": 186.5919977426529, + "p99": 217.21599996089935 + }, + "isolatedSum": { + "p50": 203.45599949359894, + "p90": 217.79199689626694, + "p95": 226.112000644207, + "p99": 241.34399741888046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.59200018644333, + "p90": 119.6800023317337, + "p95": 123.3920007944107, + "p99": 130.68799674510956 + }, + "combine": { + "p50": 123.9359974861145, + "p90": 132.35199451446533, + "p95": 134.0479999780655, + "p99": 143.8400000333786 + }, + "roundtrip": { + "p50": 208.51199328899384, + "p90": 217.40800142288208, + "p95": 220.5120027065277, + "p99": 229.5680046081543 + }, + "isolatedSum": { + "p50": 234.52799767255783, + "p90": 252.03199684619904, + "p95": 257.4400007724762, + "p99": 274.52799677848816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bd5b38a4", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_8d02a479", + "comparisonKey": "661dd1b497fcaeac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:30.442289+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.31999912858009, + "p90": 51.711998879909515, + "p95": 53.0879981815815, + "p99": 61.503998935222626 + }, + "combine": { + "p50": 37.376001477241516, + "p90": 45.24800181388855, + "p95": 46.84799909591675, + "p99": 53.69599908590317 + }, + "roundtrip": { + "p50": 64.28799778223038, + "p90": 71.32799923419952, + "p95": 73.79200309515, + "p99": 77.08799839019775 + }, + "isolatedSum": { + "p50": 85.69600060582161, + "p90": 96.96000069379807, + "p95": 99.93599727749825, + "p99": 115.1999980211258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 47.488000243902206, + "p90": 50.65599828958511, + "p95": 52.41600051522255, + "p99": 62.144000083208084 + }, + "combine": { + "p50": 39.23200070858002, + "p90": 45.53600028157234, + "p95": 46.52800038456917, + "p99": 54.52800169587135 + }, + "roundtrip": { + "p50": 63.519999384880066, + "p90": 68.12799721956253, + "p95": 71.74400240182877, + "p99": 75.71200281381607 + }, + "isolatedSum": { + "p50": 86.72000095248222, + "p90": 96.19199857115746, + "p95": 98.94400089979172, + "p99": 116.67200177907944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 48.31999912858009, + "p90": 52.12799832224846, + "p95": 54.27199974656105, + "p99": 62.20800057053566 + }, + "combine": { + "p50": 39.64800015091896, + "p90": 45.75999826192856, + "p95": 47.68000170588493, + "p99": 57.023998349905014 + }, + "roundtrip": { + "p50": 64.41599875688553, + "p90": 72.54400104284286, + "p95": 74.94399696588516, + "p99": 81.15199953317642 + }, + "isolatedSum": { + "p50": 87.96799927949905, + "p90": 97.88799658417702, + "p95": 101.95200145244598, + "p99": 119.23199892044067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 48.41599985957146, + "p90": 51.263999193906784, + "p95": 52.191998809576035, + "p99": 58.30400064587593 + }, + "combine": { + "p50": 38.047999143600464, + "p90": 45.53600028157234, + "p95": 46.39999940991402, + "p99": 53.0879981815815 + }, + "roundtrip": { + "p50": 64.03200328350067, + "p90": 70.65600156784058, + "p95": 72.7040022611618, + "p99": 78.97599786520004 + }, + "isolatedSum": { + "p50": 86.46399900317192, + "p90": 96.79999947547913, + "p95": 98.59199821949005, + "p99": 111.39199882745743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 49.247998744249344, + "p90": 51.90400034189224, + "p95": 53.02400141954422, + "p99": 60.80000102519989 + }, + "combine": { + "p50": 42.75200143456459, + "p90": 46.01600021123886, + "p95": 47.39199951291084, + "p99": 55.36000058054924 + }, + "roundtrip": { + "p50": 66.97600334882736, + "p90": 74.97599720954895, + "p95": 77.08799839019775, + "p99": 82.71999657154083 + }, + "isolatedSum": { + "p50": 92.00000017881393, + "p90": 97.9200005531311, + "p95": 100.41600093245506, + "p99": 116.16000160574913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.18399825692177, + "p90": 52.12799832224846, + "p95": 53.95200103521347, + "p99": 62.591999769210815 + }, + "combine": { + "p50": 45.343998819589615, + "p90": 47.520000487565994, + "p95": 48.767998814582825, + "p99": 54.976001381874084 + }, + "roundtrip": { + "p50": 72.38399982452393, + "p90": 76.03199779987335, + "p95": 78.14399898052216, + "p99": 83.45600217580795 + }, + "isolatedSum": { + "p50": 94.52799707651138, + "p90": 99.64799880981445, + "p95": 102.7199998497963, + "p99": 117.5680011510849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 52.70399898290634, + "p90": 60.63999980688095, + "p95": 62.272001057863235, + "p99": 68.51200014352798 + }, + "combine": { + "p50": 48.09600114822388, + "p90": 55.26399984955788, + "p95": 57.0559985935688, + "p99": 63.77600133419037 + }, + "roundtrip": { + "p50": 85.15200018882751, + "p90": 88.8959988951683, + "p95": 90.68799763917923, + "p99": 97.31200337409973 + }, + "isolatedSum": { + "p50": 100.80000013113022, + "p90": 115.90399965643883, + "p95": 119.32799965143204, + "p99": 132.28800147771835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 63.61600011587143, + "p90": 67.1359971165657, + "p95": 72.73600250482559, + "p99": 77.85599678754807 + }, + "combine": { + "p50": 58.97599831223488, + "p90": 61.43999844789505, + "p95": 66.94400310516357, + "p99": 70.97599655389786 + }, + "roundtrip": { + "p50": 104.51199859380722, + "p90": 110.49599945545197, + "p95": 111.93600296974182, + "p99": 119.99999731779099 + }, + "isolatedSum": { + "p50": 122.59199842810631, + "p90": 128.57599556446075, + "p95": 139.68000560998917, + "p99": 148.83199334144592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7b4b7034", + "identity": "gb300|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_99f44a59", + "comparisonKey": "b22da9163d34e85f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:43.676440+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 48.67200180888176, + "p90": 51.83999985456467, + "p95": 53.02400141954422, + "p99": 57.08799883723259 + }, + "combine": { + "p50": 42.047999799251556, + "p90": 46.68800160288811, + "p95": 47.74399846792221, + "p99": 54.368000477552414 + }, + "roundtrip": { + "p50": 64.89600241184235, + "p90": 68.76800209283829, + "p95": 72.4480003118515, + "p99": 77.504001557827 + }, + "isolatedSum": { + "p50": 90.72000160813332, + "p90": 98.52800145745277, + "p95": 100.76799988746643, + "p99": 111.455999314785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.215998500585556, + "p90": 52.15999856591225, + "p95": 53.44000086188316, + "p99": 62.111999839544296 + }, + "combine": { + "p50": 38.24000060558319, + "p90": 45.85599899291992, + "p95": 47.71199822425842, + "p99": 54.9440011382103 + }, + "roundtrip": { + "p50": 64.70400094985962, + "p90": 68.96000355482101, + "p95": 70.97599655389786, + "p99": 78.23999971151352 + }, + "isolatedSum": { + "p50": 87.45599910616875, + "p90": 98.01599755883217, + "p95": 101.15199908614159, + "p99": 117.0560009777546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.15200173854828, + "p90": 51.83999985456467, + "p95": 53.53600159287453, + "p99": 61.5679994225502 + }, + "combine": { + "p50": 38.55999931693077, + "p90": 45.951999723911285, + "p95": 47.200001776218414, + "p99": 54.976001381874084 + }, + "roundtrip": { + "p50": 64.86400216817856, + "p90": 69.56800073385239, + "p95": 71.55200093984604, + "p99": 77.95199751853943 + }, + "isolatedSum": { + "p50": 87.71200105547905, + "p90": 97.79199957847595, + "p95": 100.73600336909294, + "p99": 116.54400080442429 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 49.215998500585556, + "p90": 52.51200124621391, + "p95": 53.44000086188316, + "p99": 60.80000102519989 + }, + "combine": { + "p50": 38.176000118255615, + "p90": 45.31199857592583, + "p95": 46.52800038456917, + "p99": 51.29599943757057 + }, + "roundtrip": { + "p50": 65.05600363016129, + "p90": 69.88800317049026, + "p95": 71.74400240182877, + "p99": 76.54400169849396 + }, + "isolatedSum": { + "p50": 87.39199861884117, + "p90": 97.82399982213974, + "p95": 99.96800124645233, + "p99": 112.09600046277046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 49.82399940490723, + "p90": 52.57600173354149, + "p95": 53.82400006055832, + "p99": 64.00000303983688 + }, + "combine": { + "p50": 44.256001710891724, + "p90": 46.68800160288811, + "p95": 48.73599857091904, + "p99": 55.26399984955788 + }, + "roundtrip": { + "p50": 67.48799979686737, + "p90": 72.12799787521362, + "p95": 73.98399710655212, + "p99": 80.38400113582611 + }, + "isolatedSum": { + "p50": 94.08000111579895, + "p90": 99.2640033364296, + "p95": 102.55999863147736, + "p99": 119.26400288939476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.11200159788132, + "p90": 53.247999399900436, + "p95": 54.687999188899994, + "p99": 60.47999858856201 + }, + "combine": { + "p50": 46.23999819159508, + "p90": 48.8319993019104, + "p95": 49.92000013589859, + "p99": 57.151999324560165 + }, + "roundtrip": { + "p50": 73.37599992752075, + "p90": 77.69600301980972, + "p95": 79.52000200748444, + "p99": 86.46400272846222 + }, + "isolatedSum": { + "p50": 96.3519997894764, + "p90": 102.07999870181084, + "p95": 104.60799932479858, + "p99": 117.63199791312218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 53.50400134921074, + "p90": 61.3120011985302, + "p95": 63.519999384880066, + "p99": 71.61600142717361 + }, + "combine": { + "p50": 48.767998814582825, + "p90": 56.832000613212585, + "p95": 58.687999844551086, + "p99": 65.15199691057205 + }, + "roundtrip": { + "p50": 85.53600311279297, + "p90": 89.05600011348724, + "p95": 89.9839997291565, + "p99": 95.74399888515472 + }, + "isolatedSum": { + "p50": 102.27200016379356, + "p90": 118.14400181174278, + "p95": 122.20799922943115, + "p99": 136.76799833774567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 64.51199948787689, + "p90": 67.391999065876, + "p95": 72.51200079917908, + "p99": 77.34400033950806 + }, + "combine": { + "p50": 59.99999865889549, + "p90": 62.6240000128746, + "p95": 64.92800265550613, + "p99": 69.82400268316269 + }, + "roundtrip": { + "p50": 105.59999942779541, + "p90": 111.07199639081955, + "p95": 112.31999844312668, + "p99": 116.06399714946747 + }, + "isolatedSum": { + "p50": 124.51199814677238, + "p90": 130.0159990787506, + "p95": 137.4400034546852, + "p99": 147.16800302267075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1a41c2ea", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "100b396b86e03573", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:05.813535+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 429.2159974575043, + "p90": 465.8240079879761, + "p95": 490.6879961490631, + "p99": 524.9599814414978 + }, + "combine": { + "p50": 64.96000289916992, + "p90": 73.15199822187424, + "p95": 77.63200253248215, + "p99": 105.76000064611435 + }, + "roundtrip": { + "p50": 480.9280037879944, + "p90": 507.23201036453247, + "p95": 526.4000296592712, + "p99": 586.1759781837463 + }, + "isolatedSum": { + "p50": 494.1760003566742, + "p90": 538.9760062098503, + "p95": 568.3199986815453, + "p99": 630.7199820876122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 435.13599038124084, + "p90": 466.8479859828949, + "p95": 489.6320104598999, + "p99": 527.9039740562439 + }, + "combine": { + "p50": 65.76000154018402, + "p90": 75.23199915885925, + "p95": 87.52000331878662, + "p99": 125.2480000257492 + }, + "roundtrip": { + "p50": 480.6079864501953, + "p90": 508.03202390670776, + "p95": 525.2799987792969, + "p99": 567.6479935646057 + }, + "isolatedSum": { + "p50": 500.89599192142487, + "p90": 542.0799851417542, + "p95": 577.1520137786865, + "p99": 653.1519740819931 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 430.2079975605011, + "p90": 453.8879990577698, + "p95": 461.0239863395691, + "p99": 493.21600794792175 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 73.63200187683105, + "p95": 77.05599814653397, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 478.4640073776245, + "p90": 502.78401374816895, + "p95": 509.2160105705261, + "p99": 537.0879769325256 + }, + "isolatedSum": { + "p50": 497.50399589538574, + "p90": 527.5200009346008, + "p95": 538.0799844861031, + "p99": 601.3440042734146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 430.431991815567, + "p90": 457.69599080085754, + "p95": 479.775995016098, + "p99": 523.9999890327454 + }, + "combine": { + "p50": 69.11999732255936, + "p90": 77.91999727487564, + "p95": 85.24800091981888, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 478.94400358200073, + "p90": 508.4480047225952, + "p95": 522.6240158081055, + "p99": 564.8319721221924 + }, + "isolatedSum": { + "p50": 499.5519891381264, + "p90": 535.6159880757332, + "p95": 565.0239959359169, + "p99": 652.7039855718613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 429.8880100250244, + "p90": 452.92800664901733, + "p95": 460.83199977874756, + "p99": 517.3119902610779 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 76.4160007238388, + "p95": 79.00799810886383, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 478.5279929637909, + "p90": 502.1759867668152, + "p95": 508.8000297546387, + "p99": 533.2480072975159 + }, + "isolatedSum": { + "p50": 500.8000135421753, + "p90": 529.3440073728561, + "p95": 539.8399978876114, + "p99": 606.7519932985306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 432.9279959201813, + "p90": 456.57598972320557, + "p95": 463.1679952144623, + "p99": 493.151992559433 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 79.8719972372055, + "p95": 82.97599852085114, + "p99": 92.54399687051773 + }, + "roundtrip": { + "p50": 481.2160134315491, + "p90": 506.816029548645, + "p95": 512.8639936447144, + "p99": 551.360011100769 + }, + "isolatedSum": { + "p50": 505.72799891233444, + "p90": 536.4479869604111, + "p95": 546.1439937353134, + "p99": 585.6959894299507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 430.65598607063293, + "p90": 458.0160081386566, + "p95": 466.14399552345276, + "p99": 523.7119793891907 + }, + "combine": { + "p50": 85.05599945783615, + "p90": 92.79999881982803, + "p95": 97.56799787282944, + "p99": 143.23200285434723 + }, + "roundtrip": { + "p50": 482.1760058403015, + "p90": 506.27201795578003, + "p95": 515.4560208320618, + "p99": 571.4240074157715 + }, + "isolatedSum": { + "p50": 515.7119855284691, + "p90": 550.8160069584846, + "p95": 563.7119933962822, + "p99": 666.9439822435379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 431.42399191856384, + "p90": 454.68801259994507, + "p95": 459.4239890575409, + "p99": 492.2879934310913 + }, + "combine": { + "p50": 100.60799866914749, + "p90": 107.80800133943558, + "p95": 110.78400164842606, + "p99": 147.42399752140045 + }, + "roundtrip": { + "p50": 498.3679950237274, + "p90": 518.5920000076294, + "p95": 527.679979801178, + "p99": 576.3840079307556 + }, + "isolatedSum": { + "p50": 532.0319905877113, + "p90": 562.4960139393806, + "p95": 570.207990705967, + "p99": 639.7119909524918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-214b01d3", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6b1e52df2e686455", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:04.646768+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 426.94398760795593, + "p90": 447.4239945411682, + "p95": 452.9919922351837, + "p99": 480.2879989147186 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 73.15199822187424, + "p95": 76.73600316047668, + "p99": 80.54400235414505 + }, + "roundtrip": { + "p50": 478.62398624420166, + "p90": 500.0320076942444, + "p95": 504.7680139541626, + "p99": 541.8239831924438 + }, + "isolatedSum": { + "p50": 493.21598559617996, + "p90": 520.5759927630424, + "p95": 529.7279953956604, + "p99": 560.8320012688637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 427.67998576164246, + "p90": 447.1040070056915, + "p95": 454.0480077266693, + "p99": 497.6640045642853 + }, + "combine": { + "p50": 66.68800115585327, + "p90": 74.5600014925003, + "p95": 79.48800176382065, + "p99": 101.98400169610977 + }, + "roundtrip": { + "p50": 477.6960015296936, + "p90": 499.455988407135, + "p95": 508.28802585601807, + "p99": 579.6480178833008 + }, + "isolatedSum": { + "p50": 494.3679869174957, + "p90": 521.6640084981918, + "p95": 533.53600949049, + "p99": 599.648006260395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 439.35999274253845, + "p90": 522.5920081138611, + "p95": 532.9599976539612, + "p99": 549.1520166397095 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 76.60800218582153, + "p95": 80.12799918651581, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 489.8560047149658, + "p90": 582.9759836196899, + "p95": 591.7760133743286, + "p99": 607.3279976844788 + }, + "isolatedSum": { + "p50": 508.09599459171295, + "p90": 599.2000102996826, + "p95": 613.087996840477, + "p99": 638.1440162658691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 425.9200096130371, + "p90": 450.6880044937134, + "p95": 461.08800172805786, + "p99": 77811.61499023438 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 78.65600287914276, + "p95": 83.96799862384796, + "p99": 103.74400019645691 + }, + "roundtrip": { + "p50": 479.64799404144287, + "p90": 502.8799772262573, + "p95": 512.1600031852722, + "p99": 6331.424236297607 + }, + "isolatedSum": { + "p50": 497.44001030921936, + "p90": 529.3440073728561, + "p95": 545.0560003519058, + "p99": 77915.35899043083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 427.13600397109985, + "p90": 449.2799937725067, + "p95": 458.3680033683777, + "p99": 532.1599841117859 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 76.99199765920639, + "p95": 80.83199709653854, + "p99": 88.03199976682663 + }, + "roundtrip": { + "p50": 474.65598583221436, + "p90": 495.10401487350464, + "p95": 504.96000051498413, + "p99": 542.1440005302429 + }, + "isolatedSum": { + "p50": 498.4000027179718, + "p90": 526.2719914317131, + "p95": 539.2000004649162, + "p99": 620.1919838786125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 425.85599422454834, + "p90": 446.4319944381714, + "p95": 453.11999320983887, + "p99": 471.0719883441925 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 81.69600367546082, + "p95": 84.57600325345993, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 473.66398572921753, + "p90": 493.9520061016083, + "p95": 499.7760057449341, + "p99": 512.8960013389587 + }, + "isolatedSum": { + "p50": 500.4799962043762, + "p90": 528.1279981136322, + "p95": 537.6959964632988, + "p99": 558.8799864053726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 443.231999874115, + "p90": 507.3919892311096, + "p95": 544.3840026855469, + "p99": 24590.368270874023 + }, + "combine": { + "p50": 90.40000289678574, + "p90": 99.2640033364296, + "p95": 101.98400169610977, + "p99": 110.88000237941742 + }, + "roundtrip": { + "p50": 491.0080134868622, + "p90": 571.5519785881042, + "p95": 581.8560123443604, + "p99": 600.2240180969238 + }, + "isolatedSum": { + "p50": 533.6320027709007, + "p90": 606.6559925675392, + "p95": 646.3680043816566, + "p99": 24701.24827325344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 426.144003868103, + "p90": 448.89599084854126, + "p95": 455.1039934158325, + "p99": 480.4159998893738 + }, + "combine": { + "p50": 107.19999670982361, + "p90": 113.82400244474411, + "p95": 117.11999773979187, + "p99": 124.44800138473511 + }, + "roundtrip": { + "p50": 495.93600630760193, + "p90": 516.0959959030151, + "p95": 523.1999754905701, + "p99": 561.6959929466248 + }, + "isolatedSum": { + "p50": 533.3440005779266, + "p90": 562.7199932932854, + "p95": 572.2239911556244, + "p99": 604.8640012741089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5095ae79", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "e0aaecfc18971490", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:03.665070+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 429.1200041770935, + "p90": 454.17600870132446, + "p95": 461.5359902381897, + "p99": 490.3680086135864 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 76.4480009675026, + "p95": 79.68000322580338, + "p99": 85.56800335645676 + }, + "roundtrip": { + "p50": 480.47998547554016, + "p90": 503.64798307418823, + "p95": 509.5999836921692, + "p99": 517.2479748725891 + }, + "isolatedSum": { + "p50": 500.1920014619827, + "p90": 530.6240096688271, + "p95": 541.2159934639931, + "p99": 575.9360119700432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 438.81601095199585, + "p90": 467.9679870605469, + "p95": 476.83200240135193, + "p99": 527.6479721069336 + }, + "combine": { + "p50": 71.9040036201477, + "p90": 78.84799689054489, + "p95": 83.96799862384796, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 480.54400086402893, + "p90": 507.07197189331055, + "p95": 519.7759866714478, + "p99": 588.7359976768494 + }, + "isolatedSum": { + "p50": 510.72001457214355, + "p90": 546.8159839510918, + "p95": 560.8000010251999, + "p99": 633.6319744586945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 435.2959990501404, + "p90": 459.83999967575073, + "p95": 469.4080054759979, + "p99": 519.3920135498047 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 81.08799904584885, + "p95": 84.51200276613235, + "p99": 93.47199648618698 + }, + "roundtrip": { + "p50": 482.40000009536743, + "p90": 505.0240159034729, + "p95": 513.0239725112915, + "p99": 531.7440032958984 + }, + "isolatedSum": { + "p50": 509.2799961566925, + "p90": 540.9279987215996, + "p95": 553.9200082421303, + "p99": 612.8640100359917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 436.2879991531372, + "p90": 458.079993724823, + "p95": 465.37598967552185, + "p99": 479.8719882965088 + }, + "combine": { + "p50": 75.45600086450577, + "p90": 82.68799632787704, + "p95": 85.56800335645676, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 484.67200994491577, + "p90": 509.92000102996826, + "p95": 517.408013343811, + "p99": 547.4240183830261 + }, + "isolatedSum": { + "p50": 511.744000017643, + "p90": 540.7679900527, + "p95": 550.9439930319786, + "p99": 583.3279862999916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 429.9199879169464, + "p90": 452.12799310684204, + "p95": 458.3359956741333, + "p99": 474.94399547576904 + }, + "combine": { + "p50": 76.89599692821503, + "p90": 84.89599823951721, + "p95": 88.48000317811966, + "p99": 96.25600278377533 + }, + "roundtrip": { + "p50": 473.66398572921753, + "p90": 499.1680085659027, + "p95": 507.6479911804199, + "p99": 536.4480018615723 + }, + "isolatedSum": { + "p50": 506.81598484516144, + "p90": 537.0239913463593, + "p95": 546.815998852253, + "p99": 571.1999982595444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 431.93599581718445, + "p90": 462.8159999847412, + "p95": 475.13601183891296, + "p99": 520.576000213623 + }, + "combine": { + "p50": 81.34400099515915, + "p90": 87.74399757385254, + "p95": 90.94399958848953, + "p99": 120.06399780511856 + }, + "roundtrip": { + "p50": 483.99999737739563, + "p90": 509.2480182647705, + "p95": 519.2000269889832, + "p99": 564.1599893569946 + }, + "isolatedSum": { + "p50": 513.2799968123436, + "p90": 550.5599975585938, + "p95": 566.0800114274025, + "p99": 640.6399980187416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 433.3760142326355, + "p90": 455.7439982891083, + "p95": 461.91999316215515, + "p99": 477.9840111732483 + }, + "combine": { + "p50": 95.04000097513199, + "p90": 102.08000242710114, + "p95": 105.79200088977814, + "p99": 110.68800091743469 + }, + "roundtrip": { + "p50": 491.7759895324707, + "p90": 512.6399993896484, + "p95": 519.7759866714478, + "p99": 535.2320075035095 + }, + "isolatedSum": { + "p50": 528.4160152077675, + "p90": 557.8240007162094, + "p95": 567.7119940519333, + "p99": 588.672012090683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 428.3199906349182, + "p90": 455.07198572158813, + "p95": 464.28799629211426, + "p99": 491.13601446151733 + }, + "combine": { + "p50": 114.46399986743927, + "p90": 121.0239976644516, + "p95": 124.25599992275238, + "p99": 134.11200046539307 + }, + "roundtrip": { + "p50": 510.24001836776733, + "p90": 534.1439843177795, + "p95": 541.0559773445129, + "p99": 591.8400287628174 + }, + "isolatedSum": { + "p50": 542.7839905023575, + "p90": 576.0959833860397, + "p95": 588.5439962148666, + "p99": 625.2480149269104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d75d6ecc", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_c4ac4643", + "comparisonKey": "817cf09679b30bf0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:44.367885+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.88800030946732, + "p90": 117.95199662446976, + "p95": 128.89599800109863, + "p99": 144.80000734329224 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 81.98399841785431, + "p95": 85.66399663686752, + "p99": 93.50399672985077 + }, + "roundtrip": { + "p50": 258.87998938560486, + "p90": 280.0320088863373, + "p95": 287.9039943218231, + "p99": 307.3599934577942 + }, + "isolatedSum": { + "p50": 173.47200214862823, + "p90": 199.93599504232407, + "p95": 214.55999463796616, + "p99": 238.304004073143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.1760025024414, + "p90": 115.9679964184761, + "p95": 125.2480000257492, + "p99": 138.87999951839447 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 81.98399841785431, + "p95": 86.11200004816055, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 259.74398851394653, + "p90": 282.01600909233093, + "p95": 287.51999139785767, + "p99": 350.5600094795227 + }, + "isolatedSum": { + "p50": 173.18399995565414, + "p90": 197.9519948363304, + "p95": 211.36000007390976, + "p99": 229.5999974012375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.54399752616882, + "p90": 118.75200271606445, + "p95": 127.87200510501862, + "p99": 156.80000185966492 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 86.17600053548813, + "p95": 91.32800251245499, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 257.7599883079529, + "p90": 279.776006937027, + "p95": 287.26398944854736, + "p99": 298.94399642944336 + }, + "isolatedSum": { + "p50": 175.07199943065643, + "p90": 204.92800325155258, + "p95": 219.2000076174736, + "p99": 255.23199886083603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.98400104045868, + "p90": 114.49600011110306, + "p95": 124.35200065374374, + "p99": 139.3599957227707 + }, + "combine": { + "p50": 79.00799810886383, + "p90": 84.73599702119827, + "p95": 89.08800035715103, + "p99": 96.44799679517746 + }, + "roundtrip": { + "p50": 260.127991437912, + "p90": 283.488005399704, + "p95": 290.2719974517822, + "p99": 316.4159953594208 + }, + "isolatedSum": { + "p50": 176.9919991493225, + "p90": 199.23199713230133, + "p95": 213.44000101089478, + "p99": 235.80799251794815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.50399738550186, + "p90": 115.93600362539291, + "p95": 125.85599720478058, + "p99": 144.70399916172028 + }, + "combine": { + "p50": 81.18399977684021, + "p90": 87.48800307512283, + "p95": 92.51199662685394, + "p99": 97.56799787282944 + }, + "roundtrip": { + "p50": 262.5280022621155, + "p90": 284.09600257873535, + "p95": 291.55200719833374, + "p99": 317.31200218200684 + }, + "isolatedSum": { + "p50": 178.68799716234207, + "p90": 203.42400670051575, + "p95": 218.36799383163452, + "p99": 242.2719970345497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.14400225877762, + "p90": 115.7120019197464, + "p95": 124.92799758911133, + "p99": 181.43999576568604 + }, + "combine": { + "p50": 85.66399663686752, + "p90": 93.88799965381622, + "p95": 97.9200005531311, + "p99": 106.75200074911118 + }, + "roundtrip": { + "p50": 266.9439911842346, + "p90": 287.84000873565674, + "p95": 295.23199796676636, + "p99": 315.93599915504456 + }, + "isolatedSum": { + "p50": 183.80799889564514, + "p90": 209.60000157356262, + "p95": 222.84799814224243, + "p99": 288.1919965147972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 102.62399911880493, + "p90": 116.48000031709671, + "p95": 121.91999703645706, + "p99": 134.0160071849823 + }, + "combine": { + "p50": 100.832000374794, + "p90": 107.744000852108, + "p95": 111.35999858379364, + "p99": 120.83200365304947 + }, + "roundtrip": { + "p50": 283.58399868011475, + "p90": 303.26399207115173, + "p95": 308.4479868412018, + "p99": 327.58399844169617 + }, + "isolatedSum": { + "p50": 203.45599949359894, + "p90": 224.2240011692047, + "p95": 233.2799956202507, + "p99": 254.84801083803177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.43199962377548, + "p90": 124.9919980764389, + "p95": 131.58400356769562, + "p99": 157.18400478363037 + }, + "combine": { + "p50": 121.24799937009811, + "p90": 126.94400548934937, + "p95": 130.5920034646988, + "p99": 135.3279948234558 + }, + "roundtrip": { + "p50": 305.1519989967346, + "p90": 326.81599259376526, + "p95": 333.407998085022, + "p99": 357.05599188804626 + }, + "isolatedSum": { + "p50": 235.6799989938736, + "p90": 251.93600356578827, + "p95": 262.1760070323944, + "p99": 292.5119996070862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7733ba4c", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "44ee0b05a8b4a1e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:44.625010+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 432.51198530197144, + "p90": 461.08800172805786, + "p95": 472.00000286102295, + "p99": 504.12797927856445 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 79.13599908351898, + "p95": 83.36000144481659, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 472.54401445388794, + "p90": 502.6559829711914, + "p95": 508.7360143661499, + "p99": 529.2800068855286 + }, + "isolatedSum": { + "p50": 506.879985332489, + "p90": 540.2240008115768, + "p95": 555.3600043058395, + "p99": 601.1519804596901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 429.28001284599304, + "p90": 458.3039879798889, + "p95": 466.14399552345276, + "p99": 478.5279929637909 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 78.72000336647034, + "p95": 83.55200290679932, + "p99": 94.14400160312653 + }, + "roundtrip": { + "p50": 479.8080027103424, + "p90": 503.80802154541016, + "p95": 510.78397035598755, + "p99": 522.5920081138611 + }, + "isolatedSum": { + "p50": 503.00801545381546, + "p90": 537.0239913463593, + "p95": 549.6959984302521, + "p99": 572.6719945669174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 412.2239947319031, + "p90": 440.44798612594604, + "p95": 449.40799474716187, + "p99": 468.7039852142334 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 81.85599744319916, + "p95": 85.28000116348267, + "p99": 93.1520015001297 + }, + "roundtrip": { + "p50": 461.63201332092285, + "p90": 485.6959879398346, + "p95": 493.47200989723206, + "p99": 516.864001750946 + }, + "isolatedSum": { + "p50": 489.27999287843704, + "p90": 522.3039835691452, + "p95": 534.6879959106445, + "p99": 561.8559867143631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 432.19199776649475, + "p90": 459.6160054206848, + "p95": 466.5279984474182, + "p99": 479.42399978637695 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 83.3280012011528, + "p95": 87.80799806118011, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 482.91200399398804, + "p90": 508.35198163986206, + "p95": 513.5679841041565, + "p99": 526.3040065765381 + }, + "isolatedSum": { + "p50": 510.43199747800827, + "p90": 542.9440066218376, + "p95": 554.3359965085983, + "p99": 574.6560022234917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 430.88001012802124, + "p90": 461.216002702713, + "p95": 468.9599871635437, + "p99": 482.11199045181274 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 85.4720026254654, + "p95": 90.71999788284302, + "p99": 101.43999755382538 + }, + "roundtrip": { + "p50": 482.4320077896118, + "p90": 510.6559991836548, + "p95": 516.1280035972595, + "p99": 523.9359736442566 + }, + "isolatedSum": { + "p50": 511.3920122385025, + "p90": 546.6880053281784, + "p95": 559.6799850463867, + "p99": 583.5519880056381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 428.70399355888367, + "p90": 457.5999975204468, + "p95": 465.7599925994873, + "p99": 480.6399941444397 + }, + "combine": { + "p50": 84.54400300979614, + "p90": 90.11200070381165, + "p95": 94.84799951314926, + "p99": 101.27999633550644 + }, + "roundtrip": { + "p50": 482.2719991207123, + "p90": 507.10397958755493, + "p95": 512.287974357605, + "p99": 534.7200036048889 + }, + "isolatedSum": { + "p50": 513.2479965686798, + "p90": 547.7119982242584, + "p95": 560.6079921126366, + "p99": 581.9199904799461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 423.71198534965515, + "p90": 457.3119878768921, + "p95": 466.7840003967285, + "p99": 509.5679759979248 + }, + "combine": { + "p50": 98.11200201511383, + "p90": 102.7199998497963, + "p95": 106.55999928712845, + "p99": 113.21599781513214 + }, + "roundtrip": { + "p50": 492.5439953804016, + "p90": 520.2879905700684, + "p95": 526.9439816474915, + "p99": 540.5439734458923 + }, + "isolatedSum": { + "p50": 521.823987364769, + "p90": 560.0319877266884, + "p95": 573.343999683857, + "p99": 622.783973813057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 429.82399463653564, + "p90": 459.48800444602966, + "p95": 465.9520089626312, + "p99": 487.10399866104126 + }, + "combine": { + "p50": 118.49600076675415, + "p90": 124.09599870443344, + "p95": 127.20000743865967, + "p99": 131.42399489879608 + }, + "roundtrip": { + "p50": 516.1280035972595, + "p90": 538.1439924240112, + "p95": 545.1840162277222, + "p99": 558.3040118217468 + }, + "isolatedSum": { + "p50": 548.3199954032898, + "p90": 583.5840031504631, + "p95": 593.1520164012909, + "p99": 618.5279935598373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8eb16503", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "b2acbf95773921f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:01.755509+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 451.9360065460205, + "p90": 482.2080135345459, + "p95": 508.54402780532837, + "p99": 605.7599782943726 + }, + "combine": { + "p50": 75.96799731254578, + "p90": 81.727996468544, + "p95": 87.87199854850769, + "p99": 120.41600048542023 + }, + "roundtrip": { + "p50": 510.6559991836548, + "p90": 534.2400074005127, + "p95": 543.2000160217285, + "p99": 635.807991027832 + }, + "isolatedSum": { + "p50": 527.9040038585663, + "p90": 563.9360100030899, + "p95": 596.4160263538361, + "p99": 726.1759787797928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 456.928014755249, + "p90": 482.9440116882324, + "p95": 490.84800481796265, + "p99": 506.24001026153564 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 83.48800241947174, + "p95": 87.67999708652496, + "p99": 93.1520015001297 + }, + "roundtrip": { + "p50": 513.2799744606018, + "p90": 533.2159996032715, + "p95": 540.992021560669, + "p99": 576.5119791030884 + }, + "isolatedSum": { + "p50": 534.4960168004036, + "p90": 566.4320141077042, + "p95": 578.5280019044876, + "p99": 599.3920117616653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 449.0239918231964, + "p90": 483.13599824905396, + "p95": 489.0879988670349, + "p99": 504.1919946670532 + }, + "combine": { + "p50": 78.65600287914276, + "p90": 84.25600081682205, + "p95": 88.70399743318558, + "p99": 289.34401273727417 + }, + "roundtrip": { + "p50": 505.3120255470276, + "p90": 538.1119847297668, + "p95": 547.5199818611145, + "p99": 560.3839755058289 + }, + "isolatedSum": { + "p50": 527.6799947023392, + "p90": 567.391999065876, + "p95": 577.7919963002205, + "p99": 793.5360074043274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 459.52001214027405, + "p90": 484.607994556427, + "p95": 492.2240078449249, + "p99": 505.98400831222534 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 85.79199761152267, + "p95": 90.52799642086029, + "p99": 99.10400211811066 + }, + "roundtrip": { + "p50": 513.375997543335, + "p90": 537.9199981689453, + "p95": 544.2240238189697, + "p99": 560.5760216712952 + }, + "isolatedSum": { + "p50": 539.8080125451088, + "p90": 570.3999921679497, + "p95": 582.7520042657852, + "p99": 605.088010430336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 456.7039906978607, + "p90": 483.3599925041199, + "p95": 496.63999676704407, + "p99": 598.0160236358643 + }, + "combine": { + "p50": 82.5280025601387, + "p90": 88.86399865150452, + "p95": 93.79199892282486, + "p99": 121.95199728012085 + }, + "roundtrip": { + "p50": 509.5360279083252, + "p90": 535.647988319397, + "p95": 574.1119980812073, + "p99": 637.3440027236938 + }, + "isolatedSum": { + "p50": 539.2319932579994, + "p90": 572.2239911556244, + "p95": 590.4319956898689, + "p99": 719.9680209159851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 460.7999920845032, + "p90": 487.0080053806305, + "p95": 495.5520033836365, + "p99": 513.0559802055359 + }, + "combine": { + "p50": 87.74399757385254, + "p90": 93.12000125646591, + "p95": 96.89600020647049, + "p99": 101.40799731016159 + }, + "roundtrip": { + "p50": 516.3519978523254, + "p90": 542.3039793968201, + "p95": 548.5119819641113, + "p99": 575.4560232162476 + }, + "isolatedSum": { + "p50": 548.5439896583557, + "p90": 580.1280066370964, + "p95": 592.448003590107, + "p99": 614.4639775156975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 451.03999972343445, + "p90": 485.1840138435364, + "p95": 493.53599548339844, + "p99": 518.176019191742 + }, + "combine": { + "p50": 102.46399790048599, + "p90": 107.80800133943558, + "p95": 111.58400028944016, + "p99": 119.03999745845795 + }, + "roundtrip": { + "p50": 512.0000243186951, + "p90": 540.6079888343811, + "p95": 547.6160049438477, + "p99": 559.9039793014526 + }, + "isolatedSum": { + "p50": 553.5039976239204, + "p90": 592.992015182972, + "p95": 605.1199957728386, + "p99": 637.2160166501999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 453.18400859832764, + "p90": 486.01600527763367, + "p95": 492.99201369285583, + "p99": 517.1200037002563 + }, + "combine": { + "p50": 122.14399874210358, + "p90": 127.87200510501862, + "p95": 131.42399489879608, + "p99": 136.86400651931763 + }, + "roundtrip": { + "p50": 533.6959958076477, + "p90": 562.1119737625122, + "p95": 569.1199898719788, + "p99": 613.9199733734131 + }, + "isolatedSum": { + "p50": 575.3280073404312, + "p90": 613.8880103826523, + "p95": 624.4160085916519, + "p99": 653.984010219574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f16587a8", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b1b733fb", + "comparisonKey": "1ce91864f23d9173", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:12.617901+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.3840017914772, + "p90": 98.7199991941452, + "p95": 106.175996363163, + "p99": 120.44800072908401 + }, + "combine": { + "p50": 76.83199644088745, + "p90": 81.98399841785431, + "p95": 87.16800063848495, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 240.48000574111938, + "p90": 259.3599855899811, + "p95": 264.51200246810913, + "p99": 283.84000062942505 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 180.7039976119995, + "p95": 193.34399700164795, + "p99": 217.15199947357178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 83.93599838018417, + "p90": 98.59199821949005, + "p95": 103.35999727249146, + "p99": 115.39199948310852 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 82.62400329113007, + "p95": 87.99999952316284, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 240.4160052537918, + "p90": 258.62398743629456, + "p95": 263.71198892593384, + "p99": 283.9680016040802 + }, + "isolatedSum": { + "p50": 161.27999871969223, + "p90": 181.21600151062012, + "p95": 191.3599967956543, + "p99": 210.52800118923187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 83.80799740552902, + "p90": 97.28000313043594, + "p95": 103.07200253009796, + "p99": 114.27199840545654 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 86.43200248479843, + "p95": 89.56799656152725, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 242.5280064344406, + "p90": 259.8400115966797, + "p95": 265.21599292755127, + "p99": 275.90399980545044 + }, + "isolatedSum": { + "p50": 165.0559976696968, + "p90": 183.71200561523438, + "p95": 192.6399990916252, + "p99": 210.91199666261673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 89.1840010881424, + "p90": 127.10399925708771, + "p95": 147.2640037536621, + "p99": 161.8880033493042 + }, + "combine": { + "p50": 82.04799890518188, + "p90": 118.30399930477142, + "p95": 138.97599279880524, + "p99": 150.751993060112 + }, + "roundtrip": { + "p50": 247.19999730587006, + "p90": 282.1120023727417, + "p95": 297.34399914741516, + "p99": 324.67201352119446 + }, + "isolatedSum": { + "p50": 171.23199999332428, + "p90": 245.40799856185913, + "p95": 286.23999655246735, + "p99": 312.6399964094162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 88.67199718952179, + "p90": 126.24000012874603, + "p95": 144.67200636863708, + "p99": 158.07999670505524 + }, + "combine": { + "p50": 85.34400165081024, + "p90": 123.4240010380745, + "p95": 145.1520025730133, + "p99": 154.08000349998474 + }, + "roundtrip": { + "p50": 251.583993434906, + "p90": 279.9359858036041, + "p95": 295.2960133552551, + "p99": 311.93599104881287 + }, + "isolatedSum": { + "p50": 174.01599884033203, + "p90": 249.66400116682053, + "p95": 289.8240089416504, + "p99": 312.16000020504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.50400352478027, + "p90": 136.9599997997284, + "p95": 149.08799529075623, + "p99": 166.97600483894348 + }, + "combine": { + "p50": 89.24800157546997, + "p90": 130.40000200271606, + "p95": 147.2959965467453, + "p99": 155.87200224399567 + }, + "roundtrip": { + "p50": 255.87201118469238, + "p90": 286.72000765800476, + "p95": 299.1040050983429, + "p99": 316.3520097732544 + }, + "isolatedSum": { + "p50": 178.75200510025024, + "p90": 267.36000180244446, + "p95": 296.3839918375015, + "p99": 322.84800708293915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.33600240945816, + "p90": 101.18400305509567, + "p95": 106.01600259542465, + "p99": 116.86400324106216 + }, + "combine": { + "p50": 100.60799866914749, + "p90": 106.20799660682678, + "p95": 109.24799740314484, + "p99": 118.68800222873688 + }, + "roundtrip": { + "p50": 266.04801416397095, + "p90": 282.0799946784973, + "p95": 287.9680097103119, + "p99": 298.7520098686218 + }, + "isolatedSum": { + "p50": 190.94400107860565, + "p90": 207.39199966192245, + "p95": 215.2639999985695, + "p99": 235.55200546979904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.41599720716476, + "p90": 109.21599715948105, + "p95": 114.88000303506851, + "p99": 125.02400577068329 + }, + "combine": { + "p50": 121.50400131940842, + "p90": 127.6479959487915, + "p95": 132.64000415802002, + "p99": 147.93600142002106 + }, + "roundtrip": { + "p50": 293.8239872455597, + "p90": 315.0399923324585, + "p95": 323.743999004364, + "p99": 351.0720133781433 + }, + "isolatedSum": { + "p50": 221.91999852657318, + "p90": 236.86399310827255, + "p95": 247.52000719308853, + "p99": 272.96000719070435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d1adb2c7", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_4ebffb62", + "comparisonKey": "b12bc00db050e57a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:14.643799+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.319998472929, + "p90": 49.31199923157692, + "p95": 54.27199974656105, + "p99": 62.78400123119354 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 72.60800153017044, + "p95": 74.65600222349167, + "p99": 80.48000186681747 + }, + "roundtrip": { + "p50": 1534.5280170440674, + "p90": 1544.0319776535034, + "p95": 1551.200032234192, + "p99": 1574.4960308074951 + }, + "isolatedSum": { + "p50": 108.86399820446968, + "p90": 121.92000076174736, + "p95": 128.92800197005272, + "p99": 143.26400309801102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 44.19200122356415, + "p90": 49.79199916124344, + "p95": 53.92000079154968, + "p99": 61.664000153541565 + }, + "combine": { + "p50": 65.0240033864975, + "p90": 71.84000313282013, + "p95": 74.5920017361641, + "p99": 81.44000172615051 + }, + "roundtrip": { + "p50": 1535.2319478988647, + "p90": 1542.8160429000854, + "p95": 1546.3999509811401, + "p99": 1551.584005355835 + }, + "isolatedSum": { + "p50": 109.21600461006165, + "p90": 121.63200229406357, + "p95": 128.51200252771378, + "p99": 143.10400187969208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 44.92799937725067, + "p90": 50.75199902057648, + "p95": 55.93600124120712, + "p99": 65.34399837255478 + }, + "combine": { + "p50": 63.32799792289734, + "p90": 71.29599899053574, + "p95": 73.91999661922455, + "p99": 80.89599758386612 + }, + "roundtrip": { + "p50": 1534.5920324325562, + "p90": 1549.9199628829956, + "p95": 1560.0639581680298, + "p99": 1580.4799795150757 + }, + "isolatedSum": { + "p50": 108.25599730014801, + "p90": 122.04799801111221, + "p95": 129.85599786043167, + "p99": 146.2399959564209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 44.16000097990036, + "p90": 49.375999718904495, + "p95": 53.37600037455559, + "p99": 59.776000678539276 + }, + "combine": { + "p50": 65.50399959087372, + "p90": 72.09599763154984, + "p95": 74.40000027418137, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 1535.7760190963745, + "p90": 1544.6079969406128, + "p95": 1547.104001045227, + "p99": 1551.743984222412 + }, + "isolatedSum": { + "p50": 109.66400057077408, + "p90": 121.47199735045433, + "p95": 127.77600064873695, + "p99": 139.0720009803772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.60800066590309, + "p90": 50.04800111055374, + "p95": 54.23999950289726, + "p99": 60.127999633550644 + }, + "combine": { + "p50": 67.55200028419495, + "p90": 74.23999905586243, + "p95": 77.40800082683563, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 1536.0000133514404, + "p90": 1544.160008430481, + "p95": 1547.0080375671387, + "p99": 1554.4960498809814 + }, + "isolatedSum": { + "p50": 112.16000095009804, + "p90": 124.28800016641617, + "p95": 131.6480003297329, + "p99": 143.71200278401375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.56800052523613, + "p90": 50.175998359918594, + "p95": 55.03999814391136, + "p99": 61.02399900555611 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 76.9599974155426, + "p95": 79.64800298213959, + "p99": 84.57600325345993 + }, + "roundtrip": { + "p50": 1538.6240482330322, + "p90": 1546.7840433120728, + "p95": 1549.2160320281982, + "p99": 1554.2399883270264 + }, + "isolatedSum": { + "p50": 114.46400359272957, + "p90": 127.1359957754612, + "p95": 134.68800112605095, + "p99": 145.60000225901604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 48.48000034689903, + "p90": 52.352000027894974, + "p95": 55.1999993622303, + "p99": 62.07999959588051 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 73.66400212049484, + "p95": 75.58400183916092, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 1543.0079698562622, + "p90": 1574.079990386963, + "p95": 1600.640058517456, + "p99": 13291.616439819336 + }, + "isolatedSum": { + "p50": 115.77599868178368, + "p90": 126.01600214838982, + "p95": 130.78400120139122, + "p99": 141.951996833086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.66399884223938, + "p90": 57.50399827957153, + "p95": 59.808000922203064, + "p99": 66.6240006685257 + }, + "combine": { + "p50": 74.30399954319, + "p90": 81.08799904584885, + "p95": 82.71999657154083, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 1555.6479692459106, + "p90": 1562.175989151001, + "p95": 1564.8640394210815, + "p99": 1569.6640014648438 + }, + "isolatedSum": { + "p50": 127.96799838542938, + "p90": 138.59199732542038, + "p95": 142.5279974937439, + "p99": 158.01600366830826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ef83f327", + "identity": "gb300|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_b8af531e", + "comparisonKey": "71503b9e265e42a9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:46.571965+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.76799815893173, + "p90": 49.8879998922348, + "p95": 53.0879981815815, + "p99": 59.58399921655655 + }, + "combine": { + "p50": 56.63999915122986, + "p90": 65.50399959087372, + "p95": 67.52000004053116, + "p99": 71.52000069618225 + }, + "roundtrip": { + "p50": 1527.3280143737793, + "p90": 1535.871982574463, + "p95": 1538.912057876587, + "p99": 1554.2080402374268 + }, + "isolatedSum": { + "p50": 101.40799731016159, + "p90": 115.39199948310852, + "p95": 120.60799822211266, + "p99": 131.1039999127388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 45.21600157022476, + "p90": 50.71999877691269, + "p95": 54.11199852824211, + "p99": 60.92799827456474 + }, + "combine": { + "p50": 58.30400064587593, + "p90": 64.67200070619583, + "p95": 67.26399809122086, + "p99": 71.61600142717361 + }, + "roundtrip": { + "p50": 1527.1999835968018, + "p90": 1535.1680517196655, + "p95": 1537.4399423599243, + "p99": 1542.2719717025757 + }, + "isolatedSum": { + "p50": 103.52000221610069, + "p90": 115.39199948310852, + "p95": 121.37599661946297, + "p99": 132.54399970173836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 45.27999833226204, + "p90": 50.20799860358238, + "p95": 53.15199866890907, + "p99": 59.039998799562454 + }, + "combine": { + "p50": 56.48000165820122, + "p90": 64.64000046253204, + "p95": 67.07199662923813, + "p99": 73.82400333881378 + }, + "roundtrip": { + "p50": 1525.7600545883179, + "p90": 1533.4080457687378, + "p95": 1535.9679460525513, + "p99": 1544.543981552124 + }, + "isolatedSum": { + "p50": 101.75999999046326, + "p90": 114.84799906611443, + "p95": 120.2239952981472, + "p99": 132.86400213837624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 45.21600157022476, + "p90": 50.303999334573746, + "p95": 53.79199981689453, + "p99": 57.312000542879105 + }, + "combine": { + "p50": 58.97599831223488, + "p90": 65.5359998345375, + "p95": 67.58400052785873, + "p99": 72.83200323581696 + }, + "roundtrip": { + "p50": 1529.3760299682617, + "p90": 1536.9600057601929, + "p95": 1542.1119928359985, + "p99": 1569.4400072097778 + }, + "isolatedSum": { + "p50": 104.19199988245964, + "p90": 115.83999916911125, + "p95": 121.37600034475327, + "p99": 130.14400377869606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 45.184001326560974, + "p90": 51.58400163054466, + "p95": 55.48800155520439, + "p99": 59.84000116586685 + }, + "combine": { + "p50": 60.15999987721443, + "p90": 67.90400296449661, + "p95": 70.72000205516815, + "p99": 75.23199915885925 + }, + "roundtrip": { + "p50": 1528.864026069641, + "p90": 1536.8640422821045, + "p95": 1539.29603099823, + "p99": 1544.0959930419922 + }, + "isolatedSum": { + "p50": 105.3440012037754, + "p90": 119.48800459504128, + "p95": 126.20800361037254, + "p99": 135.0720003247261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 46.112000942230225, + "p90": 50.592001527547836, + "p95": 54.048001766204834, + "p99": 58.78400057554245 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 70.14399766921997, + "p95": 72.22399860620499, + "p99": 77.2479996085167 + }, + "roundtrip": { + "p50": 1532.3200225830078, + "p90": 1538.7840270996094, + "p95": 1540.7040119171143, + "p99": 1544.927954673767 + }, + "isolatedSum": { + "p50": 109.0560033917427, + "p90": 120.7359991967678, + "p95": 126.27200037240982, + "p99": 136.03200018405914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 48.38399961590767, + "p90": 53.727999329566956, + "p95": 57.8560009598732, + "p99": 65.11999666690826 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 67.74400174617767, + "p95": 69.43999975919724, + "p99": 73.5040009021759 + }, + "roundtrip": { + "p50": 1536.3199710845947, + "p90": 1541.6959524154663, + "p95": 1543.8400506973267, + "p99": 1547.6479530334473 + }, + "isolatedSum": { + "p50": 111.00799962878227, + "p90": 121.47200107574463, + "p95": 127.29600071907043, + "p99": 138.62399756908417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 54.336000233888626, + "p90": 58.01599845290184, + "p95": 61.02399900555611, + "p99": 66.65600091218948 + }, + "combine": { + "p50": 71.74400240182877, + "p90": 76.64000242948532, + "p95": 78.23999971151352, + "p99": 82.14399963617325 + }, + "roundtrip": { + "p50": 1552.8000593185425, + "p90": 1557.8880310058594, + "p95": 1559.2639446258545, + "p99": 1562.4639987945557 + }, + "isolatedSum": { + "p50": 126.08000263571739, + "p90": 134.65600088238716, + "p95": 139.26399871706963, + "p99": 148.80000054836273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-04b574d4", + "identity": "gb300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "1e02f4e7861b9a89", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:32.248707+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 92.6399976015091, + "p90": 108.25599730014801, + "p95": 114.14399743080139, + "p99": 127.26399302482605 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 78.49600166082382, + "p95": 84.44800227880478, + "p99": 124.32000041007996 + }, + "roundtrip": { + "p50": 143.64799857139587, + "p90": 157.31200575828552, + "p95": 161.85599565505981, + "p99": 174.14399981498718 + }, + "isolatedSum": { + "p50": 166.33599996566772, + "p90": 186.75199896097183, + "p95": 198.59199970960617, + "p99": 251.583993434906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 92.38400310277939, + "p90": 108.09600353240967, + "p95": 112.22399771213531, + "p99": 127.3919939994812 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 78.23999971151352, + "p95": 80.48000186681747, + "p99": 85.4400023818016 + }, + "roundtrip": { + "p50": 147.10399508476257, + "p90": 159.58400070667267, + "p95": 166.9120043516159, + "p99": 176.83200538158417 + }, + "isolatedSum": { + "p50": 166.84800386428833, + "p90": 186.3360032439232, + "p95": 192.7039995789528, + "p99": 212.8319963812828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 93.66399794816971, + "p90": 110.11199653148651, + "p95": 116.06399714946747, + "p99": 133.4719955921173 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 80.57600259780884, + "p95": 86.59200370311737, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 147.13600277900696, + "p90": 160.67199409008026, + "p95": 164.99200463294983, + "p99": 172.31999337673187 + }, + "isolatedSum": { + "p50": 168.70399564504623, + "p90": 190.68799912929535, + "p95": 202.65600085258484, + "p99": 231.1359941959381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 92.99200028181076, + "p90": 108.15999656915665, + "p95": 116.99199676513672, + "p99": 140.51200449466705 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 84.25600081682205, + "p95": 86.68799698352814, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 150.27199685573578, + "p90": 160.5439931154251, + "p95": 164.95999693870544, + "p99": 177.15199291706085 + }, + "isolatedSum": { + "p50": 169.91999745368958, + "p90": 192.4159973859787, + "p95": 203.67999374866486, + "p99": 231.90400749444962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 93.05600076913834, + "p90": 107.26399719715118, + "p95": 112.03200370073318, + "p99": 126.75200402736664 + }, + "combine": { + "p50": 77.504001557827, + "p90": 86.59200370311737, + "p95": 89.08800035715103, + "p99": 123.80799651145935 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 164.000004529953, + "p95": 169.0240055322647, + "p99": 188.60800564289093 + }, + "isolatedSum": { + "p50": 170.56000232696533, + "p90": 193.85600090026855, + "p95": 201.12000405788422, + "p99": 250.560000538826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.59199821949005, + "p90": 109.79200154542923, + "p95": 115.1999980211258, + "p99": 127.61600315570831 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 88.06400001049042, + "p95": 90.17600119113922, + "p99": 98.84800016880035 + }, + "roundtrip": { + "p50": 153.98399531841278, + "p90": 167.10400581359863, + "p95": 173.08799922466278, + "p99": 181.95199966430664 + }, + "isolatedSum": { + "p50": 179.61599677801132, + "p90": 197.85600155591965, + "p95": 205.37599921226501, + "p99": 226.46400332450867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.35999727249146, + "p90": 113.72800171375275, + "p95": 117.8240031003952, + "p99": 122.30399996042252 + }, + "combine": { + "p50": 92.99200028181076, + "p90": 100.92800110578537, + "p95": 104.73600029945374, + "p99": 139.0399932861328 + }, + "roundtrip": { + "p50": 167.80799627304077, + "p90": 179.45599555969238, + "p95": 184.28799510002136, + "p99": 200.00000298023224 + }, + "isolatedSum": { + "p50": 196.35199755430222, + "p90": 214.65600281953812, + "p95": 222.56000339984894, + "p99": 261.3439932465553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.09599739313126, + "p90": 126.11199915409088, + "p95": 129.5360028743744, + "p99": 138.36799561977386 + }, + "combine": { + "p50": 109.56799983978271, + "p90": 113.79200220108032, + "p95": 115.29599875211716, + "p99": 121.11999839544296 + }, + "roundtrip": { + "p50": 194.46399807929993, + "p90": 204.73599433898926, + "p95": 207.519993185997, + "p99": 259.13599133491516 + }, + "isolatedSum": { + "p50": 225.66399723291397, + "p90": 239.9040013551712, + "p95": 244.83200162649155, + "p99": 259.4879940152168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bc2ee7f", + "identity": "gb300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "48e66e1ac2b71fc9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:38.681283+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 92.6399976015091, + "p90": 108.83200168609619, + "p95": 114.20799791812897, + "p99": 125.95200538635254 + }, + "combine": { + "p50": 74.91199672222137, + "p90": 83.77599716186523, + "p95": 86.14400029182434, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 146.17599546909332, + "p90": 160.0320041179657, + "p95": 164.86400365829468, + "p99": 171.55200242996216 + }, + "isolatedSum": { + "p50": 167.55199432373047, + "p90": 192.60799884796143, + "p95": 200.3519982099533, + "p99": 222.9120060801506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 93.02400052547455, + "p90": 118.59200149774551, + "p95": 136.51199638843536, + "p99": 213.28000724315643 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 86.20800077915192, + "p95": 92.6079973578453, + "p99": 139.20000195503235 + }, + "roundtrip": { + "p50": 150.04800260066986, + "p90": 171.9679981470108, + "p95": 212.12799847126007, + "p99": 278.6880135536194 + }, + "isolatedSum": { + "p50": 168.06399822235107, + "p90": 204.80000227689743, + "p95": 229.11999374628067, + "p99": 352.4800091981888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.12800115346909, + "p90": 106.55999928712845, + "p95": 111.55200004577637, + "p99": 128.48000228405 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 86.40000224113464, + "p95": 89.4400030374527, + "p99": 98.49599748849869 + }, + "roundtrip": { + "p50": 149.53599870204926, + "p90": 164.57599401474, + "p95": 168.99199783802032, + "p99": 182.27200210094452 + }, + "isolatedSum": { + "p50": 169.37600076198578, + "p90": 192.9600015282631, + "p95": 200.99200308322906, + "p99": 226.97599977254868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.9520001411438, + "p90": 111.93600296974182, + "p95": 120.99199742078781, + "p99": 192.19200313091278 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 88.92799913883209, + "p95": 96.67199850082397, + "p99": 131.23199343681335 + }, + "roundtrip": { + "p50": 151.99999511241913, + "p90": 169.47199404239655, + "p95": 175.6799966096878, + "p99": 259.6159875392914 + }, + "isolatedSum": { + "p50": 175.20000040531158, + "p90": 200.8640021085739, + "p95": 217.6639959216118, + "p99": 323.42399656772614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.10400146245956, + "p90": 108.60799998044968, + "p95": 115.90400338172913, + "p99": 153.34400534629822 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 88.128000497818, + "p95": 91.61599725484848, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 153.9520025253296, + "p90": 169.0559983253479, + "p95": 173.18400740623474, + "p99": 189.69599902629852 + }, + "isolatedSum": { + "p50": 177.85599827766418, + "p90": 196.73600047826767, + "p95": 207.5200006365776, + "p99": 251.616008579731 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.9760011434555, + "p90": 113.40799927711487, + "p95": 122.65600264072418, + "p99": 195.48800587654114 + }, + "combine": { + "p50": 85.4720026254654, + "p90": 96.0640013217926, + "p95": 104.73600029945374, + "p99": 150.176003575325 + }, + "roundtrip": { + "p50": 155.68000078201294, + "p90": 174.84800517559052, + "p95": 185.5359971523285, + "p99": 279.55201268196106 + }, + "isolatedSum": { + "p50": 184.4480037689209, + "p90": 209.47200059890747, + "p95": 227.39200294017792, + "p99": 345.66400945186615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 106.4319983124733, + "p90": 116.80000275373459, + "p95": 120.12799829244614, + "p99": 130.23999333381653 + }, + "combine": { + "p50": 98.81599992513657, + "p90": 107.26399719715118, + "p95": 109.50399935245514, + "p99": 115.4559999704361 + }, + "roundtrip": { + "p50": 176.06399953365326, + "p90": 185.63200533390045, + "p95": 189.56799805164337, + "p99": 199.5840072631836 + }, + "isolatedSum": { + "p50": 205.24799823760986, + "p90": 224.06399995088577, + "p95": 229.63199764490128, + "p99": 245.69599330425262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.34400010108948, + "p90": 133.18400084972382, + "p95": 138.36799561977386, + "p99": 190.14400243759155 + }, + "combine": { + "p50": 117.72800236940384, + "p90": 124.79999661445618, + "p95": 131.8719983100891, + "p99": 171.39199376106262 + }, + "roundtrip": { + "p50": 208.064004778862, + "p90": 220.12799978256226, + "p95": 225.40800273418427, + "p99": 277.3120105266571 + }, + "isolatedSum": { + "p50": 239.07200247049332, + "p90": 257.98399746418, + "p95": 270.239993929863, + "p99": 361.5359961986542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0565a103", + "identity": "gb300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "ff2d0229ca9971ae", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:48.337343+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 104.67199981212616, + "p90": 120.92799693346024, + "p95": 125.2799928188324, + "p99": 135.6160044670105 + }, + "combine": { + "p50": 82.40000158548355, + "p90": 88.73599767684937, + "p95": 95.45599669218063, + "p99": 100.38399696350098 + }, + "roundtrip": { + "p50": 160.25599837303162, + "p90": 173.24799299240112, + "p95": 176.9919991493225, + "p99": 186.3359957933426 + }, + "isolatedSum": { + "p50": 187.0720013976097, + "p90": 209.6639946103096, + "p95": 220.73598951101303, + "p99": 236.00000143051147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 104.22399640083313, + "p90": 121.50400131940842, + "p95": 126.17599964141846, + "p99": 140.6400054693222 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 88.60799670219421, + "p95": 90.87999910116196, + "p99": 96.8639999628067 + }, + "roundtrip": { + "p50": 162.62400150299072, + "p90": 177.44000256061554, + "p95": 181.5679967403412, + "p99": 191.67999923229218 + }, + "isolatedSum": { + "p50": 187.55199760198593, + "p90": 210.11199802160263, + "p95": 217.0559987425804, + "p99": 237.5040054321289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 105.12000322341919, + "p90": 120.99199742078781, + "p95": 128.9599984884262, + "p99": 140.4159963130951 + }, + "combine": { + "p50": 85.1840004324913, + "p90": 90.46400338411331, + "p95": 95.10400146245956, + "p99": 100.41599720716476 + }, + "roundtrip": { + "p50": 163.55200111865997, + "p90": 177.5680035352707, + "p95": 184.09599363803864, + "p99": 205.50400018692017 + }, + "isolatedSum": { + "p50": 190.3040036559105, + "p90": 211.45600080490112, + "p95": 224.06399995088577, + "p99": 240.83199352025986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 105.3759977221489, + "p90": 122.46400117874146, + "p95": 129.02399897575378, + "p99": 139.5840048789978 + }, + "combine": { + "p50": 86.36800199747086, + "p90": 94.40000355243683, + "p95": 98.43199700117111, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 165.53600132465363, + "p90": 178.56000363826752, + "p95": 181.7920058965683, + "p99": 192.44800508022308 + }, + "isolatedSum": { + "p50": 191.74399971961975, + "p90": 216.86400473117828, + "p95": 227.4559959769249, + "p99": 245.31200528144836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 107.58399963378906, + "p90": 123.3920007944107, + "p95": 127.32799351215363, + "p99": 154.52800691127777 + }, + "combine": { + "p50": 87.74399757385254, + "p90": 96.16000205278397, + "p95": 98.52799773216248, + "p99": 104.67199981212616 + }, + "roundtrip": { + "p50": 167.42399334907532, + "p90": 178.71999740600586, + "p95": 182.97599256038666, + "p99": 196.60800695419312 + }, + "isolatedSum": { + "p50": 195.3279972076416, + "p90": 219.55200284719467, + "p95": 225.8559912443161, + "p99": 259.20000672340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 106.55999928712845, + "p90": 120.99199742078781, + "p95": 124.60800260305405, + "p99": 135.6160044670105 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 98.78399968147278, + "p95": 101.27999633550644, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 171.74400389194489, + "p90": 185.31200289726257, + "p95": 189.7280067205429, + "p99": 204.16000485420227 + }, + "isolatedSum": { + "p50": 196.8960016965866, + "p90": 219.7759971022606, + "p95": 225.8879989385605, + "p99": 245.37600576877594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 115.93600362539291, + "p90": 125.5359947681427, + "p95": 129.56799566745758, + "p99": 142.11200177669525 + }, + "combine": { + "p50": 104.47999835014343, + "p90": 112.8000020980835, + "p95": 115.03999680280685, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 188.48000466823578, + "p90": 200.8640021085739, + "p95": 205.1520049571991, + "p99": 215.10399878025055 + }, + "isolatedSum": { + "p50": 220.41600197553635, + "p90": 238.3359968662262, + "p95": 244.60799247026443, + "p99": 265.6640037894249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.88799810409546, + "p90": 137.85600662231445, + "p95": 142.20799505710602, + "p99": 150.27199685573578 + }, + "combine": { + "p50": 124.1919994354248, + "p90": 131.71200454235077, + "p95": 135.23200154304504, + "p99": 143.5839980840683 + }, + "roundtrip": { + "p50": 220.57600319385529, + "p90": 231.3919961452484, + "p95": 234.65600609779358, + "p99": 242.62399971485138 + }, + "isolatedSum": { + "p50": 254.07999753952026, + "p90": 269.5680111646652, + "p95": 277.43999660015106, + "p99": 293.8559949398041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d7b6858", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_882967eb", + "comparisonKey": "54ea46d930c7cbe6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:29.932964+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.10400146245956, + "p90": 111.61600053310394, + "p95": 120.51200121641159, + "p99": 136.1600011587143 + }, + "combine": { + "p50": 85.79199761152267, + "p90": 91.77599847316742, + "p95": 96.28800302743912, + "p99": 101.95200145244598 + }, + "roundtrip": { + "p50": 153.72799336910248, + "p90": 166.72000288963318, + "p95": 171.58399522304535, + "p99": 189.05599415302277 + }, + "isolatedSum": { + "p50": 180.89599907398224, + "p90": 203.39199900627136, + "p95": 216.8000042438507, + "p99": 238.11200261116028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.62399780750275, + "p90": 112.86400258541107, + "p95": 120.31999975442886, + "p99": 186.81600689888 + }, + "combine": { + "p50": 86.62399649620056, + "p90": 94.46399658918381, + "p95": 99.13600236177444, + "p99": 107.80800133943558 + }, + "roundtrip": { + "p50": 156.22399747371674, + "p90": 170.9440052509308, + "p95": 177.72799730300903, + "p99": 214.36800062656403 + }, + "isolatedSum": { + "p50": 181.2479943037033, + "p90": 207.32799917459488, + "p95": 219.4560021162033, + "p99": 294.6240082383156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 94.55999732017517, + "p90": 109.98400300741196, + "p95": 116.48000031709671, + "p99": 156.5759927034378 + }, + "combine": { + "p50": 86.36800199747086, + "p90": 94.62399780750275, + "p95": 99.55199807882309, + "p99": 109.8880022764206 + }, + "roundtrip": { + "p50": 156.54399991035461, + "p90": 172.4800020456314, + "p95": 175.9359985589981, + "p99": 228.7680059671402 + }, + "isolatedSum": { + "p50": 180.92799931764603, + "p90": 204.6080008149147, + "p95": 216.0319983959198, + "p99": 266.4639949798584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.3520035147667, + "p90": 113.34399878978729, + "p95": 121.24799937009811, + "p99": 162.91199624538422 + }, + "combine": { + "p50": 87.45600283145905, + "p90": 96.09600156545639, + "p95": 99.48799759149551, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 159.5200002193451, + "p90": 173.15199971199036, + "p95": 179.48800325393677, + "p99": 214.6880030632019 + }, + "isolatedSum": { + "p50": 183.80800634622574, + "p90": 209.44000035524368, + "p95": 220.73599696159363, + "p99": 278.5919979214668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.28000313043594, + "p90": 114.27199840545654, + "p95": 122.84799665212631, + "p99": 163.90399634838104 + }, + "combine": { + "p50": 88.54400366544724, + "p90": 97.6639986038208, + "p95": 99.7759997844696, + "p99": 123.48800152540207 + }, + "roundtrip": { + "p50": 161.56800091266632, + "p90": 174.84800517559052, + "p95": 179.74400520324707, + "p99": 205.05599677562714 + }, + "isolatedSum": { + "p50": 185.82400679588318, + "p90": 211.93599700927734, + "p95": 222.62399643659592, + "p99": 287.3919978737831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.01600193977356, + "p90": 116.28799885511398, + "p95": 121.60000205039978, + "p99": 155.5519998073578 + }, + "combine": { + "p50": 96.57599776983261, + "p90": 102.30399668216705, + "p95": 108.60799998044968, + "p99": 124.06399846076965 + }, + "roundtrip": { + "p50": 166.72000288963318, + "p90": 178.8800060749054, + "p95": 183.23199450969696, + "p99": 203.8400024175644 + }, + "isolatedSum": { + "p50": 198.59199970960617, + "p90": 218.59199553728104, + "p95": 230.20800203084946, + "p99": 279.61599826812744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.86400258541107, + "p90": 125.5359947681427, + "p95": 131.20000064373016, + "p99": 164.8000031709671 + }, + "combine": { + "p50": 108.83200168609619, + "p90": 114.56000059843063, + "p95": 120.15999853610992, + "p99": 143.42400431632996 + }, + "roundtrip": { + "p50": 191.39200448989868, + "p90": 201.12000405788422, + "p95": 204.83200252056122, + "p99": 246.91200256347656 + }, + "isolatedSum": { + "p50": 221.69600427150726, + "p90": 240.09599536657333, + "p95": 251.3599991798401, + "p99": 308.22400748729706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.91199779510498, + "p90": 138.5280042886734, + "p95": 142.62400567531586, + "p99": 165.98400473594666 + }, + "combine": { + "p50": 127.07200646400452, + "p90": 135.93600690364838, + "p95": 138.36799561977386, + "p99": 160.5439931154251 + }, + "roundtrip": { + "p50": 224.99200701713562, + "p90": 235.71200668811798, + "p95": 240.63999950885773, + "p99": 276.095986366272 + }, + "isolatedSum": { + "p50": 253.9840042591095, + "p90": 274.4640111923218, + "p95": 280.9920012950897, + "p99": 326.52799785137177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-82b66a40", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "430bb0cee6504858", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:34.018422+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.76799923181534, + "p90": 113.82400244474411, + "p95": 119.4240003824234, + "p99": 157.69599378108978 + }, + "combine": { + "p50": 85.31200140714645, + "p90": 92.44800359010696, + "p95": 97.28000313043594, + "p99": 111.61600053310394 + }, + "roundtrip": { + "p50": 155.93600273132324, + "p90": 168.70400309562683, + "p95": 173.15199971199036, + "p99": 199.10399615764618 + }, + "isolatedSum": { + "p50": 182.0800006389618, + "p90": 206.27200603485107, + "p95": 216.70400351285934, + "p99": 269.3119943141937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.47999703884125, + "p90": 113.0559965968132, + "p95": 121.50400131940842, + "p99": 184.1920018196106 + }, + "combine": { + "p50": 87.26400136947632, + "p90": 94.33600306510925, + "p95": 98.39999675750732, + "p99": 110.3999987244606 + }, + "roundtrip": { + "p50": 157.47199952602386, + "p90": 174.5920032262802, + "p95": 188.4160041809082, + "p99": 262.14399933815 + }, + "isolatedSum": { + "p50": 183.74399840831757, + "p90": 207.39199966192245, + "p95": 219.90399807691574, + "p99": 294.5920005440712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.43199700117111, + "p90": 114.52800035476685, + "p95": 118.78400295972824, + "p99": 171.2000072002411 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 96.12800180912018, + "p95": 100.67199915647507, + "p99": 111.64800077676773 + }, + "roundtrip": { + "p50": 159.84000265598297, + "p90": 173.34400117397308, + "p95": 178.71999740600586, + "p99": 230.3680032491684 + }, + "isolatedSum": { + "p50": 186.36799603700638, + "p90": 210.65600216388702, + "p95": 219.4560021162033, + "p99": 282.8480079770088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.69599884748459, + "p90": 112.86400258541107, + "p95": 124.15999919176102, + "p99": 170.3999936580658 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 95.93600034713745, + "p95": 98.88000041246414, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 160.76800227165222, + "p90": 175.20000040531158, + "p95": 181.72800540924072, + "p99": 235.167995095253 + }, + "isolatedSum": { + "p50": 186.14400178194046, + "p90": 208.80000293254852, + "p95": 223.03999960422516, + "p99": 284.1919958591461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.29600358009338, + "p90": 112.5119999051094, + "p95": 119.52000111341476, + "p99": 138.68799805641174 + }, + "combine": { + "p50": 90.04800021648407, + "p90": 98.2080027461052, + "p95": 100.44799745082855, + "p99": 111.96800321340561 + }, + "roundtrip": { + "p50": 162.84799575805664, + "p90": 174.52800273895264, + "p95": 179.87200617790222, + "p99": 194.17600333690643 + }, + "isolatedSum": { + "p50": 189.34400379657745, + "p90": 210.7200026512146, + "p95": 219.96799856424332, + "p99": 250.65600126981735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.31199657917023, + "p90": 114.04799669981003, + "p95": 118.8800036907196, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 95.61599791049957, + "p90": 102.36799716949463, + "p95": 107.61599987745285, + "p99": 143.74400675296783 + }, + "roundtrip": { + "p50": 168.83200407028198, + "p90": 181.95199966430664, + "p95": 189.2479956150055, + "p99": 252.25600600242615 + }, + "isolatedSum": { + "p50": 196.9279944896698, + "p90": 216.41599386930466, + "p95": 226.49600356817245, + "p99": 276.41600370407104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.07999694347382, + "p90": 128.4160017967224, + "p95": 133.82400572299957, + "p99": 145.91999351978302 + }, + "combine": { + "p50": 109.24799740314484, + "p90": 117.88800358772278, + "p95": 124.32000041007996, + "p99": 156.15999698638916 + }, + "roundtrip": { + "p50": 191.03999435901642, + "p90": 200.8959949016571, + "p95": 205.31199872493744, + "p99": 241.37599766254425 + }, + "isolatedSum": { + "p50": 223.32799434661865, + "p90": 246.3040053844452, + "p95": 258.14400613307953, + "p99": 302.0799905061722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.49600207805634, + "p90": 140.70400595664978, + "p95": 146.81600034236908, + "p99": 206.7839950323105 + }, + "combine": { + "p50": 128.06400656700134, + "p90": 136.1600011587143, + "p95": 140.1280015707016, + "p99": 156.22399747371674 + }, + "roundtrip": { + "p50": 226.33600234985352, + "p90": 240.63999950885773, + "p95": 247.26399779319763, + "p99": 290.6560003757477 + }, + "isolatedSum": { + "p50": 254.56000864505768, + "p90": 276.8640071153641, + "p95": 286.9440019130707, + "p99": 363.0079925060272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-15c3bc03", + "identity": "gb300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "3ed25db3d116468d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:22.842418+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.61599856615067, + "p90": 119.4240003824234, + "p95": 134.94400680065155, + "p99": 174.14399981498718 + }, + "combine": { + "p50": 85.40800213813782, + "p90": 91.2960022687912, + "p95": 98.11200201511383, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 158.07999670505524, + "p90": 177.95200645923615, + "p95": 192.73599982261658, + "p99": 243.1039959192276 + }, + "isolatedSum": { + "p50": 185.02400070428848, + "p90": 210.7200026512146, + "p95": 233.05600881576538, + "p99": 310.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.78399968147278, + "p90": 116.73600226640701, + "p95": 122.17599898576736, + "p99": 155.87200224399567 + }, + "combine": { + "p50": 86.46400272846222, + "p90": 91.839998960495, + "p95": 97.59999811649323, + "p99": 109.3439981341362 + }, + "roundtrip": { + "p50": 159.64800119400024, + "p90": 175.4239946603775, + "p95": 179.71199750900269, + "p99": 217.056006193161 + }, + "isolatedSum": { + "p50": 185.248002409935, + "p90": 208.576001226902, + "p95": 219.7759971022606, + "p99": 265.21600037813187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 100.44799745082855, + "p90": 118.75200271606445, + "p95": 127.87200510501862, + "p99": 163.58399391174316 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 99.90400075912476, + "p95": 111.13599687814713, + "p99": 144.51199769973755 + }, + "roundtrip": { + "p50": 161.53599321842194, + "p90": 181.8239986896515, + "p95": 192.4159973859787, + "p99": 247.51999974250793 + }, + "isolatedSum": { + "p50": 187.55199760198593, + "p90": 218.6560034751892, + "p95": 239.00800198316574, + "p99": 308.0959916114807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 101.82400047779083, + "p90": 118.20799857378006, + "p95": 126.46399438381195, + "p99": 178.6240041255951 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 95.48799693584442, + "p95": 98.01600128412247, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 165.8560037612915, + "p90": 181.85600638389587, + "p95": 186.91200017929077, + "p99": 242.62399971485138 + }, + "isolatedSum": { + "p50": 190.2720034122467, + "p90": 213.69599550962448, + "p95": 224.47999566793442, + "p99": 285.2800041437149 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 101.15200281143188, + "p90": 117.27999895811081, + "p95": 124.22399967908859, + "p99": 164.60800170898438 + }, + "combine": { + "p50": 89.4400030374527, + "p90": 97.75999933481216, + "p95": 101.21600329875946, + "p99": 124.79999661445618 + }, + "roundtrip": { + "p50": 168.19199919700623, + "p90": 186.27199530601501, + "p95": 201.4400064945221, + "p99": 268.8640058040619 + }, + "isolatedSum": { + "p50": 190.59200584888458, + "p90": 215.03999829292297, + "p95": 225.44000297784805, + "p99": 289.40799832344055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.92800176143646, + "p90": 118.23999881744385, + "p95": 127.55200266838074, + "p99": 174.97600615024567 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 103.4879982471466, + "p95": 107.32799768447876, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 172.03199863433838, + "p90": 188.22400271892548, + "p95": 196.9279944896698, + "p99": 255.264014005661 + }, + "isolatedSum": { + "p50": 203.23200523853302, + "p90": 221.72799706459045, + "p95": 234.8800003528595, + "p99": 289.0560030937195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.56000059843063, + "p90": 127.51999497413635, + "p95": 132.7040046453476, + "p99": 177.7919977903366 + }, + "combine": { + "p50": 110.75200140476227, + "p90": 116.03199690580368, + "p95": 122.079998254776, + "p99": 138.3039951324463 + }, + "roundtrip": { + "p50": 194.65599954128265, + "p90": 206.84799551963806, + "p95": 215.5199944972992, + "p99": 260.76799631118774 + }, + "isolatedSum": { + "p50": 225.3120020031929, + "p90": 243.55199187994003, + "p95": 254.7840029001236, + "p99": 316.0959929227829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.65600395202637, + "p90": 142.97600090503693, + "p95": 147.77599275112152, + "p99": 171.61600291728973 + }, + "combine": { + "p50": 131.29599392414093, + "p90": 137.7599984407425, + "p95": 141.63200557231903, + "p99": 161.05599701404572 + }, + "roundtrip": { + "p50": 230.75200617313385, + "p90": 240.35200476646423, + "p95": 245.05600333213806, + "p99": 297.0240116119385 + }, + "isolatedSum": { + "p50": 261.9519978761673, + "p90": 280.7359993457794, + "p95": 289.40799832344055, + "p99": 332.67199993133545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dab480e3", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||f1c99f5cf8ca9ed", + "colorKey": "gb300_6379de25", + "comparisonKey": "57291e819ecd118a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:18.724787+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f1c99f5cf8ca9ed", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 105.27999699115753, + "p90": 149.75999295711517, + "p95": 157.1200042963028, + "p99": 171.32799327373505 + }, + "combine": { + "p50": 100.5759984254837, + "p90": 147.2959965467453, + "p95": 150.36800503730774, + "p99": 158.81599485874176 + }, + "roundtrip": { + "p50": 169.24799978733063, + "p90": 199.2959976196289, + "p95": 211.67999505996704, + "p99": 230.43200373649597 + }, + "isolatedSum": { + "p50": 205.85599541664124, + "p90": 297.0559895038605, + "p95": 307.48800933361053, + "p99": 330.1439881324768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 106.9440022110939, + "p90": 151.2639969587326, + "p95": 155.20000457763672, + "p99": 164.22399878501892 + }, + "combine": { + "p50": 98.4639972448349, + "p90": 145.9520012140274, + "p95": 150.4639983177185, + "p99": 159.96800363063812 + }, + "roundtrip": { + "p50": 166.72000288963318, + "p90": 204.3199986219406, + "p95": 219.64800357818604, + "p99": 243.26400458812714 + }, + "isolatedSum": { + "p50": 205.4079994559288, + "p90": 297.21599817276, + "p95": 305.6640028953552, + "p99": 324.19200241565704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 108.0000028014183, + "p90": 148.83199334144592, + "p95": 154.62400019168854, + "p99": 167.26399958133698 + }, + "combine": { + "p50": 99.29600358009338, + "p90": 146.04799449443817, + "p95": 150.91200172901154, + "p99": 159.64800119400024 + }, + "roundtrip": { + "p50": 175.61599612236023, + "p90": 204.57600057125092, + "p95": 218.49599480628967, + "p99": 231.455996632576 + }, + "isolatedSum": { + "p50": 207.2960063815117, + "p90": 294.8799878358841, + "p95": 305.5360019207001, + "p99": 326.9120007753372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.38399958610535, + "p90": 153.72799336910248, + "p95": 157.56799280643463, + "p99": 167.90400445461273 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 146.17599546909332, + "p95": 152.44799852371216, + "p99": 162.75200247764587 + }, + "roundtrip": { + "p50": 172.992005944252, + "p90": 213.82400393486023, + "p95": 226.78400576114655, + "p99": 257.63198733329773 + }, + "isolatedSum": { + "p50": 212.5760018825531, + "p90": 299.9039888381958, + "p95": 310.0159913301468, + "p99": 330.6560069322586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 106.55999928712845, + "p90": 151.7760008573532, + "p95": 157.60000050067902, + "p99": 167.84000396728516 + }, + "combine": { + "p50": 102.39999741315842, + "p90": 150.751993060112, + "p95": 157.6319932937622, + "p99": 165.12000560760498 + }, + "roundtrip": { + "p50": 176.7359972000122, + "p90": 206.62400126457214, + "p95": 222.81600534915924, + "p99": 236.86400055885315 + }, + "isolatedSum": { + "p50": 208.95999670028687, + "p90": 302.5279939174652, + "p95": 315.2319937944412, + "p99": 332.96000957489014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 113.11999708414078, + "p90": 142.94399321079254, + "p95": 149.9200016260147, + "p99": 167.71200299263 + }, + "combine": { + "p50": 109.63200032711029, + "p90": 151.58399939537048, + "p95": 157.3760062456131, + "p99": 163.5199934244156 + }, + "roundtrip": { + "p50": 184.86399948596954, + "p90": 225.47200322151184, + "p95": 234.78400707244873, + "p99": 247.42400646209717 + }, + "isolatedSum": { + "p50": 222.75199741125107, + "p90": 294.527992606163, + "p95": 307.2960078716278, + "p99": 331.2319964170456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 124.32000041007996, + "p90": 149.85600113868713, + "p95": 156.44800662994385, + "p99": 175.90400576591492 + }, + "combine": { + "p50": 130.8159977197647, + "p90": 151.93599462509155, + "p95": 158.1760048866272, + "p99": 172.38399386405945 + }, + "roundtrip": { + "p50": 211.87199652194977, + "p90": 239.1040027141571, + "p95": 247.1040040254593, + "p99": 275.4879891872406 + }, + "isolatedSum": { + "p50": 255.13599812984467, + "p90": 301.7919957637787, + "p95": 314.62401151657104, + "p99": 348.28799962997437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 146.91199362277985, + "p90": 181.08800053596497, + "p95": 188.73600661754608, + "p99": 198.04799556732178 + }, + "combine": { + "p50": 152.70400047302246, + "p90": 187.8719925880432, + "p95": 195.3279972076416, + "p99": 210.4959934949875 + }, + "roundtrip": { + "p50": 264.73599672317505, + "p90": 302.7519881725311, + "p95": 313.8880133628845, + "p99": 326.81599259376526 + }, + "isolatedSum": { + "p50": 299.6159940958023, + "p90": 368.9599931240082, + "p95": 384.0640038251877, + "p99": 408.54398906230927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34ed20af", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_58c6ccd4", + "comparisonKey": "3a73d61d0db4553d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:50.903916+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 90.97599983215332, + "p90": 103.7760004401207, + "p95": 107.26399719715118, + "p99": 120.99199742078781 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 78.72000336647034, + "p95": 83.45600217580795, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 141.66399836540222, + "p90": 153.31199765205383, + "p95": 157.3760062456131, + "p99": 166.52800142765045 + }, + "isolatedSum": { + "p50": 163.7440025806427, + "p90": 182.49600380659103, + "p95": 190.71999937295914, + "p99": 208.19199830293655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.14400160312653, + "p90": 108.38399827480316, + "p95": 112.73600161075592, + "p99": 126.97599828243256 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 81.40800148248672, + "p95": 84.99199897050858, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 146.43199741840363, + "p90": 159.90400314331055, + "p95": 164.19200599193573, + "p99": 180.86400628089905 + }, + "isolatedSum": { + "p50": 169.18399930000305, + "p90": 189.7919997572899, + "p95": 197.7280005812645, + "p99": 215.96799790859222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.46399790048599, + "p90": 112.73600161075592, + "p95": 116.67200177907944, + "p99": 124.86399710178375 + }, + "combine": { + "p50": 82.2720006108284, + "p90": 88.8959988951683, + "p95": 93.72799843549728, + "p99": 100.80000013113022 + }, + "roundtrip": { + "p50": 154.08000349998474, + "p90": 166.87999665737152, + "p95": 169.98399794101715, + "p99": 182.23999440670013 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 201.63200050592422, + "p95": 210.40000021457672, + "p99": 225.66399723291397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.38399761915207, + "p90": 114.68800157308578, + "p95": 117.69600212574005, + "p99": 132.4480026960373 + }, + "combine": { + "p50": 85.21600067615509, + "p90": 89.88799899816513, + "p95": 92.83199906349182, + "p99": 98.88000041246414 + }, + "roundtrip": { + "p50": 158.78400206565857, + "p90": 170.30400037765503, + "p95": 173.40800166130066, + "p99": 182.14400112628937 + }, + "isolatedSum": { + "p50": 189.59999829530716, + "p90": 204.57600057125092, + "p95": 210.52800118923187, + "p99": 231.32800310850143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9dbb91c3", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_0bc52499", + "comparisonKey": "76a694c3736658c7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:11.840675+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.04000163078308, + "p90": 114.20799791812897, + "p95": 120.31999975442886, + "p99": 152.12799608707428 + }, + "combine": { + "p50": 74.94399696588516, + "p90": 83.67999643087387, + "p95": 90.68799763917923, + "p99": 138.40000331401825 + }, + "roundtrip": { + "p50": 148.12800288200378, + "p90": 163.64799439907074, + "p95": 168.2880073785782, + "p99": 209.18400585651398 + }, + "isolatedSum": { + "p50": 173.98399859666824, + "p90": 197.88799434900284, + "p95": 211.0079973936081, + "p99": 290.52799940109253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 99.90400075912476, + "p90": 116.67200177907944, + "p95": 127.96799838542938, + "p99": 177.50400304794312 + }, + "combine": { + "p50": 75.77600330114365, + "p90": 83.20000022649765, + "p95": 85.85599809885025, + "p99": 91.36000275611877 + }, + "roundtrip": { + "p50": 149.59999918937683, + "p90": 165.0879979133606, + "p95": 172.70399630069733, + "p99": 260.80000400543213 + }, + "isolatedSum": { + "p50": 175.6800040602684, + "p90": 199.8720020055771, + "p95": 213.82399648427963, + "p99": 268.8640058040619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.53599762916565, + "p90": 111.96800321340561, + "p95": 116.73600226640701, + "p99": 153.79199385643005 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 85.7279971241951, + "p95": 89.85599875450134, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 152.0639955997467, + "p90": 165.21599888801575, + "p95": 171.48800194263458, + "p99": 229.34399545192719 + }, + "isolatedSum": { + "p50": 174.46399480104446, + "p90": 197.6960003376007, + "p95": 206.59200102090836, + "p99": 268.1279927492142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.78399968147278, + "p90": 113.63200098276138, + "p95": 118.65600198507309, + "p99": 129.95199859142303 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 87.2960016131401, + "p95": 92.28800237178802, + "p99": 130.91200590133667 + }, + "roundtrip": { + "p50": 154.01600301265717, + "p90": 168.2880073785782, + "p95": 181.92000687122345, + "p99": 246.62399291992188 + }, + "isolatedSum": { + "p50": 177.15200036764145, + "p90": 200.9280025959015, + "p95": 210.94400435686111, + "p99": 260.8640044927597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.71199929714203, + "p90": 115.13599753379822, + "p95": 122.6240023970604, + "p99": 146.91199362277985 + }, + "combine": { + "p50": 80.83199709653854, + "p90": 89.28000181913376, + "p95": 95.20000219345093, + "p99": 137.56799697875977 + }, + "roundtrip": { + "p50": 155.58399260044098, + "p90": 169.5680022239685, + "p95": 177.40799486637115, + "p99": 247.6480007171631 + }, + "isolatedSum": { + "p50": 180.54399639368057, + "p90": 204.41599935293198, + "p95": 217.82400459051132, + "p99": 284.4799906015396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.60799866914749, + "p90": 116.28799885511398, + "p95": 123.00799787044525, + "p99": 156.0640037059784 + }, + "combine": { + "p50": 83.42400193214417, + "p90": 92.19200164079666, + "p95": 98.62399846315384, + "p99": 408.25599431991577 + }, + "roundtrip": { + "p50": 155.96799552440643, + "p90": 170.43200135231018, + "p95": 174.94399845600128, + "p99": 234.55999791622162 + }, + "isolatedSum": { + "p50": 184.03200060129166, + "p90": 208.48000049591064, + "p95": 221.6319963335991, + "p99": 564.3199980258942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 103.7760004401207, + "p90": 117.18399822711945, + "p95": 123.45600128173828, + "p99": 166.04800522327423 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 92.8959995508194, + "p95": 97.56799787282944, + "p99": 124.60800260305405 + }, + "roundtrip": { + "p50": 159.5200002193451, + "p90": 172.86400496959686, + "p95": 177.88800597190857, + "p99": 238.17600309848785 + }, + "isolatedSum": { + "p50": 190.0480017066002, + "p90": 210.07999777793884, + "p95": 221.02399915456772, + "p99": 290.6560078263283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.47199910879135, + "p90": 120.99199742078781, + "p95": 126.24000012874603, + "p99": 165.50399363040924 + }, + "combine": { + "p50": 101.24800354242325, + "p90": 109.56799983978271, + "p95": 112.57600039243698, + "p99": 137.15200126171112 + }, + "roundtrip": { + "p50": 181.60000443458557, + "p90": 195.19999623298645, + "p95": 203.93599569797516, + "p99": 254.7520101070404 + }, + "isolatedSum": { + "p50": 210.7200026512146, + "p90": 230.55999726057053, + "p95": 238.816000521183, + "p99": 302.65599489212036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-306e03d2", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||15404c7c0ec01b5", + "colorKey": "gb300_b8354a13", + "comparisonKey": "13a90a1911e15611", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:07.012003+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15404c7c0ec01b5", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.28000247478485, + "p90": 105.76000064611435, + "p95": 108.76800119876862, + "p99": 119.9679970741272 + }, + "combine": { + "p50": 84.19200032949448, + "p90": 87.87199854850769, + "p95": 89.79199826717377, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 151.296004652977, + "p90": 162.91199624538422, + "p95": 166.27199947834015, + "p99": 175.64800381660461 + }, + "isolatedSum": { + "p50": 177.47200280427933, + "p90": 193.63199919462204, + "p95": 198.55999946594238, + "p99": 216.92799776792526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.38399630784988, + "p90": 143.26399564743042, + "p95": 151.2320041656494, + "p99": 165.3439998626709 + }, + "combine": { + "p50": 87.48800307512283, + "p90": 123.9359974861145, + "p95": 139.3280029296875, + "p99": 151.45599842071533 + }, + "roundtrip": { + "p50": 157.151997089386, + "p90": 196.83200120925903, + "p95": 207.45599269866943, + "p99": 224.95999932289124 + }, + "isolatedSum": { + "p50": 183.87199938297272, + "p90": 267.1999931335449, + "p95": 290.5600070953369, + "p99": 316.79999828338623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.57599711418152, + "p90": 105.79200088977814, + "p95": 110.11199653148651, + "p99": 128.80000472068787 + }, + "combine": { + "p50": 86.14400029182434, + "p90": 90.55999666452408, + "p95": 96.09600156545639, + "p99": 101.31199657917023 + }, + "roundtrip": { + "p50": 155.90399503707886, + "p90": 166.62399470806122, + "p95": 169.44000124931335, + "p99": 187.1040016412735 + }, + "isolatedSum": { + "p50": 178.71999740600586, + "p90": 196.35199755430222, + "p95": 206.2079980969429, + "p99": 230.1120012998581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.56799787282944, + "p90": 131.6480040550232, + "p95": 147.77599275112152, + "p99": 160.35200655460358 + }, + "combine": { + "p50": 87.0399996638298, + "p90": 92.73599833250046, + "p95": 97.31200337409973, + "p99": 101.79200023412704 + }, + "roundtrip": { + "p50": 156.99200332164764, + "p90": 168.03200542926788, + "p95": 171.6800034046173, + "p99": 183.61599743366241 + }, + "isolatedSum": { + "p50": 184.60799753665924, + "p90": 224.38400238752365, + "p95": 245.08799612522125, + "p99": 262.1440067887306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.85600006580353, + "p90": 122.72000312805176, + "p95": 136.4160031080246, + "p99": 159.2320054769516 + }, + "combine": { + "p50": 89.53599631786346, + "p90": 132.38400220870972, + "p95": 148.3840048313141, + "p99": 154.23999726772308 + }, + "roundtrip": { + "p50": 161.72799468040466, + "p90": 182.8480064868927, + "p95": 201.05600357055664, + "p99": 256.22400641441345 + }, + "isolatedSum": { + "p50": 187.391996383667, + "p90": 255.10400533676147, + "p95": 284.8000079393387, + "p99": 313.4720027446747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.26399654150009, + "p90": 147.67999947071075, + "p95": 153.34400534629822, + "p99": 160.89600324630737 + }, + "combine": { + "p50": 100.38399696350098, + "p90": 148.3200043439865, + "p95": 152.38399803638458, + "p99": 161.31199896335602 + }, + "roundtrip": { + "p50": 173.50399494171143, + "p90": 218.4319943189621, + "p95": 228.2239943742752, + "p99": 237.95199394226074 + }, + "isolatedSum": { + "p50": 203.64799350500107, + "p90": 296.00000381469727, + "p95": 305.7280033826828, + "p99": 322.2080022096634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.24799805879593, + "p90": 134.49600338935852, + "p95": 146.81600034236908, + "p99": 157.21599757671356 + }, + "combine": { + "p50": 110.43199896812439, + "p90": 133.95200669765472, + "p95": 148.60799908638, + "p99": 160.89600324630737 + }, + "roundtrip": { + "p50": 194.5279985666275, + "p90": 214.91199731826782, + "p95": 231.04000091552734, + "p99": 243.0720031261444 + }, + "isolatedSum": { + "p50": 223.67999702692032, + "p90": 268.44801008701324, + "p95": 295.4239994287491, + "p99": 318.11200082302094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.89600610733032, + "p90": 146.464005112648, + "p95": 153.85599434375763, + "p99": 177.824005484581 + }, + "combine": { + "p50": 144.83200013637543, + "p90": 182.8480064868927, + "p95": 188.51199746131897, + "p99": 202.01599597930908 + }, + "roundtrip": { + "p50": 251.77600979804993, + "p90": 289.5359992980957, + "p95": 303.3919930458069, + "p99": 317.8560137748718 + }, + "isolatedSum": { + "p50": 277.72800624370575, + "p90": 329.3120115995407, + "p95": 342.3679918050766, + "p99": 379.8400014638901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e3652ba", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_8d40934b", + "comparisonKey": "86708d4689d5e5a4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:52.283851+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.99999666213989, + "p90": 151.64799988269806, + "p95": 158.87999534606934, + "p99": 172.31999337673187 + }, + "combine": { + "p50": 89.53599631786346, + "p90": 131.84000551700592, + "p95": 147.87200093269348, + "p99": 154.08000349998474 + }, + "roundtrip": { + "p50": 173.37599396705627, + "p90": 209.24800634384155, + "p95": 221.5680032968521, + "p99": 238.24000358581543 + }, + "isolatedSum": { + "p50": 205.53599298000336, + "p90": 283.488005399704, + "p95": 306.7519962787628, + "p99": 326.3999968767166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 122.72000312805176, + "p90": 152.8639942407608, + "p95": 159.32799875736237, + "p99": 171.74400389194489 + }, + "combine": { + "p50": 96.79999947547913, + "p90": 148.03199470043182, + "p95": 152.0639955997467, + "p99": 159.42400693893433 + }, + "roundtrip": { + "p50": 187.45599687099457, + "p90": 225.2800017595291, + "p95": 232.16000199317932, + "p99": 249.7279942035675 + }, + "isolatedSum": { + "p50": 219.52000260353088, + "p90": 300.8959889411926, + "p95": 311.39199435710907, + "p99": 331.1680108308792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 106.75200074911118, + "p90": 125.50400197505951, + "p95": 138.94400000572205, + "p99": 166.33599996566772 + }, + "combine": { + "p50": 89.6959975361824, + "p90": 131.67999684810638, + "p95": 148.54399859905243, + "p99": 159.64800119400024 + }, + "roundtrip": { + "p50": 171.90399765968323, + "p90": 210.11200547218323, + "p95": 225.24799406528473, + "p99": 248.54399263858795 + }, + "isolatedSum": { + "p50": 196.44799828529358, + "p90": 257.1839988231659, + "p95": 287.4879986047745, + "p99": 325.98400115966797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.43200159072876, + "p90": 154.14400398731232, + "p95": 158.9439958333969, + "p99": 171.64799571037292 + }, + "combine": { + "p50": 113.95200341939926, + "p90": 153.53600680828094, + "p95": 158.6879938840866, + "p99": 165.43999314308167 + }, + "roundtrip": { + "p50": 186.17600202560425, + "p90": 227.35999524593353, + "p95": 235.4239970445633, + "p99": 249.24799799919128 + }, + "isolatedSum": { + "p50": 240.38400501012802, + "p90": 307.68001079559326, + "p95": 317.6319897174835, + "p99": 337.0879888534546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 110.3999987244606, + "p90": 148.12800288200378, + "p95": 156.3519984483719, + "p99": 174.14399981498718 + }, + "combine": { + "p50": 100.89600086212158, + "p90": 150.36800503730774, + "p95": 155.74400126934052, + "p99": 161.82400286197662 + }, + "roundtrip": { + "p50": 179.29600179195404, + "p90": 221.69600427150726, + "p95": 231.61600530147552, + "p99": 248.03200364112854 + }, + "isolatedSum": { + "p50": 211.29599958658218, + "p90": 298.4960079193115, + "p95": 312.0959997177124, + "p99": 335.9680026769638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.37600100040436, + "p90": 151.64799988269806, + "p95": 159.743994474411, + "p99": 180.25599420070648 + }, + "combine": { + "p50": 103.58399897813797, + "p90": 150.52799880504608, + "p95": 154.91199493408203, + "p99": 165.6000018119812 + }, + "roundtrip": { + "p50": 187.04000115394592, + "p90": 231.455996632576, + "p95": 238.62400650978088, + "p99": 252.99200415611267 + }, + "isolatedSum": { + "p50": 228.95999997854233, + "p90": 302.17599868774414, + "p95": 314.65598940849304, + "p99": 345.8559960126877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 127.07200646400452, + "p90": 153.18399667739868, + "p95": 162.20800578594208, + "p99": 185.63200533390045 + }, + "combine": { + "p50": 113.02399635314941, + "p90": 123.45600128173828, + "p95": 132.4480026960373, + "p99": 160.8320027589798 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 217.18400716781616, + "p95": 233.72800648212433, + "p99": 268.8319981098175 + }, + "isolatedSum": { + "p50": 240.09600281715393, + "p90": 276.63999795913696, + "p95": 294.65600848197937, + "p99": 346.46400809288025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 142.97600090503693, + "p90": 185.47199666500092, + "p95": 192.25600361824036, + "p99": 204.12799715995789 + }, + "combine": { + "p50": 138.91200721263885, + "p90": 168.06399822235107, + "p95": 187.16800212860107, + "p99": 198.17599654197693 + }, + "roundtrip": { + "p50": 238.97600173950195, + "p90": 274.1119861602783, + "p95": 282.20799565315247, + "p99": 298.5599935054779 + }, + "isolatedSum": { + "p50": 281.8880081176758, + "p90": 353.535994887352, + "p95": 379.42400574684143, + "p99": 402.3039937019348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8ac5dead", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_70e3fa53", + "comparisonKey": "4d6680947bbdbea5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:41.440654+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 100.47999769449234, + "p90": 113.66400122642517, + "p95": 116.80000275373459, + "p99": 130.0799995660782 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 89.75999802350998, + "p95": 95.58399766683578, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 157.82399475574493, + "p90": 169.3439930677414, + "p95": 174.49599504470825, + "p99": 189.11999464035034 + }, + "isolatedSum": { + "p50": 183.3599954843521, + "p90": 203.42399924993515, + "p95": 212.38400042057037, + "p99": 243.32799762487411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 99.84000027179718, + "p90": 111.55200004577637, + "p95": 116.73600226640701, + "p99": 124.1919994354248 + }, + "combine": { + "p50": 85.75999736785889, + "p90": 92.32000261545181, + "p95": 96.83199971914291, + "p99": 115.87200313806534 + }, + "roundtrip": { + "p50": 160.92799603939056, + "p90": 173.7920045852661, + "p95": 177.95200645923615, + "p99": 195.0400024652481 + }, + "isolatedSum": { + "p50": 185.59999763965607, + "p90": 203.87200266122818, + "p95": 213.56800198554993, + "p99": 240.06400257349014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.90400075912476, + "p90": 114.20799791812897, + "p95": 118.49600076675415, + "p99": 133.02400708198547 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 95.48799693584442, + "p95": 99.64799880981445, + "p99": 112.73600161075592 + }, + "roundtrip": { + "p50": 164.38399255275726, + "p90": 177.37600207328796, + "p95": 181.5039962530136, + "p99": 214.62400257587433 + }, + "isolatedSum": { + "p50": 187.00800091028214, + "p90": 209.6959948539734, + "p95": 218.1439995765686, + "p99": 245.7600086927414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 101.82400047779083, + "p90": 114.84800279140472, + "p95": 119.64800208806992, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 88.57599645853043, + "p90": 95.90400010347366, + "p95": 99.35999661684036, + "p99": 114.3999993801117 + }, + "roundtrip": { + "p50": 167.90400445461273, + "p90": 180.4800033569336, + "p95": 184.67199802398682, + "p99": 209.27999913692474 + }, + "isolatedSum": { + "p50": 190.39999693632126, + "p90": 210.7520028948784, + "p95": 219.00799870491028, + "p99": 246.49599194526672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 101.59999877214432, + "p90": 115.77600240707397, + "p95": 122.6240023970604, + "p99": 140.6719982624054 + }, + "combine": { + "p50": 89.4400030374527, + "p90": 98.49599748849869, + "p95": 102.14400291442871, + "p99": 119.1679984331131 + }, + "roundtrip": { + "p50": 169.0240055322647, + "p90": 179.1680008172989, + "p95": 181.88799917697906, + "p99": 194.33599710464478 + }, + "isolatedSum": { + "p50": 191.04000180959702, + "p90": 214.27199989557266, + "p95": 224.7680053114891, + "p99": 259.8399966955185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 106.30399733781815, + "p90": 117.08799749612808, + "p95": 121.5360015630722, + "p99": 135.80800592899323 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 103.10400277376175, + "p95": 108.12799632549286, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 174.5920032262802, + "p90": 186.97600066661835, + "p95": 191.39200448989868, + "p99": 208.70399475097656 + }, + "isolatedSum": { + "p50": 203.5199999809265, + "p90": 220.19200026988983, + "p95": 229.66399788856506, + "p99": 256.80000334978104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 116.28799885511398, + "p90": 125.56800246238708, + "p95": 131.48799538612366, + "p99": 154.4640064239502 + }, + "combine": { + "p50": 109.95200276374817, + "p90": 116.41599982976913, + "p95": 121.60000205039978, + "p99": 145.24799585342407 + }, + "roundtrip": { + "p50": 194.65599954128265, + "p90": 205.1839977502823, + "p95": 208.5759937763214, + "p99": 246.8159943819046 + }, + "isolatedSum": { + "p50": 226.24000161886215, + "p90": 241.98400229215622, + "p95": 253.08799743652344, + "p99": 299.71200227737427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.76800513267517, + "p90": 142.87999272346497, + "p95": 146.33600413799286, + "p99": 159.36000645160675 + }, + "combine": { + "p50": 129.2479932308197, + "p90": 137.31199502944946, + "p95": 140.44800400733948, + "p99": 150.7200002670288 + }, + "roundtrip": { + "p50": 230.335995554924, + "p90": 238.94399404525757, + "p95": 243.23199689388275, + "p99": 288.09601068496704 + }, + "isolatedSum": { + "p50": 262.0159983634949, + "p90": 280.19198775291443, + "p95": 286.78400814533234, + "p99": 310.08000671863556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-369ec090", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_ed023b5e", + "comparisonKey": "c529fcae66e93e08", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:54.446270+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "striped", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 101.88800096511841, + "p90": 119.32799965143204, + "p95": 126.17599964141846, + "p99": 150.4639983177185 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 88.51200342178345, + "p95": 91.48799628019333, + "p99": 121.37600034475327 + }, + "roundtrip": { + "p50": 157.69599378108978, + "p90": 172.09599912166595, + "p95": 180.60800433158875, + "p99": 218.59200298786163 + }, + "isolatedSum": { + "p50": 181.18400126695633, + "p90": 207.84000307321548, + "p95": 217.6639959216118, + "p99": 271.83999866247177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 102.01600193977356, + "p90": 114.68800157308578, + "p95": 121.21599912643433, + "p99": 132.192000746727 + }, + "combine": { + "p50": 82.46400207281113, + "p90": 88.99199962615967, + "p95": 91.74399822950363, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 161.9199961423874, + "p90": 172.70399630069733, + "p95": 178.24000120162964, + "p99": 205.28000593185425 + }, + "isolatedSum": { + "p50": 184.4800040125847, + "p90": 203.68000119924545, + "p95": 212.95999735593796, + "p99": 234.68799889087677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 102.1760031580925, + "p90": 118.01599711179733, + "p95": 124.64000284671783, + "p99": 152.38399803638458 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 92.73599833250046, + "p95": 98.49599748849869, + "p99": 123.9359974861145 + }, + "roundtrip": { + "p50": 163.4880006313324, + "p90": 174.75199699401855, + "p95": 178.75200510025024, + "p99": 186.65599822998047 + }, + "isolatedSum": { + "p50": 189.7599995136261, + "p90": 210.7519954442978, + "p95": 223.13600033521652, + "p99": 276.3199955224991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 103.7760004401207, + "p90": 117.5680011510849, + "p95": 124.41600114107132, + "p99": 156.95999562740326 + }, + "combine": { + "p50": 87.90399879217148, + "p90": 95.0080007314682, + "p95": 98.94400089979172, + "p99": 113.69600147008896 + }, + "roundtrip": { + "p50": 165.0560051202774, + "p90": 177.3120015859604, + "p95": 185.82400679588318, + "p99": 217.95199811458588 + }, + "isolatedSum": { + "p50": 191.67999923229218, + "p90": 212.5760018825531, + "p95": 223.36000204086304, + "p99": 270.6559970974922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 105.05600273609161, + "p90": 117.50400066375732, + "p95": 121.76000326871872, + "p99": 155.20000457763672 + }, + "combine": { + "p50": 89.31200206279755, + "p90": 96.83199971914291, + "p95": 100.99200159311295, + "p99": 122.8799968957901 + }, + "roundtrip": { + "p50": 167.71200299263, + "p90": 179.9039989709854, + "p95": 185.5040043592453, + "p99": 217.53600239753723 + }, + "isolatedSum": { + "p50": 194.36800479888916, + "p90": 214.33600038290024, + "p95": 222.75200486183167, + "p99": 278.0800014734268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.51199859380722, + "p90": 115.32799899578094, + "p95": 119.19999867677689, + "p99": 130.3039938211441 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 99.93600100278854, + "p95": 101.75999999046326, + "p99": 109.72800105810165 + }, + "roundtrip": { + "p50": 172.992005944252, + "p90": 181.88799917697906, + "p95": 185.63200533390045, + "p99": 196.16000354290009 + }, + "isolatedSum": { + "p50": 195.96800208091736, + "p90": 215.2639999985695, + "p95": 220.95999866724014, + "p99": 240.03199487924576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.38399893045425, + "p90": 125.69600343704224, + "p95": 131.71200454235077, + "p99": 153.31199765205383 + }, + "combine": { + "p50": 109.37599837779999, + "p90": 115.10399729013443, + "p95": 117.63200163841248, + "p99": 135.96799969673157 + }, + "roundtrip": { + "p50": 194.30400431156158, + "p90": 203.87199521064758, + "p95": 210.33599972724915, + "p99": 242.62399971485138 + }, + "isolatedSum": { + "p50": 221.75999730825424, + "p90": 240.80000072717667, + "p95": 249.34400618076324, + "p99": 289.2799973487854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.74399733543396, + "p90": 141.53599739074707, + "p95": 146.4959979057312, + "p99": 158.720001578331 + }, + "combine": { + "p50": 139.45600390434265, + "p90": 147.67999947071075, + "p95": 150.4960060119629, + "p99": 155.13600409030914 + }, + "roundtrip": { + "p50": 244.00000274181366, + "p90": 252.06398963928223, + "p95": 254.59200143814087, + "p99": 261.6640031337738 + }, + "isolatedSum": { + "p50": 271.2000012397766, + "p90": 289.2159968614578, + "p95": 296.9920039176941, + "p99": 313.85600566864014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bafe160b", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fc79fe5fdca4c", + "colorKey": "gb300_92ddb4ac", + "comparisonKey": "34f8bcc560b1f685", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:34.369304+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fc79fe5fdca4c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.64799815416336, + "p90": 109.02400314807892, + "p95": 112.09599673748016, + "p99": 124.51200187206268 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 81.95199817419052, + "p95": 84.54400300979614, + "p99": 92.0960009098053 + }, + "roundtrip": { + "p50": 146.40000462532043, + "p90": 159.58400070667267, + "p95": 163.42400014400482, + "p99": 169.27999258041382 + }, + "isolatedSum": { + "p50": 170.6559956073761, + "p90": 190.97600132226944, + "p95": 196.6399997472763, + "p99": 216.60800278186798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 95.61599791049957, + "p90": 107.90400207042694, + "p95": 112.12799698114395, + "p99": 121.56800180673599 + }, + "combine": { + "p50": 75.77600330114365, + "p90": 83.36000144481659, + "p95": 86.11200004816055, + "p99": 93.6959981918335 + }, + "roundtrip": { + "p50": 149.6960073709488, + "p90": 162.36799955368042, + "p95": 167.61599481105804, + "p99": 181.536003947258 + }, + "isolatedSum": { + "p50": 171.39200121164322, + "p90": 191.26400351524353, + "p95": 198.2399970293045, + "p99": 215.2639999985695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.51199728250504, + "p90": 109.53599959611893, + "p95": 113.27999830245972, + "p99": 120.06399780511856 + }, + "combine": { + "p50": 77.63200253248215, + "p90": 85.40800213813782, + "p95": 89.63199704885483, + "p99": 96.8639999628067 + }, + "roundtrip": { + "p50": 151.67999267578125, + "p90": 164.2560064792633, + "p95": 168.73599588871002, + "p99": 185.2159947156906 + }, + "isolatedSum": { + "p50": 174.14399981498718, + "p90": 194.94400173425674, + "p95": 202.91199535131454, + "p99": 216.92799776792526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.43199700117111, + "p90": 111.48799955844879, + "p95": 115.35999923944473, + "p99": 124.06399846076965 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 88.128000497818, + "p95": 91.23200178146362, + "p99": 111.29599809646606 + }, + "roundtrip": { + "p50": 157.0879966020584, + "p90": 170.6559956073761, + "p95": 175.20000040531158, + "p99": 185.2799952030182 + }, + "isolatedSum": { + "p50": 179.23199385404587, + "p90": 199.61600005626678, + "p95": 206.59200102090836, + "p99": 235.35999655723572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.79199957847595, + "p90": 112.19199746847153, + "p95": 115.32799899578094, + "p99": 140.28799533843994 + }, + "combine": { + "p50": 83.42400193214417, + "p90": 88.128000497818, + "p95": 89.82399851083755, + "p99": 98.88000041246414 + }, + "roundtrip": { + "p50": 159.32799875736237, + "p90": 172.06400632858276, + "p95": 175.20000040531158, + "p99": 188.51199746131897 + }, + "isolatedSum": { + "p50": 181.21600151062012, + "p90": 200.31999796628952, + "p95": 205.1519975066185, + "p99": 239.16799575090408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.65599936246872, + "p90": 112.0000034570694, + "p95": 116.86400324106216, + "p99": 125.05599856376648 + }, + "combine": { + "p50": 87.42400258779526, + "p90": 95.16800194978714, + "p95": 98.43199700117111, + "p99": 104.12800312042236 + }, + "roundtrip": { + "p50": 163.5199934244156, + "p90": 177.279993891716, + "p95": 181.0240000486374, + "p99": 192.54399836063385 + }, + "isolatedSum": { + "p50": 190.08000195026398, + "p90": 207.16800540685654, + "p95": 215.29600024223328, + "p99": 229.18400168418884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.2879975438118, + "p90": 117.88800358772278, + "p95": 121.05599790811539, + "p99": 131.23199343681335 + }, + "combine": { + "p50": 101.27999633550644, + "p90": 108.47999900579453, + "p95": 110.91200262308121, + "p99": 115.7120019197464 + }, + "roundtrip": { + "p50": 184.60799753665924, + "p90": 194.240003824234, + "p95": 197.88800179958344, + "p99": 206.2399983406067 + }, + "isolatedSum": { + "p50": 209.56799387931824, + "p90": 226.3680025935173, + "p95": 231.9680005311966, + "p99": 246.94399535655975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.03199821710587, + "p90": 133.59999656677246, + "p95": 136.76799833774567, + "p99": 144.22400295734406 + }, + "combine": { + "p50": 133.85599851608276, + "p90": 139.93600010871887, + "p95": 144.896000623703, + "p99": 148.8640010356903 + }, + "roundtrip": { + "p50": 232.41600394248962, + "p90": 241.85599386692047, + "p95": 244.09599602222443, + "p99": 253.56799364089966 + }, + "isolatedSum": { + "p50": 257.88799673318863, + "p90": 273.53599667549133, + "p95": 281.66399896144867, + "p99": 293.08800399303436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5f46826a", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_3e2f6cc2", + "comparisonKey": "e01503a2038b1a60", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:19.271869+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 101.40799731016159, + "p90": 115.61600118875504, + "p95": 120.7360029220581, + "p99": 143.5839980840683 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 88.35200220346451, + "p95": 91.64799749851227, + "p99": 101.18400305509567 + }, + "roundtrip": { + "p50": 156.99200332164764, + "p90": 170.78399658203125, + "p95": 173.69599640369415, + "p99": 194.33599710464478 + }, + "isolatedSum": { + "p50": 184.1599941253662, + "p90": 203.96800339221954, + "p95": 212.38400042057037, + "p99": 244.76800113916397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.44799745082855, + "p90": 114.656001329422, + "p95": 121.34400010108948, + "p99": 137.02400028705597 + }, + "combine": { + "p50": 85.05599945783615, + "p90": 89.56799656152725, + "p95": 93.63199770450592, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 159.58400070667267, + "p90": 171.29600048065186, + "p95": 174.40000176429749, + "p99": 183.87199938297272 + }, + "isolatedSum": { + "p50": 185.5039969086647, + "p90": 204.22399789094925, + "p95": 214.9759978055954, + "p99": 239.48799818754196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 100.63999891281128, + "p90": 114.9120032787323, + "p95": 118.84800344705582, + "p99": 135.0719928741455 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 93.53599697351456, + "p95": 97.28000313043594, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 162.432000041008, + "p90": 174.5920032262802, + "p95": 178.01600694656372, + "p99": 190.94400107860565 + }, + "isolatedSum": { + "p50": 187.77599930763245, + "p90": 208.44800025224686, + "p95": 216.12800657749176, + "p99": 239.23199623823166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 103.00800204277039, + "p90": 116.70400202274323, + "p95": 122.36800044775009, + "p99": 131.3920021057129 + }, + "combine": { + "p50": 88.60799670219421, + "p90": 97.47199714183807, + "p95": 100.54399818181992, + "p99": 113.69600147008896 + }, + "roundtrip": { + "p50": 165.53600132465363, + "p90": 177.3120015859604, + "p95": 180.67200481891632, + "p99": 187.00799345970154 + }, + "isolatedSum": { + "p50": 191.6159987449646, + "p90": 214.1759991645813, + "p95": 222.91199862957, + "p99": 245.08800357580185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 103.29599678516388, + "p90": 116.06399714946747, + "p95": 122.11199849843979, + "p99": 130.2720010280609 + }, + "combine": { + "p50": 89.28000181913376, + "p90": 98.81599992513657, + "p95": 100.99200159311295, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 166.75199568271637, + "p90": 178.6240041255951, + "p95": 184.28799510002136, + "p99": 190.97599387168884 + }, + "isolatedSum": { + "p50": 192.57599860429764, + "p90": 214.87999707460403, + "p95": 223.10400009155273, + "p99": 241.40799790620804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.79200088977814, + "p90": 119.80800330638885, + "p95": 123.96799772977829, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 95.83999961614609, + "p90": 102.52799838781357, + "p95": 107.87200182676315, + "p99": 111.90400272607803 + }, + "roundtrip": { + "p50": 171.07200622558594, + "p90": 183.07200074195862, + "p95": 188.25599551200867, + "p99": 203.42400670051575 + }, + "isolatedSum": { + "p50": 201.63200050592422, + "p90": 222.33600169420242, + "p95": 231.83999955654144, + "p99": 250.0159963965416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 115.9679964184761, + "p90": 126.01600587368011, + "p95": 130.5599957704544, + "p99": 159.39199924468994 + }, + "combine": { + "p50": 110.01600325107574, + "p90": 115.1999980211258, + "p95": 119.77600306272507, + "p99": 133.66399705410004 + }, + "roundtrip": { + "p50": 192.89599359035492, + "p90": 202.27199792861938, + "p95": 206.40000700950623, + "p99": 226.20800137519836 + }, + "isolatedSum": { + "p50": 225.98399966955185, + "p90": 241.2160038948059, + "p95": 250.33599883317947, + "p99": 293.05599629879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.55199587345123, + "p90": 141.76000654697418, + "p95": 144.6080058813095, + "p99": 153.3759981393814 + }, + "combine": { + "p50": 127.03999876976013, + "p90": 135.19999384880066, + "p95": 136.86400651931763, + "p99": 146.464005112648 + }, + "roundtrip": { + "p50": 228.70400547981262, + "p90": 237.92000114917755, + "p95": 240.447998046875, + "p99": 246.14399671554565 + }, + "isolatedSum": { + "p50": 258.59199464321136, + "p90": 276.96000039577484, + "p95": 281.47201240062714, + "p99": 299.8400032520294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a17a440", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_edb03f57", + "comparisonKey": "06c53defb4a320c7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:12.175309+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 106.04800283908844, + "p90": 121.95199728012085, + "p95": 126.62400305271149, + "p99": 135.13599336147308 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 92.8959995508194, + "p95": 96.47999703884125, + "p99": 101.75999999046326 + }, + "roundtrip": { + "p50": 165.3120070695877, + "p90": 177.5359958410263, + "p95": 181.66400492191315, + "p99": 191.80800020694733 + }, + "isolatedSum": { + "p50": 191.5840059518814, + "p90": 214.84799683094025, + "p95": 223.10400009155273, + "p99": 236.89599335193634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 107.32799768447876, + "p90": 121.76000326871872, + "p95": 128.80000472068787, + "p99": 162.7199947834015 + }, + "combine": { + "p50": 85.91999858617783, + "p90": 93.44000369310379, + "p95": 97.34400361776352, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 166.20799899101257, + "p90": 181.34400248527527, + "p95": 190.8160001039505, + "p99": 230.81600666046143 + }, + "isolatedSum": { + "p50": 193.24799627065659, + "p90": 215.2000069618225, + "p95": 226.14400833845139, + "p99": 268.70399713516235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 108.76800119876862, + "p90": 121.8239963054657, + "p95": 127.87200510501862, + "p99": 144.3520039319992 + }, + "combine": { + "p50": 88.79999816417694, + "p90": 97.37599641084671, + "p95": 100.51199793815613, + "p99": 111.23199760913849 + }, + "roundtrip": { + "p50": 169.95200514793396, + "p90": 182.6239973306656, + "p95": 189.37599658966064, + "p99": 214.84799683094025 + }, + "isolatedSum": { + "p50": 197.56799936294556, + "p90": 219.1999927163124, + "p95": 228.38400304317474, + "p99": 255.5840015411377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 108.31999778747559, + "p90": 123.36000055074692, + "p95": 128.51199507713318, + "p99": 139.8719996213913 + }, + "combine": { + "p50": 90.30400216579437, + "p90": 97.69599884748459, + "p95": 101.69599950313568, + "p99": 108.99200290441513 + }, + "roundtrip": { + "p50": 170.97599804401398, + "p90": 183.52000415325165, + "p95": 186.71999871730804, + "p99": 191.6159987449646 + }, + "isolatedSum": { + "p50": 198.62399995326996, + "p90": 221.0559993982315, + "p95": 230.20799458026886, + "p99": 248.86400252580643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 108.64000022411346, + "p90": 121.47200107574463, + "p95": 126.46399438381195, + "p99": 143.23200285434723 + }, + "combine": { + "p50": 91.42400324344635, + "p90": 98.43199700117111, + "p95": 100.3199964761734, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 171.29600048065186, + "p90": 183.52000415325165, + "p95": 188.25599551200867, + "p99": 197.76000082492828 + }, + "isolatedSum": { + "p50": 200.06400346755981, + "p90": 219.90399807691574, + "p95": 226.78399085998535, + "p99": 248.6720010638237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 108.96000266075134, + "p90": 122.36800044775009, + "p95": 125.91999769210815, + "p99": 139.0720009803772 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 105.24799674749374, + "p95": 111.77600175142288, + "p99": 176.54399573802948 + }, + "roundtrip": { + "p50": 177.279993891716, + "p90": 191.23199582099915, + "p95": 196.16000354290009, + "p99": 246.49600684642792 + }, + "isolatedSum": { + "p50": 205.85600286722183, + "p90": 227.61599719524384, + "p95": 237.69599944353104, + "p99": 315.6159967184067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 118.30399930477142, + "p90": 130.3360015153885, + "p95": 136.4160031080246, + "p99": 148.44800531864166 + }, + "combine": { + "p50": 110.68800091743469, + "p90": 118.04799735546112, + "p95": 120.28799951076508, + "p99": 125.5359947681427 + }, + "roundtrip": { + "p50": 200.28799772262573, + "p90": 211.74399554729462, + "p95": 215.42400121688843, + "p99": 224.09600019454956 + }, + "isolatedSum": { + "p50": 228.99200022220612, + "p90": 248.3839988708496, + "p95": 256.7040026187897, + "p99": 273.98400008678436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.1839940547943, + "p90": 147.5200057029724, + "p95": 150.81599354743958, + "p99": 163.4880006313324 + }, + "combine": { + "p50": 143.0400013923645, + "p90": 149.59999918937683, + "p95": 151.93599462509155, + "p99": 156.80000185966492 + }, + "roundtrip": { + "p50": 250.5280077457428, + "p90": 260.09601354599, + "p95": 263.96799087524414, + "p99": 269.9519991874695 + }, + "isolatedSum": { + "p50": 280.2239954471588, + "p90": 297.12000489234924, + "p95": 302.7519881725311, + "p99": 320.2880024909973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9b6bfb54", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_2194b8a7", + "comparisonKey": "854c2ec54ecd8073", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:23.680026+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 106.75200074911118, + "p90": 123.45600128173828, + "p95": 132.25600123405457, + "p99": 176.12800002098083 + }, + "combine": { + "p50": 86.27200126647949, + "p90": 96.12800180912018, + "p95": 99.45599734783173, + "p99": 135.74400544166565 + }, + "roundtrip": { + "p50": 166.27199947834015, + "p90": 180.89599907398224, + "p95": 186.39999628067017, + "p99": 242.33600497245789 + }, + "isolatedSum": { + "p50": 193.02400201559067, + "p90": 219.58400309085846, + "p95": 231.7119985818863, + "p99": 311.8720054626465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 106.4319983124733, + "p90": 123.74400347471237, + "p95": 133.59999656677246, + "p99": 182.81599879264832 + }, + "combine": { + "p50": 86.04799956083298, + "p90": 97.72799909114838, + "p95": 110.01600325107574, + "p99": 149.47199821472168 + }, + "roundtrip": { + "p50": 169.08800601959229, + "p90": 182.11199343204498, + "p95": 189.7599995136261, + "p99": 250.62400102615356 + }, + "isolatedSum": { + "p50": 192.47999787330627, + "p90": 221.47200256586075, + "p95": 243.6159998178482, + "p99": 332.28799700737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 106.1440035700798, + "p90": 121.0239976644516, + "p95": 127.71199643611908, + "p99": 171.1360067129135 + }, + "combine": { + "p50": 87.5839963555336, + "p90": 95.93600034713745, + "p95": 98.11200201511383, + "p99": 109.0880036354065 + }, + "roundtrip": { + "p50": 170.75200378894806, + "p90": 184.60799753665924, + "p95": 190.528005361557, + "p99": 226.30399465560913 + }, + "isolatedSum": { + "p50": 193.7279999256134, + "p90": 216.95999801158905, + "p95": 225.8239984512329, + "p99": 280.22401034832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 108.67200046777725, + "p90": 128.03199887275696, + "p95": 139.29599523544312, + "p99": 202.07999646663666 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 99.64799880981445, + "p95": 104.41599786281586, + "p99": 147.35999703407288 + }, + "roundtrip": { + "p50": 174.27200078964233, + "p90": 189.5039975643158, + "p95": 200.41599869728088, + "p99": 242.75200068950653 + }, + "isolatedSum": { + "p50": 199.96800273656845, + "p90": 227.6799976825714, + "p95": 243.71199309825897, + "p99": 349.43999350070953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 109.56799983978271, + "p90": 127.07200646400452, + "p95": 138.75199854373932, + "p99": 177.34399437904358 + }, + "combine": { + "p50": 90.68799763917923, + "p90": 99.93600100278854, + "p95": 103.39199751615524, + "p99": 138.94400000572205 + }, + "roundtrip": { + "p50": 174.6239960193634, + "p90": 188.35200369358063, + "p95": 193.1840032339096, + "p99": 239.48800563812256 + }, + "isolatedSum": { + "p50": 200.25599747896194, + "p90": 227.00800746679306, + "p95": 242.14399605989456, + "p99": 316.2879943847656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 110.36799848079681, + "p90": 126.5919953584671, + "p95": 142.7839994430542, + "p99": 187.00799345970154 + }, + "combine": { + "p50": 97.15200215578079, + "p90": 105.69600015878677, + "p95": 110.27199774980545, + "p99": 135.3600025177002 + }, + "roundtrip": { + "p50": 181.15200102329254, + "p90": 197.2160041332245, + "p95": 214.75200355052948, + "p99": 256.3199996948242 + }, + "isolatedSum": { + "p50": 207.5200006365776, + "p90": 232.28799551725388, + "p95": 253.05599719285965, + "p99": 322.36799597740173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 118.8800036907196, + "p90": 131.45600259304047, + "p95": 138.91200721263885, + "p99": 179.36000227928162 + }, + "combine": { + "p50": 110.52799969911575, + "p90": 119.45600062608719, + "p95": 123.07199835777283, + "p99": 158.04800391197205 + }, + "roundtrip": { + "p50": 197.34400510787964, + "p90": 209.6319943666458, + "p95": 215.39199352264404, + "p99": 264.3199861049652 + }, + "isolatedSum": { + "p50": 229.40800338983536, + "p90": 250.91200321912766, + "p95": 261.9840055704117, + "p99": 337.40800619125366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.53599607944489, + "p90": 144.48000490665436, + "p95": 151.90400183200836, + "p99": 185.85599958896637 + }, + "combine": { + "p50": 128.57599556446075, + "p90": 136.6720050573349, + "p95": 141.6960060596466, + "p99": 175.64800381660461 + }, + "roundtrip": { + "p50": 232.35200345516205, + "p90": 243.93600225448608, + "p95": 253.31199169158936, + "p99": 302.3360073566437 + }, + "isolatedSum": { + "p50": 262.11199164390564, + "p90": 281.15200996398926, + "p95": 293.60000789165497, + "p99": 361.504003405571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-45b3be70", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_359d9fe4", + "comparisonKey": "2166de0e49ceee0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:15.070619+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 92.47999638319016, + "p90": 104.73600029945374, + "p95": 108.89600217342377, + "p99": 122.20799922943115 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 85.9839990735054, + "p95": 88.06400001049042, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 149.6960073709488, + "p90": 160.44799983501434, + "p95": 163.42400014400482, + "p99": 172.03199863433838 + }, + "isolatedSum": { + "p50": 170.27199268341064, + "p90": 190.71999937295914, + "p95": 196.96000218391418, + "p99": 217.3440009355545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 92.16000139713287, + "p90": 104.22399640083313, + "p95": 109.0880036354065, + "p99": 117.8240031003952 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 86.81599795818329, + "p95": 89.40800279378891, + "p99": 99.61599856615067 + }, + "roundtrip": { + "p50": 152.319997549057, + "p90": 163.13600540161133, + "p95": 167.13599860668182, + "p99": 180.86400628089905 + }, + "isolatedSum": { + "p50": 174.0799993276596, + "p90": 191.03999435901642, + "p95": 198.4960064291954, + "p99": 217.44000166654587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 94.65599805116653, + "p90": 108.60799998044968, + "p95": 113.47199976444244, + "p99": 158.4639996290207 + }, + "combine": { + "p50": 84.19200032949448, + "p90": 87.71199733018875, + "p95": 89.1840010881424, + "p99": 99.07200187444687 + }, + "roundtrip": { + "p50": 154.04799580574036, + "p90": 166.17600619792938, + "p95": 169.63200271129608, + "p99": 194.59199905395508 + }, + "isolatedSum": { + "p50": 178.847998380661, + "p90": 196.31999731063843, + "p95": 202.65600085258484, + "p99": 257.53600150346756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.0080007314682, + "p90": 104.92800176143646, + "p95": 109.3439981341362, + "p99": 131.04000687599182 + }, + "combine": { + "p50": 85.31200140714645, + "p90": 89.37600255012512, + "p95": 93.6959981918335, + "p99": 99.0080013871193 + }, + "roundtrip": { + "p50": 156.41599893569946, + "p90": 167.10400581359863, + "p95": 170.49600183963776, + "p99": 181.31199479103088 + }, + "isolatedSum": { + "p50": 180.32000213861465, + "p90": 194.30400431156158, + "p95": 203.0399963259697, + "p99": 230.04800826311111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.39200365543365, + "p90": 108.73600095510483, + "p95": 112.76800185441971, + "p99": 133.2480013370514 + }, + "combine": { + "p50": 86.2400010228157, + "p90": 90.62399715185165, + "p95": 96.0640013217926, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 157.53600001335144, + "p90": 169.24799978733063, + "p95": 172.44799435138702, + "p99": 184.03199315071106 + }, + "isolatedSum": { + "p50": 181.63200467824936, + "p90": 199.35999810695648, + "p95": 208.8320031762123, + "p99": 235.71199923753738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.16000270843506, + "p90": 110.49599945545197, + "p95": 113.6000007390976, + "p99": 118.52800101041794 + }, + "combine": { + "p50": 89.6959975361824, + "p90": 98.01600128412247, + "p95": 99.74399954080582, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 164.09599781036377, + "p90": 173.18400740623474, + "p95": 176.35199427604675, + "p99": 183.61599743366241 + }, + "isolatedSum": { + "p50": 189.85600024461746, + "p90": 208.51200073957443, + "p95": 213.3440002799034, + "p99": 226.24000161886215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 109.8880022764206, + "p90": 119.71200257539749, + "p95": 124.89599734544754, + "p99": 133.59999656677246 + }, + "combine": { + "p50": 104.86400127410889, + "p90": 111.87200248241425, + "p95": 113.98400366306305, + "p99": 123.71200323104858 + }, + "roundtrip": { + "p50": 189.28000330924988, + "p90": 198.40000569820404, + "p95": 202.2079974412918, + "p99": 209.60000157356262 + }, + "isolatedSum": { + "p50": 214.75200355052948, + "p90": 231.58400505781174, + "p95": 238.8800010085106, + "p99": 257.31199979782104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.61600315570831, + "p90": 135.42400300502777, + "p95": 138.40000331401825, + "p99": 143.90400052070618 + }, + "combine": { + "p50": 136.73600554466248, + "p90": 143.64799857139587, + "p95": 146.27200365066528, + "p99": 148.99200201034546 + }, + "roundtrip": { + "p50": 238.39999735355377, + "p90": 245.66400051116943, + "p95": 247.3600059747696, + "p99": 254.17599081993103 + }, + "isolatedSum": { + "p50": 264.3520087003708, + "p90": 279.07200157642365, + "p95": 284.67200696468353, + "p99": 292.89600253105164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-235afbc1", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_e82c0e0a", + "comparisonKey": "b03a85ca66452e3b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:26.691348+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.43999689817429, + "p90": 111.10399663448334, + "p95": 115.03999680280685, + "p99": 128.80000472068787 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 88.16000074148178, + "p95": 90.27200192213058, + "p99": 96.51199728250504 + }, + "roundtrip": { + "p50": 156.99200332164764, + "p90": 167.93599724769592, + "p95": 171.07200622558594, + "p99": 180.7360053062439 + }, + "isolatedSum": { + "p50": 180.7679980993271, + "p90": 199.26399737596512, + "p95": 205.31199872493744, + "p99": 225.3120020031929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.6079980134964, + "p90": 109.63200032711029, + "p95": 113.8560026884079, + "p99": 125.08800625801086 + }, + "combine": { + "p50": 84.63999629020691, + "p90": 89.6959975361824, + "p95": 92.8959995508194, + "p99": 99.39199686050415 + }, + "roundtrip": { + "p50": 158.39999914169312, + "p90": 170.68800330162048, + "p95": 174.43199455738068, + "p99": 182.52800405025482 + }, + "isolatedSum": { + "p50": 181.2479943037033, + "p90": 199.3279978632927, + "p95": 206.7520022392273, + "p99": 224.48000311851501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.79199957847595, + "p90": 110.944002866745, + "p95": 113.98400366306305, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 93.05600076913834, + "p95": 96.92800045013428, + "p99": 101.47199779748917 + }, + "roundtrip": { + "p50": 161.95200383663177, + "p90": 173.98400604724884, + "p95": 178.71999740600586, + "p99": 189.34400379657745 + }, + "isolatedSum": { + "p50": 184.89599972963333, + "p90": 204.00000363588333, + "p95": 210.91200411319733, + "p99": 228.60800474882126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.88000041246414, + "p90": 111.84000223875046, + "p95": 115.77600240707397, + "p99": 123.29600006341934 + }, + "combine": { + "p50": 87.20000088214874, + "p90": 95.61599791049957, + "p95": 98.04800152778625, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 162.01600432395935, + "p90": 174.5920032262802, + "p95": 178.52799594402313, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 186.08000129461288, + "p90": 207.45600014925003, + "p95": 213.82400393486023, + "p99": 227.55199670791626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 100.54399818181992, + "p90": 112.19199746847153, + "p95": 116.2559986114502, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 88.35200220346451, + "p90": 96.83199971914291, + "p95": 99.48799759149551, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 166.6560024023056, + "p90": 176.41599476337433, + "p95": 180.7679980993271, + "p99": 191.48799777030945 + }, + "isolatedSum": { + "p50": 188.89600038528442, + "p90": 209.02399718761444, + "p95": 215.7439962029457, + "p99": 232.44799673557281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.00000214576721, + "p90": 114.27199840545654, + "p95": 117.5680011510849, + "p99": 127.87200510501862 + }, + "combine": { + "p50": 95.71199864149094, + "p90": 100.92800110578537, + "p95": 102.94400155544281, + "p99": 111.455999314785 + }, + "roundtrip": { + "p50": 169.72799599170685, + "p90": 181.47200345993042, + "p95": 184.1920018196106, + "p99": 192.03199446201324 + }, + "isolatedSum": { + "p50": 199.71200078725815, + "p90": 215.1999995112419, + "p95": 220.5120027065277, + "p99": 239.32800441980362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.63200098276138, + "p90": 123.19999933242798, + "p95": 127.51999497413635, + "p99": 137.60000467300415 + }, + "combine": { + "p50": 110.52799969911575, + "p90": 115.64800143241882, + "p95": 120.31999975442886, + "p99": 129.98400628566742 + }, + "roundtrip": { + "p50": 194.07999515533447, + "p90": 203.10400426387787, + "p95": 206.59199357032776, + "p99": 214.52799439430237 + }, + "isolatedSum": { + "p50": 224.16000068187714, + "p90": 238.8480007648468, + "p95": 247.83999472856522, + "p99": 267.58401095867157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.7599971294403, + "p90": 139.13600146770477, + "p95": 142.43200421333313, + "p99": 148.0959951877594 + }, + "combine": { + "p50": 126.0479986667633, + "p90": 134.5600038766861, + "p95": 136.35200262069702, + "p99": 139.90400731563568 + }, + "roundtrip": { + "p50": 228.19200158119202, + "p90": 237.66399919986725, + "p95": 241.43999814987183, + "p99": 249.5039999485016 + }, + "isolatedSum": { + "p50": 255.8079957962036, + "p90": 273.69600534439087, + "p95": 278.78400683403015, + "p99": 288.0000025033951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a3ee4493", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_e52484d0", + "comparisonKey": "b56d29b79739cdb2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:39.990797+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.98400104045868, + "p90": 117.88800358772278, + "p95": 135.45599579811096, + "p99": 186.3359957933426 + }, + "combine": { + "p50": 83.03999900817871, + "p90": 112.5119999051094, + "p95": 127.77599692344666, + "p99": 146.91199362277985 + }, + "roundtrip": { + "p50": 159.0079963207245, + "p90": 199.2959976196289, + "p95": 220.44800221920013, + "p99": 281.47199749946594 + }, + "isolatedSum": { + "p50": 181.0240000486374, + "p90": 230.40000349283218, + "p95": 263.2319927215576, + "p99": 333.24798941612244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.22400319576263, + "p90": 123.96799772977829, + "p95": 153.05599570274353, + "p99": 203.5840004682541 + }, + "combine": { + "p50": 86.17600053548813, + "p90": 97.53599762916565, + "p95": 112.31999844312668, + "p99": 150.33599734306335 + }, + "roundtrip": { + "p50": 160.89600324630737, + "p90": 184.7040057182312, + "p95": 224.12799298763275, + "p99": 277.44001150131226 + }, + "isolatedSum": { + "p50": 186.40000373125076, + "p90": 221.50399535894394, + "p95": 265.3759941458702, + "p99": 353.91999781131744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 100.92800110578537, + "p90": 133.05599987506866, + "p95": 153.6960005760193, + "p99": 208.28799903392792 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 119.23199892044067, + "p95": 132.54399597644806, + "p99": 147.87200093269348 + }, + "roundtrip": { + "p50": 162.27200627326965, + "p90": 193.4719979763031, + "p95": 212.67199516296387, + "p99": 252.9599964618683 + }, + "isolatedSum": { + "p50": 188.54399770498276, + "p90": 252.28799879550934, + "p95": 286.23999655246735, + "p99": 356.1599999666214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 102.20800340175629, + "p90": 129.85600531101227, + "p95": 146.59200608730316, + "p99": 189.43999707698822 + }, + "combine": { + "p50": 88.83199840784073, + "p90": 121.24799937009811, + "p95": 133.82400572299957, + "p99": 151.96800231933594 + }, + "roundtrip": { + "p50": 166.52800142765045, + "p90": 204.0639966726303, + "p95": 223.07200729846954, + "p99": 284.12801027297974 + }, + "isolatedSum": { + "p50": 191.04000180959702, + "p90": 251.10400468111038, + "p95": 280.41601181030273, + "p99": 341.40799939632416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 102.7199998497963, + "p90": 137.2479945421219, + "p95": 152.67199277877808, + "p99": 192.671999335289 + }, + "combine": { + "p50": 88.48000317811966, + "p90": 101.75999999046326, + "p95": 121.47200107574463, + "p99": 148.44800531864166 + }, + "roundtrip": { + "p50": 166.143998503685, + "p90": 195.74399292469025, + "p95": 215.32799303531647, + "p99": 279.04000878334045 + }, + "isolatedSum": { + "p50": 191.20000302791595, + "p90": 239.00799453258514, + "p95": 274.1439938545227, + "p99": 341.12000465393066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 107.35999792814255, + "p90": 145.1520025730133, + "p95": 156.031996011734, + "p99": 213.79199624061584 + }, + "combine": { + "p50": 96.03200107812881, + "p90": 105.79200088977814, + "p95": 119.07199770212173, + "p99": 158.1439971923828 + }, + "roundtrip": { + "p50": 171.03999853134155, + "p90": 201.1519968509674, + "p95": 222.97599911689758, + "p99": 303.42400074005127 + }, + "isolatedSum": { + "p50": 203.39199900627136, + "p90": 250.94400346279144, + "p95": 275.10399371385574, + "p99": 371.93599343299866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 114.81600254774094, + "p90": 132.4159950017929, + "p95": 151.8400013446808, + "p99": 200.095996260643 + }, + "combine": { + "p50": 111.68000102043152, + "p90": 135.96799969673157, + "p95": 149.98400211334229, + "p99": 175.04000663757324 + }, + "roundtrip": { + "p50": 193.05600225925446, + "p90": 221.21599316596985, + "p95": 239.1040027141571, + "p99": 284.2560112476349 + }, + "isolatedSum": { + "p50": 226.49600356817245, + "p90": 268.3839946985245, + "p95": 301.82400345802307, + "p99": 375.13600289821625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.26400113105774, + "p90": 156.12800419330597, + "p95": 177.37600207328796, + "p99": 201.6959935426712 + }, + "combine": { + "p50": 127.00800597667694, + "p90": 138.11199367046356, + "p95": 148.25600385665894, + "p99": 186.0480010509491 + }, + "roundtrip": { + "p50": 229.5680046081543, + "p90": 255.0399899482727, + "p95": 273.1199860572815, + "p99": 320.6399977207184 + }, + "isolatedSum": { + "p50": 258.2720071077347, + "p90": 294.23999786376953, + "p95": 325.6320059299469, + "p99": 387.7439945936203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60de05fc", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_2d2139e3", + "comparisonKey": "4db81f7021002deb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:03.007049+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.39200037717819, + "p90": 86.87999844551086, + "p95": 91.2960022687912, + "p99": 99.2640033364296 + }, + "combine": { + "p50": 85.79199761152267, + "p90": 90.36800265312195, + "p95": 94.40000355243683, + "p99": 104.96000200510025 + }, + "roundtrip": { + "p50": 140.19200205802917, + "p90": 151.58399939537048, + "p95": 156.54399991035461, + "p99": 168.5120016336441 + }, + "isolatedSum": { + "p50": 161.18399798870087, + "p90": 177.2480010986328, + "p95": 185.69600582122803, + "p99": 204.22400534152985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.96799731254578, + "p90": 87.8399983048439, + "p95": 92.41600334644318, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 84.60800349712372, + "p90": 88.79999816417694, + "p95": 92.57599711418152, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 139.16799426078796, + "p90": 151.39199793338776, + "p95": 155.83999454975128, + "p99": 164.35199975967407 + }, + "isolatedSum": { + "p50": 160.5760008096695, + "p90": 176.63999646902084, + "p95": 184.9920004606247, + "p99": 206.14399760961533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.7600035071373, + "p90": 89.1840010881424, + "p95": 94.59199756383896, + "p99": 119.9679970741272 + }, + "combine": { + "p50": 86.65599673986435, + "p90": 91.71199798583984, + "p95": 97.15200215578079, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 143.71199905872345, + "p90": 155.29599785804749, + "p95": 159.84000265598297, + "p99": 188.35200369358063 + }, + "isolatedSum": { + "p50": 164.41600024700165, + "p90": 180.89599907398224, + "p95": 191.74399971961975, + "p99": 225.3439947962761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 81.53600245714188, + "p90": 117.27999895811081, + "p95": 126.01600587368011, + "p99": 134.24000144004822 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 92.41600334644318, + "p95": 95.29600292444229, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 145.31199634075165, + "p90": 157.82399475574493, + "p95": 161.6639941930771, + "p99": 170.20800709724426 + }, + "isolatedSum": { + "p50": 168.92800480127335, + "p90": 209.69600230455399, + "p95": 221.3120087981224, + "p99": 238.40000480413437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.75199681520462, + "p90": 92.32000261545181, + "p95": 96.83199971914291, + "p99": 118.46400052309036 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 94.14400160312653, + "p95": 97.02400118112564, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 146.2080031633377, + "p90": 158.07999670505524, + "p95": 161.98399662971497, + "p99": 169.5680022239685 + }, + "isolatedSum": { + "p50": 171.1999997496605, + "p90": 186.46400421857834, + "p95": 193.85600090026855, + "p99": 222.6240038871765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.9839990735054, + "p90": 94.87999975681305, + "p95": 99.45599734783173, + "p99": 106.36799782514572 + }, + "combine": { + "p50": 95.51999717950821, + "p90": 101.27999633550644, + "p95": 104.06400263309479, + "p99": 112.12799698114395 + }, + "roundtrip": { + "p50": 151.0079950094223, + "p90": 162.23999857902527, + "p95": 166.81599617004395, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 181.5039962530136, + "p90": 196.1599960923195, + "p95": 203.5199999809265, + "p99": 218.49599480628967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.82399982213974, + "p90": 116.7680025100708, + "p95": 131.42399489879608, + "p99": 148.83199334144592 + }, + "combine": { + "p50": 108.73600095510483, + "p90": 113.8560026884079, + "p95": 116.15999788045883, + "p99": 139.55199718475342 + }, + "roundtrip": { + "p50": 175.74399709701538, + "p90": 184.03199315071106, + "p95": 186.65599822998047, + "p99": 196.9279944896698 + }, + "isolatedSum": { + "p50": 206.56000077724457, + "p90": 230.6240051984787, + "p95": 247.5839927792549, + "p99": 288.38399052619934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.19199746847153, + "p90": 120.64000219106674, + "p95": 125.50400197505951, + "p99": 133.15199315547943 + }, + "combine": { + "p50": 126.20800733566284, + "p90": 132.22399353981018, + "p95": 134.75200533866882, + "p99": 145.63199877738953 + }, + "roundtrip": { + "p50": 211.10400557518005, + "p90": 229.37600314617157, + "p95": 240.4160052537918, + "p99": 261.50399446487427 + }, + "isolatedSum": { + "p50": 238.40000480413437, + "p90": 252.86399573087692, + "p95": 260.25600731372833, + "p99": 278.78399193286896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bdf9903", + "identity": "gb300|deepep|v2|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_759801aa", + "comparisonKey": "133c9a9083da0033", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:05.257194+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.64000090956688, + "p90": 51.711998879909515, + "p95": 52.99200117588043, + "p99": 62.3680017888546 + }, + "combine": { + "p50": 39.36000168323517, + "p90": 46.46399989724159, + "p95": 47.775998711586, + "p99": 54.78399991989136 + }, + "roundtrip": { + "p50": 65.69600105285645, + "p90": 69.95200365781784, + "p95": 71.77600264549255, + "p99": 76.83199644088745 + }, + "isolatedSum": { + "p50": 84.00000259280205, + "p90": 98.17599877715111, + "p95": 100.76799988746643, + "p99": 117.15200170874596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 47.775998711586, + "p90": 52.38400027155876, + "p95": 53.66399884223938, + "p99": 60.06399914622307 + }, + "combine": { + "p50": 43.55200007557869, + "p90": 46.20800167322159, + "p95": 47.32799902558327, + "p99": 56.19199946522713 + }, + "roundtrip": { + "p50": 66.0799965262413, + "p90": 70.62400132417679, + "p95": 72.76800274848938, + "p99": 78.04799824953079 + }, + "isolatedSum": { + "p50": 91.32799878716469, + "p90": 98.59200194478035, + "p95": 100.99199786782265, + "p99": 116.2559986114502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 48.16000163555145, + "p90": 52.799999713897705, + "p95": 54.1439987719059, + "p99": 61.5679994225502 + }, + "combine": { + "p50": 43.487999588251114, + "p90": 46.33599892258644, + "p95": 47.71199822425842, + "p99": 55.776000022888184 + }, + "roundtrip": { + "p50": 66.01600348949432, + "p90": 70.0799971818924, + "p95": 72.12799787521362, + "p99": 78.14399898052216 + }, + "isolatedSum": { + "p50": 91.64800122380257, + "p90": 99.13599863648415, + "p95": 101.85599699616432, + "p99": 117.34399944543839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 48.22399839758873, + "p90": 52.73599922657013, + "p95": 54.4000007212162, + "p99": 63.10400366783142 + }, + "combine": { + "p50": 43.55200007557869, + "p90": 46.560000628232956, + "p95": 47.90399968624115, + "p99": 56.89600110054016 + }, + "roundtrip": { + "p50": 66.56000018119812, + "p90": 71.45600020885468, + "p95": 73.11999797821045, + "p99": 78.65600287914276 + }, + "isolatedSum": { + "p50": 91.77599847316742, + "p90": 99.29599985480309, + "p95": 102.30400040745735, + "p99": 120.00000476837158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 50.175998359918594, + "p90": 53.63199859857559, + "p95": 56.63999915122986, + "p99": 65.92000275850296 + }, + "combine": { + "p50": 43.58400031924248, + "p90": 46.36799916625023, + "p95": 47.93599992990494, + "p99": 55.16799911856651 + }, + "roundtrip": { + "p50": 68.38399916887283, + "p90": 73.18399846553802, + "p95": 75.77600330114365, + "p99": 81.216000020504 + }, + "isolatedSum": { + "p50": 93.75999867916107, + "p90": 99.99999776482582, + "p95": 104.5759990811348, + "p99": 121.08800187706947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 51.61599814891815, + "p90": 54.59199845790863, + "p95": 57.08799883723259, + "p99": 64.12799656391144 + }, + "combine": { + "p50": 47.61600121855736, + "p90": 50.65599828958511, + "p95": 55.23199960589409, + "p99": 59.67999994754791 + }, + "roundtrip": { + "p50": 74.27199929952621, + "p90": 79.16799932718277, + "p95": 80.73599636554718, + "p99": 85.05599945783615 + }, + "isolatedSum": { + "p50": 99.23199936747551, + "p90": 105.24799674749374, + "p95": 112.31999844312668, + "p99": 123.80799651145935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 53.37600037455559, + "p90": 60.416001826524734, + "p95": 61.983998864889145, + "p99": 66.11199676990509 + }, + "combine": { + "p50": 53.63199859857559, + "p90": 58.559998869895935, + "p95": 60.67200005054474, + "p99": 68.80000233650208 + }, + "roundtrip": { + "p50": 86.07999980449677, + "p90": 90.59199690818787, + "p95": 92.6079973578453, + "p99": 98.33600372076035 + }, + "isolatedSum": { + "p50": 107.00799897313118, + "p90": 118.97600069642067, + "p95": 122.65599891543388, + "p99": 134.91199910640717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 65.18399715423584, + "p90": 68.70400160551071, + "p95": 72.1919983625412, + "p99": 79.99999821186066 + }, + "combine": { + "p50": 61.40799820423126, + "p90": 68.80000233650208, + "p95": 70.81600278615952, + "p99": 73.66400212049484 + }, + "roundtrip": { + "p50": 107.744000852108, + "p90": 112.28799819946289, + "p95": 114.33599889278412, + "p99": 117.72800236940384 + }, + "isolatedSum": { + "p50": 126.5919953584671, + "p90": 137.5040039420128, + "p95": 143.0080011487007, + "p99": 153.6640003323555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-790ff0ab", + "identity": "gb300|deepep|v2|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_975223f6", + "comparisonKey": "f5a29a751a3dd4e1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:20.713686+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.40799996256828, + "p90": 53.18399891257286, + "p95": 56.12799897789955, + "p99": 65.60000032186508 + }, + "combine": { + "p50": 45.27999833226204, + "p90": 47.87199944257736, + "p95": 49.02400076389313, + "p99": 54.1439987719059 + }, + "roundtrip": { + "p50": 65.69600105285645, + "p90": 71.19999825954437, + "p95": 73.27999919652939, + "p99": 77.504001557827 + }, + "isolatedSum": { + "p50": 94.68799829483032, + "p90": 101.05599835515022, + "p95": 105.15199974179268, + "p99": 119.74399909377098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.66399818658829, + "p90": 53.119998425245285, + "p95": 56.384000927209854, + "p99": 63.45599889755249 + }, + "combine": { + "p50": 44.99199986457825, + "p90": 47.07200080156326, + "p95": 48.128001391887665, + "p99": 56.41600117087364 + }, + "roundtrip": { + "p50": 66.72000139951706, + "p90": 72.12799787521362, + "p95": 74.36800003051758, + "p99": 79.00799810886383 + }, + "isolatedSum": { + "p50": 94.65599805116653, + "p90": 100.19199922680855, + "p95": 104.51200231909752, + "p99": 119.87200006842613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.72799867391586, + "p90": 52.73599922657013, + "p95": 54.78399991989136, + "p99": 63.32799792289734 + }, + "combine": { + "p50": 45.05600035190582, + "p90": 47.07200080156326, + "p95": 48.48000034689903, + "p99": 55.00800162553787 + }, + "roundtrip": { + "p50": 66.3359984755516, + "p90": 72.09599763154984, + "p95": 74.81600344181061, + "p99": 83.45600217580795 + }, + "isolatedSum": { + "p50": 94.78399902582169, + "p90": 99.80800002813339, + "p95": 103.26400026679039, + "p99": 118.33599954843521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 50.4320003092289, + "p90": 53.31199988722801, + "p95": 55.96800148487091, + "p99": 63.90400230884552 + }, + "combine": { + "p50": 44.704001396894455, + "p90": 47.58400097489357, + "p95": 48.8319993019104, + "p99": 57.37600103020668 + }, + "roundtrip": { + "p50": 67.58400052785873, + "p90": 72.51200079917908, + "p95": 74.52800124883652, + "p99": 80.51200211048126 + }, + "isolatedSum": { + "p50": 95.13600170612335, + "p90": 100.89600086212158, + "p95": 104.80000078678131, + "p99": 121.2800033390522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 51.00800096988678, + "p90": 53.82400006055832, + "p95": 56.671999394893646, + "p99": 65.63200056552887 + }, + "combine": { + "p50": 45.504000037908554, + "p90": 47.648001462221146, + "p95": 48.70399832725525, + "p99": 56.73599988222122 + }, + "roundtrip": { + "p50": 69.40799951553345, + "p90": 74.81600344181061, + "p95": 76.7040029168129, + "p99": 81.63200318813324 + }, + "isolatedSum": { + "p50": 96.51200100779533, + "p90": 101.47200152277946, + "p95": 105.3759977221489, + "p99": 122.36800044775009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 51.29599943757057, + "p90": 54.496001452207565, + "p95": 59.4559982419014, + "p99": 64.00000303983688 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 54.655998945236206, + "p95": 57.21599981188774, + "p99": 59.74400043487549 + }, + "roundtrip": { + "p50": 75.77600330114365, + "p90": 81.18399977684021, + "p95": 82.91199803352356, + "p99": 86.81599795818329 + }, + "isolatedSum": { + "p50": 98.94400089979172, + "p90": 109.15200039744377, + "p95": 116.67199805378914, + "p99": 123.74400347471237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 53.82400006055832, + "p90": 61.76000088453293, + "p95": 63.61600011587143, + "p99": 67.74400174617767 + }, + "combine": { + "p50": 56.86400085687637, + "p90": 59.4559982419014, + "p95": 60.896001756191254, + "p99": 69.11999732255936 + }, + "roundtrip": { + "p50": 87.80799806118011, + "p90": 93.34400296211243, + "p95": 95.2640026807785, + "p99": 100.63999891281128 + }, + "isolatedSum": { + "p50": 110.68800091743469, + "p90": 121.21599912643433, + "p95": 124.51200187206268, + "p99": 136.86399906873703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 64.86400216817856, + "p90": 72.31999933719635, + "p95": 74.07999783754349, + "p99": 83.8719978928566 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 70.62400132417679, + "p95": 71.99999690055847, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 108.15999656915665, + "p90": 113.24799805879593, + "p95": 114.3999993801117, + "p99": 117.76000261306763 + }, + "isolatedSum": { + "p50": 131.48800283670425, + "p90": 142.94400066137314, + "p95": 146.07999473810196, + "p99": 160.2879986166954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dfdcf75f", + "identity": "gb300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ebe68878aa18bb0", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "b05ff3512b24317c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:00.017664+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ebe68878aa18bb0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 400.64001083374023, + "p90": 431.64798617362976, + "p95": 437.47198581695557, + "p99": 451.200008392334 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 111.93600296974182, + "p95": 120.38400024175644, + "p99": 141.27999544143677 + }, + "roundtrip": { + "p50": 446.9119906425476, + "p90": 474.592000246048, + "p95": 482.015997171402, + "p99": 519.4879770278931 + }, + "isolatedSum": { + "p50": 474.14401173591614, + "p90": 543.5839891433716, + "p95": 557.855986058712, + "p99": 592.4800038337708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 389.5359933376312, + "p90": 423.2960045337677, + "p95": 432.15999007225037, + "p99": 460.7039988040924 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 104.38399761915207, + "p95": 113.56800049543381, + "p99": 137.92000710964203 + }, + "roundtrip": { + "p50": 433.6319863796234, + "p90": 472.83199429512024, + "p95": 479.0079891681671, + "p99": 509.5999836921692 + }, + "isolatedSum": { + "p50": 462.0479941368103, + "p90": 527.6800021529198, + "p95": 545.7279905676842, + "p99": 598.6240059137344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 398.4000086784363, + "p90": 438.30400705337524, + "p95": 447.9359984397888, + "p99": 462.97600865364075 + }, + "combine": { + "p50": 74.52800124883652, + "p90": 93.82399916648865, + "p95": 112.06399649381638, + "p99": 138.5280042886734 + }, + "roundtrip": { + "p50": 442.27200746536255, + "p90": 484.8000109195709, + "p95": 492.8320050239563, + "p99": 508.1920027732849 + }, + "isolatedSum": { + "p50": 472.9280099272728, + "p90": 532.1280062198639, + "p95": 559.9999949336052, + "p99": 601.5040129423141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 395.52000164985657, + "p90": 432.6080083847046, + "p95": 444.3199932575226, + "p99": 469.11999583244324 + }, + "combine": { + "p50": 76.51200145483017, + "p90": 107.07200318574905, + "p95": 120.70400267839432, + "p99": 147.16799557209015 + }, + "roundtrip": { + "p50": 446.5920031070709, + "p90": 482.1760058403015, + "p95": 488.3520007133484, + "p99": 513.4400129318237 + }, + "isolatedSum": { + "p50": 472.03200310468674, + "p90": 539.6800115704536, + "p95": 565.0239959359169, + "p99": 616.2879914045334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 397.0879912376404, + "p90": 429.24800515174866, + "p95": 435.36001443862915, + "p99": 457.98400044441223 + }, + "combine": { + "p50": 77.27999985218048, + "p90": 110.01600325107574, + "p95": 124.64000284671783, + "p99": 145.1839953660965 + }, + "roundtrip": { + "p50": 448.15999269485474, + "p90": 481.1840057373047, + "p95": 489.3440008163452, + "p99": 526.8480181694031 + }, + "isolatedSum": { + "p50": 474.36799108982086, + "p90": 539.2640084028244, + "p95": 560.000017285347, + "p99": 603.1679958105087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 390.5920088291168, + "p90": 424.54400658607483, + "p95": 435.42400002479553, + "p99": 469.02400255203247 + }, + "combine": { + "p50": 80.06399869918823, + "p90": 88.95999938249588, + "p95": 91.51999652385712, + "p99": 101.9200012087822 + }, + "roundtrip": { + "p50": 440.5759871006012, + "p90": 476.9600033760071, + "p95": 484.76800322532654, + "p99": 499.424010515213 + }, + "isolatedSum": { + "p50": 470.65600752830505, + "p90": 513.5040059685707, + "p95": 526.9439965486526, + "p99": 570.9440037608147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 400.12800693511963, + "p90": 434.59200859069824, + "p95": 443.77601146698, + "p99": 456.928014755249 + }, + "combine": { + "p50": 91.16800129413605, + "p90": 131.071999669075, + "p95": 146.97599411010742, + "p99": 157.69599378108978 + }, + "roundtrip": { + "p50": 460.06399393081665, + "p90": 500.5760192871094, + "p95": 509.92000102996826, + "p99": 532.0000052452087 + }, + "isolatedSum": { + "p50": 491.2960082292557, + "p90": 565.6640082597733, + "p95": 590.7520055770874, + "p99": 614.6240085363388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 399.23200011253357, + "p90": 441.47199392318726, + "p95": 455.1680088043213, + "p99": 472.4479913711548 + }, + "combine": { + "p50": 107.80800133943558, + "p90": 135.45599579811096, + "p95": 146.30399644374847, + "p99": 156.47999942302704 + }, + "roundtrip": { + "p50": 474.65598583221436, + "p90": 508.9600086212158, + "p95": 515.5199766159058, + "p99": 542.4960255622864 + }, + "isolatedSum": { + "p50": 507.04000145196915, + "p90": 576.9279897212982, + "p95": 601.4720052480698, + "p99": 628.9279907941818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e9a3eea", + "identity": "gb300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||13e2b193b87a112", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "94574e0eaf2d6f5b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:04.979254+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "13e2b193b87a112", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 423.2639968395233, + "p90": 448.5119879245758, + "p95": 453.66400480270386, + "p99": 478.5600006580353 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 81.88799768686295, + "p95": 86.04799956083298, + "p99": 111.51999980211258 + }, + "roundtrip": { + "p50": 478.94400358200073, + "p90": 502.1759867668152, + "p95": 508.7680220603943, + "p99": 541.1520004272461 + }, + "isolatedSum": { + "p50": 495.1999932527542, + "p90": 530.3999856114388, + "p95": 539.7120043635368, + "p99": 590.0800004601479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 437.6960098743439, + "p90": 457.8559994697571, + "p95": 464.5119905471802, + "p99": 490.07999897003174 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 101.43999755382538, + "p95": 115.10399729013443, + "p99": 142.2719955444336 + }, + "roundtrip": { + "p50": 487.8079891204834, + "p90": 513.3119821548462, + "p95": 522.7839946746826, + "p99": 546.9440221786499 + }, + "isolatedSum": { + "p50": 509.8240077495575, + "p90": 559.2959970235825, + "p95": 579.6159878373146, + "p99": 632.3519945144653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 421.56800627708435, + "p90": 448.1920003890991, + "p95": 453.5680115222931, + "p99": 483.3599925041199 + }, + "combine": { + "p50": 73.79200309515, + "p90": 98.84800016880035, + "p95": 118.1119978427887, + "p99": 145.85599303245544 + }, + "roundtrip": { + "p50": 474.7200012207031, + "p90": 500.63997507095337, + "p95": 509.5360279083252, + "p99": 533.1519842147827 + }, + "isolatedSum": { + "p50": 495.36000937223434, + "p90": 547.0400005578995, + "p95": 571.6800093650818, + "p99": 629.2159855365753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 434.5279932022095, + "p90": 453.3120095729828, + "p95": 463.80800008773804, + "p99": 487.2319996356964 + }, + "combine": { + "p50": 76.80000364780426, + "p90": 104.92800176143646, + "p95": 120.38400024175644, + "p99": 142.87999272346497 + }, + "roundtrip": { + "p50": 485.6640100479126, + "p90": 507.968008518219, + "p95": 521.9519734382629, + "p99": 541.5040254592896 + }, + "isolatedSum": { + "p50": 511.32799685001373, + "p90": 558.2400113344193, + "p95": 584.1920003294945, + "p99": 630.1119923591614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 423.64799976348877, + "p90": 451.7120122909546, + "p95": 461.216002702713, + "p99": 494.9119985103607 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 92.16000139713287, + "p95": 110.23999750614166, + "p99": 145.4399973154068 + }, + "roundtrip": { + "p50": 481.31200671195984, + "p90": 510.0160241127014, + "p95": 522.0159888267517, + "p99": 546.2719798088074 + }, + "isolatedSum": { + "p50": 500.51199644804, + "p90": 543.8720136880875, + "p95": 571.4560002088547, + "p99": 640.3519958257675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 427.90400981903076, + "p90": 450.655996799469, + "p95": 458.8800072669983, + "p99": 479.2320132255554 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 105.31199723482132, + "p95": 122.81599640846252, + "p99": 146.88000082969666 + }, + "roundtrip": { + "p50": 481.56800866127014, + "p90": 508.512020111084, + "p95": 516.9280171394348, + "p99": 538.4640097618103 + }, + "isolatedSum": { + "p50": 509.44001227617264, + "p90": 555.9679940342903, + "p95": 581.6960036754608, + "p99": 626.1120140552521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 419.1359877586365, + "p90": 449.18400049209595, + "p95": 460.57599782943726, + "p99": 502.6559829711914 + }, + "combine": { + "p50": 96.51199728250504, + "p90": 135.42400300502777, + "p95": 146.7200070619583, + "p99": 156.19200468063354 + }, + "roundtrip": { + "p50": 488.48000168800354, + "p90": 515.5839920043945, + "p95": 526.4000296592712, + "p99": 560.5760216712952 + }, + "isolatedSum": { + "p50": 515.6479850411415, + "p90": 584.6080034971237, + "p95": 607.2960048913956, + "p99": 658.847987651825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 425.28000473976135, + "p90": 451.4879882335663, + "p95": 459.26401019096375, + "p99": 486.2079918384552 + }, + "combine": { + "p50": 112.09599673748016, + "p90": 127.13600695133209, + "p95": 142.33599603176117, + "p99": 165.43999314308167 + }, + "roundtrip": { + "p50": 512.8639936447144, + "p90": 541.5999889373779, + "p95": 559.7119927406311, + "p99": 584.8000049591064 + }, + "isolatedSum": { + "p50": 537.3760014772415, + "p90": 578.6239951848984, + "p95": 601.6000062227249, + "p99": 651.6479849815369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c55b87ac", + "identity": "gb300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "7903d1bbd54c43ac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:13.491962+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 424.2880046367645, + "p90": 449.8879909515381, + "p95": 454.6239972114563, + "p99": 472.9920029640198 + }, + "combine": { + "p50": 77.05599814653397, + "p90": 104.5759990811348, + "p95": 116.92799627780914, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 478.68800163269043, + "p90": 499.7760057449341, + "p95": 506.24001026153564, + "p99": 519.0719962120056 + }, + "isolatedSum": { + "p50": 501.3440027832985, + "p90": 554.4639900326729, + "p95": 571.5519934892654, + "p99": 612.8000020980835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 424.3519902229309, + "p90": 444.5439875125885, + "p95": 450.97601413726807, + "p99": 464.32000398635864 + }, + "combine": { + "p50": 75.93599706888199, + "p90": 82.11199939250946, + "p95": 84.57600325345993, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 474.17598962783813, + "p90": 495.2000081539154, + "p95": 500.70399045944214, + "p99": 512.1920108795166 + }, + "isolatedSum": { + "p50": 500.2879872918129, + "p90": 526.655986905098, + "p95": 535.552017390728, + "p99": 554.8800006508827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 431.4559996128082, + "p90": 457.8239917755127, + "p95": 468.7039852142334, + "p99": 501.7920136451721 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 104.19200360774994, + "p95": 115.64800143241882, + "p99": 125.11999905109406 + }, + "roundtrip": { + "p50": 478.87998819351196, + "p90": 502.8799772262573, + "p95": 516.0319805145264, + "p99": 545.632004737854 + }, + "isolatedSum": { + "p50": 510.1440027356148, + "p90": 562.0159953832626, + "p95": 584.3519866466522, + "p99": 626.9120126962662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 426.1760115623474, + "p90": 448.63998889923096, + "p95": 454.0160000324249, + "p99": 475.39201378822327 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 86.27200126647949, + "p95": 89.40800279378891, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 475.0399887561798, + "p90": 495.58401107788086, + "p95": 501.8560290336609, + "p99": 537.4079942703247 + }, + "isolatedSum": { + "p50": 505.8880150318146, + "p90": 534.9119901657104, + "p95": 543.4240028262138, + "p99": 573.8240107893944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 435.2959990501404, + "p90": 472.51200675964355, + "p95": 512.7679705619812, + "p99": 627.1359920501709 + }, + "combine": { + "p50": 85.9839990735054, + "p90": 134.0160071849823, + "p95": 146.36799693107605, + "p99": 173.37599396705627 + }, + "roundtrip": { + "p50": 487.36000061035156, + "p90": 522.9439735412598, + "p95": 557.8240156173706, + "p99": 695.2639818191528 + }, + "isolatedSum": { + "p50": 521.2799981236458, + "p90": 606.5280139446259, + "p95": 659.1359674930573, + "p99": 800.5119860172272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 424.0320026874542, + "p90": 446.30399346351624, + "p95": 452.38399505615234, + "p99": 469.34399008750916 + }, + "combine": { + "p50": 86.04799956083298, + "p90": 93.28000247478485, + "p95": 96.63999825716019, + "p99": 109.95200276374817 + }, + "roundtrip": { + "p50": 482.14399814605713, + "p90": 502.4319887161255, + "p95": 509.37598943710327, + "p99": 524.0319967269897 + }, + "isolatedSum": { + "p50": 510.0800022482872, + "p90": 539.5839959383011, + "p95": 549.0239933133125, + "p99": 579.2959928512573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 429.53601479530334, + "p90": 454.78400588035583, + "p95": 462.46400475502014, + "p99": 496.96001410484314 + }, + "combine": { + "p50": 99.67999905347824, + "p90": 125.40799379348755, + "p95": 133.7919980287552, + "p99": 149.6639996767044 + }, + "roundtrip": { + "p50": 499.424010515213, + "p90": 522.1120119094849, + "p95": 535.8399748802185, + "p99": 567.0400261878967 + }, + "isolatedSum": { + "p50": 529.2160138487816, + "p90": 580.1919996738434, + "p95": 596.2560027837753, + "p99": 646.6240137815475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 432.6080083847046, + "p90": 467.16800332069397, + "p95": 484.19201374053955, + "p99": 616.0320043563843 + }, + "combine": { + "p50": 127.10399925708771, + "p90": 153.24799716472626, + "p95": 162.27200627326965, + "p99": 201.02399587631226 + }, + "roundtrip": { + "p50": 525.5680084228516, + "p90": 567.8719878196716, + "p95": 605.0879955291748, + "p99": 726.9120216369629 + }, + "isolatedSum": { + "p50": 559.7120076417923, + "p90": 620.4160004854202, + "p95": 646.4640200138092, + "p99": 817.0560002326965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c0f7cfd", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_33bbf042", + "comparisonKey": "03176b8619c645a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:06.239833+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.18400239944458, + "p90": 110.6560006737709, + "p95": 117.24799871444702, + "p99": 128.35200130939484 + }, + "combine": { + "p50": 77.37600058317184, + "p90": 81.4720019698143, + "p95": 84.73599702119827, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 257.85601139068604, + "p90": 279.29601073265076, + "p95": 285.2480113506317, + "p99": 300.4159927368164 + }, + "isolatedSum": { + "p50": 174.56000298261642, + "p90": 192.1280026435852, + "p95": 201.9839957356453, + "p99": 225.8239984512329 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.47999703884125, + "p90": 111.00800335407257, + "p95": 117.79200285673141, + "p99": 132.35199451446533 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 83.48800241947174, + "p95": 86.07999980449677, + "p99": 94.97600048780441 + }, + "roundtrip": { + "p50": 264.73599672317505, + "p90": 282.75200724601746, + "p95": 287.4560058116913, + "p99": 331.7759931087494 + }, + "isolatedSum": { + "p50": 175.23200064897537, + "p90": 194.4960057735443, + "p95": 203.87200266122818, + "p99": 227.32799500226974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.28000313043594, + "p90": 110.88000237941742, + "p95": 116.12799763679504, + "p99": 129.69599664211273 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 84.32000130414963, + "p95": 86.59200370311737, + "p99": 101.50399804115295 + }, + "roundtrip": { + "p50": 261.34398579597473, + "p90": 280.5440127849579, + "p95": 285.8560085296631, + "p99": 297.0240116119385 + }, + "isolatedSum": { + "p50": 177.15200036764145, + "p90": 195.20000368356705, + "p95": 202.72000133991241, + "p99": 231.1999946832657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.79999947547913, + "p90": 113.47199976444244, + "p95": 116.83200299739838, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 85.34400165081024, + "p95": 88.76799792051315, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 265.6320035457611, + "p90": 283.9680016040802, + "p95": 290.367990732193, + "p99": 316.9279992580414 + }, + "isolatedSum": { + "p50": 178.40000241994858, + "p90": 198.81600141525269, + "p95": 205.60000091791153, + "p99": 223.61600399017334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.48799693584442, + "p90": 111.55200004577637, + "p95": 117.5680011510849, + "p99": 132.76800513267517 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 87.36000210046768, + "p95": 91.51999652385712, + "p99": 99.87200051546097 + }, + "roundtrip": { + "p50": 261.50399446487427, + "p90": 280.70399165153503, + "p95": 285.8240008354187, + "p99": 315.90399146080017 + }, + "isolatedSum": { + "p50": 178.23999375104904, + "p90": 198.91200214624405, + "p95": 209.08799767494202, + "p99": 232.64000564813614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.08800166845322, + "p90": 111.29599809646606, + "p95": 115.00799655914307, + "p99": 125.791996717453 + }, + "combine": { + "p50": 87.96799927949905, + "p90": 92.67199784517288, + "p95": 95.10400146245956, + "p99": 102.94400155544281 + }, + "roundtrip": { + "p50": 271.263986825943, + "p90": 289.5680069923401, + "p95": 293.92001032829285, + "p99": 307.2960078716278 + }, + "isolatedSum": { + "p50": 185.05600094795227, + "p90": 203.96799594163895, + "p95": 210.11199802160263, + "p99": 228.7359982728958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 102.11200267076492, + "p90": 112.96000331640244, + "p95": 117.5680011510849, + "p99": 128.63999605178833 + }, + "combine": { + "p50": 101.3759970664978, + "p90": 106.23999685049057, + "p95": 109.31199789047241, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 284.31999683380127, + "p90": 300.25601387023926, + "p95": 303.99999022483826, + "p99": 319.2639946937561 + }, + "isolatedSum": { + "p50": 203.48799973726273, + "p90": 219.200000166893, + "p95": 226.8799990415573, + "p99": 257.53599405288696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.76000195741653, + "p90": 122.11199849843979, + "p95": 126.71999633312225, + "p99": 134.24000144004822 + }, + "combine": { + "p50": 121.40800058841705, + "p90": 126.97599828243256, + "p95": 131.6480040550232, + "p99": 139.23199474811554 + }, + "roundtrip": { + "p50": 309.4080090522766, + "p90": 324.8960077762604, + "p95": 330.4319977760315, + "p99": 342.6240086555481 + }, + "isolatedSum": { + "p50": 235.1680025458336, + "p90": 249.08799678087234, + "p95": 258.36800038814545, + "p99": 273.47199618816376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d8af479", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "13ad5a079de40fdc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:16.433783+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 470.36799788475037, + "p90": 491.4880096912384, + "p95": 497.6319968700409, + "p99": 524.6719717979431 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 86.5280032157898, + "p95": 91.0400003194809, + "p99": 101.08800232410431 + }, + "roundtrip": { + "p50": 528.9599895477295, + "p90": 549.4719743728638, + "p95": 556.6719770431519, + "p99": 570.0160264968872 + }, + "isolatedSum": { + "p50": 551.6159981489182, + "p90": 578.0160129070282, + "p95": 588.6719971895218, + "p99": 625.7599741220474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 469.9519872665405, + "p90": 494.1439926624298, + "p95": 503.4880042076111, + "p99": 534.6239805221558 + }, + "combine": { + "p50": 80.48000186681747, + "p90": 85.02399921417236, + "p95": 88.48000317811966, + "p99": 95.07200121879578 + }, + "roundtrip": { + "p50": 528.4799933433533, + "p90": 550.6240129470825, + "p95": 558.3040118217468, + "p99": 606.0159802436829 + }, + "isolatedSum": { + "p50": 550.431989133358, + "p90": 579.1679918766022, + "p95": 591.9680073857307, + "p99": 629.6959817409515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 464.89599347114563, + "p90": 487.64801025390625, + "p95": 494.2080080509186, + "p99": 526.5600085258484 + }, + "combine": { + "p50": 83.23200047016144, + "p90": 88.25600147247314, + "p95": 92.03200042247772, + "p99": 99.87200051546097 + }, + "roundtrip": { + "p50": 529.8879742622375, + "p90": 552.2559881210327, + "p95": 560.1599812507629, + "p99": 622.6879954338074 + }, + "isolatedSum": { + "p50": 548.1279939413071, + "p90": 575.9040117263794, + "p95": 586.2400084733963, + "p99": 626.4320090413094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 472.4479913711548, + "p90": 494.84801292419434, + "p95": 500.7680058479309, + "p99": 519.9040174484253 + }, + "combine": { + "p50": 84.89599823951721, + "p90": 90.65599739551544, + "p95": 94.08000111579895, + "p99": 99.96800124645233 + }, + "roundtrip": { + "p50": 532.9920053482056, + "p90": 551.904022693634, + "p95": 558.784008026123, + "p99": 573.6640095710754 + }, + "isolatedSum": { + "p50": 557.343989610672, + "p90": 585.5040103197098, + "p95": 594.8480069637299, + "p99": 619.8720186948776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 474.62400794029236, + "p90": 497.2800016403198, + "p95": 504.35197353363037, + "p99": 534.6559882164001 + }, + "combine": { + "p50": 86.5280032157898, + "p90": 92.32000261545181, + "p95": 95.96800059080124, + "p99": 102.55999863147736 + }, + "roundtrip": { + "p50": 535.5839729309082, + "p90": 558.2720041275024, + "p95": 563.1999969482422, + "p99": 583.0399990081787 + }, + "isolatedSum": { + "p50": 561.1520111560822, + "p90": 589.6000042557716, + "p95": 600.3199741244316, + "p99": 637.2159868478775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 471.8720018863678, + "p90": 495.743989944458, + "p95": 503.167986869812, + "p99": 542.0799851417542 + }, + "combine": { + "p50": 91.51999652385712, + "p90": 97.120001912117, + "p95": 100.70399940013885, + "p99": 110.36799848079681 + }, + "roundtrip": { + "p50": 535.1359844207764, + "p90": 552.191972732544, + "p95": 559.7440004348755, + "p99": 576.960027217865 + }, + "isolatedSum": { + "p50": 563.3919984102249, + "p90": 592.863991856575, + "p95": 603.8719862699509, + "p99": 652.447983622551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 463.8400077819824, + "p90": 490.4319941997528, + "p95": 496.19200825691223, + "p99": 530.8799743652344 + }, + "combine": { + "p50": 104.60799932479858, + "p90": 108.64000022411346, + "p95": 111.80800199508667, + "p99": 118.8800036907196 + }, + "roundtrip": { + "p50": 538.8799905776978, + "p90": 561.9519948959351, + "p95": 567.6800012588501, + "p99": 611.6480231285095 + }, + "isolatedSum": { + "p50": 568.448007106781, + "p90": 599.0719944238663, + "p95": 608.0000102519989, + "p99": 649.759978055954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 474.4639992713928, + "p90": 496.41600251197815, + "p95": 503.32802534103394, + "p99": 529.0240049362183 + }, + "combine": { + "p50": 124.60800260305405, + "p90": 130.49599528312683, + "p95": 133.15199315547943, + "p99": 140.32000303268433 + }, + "roundtrip": { + "p50": 566.3359761238098, + "p90": 583.2639932632446, + "p95": 588.1279706954956, + "p99": 630.1440000534058 + }, + "isolatedSum": { + "p50": 599.0720018744469, + "p90": 626.911997795105, + "p95": 636.4800184965134, + "p99": 669.3440079689026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ef7e3b72", + "identity": "gb300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||6d507ec2ec8998f", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "3e6519cb4130ddf6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:43.591650+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6d507ec2ec8998f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 438.9120042324066, + "p90": 466.7840003967285, + "p95": 472.83199429512024, + "p99": 500.44798851013184 + }, + "combine": { + "p50": 76.83199644088745, + "p90": 83.55200290679932, + "p95": 87.26400136947632, + "p99": 95.29600292444229 + }, + "roundtrip": { + "p50": 495.13599276542664, + "p90": 521.6320157051086, + "p95": 526.4639854431152, + "p99": 552.1280169487 + }, + "isolatedSum": { + "p50": 515.7440006732941, + "p90": 550.3360033035278, + "p95": 560.0959956645966, + "p99": 595.7439914345741 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 454.71999049186707, + "p90": 475.93599557876587, + "p95": 483.0079972743988, + "p99": 536.5440249443054 + }, + "combine": { + "p50": 79.26400005817413, + "p90": 86.65599673986435, + "p95": 90.52799642086029, + "p99": 94.33600306510925 + }, + "roundtrip": { + "p50": 504.89598512649536, + "p90": 524.7359871864319, + "p95": 530.9119820594788, + "p99": 547.2959876060486 + }, + "isolatedSum": { + "p50": 533.9839905500412, + "p90": 562.5919923186302, + "p95": 573.5359936952591, + "p99": 630.8800280094147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 429.1520118713379, + "p90": 458.3039879798889, + "p95": 467.4240052700043, + "p99": 493.0559992790222 + }, + "combine": { + "p50": 79.96799796819687, + "p90": 86.14400029182434, + "p95": 90.52799642086029, + "p99": 96.63999825716019 + }, + "roundtrip": { + "p50": 483.5839867591858, + "p90": 510.8799934387207, + "p95": 520.5119848251343, + "p99": 542.6880121231079 + }, + "isolatedSum": { + "p50": 509.12000983953476, + "p90": 544.4479882717133, + "p95": 557.9520016908646, + "p99": 589.6959975361824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 453.47198843955994, + "p90": 472.6719856262207, + "p95": 478.7519872188568, + "p99": 521.1520195007324 + }, + "combine": { + "p50": 83.80799740552902, + "p90": 90.65599739551544, + "p95": 95.77599912881851, + "p99": 100.19200295209885 + }, + "roundtrip": { + "p50": 511.9360089302063, + "p90": 529.5040011405945, + "p95": 533.5999727249146, + "p99": 545.1520085334778 + }, + "isolatedSum": { + "p50": 537.279985845089, + "p90": 563.3279830217361, + "p95": 574.5279863476753, + "p99": 621.3440224528313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 452.09598541259766, + "p90": 484.44798588752747, + "p95": 586.5600109100342, + "p99": 1059.1039657592773 + }, + "combine": { + "p50": 86.62399649620056, + "p90": 100.38399696350098, + "p95": 138.3039951324463, + "p99": 793.4399843215942 + }, + "roundtrip": { + "p50": 502.8480291366577, + "p90": 533.1199765205383, + "p95": 573.1199979782104, + "p99": 685.7920289039612 + }, + "isolatedSum": { + "p50": 538.7199819087982, + "p90": 584.8319828510284, + "p95": 724.8640060424805, + "p99": 1852.5439500808716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 451.9360065460205, + "p90": 476.3199985027313, + "p95": 514.9440169334412, + "p99": 617.0560121536255 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 99.71199929714203, + "p95": 105.82400113344193, + "p99": 147.23199605941772 + }, + "roundtrip": { + "p50": 516.9919729232788, + "p90": 540.6720042228699, + "p95": 553.056001663208, + "p99": 678.879976272583 + }, + "isolatedSum": { + "p50": 543.2320088148117, + "p90": 576.0319977998734, + "p95": 620.7680180668831, + "p99": 764.2880082130432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 446.0799992084503, + "p90": 473.60000014305115, + "p95": 482.84798860549927, + "p99": 496.16000056266785 + }, + "combine": { + "p50": 104.51199859380722, + "p90": 111.10399663448334, + "p95": 114.1119971871376, + "p99": 119.9679970741272 + }, + "roundtrip": { + "p50": 524.4160294532776, + "p90": 547.3920106887817, + "p95": 553.3120036125183, + "p99": 594.8160290718079 + }, + "isolatedSum": { + "p50": 550.5919978022575, + "p90": 584.7039967775345, + "p95": 596.9599857926369, + "p99": 616.127997636795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 451.7439901828766, + "p90": 477.9840111732483, + "p95": 490.30399322509766, + "p99": 578.2719850540161 + }, + "combine": { + "p50": 123.87199699878693, + "p90": 129.60000336170197, + "p95": 133.18400084972382, + "p99": 139.3599957227707 + }, + "roundtrip": { + "p50": 555.6480288505554, + "p90": 582.207977771759, + "p95": 591.7760133743286, + "p99": 717.087984085083 + }, + "isolatedSum": { + "p50": 575.6159871816635, + "p90": 607.5840145349503, + "p95": 623.4879940748215, + "p99": 717.6319807767868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a3a2305", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_4e6a59ba", + "comparisonKey": "a6bb3b0c93895cdf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:39.702631+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 84.3840017914772, + "p90": 105.79200088977814, + "p95": 118.40000003576279, + "p99": 147.0080018043518 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 107.35999792814255, + "p95": 121.05599790811539, + "p99": 147.87200093269348 + }, + "roundtrip": { + "p50": 245.63199281692505, + "p90": 273.6000120639801, + "p95": 289.2799973487854, + "p99": 326.78401470184326 + }, + "isolatedSum": { + "p50": 164.48000073432922, + "p90": 213.15199881792068, + "p95": 239.45599794387817, + "p99": 294.8800027370453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 83.45600217580795, + "p90": 96.99200093746185, + "p95": 102.39999741315842, + "p99": 121.63200229406357 + }, + "combine": { + "p50": 80.35200089216232, + "p90": 85.85599809885025, + "p95": 89.56799656152725, + "p99": 101.08800232410431 + }, + "roundtrip": { + "p50": 246.62399291992188, + "p90": 264.67201113700867, + "p95": 275.4879891872406, + "p99": 335.80800890922546 + }, + "isolatedSum": { + "p50": 163.80800306797028, + "p90": 182.8479990363121, + "p95": 191.96799397468567, + "p99": 222.72000461816788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 84.32000130414963, + "p90": 107.51999914646149, + "p95": 122.30399996042252, + "p99": 151.96800231933594 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 97.6639986038208, + "p95": 115.87200313806534, + "p99": 144.48000490665436 + }, + "roundtrip": { + "p50": 248.6719936132431, + "p90": 277.50399708747864, + "p95": 291.6159927845001, + "p99": 335.4560136795044 + }, + "isolatedSum": { + "p50": 166.30399972200394, + "p90": 205.1839977502823, + "p95": 238.17600309848785, + "p99": 296.4480072259903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 82.30400085449219, + "p90": 96.00000083446503, + "p95": 100.99200159311295, + "p99": 127.3919939994812 + }, + "combine": { + "p50": 84.1279998421669, + "p90": 90.20800143480301, + "p95": 94.36800330877304, + "p99": 121.0239976644516 + }, + "roundtrip": { + "p50": 252.03201174736023, + "p90": 273.3120024204254, + "p95": 282.8800082206726, + "p99": 326.07999444007874 + }, + "isolatedSum": { + "p50": 166.4320006966591, + "p90": 186.20800226926804, + "p95": 195.360004901886, + "p99": 248.4159916639328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.89599823951721, + "p90": 99.80800002813339, + "p95": 108.5439994931221, + "p99": 139.5840048789978 + }, + "combine": { + "p50": 86.56000345945358, + "p90": 94.87999975681305, + "p95": 97.98400104045868, + "p99": 139.96799290180206 + }, + "roundtrip": { + "p50": 253.4399926662445, + "p90": 275.7439911365509, + "p95": 289.4720137119293, + "p99": 336.5760147571564 + }, + "isolatedSum": { + "p50": 171.4560016989708, + "p90": 194.68799978494644, + "p95": 206.52800053358078, + "p99": 279.55199778079987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.37600189447403, + "p90": 98.27200323343277, + "p95": 103.39199751615524, + "p99": 115.64800143241882 + }, + "combine": { + "p50": 90.97599983215332, + "p90": 98.11200201511383, + "p95": 101.95200145244598, + "p99": 116.03199690580368 + }, + "roundtrip": { + "p50": 254.59200143814087, + "p90": 272.41599559783936, + "p95": 279.7439992427826, + "p99": 297.12000489234924 + }, + "isolatedSum": { + "p50": 176.35200172662735, + "p90": 196.3840052485466, + "p95": 205.34399896860123, + "p99": 231.6799983382225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.39200299978256, + "p90": 102.24000364542007, + "p95": 112.0000034570694, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 103.74400019645691, + "p90": 112.31999844312668, + "p95": 118.1119978427887, + "p99": 160.25599837303162 + }, + "roundtrip": { + "p50": 269.0880000591278, + "p90": 288.4159982204437, + "p95": 295.48799991607666, + "p99": 339.80798721313477 + }, + "isolatedSum": { + "p50": 195.13600319623947, + "p90": 214.56000208854675, + "p95": 230.1120012998581, + "p99": 308.54399502277374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.94400155544281, + "p90": 111.26399785280228, + "p95": 116.83200299739838, + "p99": 138.047993183136 + }, + "combine": { + "p50": 123.3920007944107, + "p90": 128.86400520801544, + "p95": 132.38400220870972, + "p99": 142.59199798107147 + }, + "roundtrip": { + "p50": 293.3439910411835, + "p90": 309.28000807762146, + "p95": 314.84800577163696, + "p99": 328.19199562072754 + }, + "isolatedSum": { + "p50": 226.33600234985352, + "p90": 240.12800306081772, + "p95": 249.2160052061081, + "p99": 280.63999116420746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-151e99ca", + "identity": "gb300|deepep|v2|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_314e52ef", + "comparisonKey": "3fdc715119e92c2d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:52.271129+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.38399896025658, + "p90": 55.10399863123894, + "p95": 65.72800129652023, + "p99": 107.04000294208527 + }, + "combine": { + "p50": 57.37600103020668, + "p90": 68.09599697589874, + "p95": 71.58400118350983, + "p99": 90.97599983215332 + }, + "roundtrip": { + "p50": 1529.2160511016846, + "p90": 1562.3680353164673, + "p95": 1579.3919563293457, + "p99": 4602.784156799316 + }, + "isolatedSum": { + "p50": 101.75999999046326, + "p90": 123.19999560713768, + "p95": 137.31200248003006, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 44.76799815893173, + "p90": 67.1359971165657, + "p95": 70.88000327348709, + "p99": 103.42399775981903 + }, + "combine": { + "p50": 55.615998804569244, + "p90": 68.60800087451935, + "p95": 73.72800260782242, + "p99": 77.44000107049942 + }, + "roundtrip": { + "p50": 1526.8800258636475, + "p90": 1552.9279708862305, + "p95": 1564.0640258789062, + "p99": 1583.2959413528442 + }, + "isolatedSum": { + "p50": 100.38399696350098, + "p90": 135.74399799108505, + "p95": 144.6080058813095, + "p99": 180.86399883031845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 46.23999819159508, + "p90": 66.3679987192154, + "p95": 74.33599978685379, + "p99": 104.38399761915207 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 71.84000313282013, + "p95": 75.23199915885925, + "p99": 84.99199897050858 + }, + "roundtrip": { + "p50": 1535.3599786758423, + "p90": 1554.3999671936035, + "p95": 1564.6400451660156, + "p99": 1583.456039428711 + }, + "isolatedSum": { + "p50": 104.95999827980995, + "p90": 138.20800185203552, + "p95": 149.56799894571304, + "p99": 189.37599658966064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 44.28799822926521, + "p90": 49.536000937223434, + "p95": 53.47200110554695, + "p99": 58.6559996008873 + }, + "combine": { + "p50": 56.992001831531525, + "p90": 65.11999666690826, + "p95": 68.1919977068901, + "p99": 72.1919983625412 + }, + "roundtrip": { + "p50": 1528.0959606170654, + "p90": 1538.4000539779663, + "p95": 1541.983962059021, + "p99": 1568.735957145691 + }, + "isolatedSum": { + "p50": 101.28000006079674, + "p90": 114.6559976041317, + "p95": 121.66399881243706, + "p99": 130.8479979634285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.704001396894455, + "p90": 54.88000065088272, + "p95": 61.43999844789505, + "p99": 100.28800368309021 + }, + "combine": { + "p50": 57.56799876689911, + "p90": 69.18399780988693, + "p95": 72.12799787521362, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 1531.2000513076782, + "p90": 1547.1680164337158, + "p95": 1555.8719635009766, + "p99": 1579.4559717178345 + }, + "isolatedSum": { + "p50": 102.27200016379356, + "p90": 124.06399846076965, + "p95": 133.56799632310867, + "p99": 178.94400656223297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.89599913358688, + "p90": 52.57600173354149, + "p95": 56.41600117087364, + "p99": 96.16000205278397 + }, + "combine": { + "p50": 59.42400172352791, + "p90": 69.66400146484375, + "p95": 73.34399968385696, + "p99": 82.94399827718735 + }, + "roundtrip": { + "p50": 1531.3600301742554, + "p90": 1545.7600355148315, + "p95": 1550.3040552139282, + "p99": 1569.7280168533325 + }, + "isolatedSum": { + "p50": 104.32000085711479, + "p90": 122.24000319838524, + "p95": 129.7600008547306, + "p99": 179.1040003299713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 46.112000942230225, + "p90": 50.36799982190132, + "p95": 54.30399999022484, + "p99": 59.67999994754791 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 67.64800101518631, + "p95": 70.20799815654755, + "p99": 75.42400062084198 + }, + "roundtrip": { + "p50": 1539.1360521316528, + "p90": 1550.4319667816162, + "p95": 1554.3999671936035, + "p99": 1565.1839971542358 + }, + "isolatedSum": { + "p50": 107.16800019145012, + "p90": 118.01600083708763, + "p95": 124.51199814677238, + "p99": 135.1040005683899 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 54.496001452207565, + "p90": 62.6240000128746, + "p95": 65.76000154018402, + "p99": 86.17600053548813 + }, + "combine": { + "p50": 74.81600344181061, + "p90": 84.3840017914772, + "p95": 87.55200356245041, + "p99": 95.8079993724823 + }, + "roundtrip": { + "p50": 1556.447982788086, + "p90": 1573.7600326538086, + "p95": 1588.2560014724731, + "p99": 1605.88800907135 + }, + "isolatedSum": { + "p50": 129.31200489401817, + "p90": 147.0080018043518, + "p95": 153.31200510263443, + "p99": 181.98399990797043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-82003616", + "identity": "gb300|deepep|v2|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_2ccf2ebb", + "comparisonKey": "971292115fbba672", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:25.482287+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.399998754262924, + "p90": 47.32799902558327, + "p95": 50.144001841545105, + "p99": 56.60799890756607 + }, + "combine": { + "p50": 51.231998950242996, + "p90": 55.67999929189682, + "p95": 57.472001761198044, + "p99": 62.68800050020218 + }, + "roundtrip": { + "p50": 1522.1439599990845, + "p90": 1526.6879796981812, + "p95": 1528.5760164260864, + "p99": 1531.5519571304321 + }, + "isolatedSum": { + "p50": 93.63199770450592, + "p90": 103.00799831748009, + "p95": 107.61600360274315, + "p99": 119.29599940776825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 43.007999658584595, + "p90": 47.87199944257736, + "p95": 50.624001771211624, + "p99": 57.53599852323532 + }, + "combine": { + "p50": 52.38400027155876, + "p90": 56.2559999525547, + "p95": 58.14399942755699, + "p99": 60.447998344898224 + }, + "roundtrip": { + "p50": 1524.448037147522, + "p90": 1528.864026069641, + "p95": 1530.8799743652344, + "p99": 1533.8239669799805 + }, + "isolatedSum": { + "p50": 95.39199993014336, + "p90": 104.12799939513206, + "p95": 108.76800119876862, + "p99": 117.98399686813354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.65600070357323, + "p90": 49.247998744249344, + "p95": 51.711998879909515, + "p99": 57.151999324560165 + }, + "combine": { + "p50": 54.4000007212162, + "p90": 59.90400165319443, + "p95": 62.20800057053566, + "p99": 66.68800115585327 + }, + "roundtrip": { + "p50": 1525.215983390808, + "p90": 1533.8239669799805, + "p95": 1547.7440357208252, + "p99": 1581.3119411468506 + }, + "isolatedSum": { + "p50": 97.05600142478943, + "p90": 109.15200039744377, + "p95": 113.91999945044518, + "p99": 123.84000048041344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 43.807998299598694, + "p90": 49.15200173854828, + "p95": 50.84799975156784, + "p99": 65.2799978852272 + }, + "combine": { + "p50": 55.743999779224396, + "p90": 65.24799764156342, + "p95": 74.75200295448303, + "p99": 264.8639976978302 + }, + "roundtrip": { + "p50": 1524.7360467910767, + "p90": 1529.1839838027954, + "p95": 1531.0720205307007, + "p99": 1535.3599786758423 + }, + "isolatedSum": { + "p50": 99.55199807882309, + "p90": 114.3999993801117, + "p95": 125.60000270605087, + "p99": 330.1439955830574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.7200011909008, + "p90": 48.16000163555145, + "p95": 51.96800082921982, + "p99": 58.17599967122078 + }, + "combine": { + "p50": 56.12799897789955, + "p90": 63.48799914121628, + "p95": 66.20799750089645, + "p99": 75.00799745321274 + }, + "roundtrip": { + "p50": 1526.9440412521362, + "p90": 1535.6800556182861, + "p95": 1545.2799797058105, + "p99": 1565.343976020813 + }, + "isolatedSum": { + "p50": 98.84800016880035, + "p90": 111.64800077676773, + "p95": 118.17599833011627, + "p99": 133.18399712443352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.544000178575516, + "p90": 50.144001841545105, + "p95": 52.5440014898777, + "p99": 93.50399672985077 + }, + "combine": { + "p50": 57.37600103020668, + "p90": 65.76000154018402, + "p95": 68.7360018491745, + "p99": 75.80800354480743 + }, + "roundtrip": { + "p50": 1530.3679704666138, + "p90": 1539.3919944763184, + "p95": 1543.776035308838, + "p99": 1567.1679973602295 + }, + "isolatedSum": { + "p50": 101.9200012087822, + "p90": 115.90400338172913, + "p95": 121.2800033390522, + "p99": 169.3120002746582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 45.66400125622749, + "p90": 53.50400134921074, + "p95": 56.384000927209854, + "p99": 60.47999858856201 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 64.67200070619583, + "p95": 66.3359984755516, + "p99": 69.63200122117996 + }, + "roundtrip": { + "p50": 1534.2400074005127, + "p90": 1539.423942565918, + "p95": 1542.0479774475098, + "p99": 1549.4400262832642 + }, + "isolatedSum": { + "p50": 105.28000071644783, + "p90": 118.17600205540657, + "p95": 122.71999940276146, + "p99": 130.11199980974197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 52.89600044488907, + "p90": 57.312000542879105, + "p95": 62.431998550891876, + "p99": 101.82400047779083 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 81.05599880218506, + "p95": 86.71999722719193, + "p99": 120.95999717712402 + }, + "roundtrip": { + "p50": 1551.8399477005005, + "p90": 1557.3439598083496, + "p95": 1567.5519704818726, + "p99": 1597.1200466156006 + }, + "isolatedSum": { + "p50": 123.90399724245071, + "p90": 138.36799934506416, + "p95": 149.1519957780838, + "p99": 222.78399765491486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-016c372a", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||d27781632f6e008", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "618aec2a1583ad18", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:55.957329+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "d27781632f6e008", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 110.36799848079681, + "p90": 117.0239970088005, + "p95": 120.51200121641159, + "p99": 129.82399761676788 + }, + "combine": { + "p50": 82.56000280380249, + "p90": 86.11200004816055, + "p95": 88.54400366544724, + "p99": 95.36000341176987 + }, + "roundtrip": { + "p50": 165.3439998626709, + "p90": 173.8560050725937, + "p95": 175.77600479125977, + "p99": 182.78400599956512 + }, + "isolatedSum": { + "p50": 192.9280012845993, + "p90": 203.13599705696106, + "p95": 209.05600488185883, + "p99": 225.18400102853775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15294464, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 136.76799833774567, + "p90": 144.51199769973755, + "p95": 147.5840061903, + "p99": 153.76000106334686 + }, + "combine": { + "p50": 102.46399790048599, + "p90": 108.67200046777725, + "p95": 110.07999628782272, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 211.45600080490112, + "p90": 219.67999637126923, + "p95": 222.17600047588348, + "p99": 227.48799622058868 + }, + "isolatedSum": { + "p50": 239.23199623823166, + "p90": 253.1839981675148, + "p95": 257.6640024781227, + "p99": 267.93599873781204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 30384128, + "combineLogicalBytes": 30384128, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 194.815993309021, + "p90": 201.02399587631226, + "p95": 203.39199900627136, + "p99": 209.6640020608902 + }, + "combine": { + "p50": 144.16000247001648, + "p90": 148.51200580596924, + "p95": 150.9760022163391, + "p99": 156.2879979610443 + }, + "roundtrip": { + "p50": 309.6959888935089, + "p90": 317.82400608062744, + "p95": 321.696013212204, + "p99": 330.81600069999695 + }, + "isolatedSum": { + "p50": 338.9759957790375, + "p90": 349.5360016822815, + "p95": 354.3680012226105, + "p99": 365.9520000219345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 60743680, + "combineLogicalBytes": 60743680, + "fanoutMean": 3.62060546875, + "recvTokensMax": 1865, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 296.7360019683838, + "p90": 304.6720027923584, + "p95": 307.42400884628296, + "p99": 314.4640028476715 + }, + "combine": { + "p50": 219.2319929599762, + "p90": 224.35200214385986, + "p95": 228.5120040178299, + "p99": 233.11999440193176 + }, + "roundtrip": { + "p50": 495.39199471473694, + "p90": 503.55201959609985, + "p95": 506.24001026153564, + "p99": 510.72001457214355 + }, + "isolatedSum": { + "p50": 515.96799492836, + "p90": 529.0240049362183, + "p95": 535.9360128641129, + "p99": 547.5839972496033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 121716736, + "combineLogicalBytes": 121716736, + "fanoutMean": 3.62744140625, + "recvTokensMax": 3730, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 505.6319832801819, + "p90": 513.3439898490906, + "p95": 516.1600112915039, + "p99": 522.1760272979736 + }, + "combine": { + "p50": 474.14401173591614, + "p90": 481.2160134315491, + "p95": 483.3280146121979, + "p99": 490.911990404129 + }, + "roundtrip": { + "p50": 926.2400269508362, + "p90": 935.7759952545166, + "p95": 939.7439956665039, + "p99": 946.6879963874817 + }, + "isolatedSum": { + "p50": 979.775995016098, + "p90": 994.5600032806396, + "p95": 999.4880259037018, + "p99": 1013.0880177021027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 243236864, + "combineLogicalBytes": 243236864, + "fanoutMean": 3.62451171875, + "recvTokensMax": 7446, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 937.8880262374878, + "p90": 945.0560212135315, + "p95": 948.1279850006104, + "p99": 952.5439739227295 + }, + "combine": { + "p50": 862.2080087661743, + "p90": 873.2799887657166, + "p95": 874.6240139007568, + "p99": 882.4639916419983 + }, + "roundtrip": { + "p50": 1809.664011001587, + "p90": 1819.808006286621, + "p95": 1822.3999738693237, + "p99": 1828.4800052642822 + }, + "isolatedSum": { + "p50": 1800.096035003662, + "p90": 1818.336009979248, + "p95": 1822.7519989013672, + "p99": 1835.0079655647278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 486342656, + "combineLogicalBytes": 486342656, + "fanoutMean": 3.62353515625, + "recvTokensMax": 14871, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fcf0f3be", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||6a3023945a551d7", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "0e795f2033818840", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:47.224751+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6a3023945a551d7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 109.43999886512756, + "p90": 115.26399850845337, + "p95": 120.03199756145477, + "p99": 131.1360001564026 + }, + "combine": { + "p50": 85.50400286912918, + "p90": 90.65599739551544, + "p95": 93.21600198745728, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 171.2000072002411, + "p90": 176.9919991493225, + "p95": 180.7039976119995, + "p99": 187.6160055398941 + }, + "isolatedSum": { + "p50": 194.94400173425674, + "p90": 205.9199959039688, + "p95": 213.24799954891205, + "p99": 228.57599705457687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19097600, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 137.05599308013916, + "p90": 144.6399986743927, + "p95": 147.23199605941772, + "p99": 154.6880006790161 + }, + "combine": { + "p50": 107.45599865913391, + "p90": 111.10399663448334, + "p95": 112.99200356006622, + "p99": 121.79200351238251 + }, + "roundtrip": { + "p50": 221.37600183486938, + "p90": 228.35199534893036, + "p95": 232.41600394248962, + "p99": 238.39999735355377 + }, + "isolatedSum": { + "p50": 244.51199173927307, + "p90": 255.74399530887604, + "p95": 260.22399961948395, + "p99": 276.4800041913986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 37980160, + "combineLogicalBytes": 37980160, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 196.25599682331085, + "p90": 202.87999510765076, + "p95": 205.50400018692017, + "p99": 215.36000072956085 + }, + "combine": { + "p50": 148.3519971370697, + "p90": 155.2640050649643, + "p95": 157.95199573040009, + "p99": 162.81600296497345 + }, + "roundtrip": { + "p50": 320.99199295043945, + "p90": 328.0960023403168, + "p95": 330.3680121898651, + "p99": 336.9919955730438 + }, + "isolatedSum": { + "p50": 344.60799396038055, + "p90": 358.14400017261505, + "p95": 363.45599591732025, + "p99": 378.1760036945343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 75776000, + "combineLogicalBytes": 75776000, + "fanoutMean": 3.61328125, + "recvTokensMax": 1867, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 304.4799864292145, + "p90": 312.4479949474335, + "p95": 315.2320086956024, + "p99": 320.51199674606323 + }, + "combine": { + "p50": 257.0880055427551, + "p90": 264.1279995441437, + "p95": 267.7440047264099, + "p99": 272.0319926738739 + }, + "roundtrip": { + "p50": 527.3920297622681, + "p90": 537.5679731369019, + "p95": 541.4080023765564, + "p99": 549.95197057724 + }, + "isolatedSum": { + "p50": 561.5679919719696, + "p90": 576.5759944915771, + "p95": 582.9760134220123, + "p99": 592.5439894199371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 152074240, + "combineLogicalBytes": 152074240, + "fanoutMean": 3.625732421875, + "recvTokensMax": 3722, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 520.3520059585571, + "p90": 526.9119739532471, + "p95": 528.6399722099304, + "p99": 534.3999862670898 + }, + "combine": { + "p50": 493.9199984073639, + "p90": 501.21599435806274, + "p95": 503.1359791755676, + "p99": 505.5999755859375 + }, + "roundtrip": { + "p50": 990.5279874801636, + "p90": 997.376024723053, + "p95": 1000.3839731216431, + "p99": 1006.7520141601562 + }, + "isolatedSum": { + "p50": 1014.272004365921, + "p90": 1028.1279683113098, + "p95": 1031.775951385498, + "p99": 1039.9999618530273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 304117760, + "combineLogicalBytes": 304117760, + "fanoutMean": 3.6253662109375, + "recvTokensMax": 7453, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 962.7839922904968, + "p90": 970.0800180435181, + "p95": 973.1519818305969, + "p99": 978.879988193512 + }, + "combine": { + "p50": 939.5840167999268, + "p90": 947.8399753570557, + "p95": 949.2800235748291, + "p99": 952.4160027503967 + }, + "roundtrip": { + "p50": 1873.952031135559, + "p90": 1881.2799453735352, + "p95": 1883.8080167770386, + "p99": 1890.1760578155518 + }, + "isolatedSum": { + "p50": 1902.3680090904236, + "p90": 1917.9199934005737, + "p95": 1922.432005405426, + "p99": 1931.2959909439087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 608358400, + "combineLogicalBytes": 608358400, + "fanoutMean": 3.6260986328125, + "recvTokensMax": 14884, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2e34e61", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "94ce9dbc926f2634", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:39.012700+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 115.58400094509125, + "p90": 124.57600235939026, + "p95": 127.93600559234619, + "p99": 134.62400436401367 + }, + "combine": { + "p50": 94.4959968328476, + "p90": 98.84800016880035, + "p95": 100.12800246477127, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 183.84000658988953, + "p90": 191.93600118160248, + "p95": 194.68800723552704, + "p99": 201.9840031862259 + }, + "isolatedSum": { + "p50": 210.07999777793884, + "p90": 223.4240025281906, + "p95": 228.06400805711746, + "p99": 240.00000208616257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22941696, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 146.97599411010742, + "p90": 153.888002038002, + "p95": 155.74400126934052, + "p99": 163.55200111865997 + }, + "combine": { + "p50": 113.43999952077866, + "p90": 121.11999839544296, + "p95": 122.3360002040863, + "p99": 130.36799430847168 + }, + "roundtrip": { + "p50": 232.28800296783447, + "p90": 240.1600033044815, + "p95": 242.49599874019623, + "p99": 251.13600492477417 + }, + "isolatedSum": { + "p50": 260.4159936308861, + "p90": 275.008000433445, + "p95": 278.0800014734268, + "p99": 293.91999542713165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45649920, + "combineLogicalBytes": 45649920, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 201.7280012369156, + "p90": 209.72800254821777, + "p95": 211.87199652194977, + "p99": 220.22399306297302 + }, + "combine": { + "p50": 157.79200196266174, + "p90": 161.9199961423874, + "p95": 163.5199934244156, + "p99": 169.3439930677414 + }, + "roundtrip": { + "p50": 335.07201075553894, + "p90": 343.3600068092346, + "p95": 345.984011888504, + "p99": 352.80001163482666 + }, + "isolatedSum": { + "p50": 359.52000319957733, + "p90": 371.64799869060516, + "p95": 375.39198994636536, + "p99": 389.5679861307144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91066368, + "combineLogicalBytes": 91066368, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 311.5200102329254, + "p90": 318.9440071582794, + "p95": 323.0719864368439, + "p99": 331.712007522583 + }, + "combine": { + "p50": 287.200003862381, + "p90": 295.1039969921112, + "p95": 297.34399914741516, + "p99": 300.7360100746155 + }, + "roundtrip": { + "p50": 559.6160292625427, + "p90": 569.2160129547119, + "p95": 572.1279978752136, + "p99": 577.5039792060852 + }, + "isolatedSum": { + "p50": 598.7200140953064, + "p90": 614.0480041503906, + "p95": 620.415985584259, + "p99": 632.4480175971985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 182059008, + "combineLogicalBytes": 182059008, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 533.6639881134033, + "p90": 540.7040119171143, + "p95": 543.3279871940613, + "p99": 549.7279763221741 + }, + "combine": { + "p50": 510.49602031707764, + "p90": 517.9839730262756, + "p95": 519.1680192947388, + "p99": 523.5840082168579 + }, + "roundtrip": { + "p50": 1017.1200037002563, + "p90": 1024.3840217590332, + "p95": 1027.3280143737793, + "p99": 1031.4559936523438 + }, + "isolatedSum": { + "p50": 1044.160008430481, + "p90": 1058.68798494339, + "p95": 1062.4960064888, + "p99": 1073.311984539032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 364449792, + "combineLogicalBytes": 364449792, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 978.1439900398254, + "p90": 984.2240214347839, + "p95": 986.8159890174866, + "p99": 992.9919838905334 + }, + "combine": { + "p50": 951.9360065460205, + "p90": 960.4480266571045, + "p95": 961.9839787483215, + "p99": 964.0640020370483 + }, + "roundtrip": { + "p50": 1905.7600498199463, + "p90": 1913.4399890899658, + "p95": 1917.2799587249756, + "p99": 1924.3520498275757 + }, + "isolatedSum": { + "p50": 1930.079996585846, + "p90": 1944.6720480918884, + "p95": 1948.799967765808, + "p99": 1957.0559859275818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 727953408, + "combineLogicalBytes": 727953408, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e27d68c", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_abf250ec", + "comparisonKey": "6d96f58d256fd850", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:29.475161+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.86400324106216, + "p90": 125.18399953842163, + "p95": 128.7360042333603, + "p99": 135.26399433612823 + }, + "combine": { + "p50": 97.98400104045868, + "p90": 106.36799782514572, + "p95": 107.87200182676315, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 189.7280067205429, + "p90": 196.9279944896698, + "p95": 199.0399956703186, + "p99": 205.21600544452667 + }, + "isolatedSum": { + "p50": 214.84800428152084, + "p90": 231.55199736356735, + "p95": 236.60800606012344, + "p99": 251.3279914855957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 146.88000082969666, + "p90": 152.3520052433014, + "p95": 153.82400155067444, + "p99": 158.62399339675903 + }, + "combine": { + "p50": 119.80800330638885, + "p90": 123.64800274372101, + "p95": 125.82400441169739, + "p99": 130.94399869441986 + }, + "roundtrip": { + "p50": 241.60000681877136, + "p90": 249.1839975118637, + "p95": 251.96799635887146, + "p99": 255.64798712730408 + }, + "isolatedSum": { + "p50": 266.6880041360855, + "p90": 276.0000079870224, + "p95": 279.6480059623718, + "p99": 289.5679920911789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.31199872493744, + "p90": 213.4079933166504, + "p95": 216.12800657749176, + "p99": 220.22399306297302 + }, + "combine": { + "p50": 165.3120070695877, + "p90": 171.23199999332428, + "p95": 172.992005944252, + "p99": 179.77599799633026 + }, + "roundtrip": { + "p50": 342.46399998664856, + "p90": 349.7920036315918, + "p95": 352.3840010166168, + "p99": 357.88801312446594 + }, + "isolatedSum": { + "p50": 370.62400579452515, + "p90": 384.63999330997467, + "p95": 389.1200125217438, + "p99": 399.9999910593033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 312.1280074119568, + "p90": 320.0640082359314, + "p95": 323.199987411499, + "p99": 329.47200536727905 + }, + "combine": { + "p50": 305.11999130249023, + "p90": 309.56798791885376, + "p95": 311.5200102329254, + "p99": 318.91199946403503 + }, + "roundtrip": { + "p50": 588.9599919319153, + "p90": 597.0240235328674, + "p95": 599.4560122489929, + "p99": 606.4959764480591 + }, + "isolatedSum": { + "p50": 617.247998714447, + "p90": 629.6319961547852, + "p95": 634.7199976444244, + "p99": 648.3840048313141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 543.0399775505066, + "p90": 550.6560206413269, + "p95": 552.7679920196533, + "p99": 557.919979095459 + }, + "combine": { + "p50": 525.4080295562744, + "p90": 530.2079916000366, + "p95": 531.6799879074097, + "p99": 536.4800095558167 + }, + "roundtrip": { + "p50": 1043.7439680099487, + "p90": 1052.7039766311646, + "p95": 1055.0719499588013, + "p99": 1062.9440546035767 + }, + "isolatedSum": { + "p50": 1068.448007106781, + "p90": 1080.8640122413635, + "p95": 1084.447979927063, + "p99": 1094.3999886512756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 998.4639883041382, + "p90": 1003.8399696350098, + "p95": 1006.1440467834473, + "p99": 1010.3039741516113 + }, + "combine": { + "p50": 972.4479913711548, + "p90": 980.288028717041, + "p95": 982.7520251274109, + "p99": 991.0079836845398 + }, + "roundtrip": { + "p50": 1945.0880289077759, + "p90": 1952.6400566101074, + "p95": 1955.3920030593872, + "p99": 1983.6479425430298 + }, + "isolatedSum": { + "p50": 1970.911979675293, + "p90": 1984.1279983520508, + "p95": 1988.8960719108582, + "p99": 2001.3119578361511 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8370c04", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "4c16ffc6f2edd9c7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:15.799249+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.17599767446518, + "p90": 121.63200229406357, + "p95": 124.76799637079239, + "p99": 130.0799995660782 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 105.43999820947647, + "p95": 107.29599744081497, + "p99": 111.10399663448334 + }, + "roundtrip": { + "p50": 188.38399648666382, + "p90": 195.0400024652481, + "p95": 198.08000326156616, + "p99": 203.77600193023682 + }, + "isolatedSum": { + "p50": 211.93599700927734, + "p90": 227.07200050354004, + "p95": 232.06399381160736, + "p99": 241.18399620056152 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 146.27200365066528, + "p90": 153.08800339698792, + "p95": 156.2879979610443, + "p99": 164.35199975967407 + }, + "combine": { + "p50": 119.23199892044067, + "p90": 123.29600006341934, + "p95": 125.76000392436981, + "p99": 131.99999928474426 + }, + "roundtrip": { + "p50": 239.3919974565506, + "p90": 246.94399535655975, + "p95": 250.5280077457428, + "p99": 254.91198897361755 + }, + "isolatedSum": { + "p50": 265.50400257110596, + "p90": 276.38400346040726, + "p95": 282.0480018854141, + "p99": 296.35199904441833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 203.99999618530273, + "p90": 211.90400421619415, + "p95": 214.23999965190887, + "p99": 220.22399306297302 + }, + "combine": { + "p50": 166.1120057106018, + "p90": 170.27199268341064, + "p95": 171.9679981470108, + "p99": 177.3120015859604 + }, + "roundtrip": { + "p50": 341.43999218940735, + "p90": 349.11999106407166, + "p95": 351.99999809265137, + "p99": 356.8960130214691 + }, + "isolatedSum": { + "p50": 370.11200189590454, + "p90": 382.1759968996048, + "p95": 386.2079977989197, + "p99": 397.5359946489334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 310.4639947414398, + "p90": 319.8719918727875, + "p95": 322.36799597740173, + "p99": 328.7679851055145 + }, + "combine": { + "p50": 304.80000376701355, + "p90": 309.6640110015869, + "p95": 311.74400448799133, + "p99": 318.33600997924805 + }, + "roundtrip": { + "p50": 586.463987827301, + "p90": 594.7200059890747, + "p95": 597.2480177879333, + "p99": 603.0399799346924 + }, + "isolatedSum": { + "p50": 615.2639985084534, + "p90": 629.5360028743744, + "p95": 634.1120004653931, + "p99": 647.1039950847626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 541.5040254592896, + "p90": 549.3760108947754, + "p95": 551.7439842224121, + "p99": 558.3360195159912 + }, + "combine": { + "p50": 525.37602186203, + "p90": 530.4960012435913, + "p95": 532.0640206336975, + "p99": 538.0160212516785 + }, + "roundtrip": { + "p50": 1041.7280197143555, + "p90": 1048.9280223846436, + "p95": 1051.2959957122803, + "p99": 1056.3839673995972 + }, + "isolatedSum": { + "p50": 1066.8800473213196, + "p90": 1079.8720121383667, + "p95": 1083.8080048561096, + "p99": 1096.3520407676697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 998.2720017433167, + "p90": 1003.775954246521, + "p95": 1005.5999755859375, + "p99": 1009.6960067749023 + }, + "combine": { + "p50": 971.8719720840454, + "p90": 979.7760248184204, + "p95": 982.7200174331665, + "p99": 988.5759949684143 + }, + "roundtrip": { + "p50": 1945.4400539398193, + "p90": 1952.9919624328613, + "p95": 1955.1359415054321, + "p99": 1961.4399671554565 + }, + "isolatedSum": { + "p50": 1970.143973827362, + "p90": 1983.5519790649414, + "p95": 1988.319993019104, + "p99": 1998.2720017433167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-50c2a856", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||b208ea04b16e80b", + "colorKey": "gb300_4be7cae4", + "comparisonKey": "bbad58fb1bf1d1ea", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:06.027393+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b208ea04b16e80b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.84800344705582, + "p90": 127.23200023174286, + "p95": 129.2160004377365, + "p99": 133.98399949073792 + }, + "combine": { + "p50": 98.65599870681763, + "p90": 105.02400249242783, + "p95": 106.59199953079224, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 191.83999300003052, + "p90": 199.13600385189056, + "p95": 202.78400182724, + "p99": 209.9519968032837 + }, + "isolatedSum": { + "p50": 217.50400215387344, + "p90": 232.25600272417068, + "p95": 235.80799996852875, + "p99": 249.82400238513947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26621952, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 149.34399724006653, + "p90": 155.5200070142746, + "p95": 157.95199573040009, + "p99": 166.78400337696075 + }, + "combine": { + "p50": 119.23199892044067, + "p90": 123.83999675512314, + "p95": 126.78399682044983, + "p99": 131.48799538612366 + }, + "roundtrip": { + "p50": 244.9920028448105, + "p90": 252.00000405311584, + "p95": 254.2400062084198, + "p99": 260.3200078010559 + }, + "isolatedSum": { + "p50": 268.5759961605072, + "p90": 279.36000376939774, + "p95": 284.7359925508499, + "p99": 298.2719987630844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53329920, + "combineLogicalBytes": 53329920, + "fanoutMean": 3.6328125, + "recvTokensMax": 944, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 207.58399367332458, + "p90": 215.71199595928192, + "p95": 218.01599860191345, + "p99": 223.83999824523926 + }, + "combine": { + "p50": 167.1999990940094, + "p90": 171.55200242996216, + "p95": 173.6000031232834, + "p99": 179.00800704956055 + }, + "roundtrip": { + "p50": 348.1599986553192, + "p90": 356.00000619888306, + "p95": 358.65598917007446, + "p99": 365.63199758529663 + }, + "isolatedSum": { + "p50": 374.783992767334, + "p90": 387.2639983892441, + "p95": 391.61600172519684, + "p99": 402.8480052947998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106530816, + "combineLogicalBytes": 106530816, + "fanoutMean": 3.62841796875, + "recvTokensMax": 1882, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 315.10400772094727, + "p90": 323.2640027999878, + "p95": 325.9199857711792, + "p99": 332.96000957489014 + }, + "combine": { + "p50": 304.86398935317993, + "p90": 309.4399869441986, + "p95": 311.1039996147156, + "p99": 315.90399146080017 + }, + "roundtrip": { + "p50": 590.5920267105103, + "p90": 597.4400043487549, + "p95": 600.1600027084351, + "p99": 604.4800281524658 + }, + "isolatedSum": { + "p50": 619.9679970741272, + "p90": 632.7039897441864, + "p95": 637.0239853858948, + "p99": 648.8640010356903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212631552, + "combineLogicalBytes": 212631552, + "fanoutMean": 3.62109375, + "recvTokensMax": 3729, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 542.7520275115967, + "p90": 550.4000186920166, + "p95": 553.1520247459412, + "p99": 559.1679811477661 + }, + "combine": { + "p50": 524.5440006256104, + "p90": 529.9199819564819, + "p95": 531.8719744682312, + "p99": 536.8319749832153 + }, + "roundtrip": { + "p50": 1042.1119928359985, + "p90": 1049.3119955062866, + "p95": 1051.2959957122803, + "p99": 1056.447982788086 + }, + "isolatedSum": { + "p50": 1067.296028137207, + "p90": 1080.3200006484985, + "p95": 1085.0239992141724, + "p99": 1095.9999561309814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424632320, + "combineLogicalBytes": 424632320, + "fanoutMean": 3.61572265625, + "recvTokensMax": 7430, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1001.0240077972412, + "p90": 1007.3920488357544, + "p95": 1009.0559720993042, + "p99": 1011.680006980896 + }, + "combine": { + "p50": 971.9359874725342, + "p90": 980.6079864501953, + "p95": 982.6880097389221, + "p99": 985.3119850158691 + }, + "roundtrip": { + "p50": 1951.7760276794434, + "p90": 1959.61594581604, + "p95": 1962.5600576400757, + "p99": 1968.6720371246338 + }, + "isolatedSum": { + "p50": 1972.9599952697754, + "p90": 1988.0000352859497, + "p95": 1991.7439818382263, + "p99": 1996.9919919967651 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848074752, + "combineLogicalBytes": 848074752, + "fanoutMean": 3.61065673828125, + "recvTokensMax": 14815, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-71a5d543", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|prefill|normal|none|none|0|tuned||1104ab83732593b", + "colorKey": "gb300_76c0d0f4", + "comparisonKey": "7244f39eb87e6023", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:02.735201+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1104ab83732593b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 122.23999947309494, + "p90": 131.48799538612366, + "p95": 134.3040019273758, + "p99": 139.67999815940857 + }, + "combine": { + "p50": 101.98400169610977, + "p90": 109.50399935245514, + "p95": 111.68000102043152, + "p99": 119.35999989509583 + }, + "roundtrip": { + "p50": 196.25599682331085, + "p90": 204.03200387954712, + "p95": 206.7199945449829, + "p99": 213.85599672794342 + }, + "isolatedSum": { + "p50": 224.2240011692047, + "p90": 240.9919947385788, + "p95": 245.9840029478073, + "p99": 259.0399980545044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 153.18399667739868, + "p90": 159.93599593639374, + "p95": 162.6880019903183, + "p99": 169.0559983253479 + }, + "combine": { + "p50": 122.079998254776, + "p90": 126.78399682044983, + "p95": 130.0799995660782, + "p99": 134.33599472045898 + }, + "roundtrip": { + "p50": 248.35200607776642, + "p90": 255.42399287223816, + "p95": 258.9440047740936, + "p99": 265.4399871826172 + }, + "isolatedSum": { + "p50": 275.2639949321747, + "p90": 286.71999275684357, + "p95": 292.7680015563965, + "p99": 303.3919930458069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 4, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 211.35999262332916, + "p90": 217.98400580883026, + "p95": 220.5439954996109, + "p99": 225.8560061454773 + }, + "combine": { + "p50": 189.53600525856018, + "p90": 197.24799692630768, + "p95": 198.65599274635315, + "p99": 205.76000213623047 + }, + "roundtrip": { + "p50": 356.3840091228485, + "p90": 364.44801092147827, + "p95": 367.5839900970459, + "p99": 372.19199538230896 + }, + "isolatedSum": { + "p50": 400.89599788188934, + "p90": 415.23200273513794, + "p95": 419.19998824596405, + "p99": 431.61600828170776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 4, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 334.6560001373291, + "p90": 342.303991317749, + "p95": 345.40799260139465, + "p99": 350.71998834609985 + }, + "combine": { + "p50": 324.8960077762604, + "p90": 331.87198638916016, + "p95": 333.3120048046112, + "p99": 336.0320031642914 + }, + "roundtrip": { + "p50": 630.1440000534058, + "p90": 637.9520297050476, + "p95": 639.8400068283081, + "p99": 647.3600268363953 + }, + "isolatedSum": { + "p50": 659.5520079135895, + "p90": 674.1759777069092, + "p95": 678.7199974060059, + "p99": 686.7519915103912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 4, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 574.5279788970947, + "p90": 582.751989364624, + "p95": 585.6000185012817, + "p99": 595.1679944992065 + }, + "combine": { + "p50": 563.4239912033081, + "p90": 568.3199763298035, + "p95": 569.9520111083984, + "p99": 575.1039981842041 + }, + "roundtrip": { + "p50": 1106.2079668045044, + "p90": 1114.016056060791, + "p95": 1116.4480447769165, + "p99": 1126.1759996414185 + }, + "isolatedSum": { + "p50": 1137.9519701004028, + "p90": 1151.0719656944275, + "p95": 1155.5520296096802, + "p99": 1170.2719926834106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 4, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1060.863971710205, + "p90": 1067.7759647369385, + "p95": 1070.304036140442, + "p99": 1075.711965560913 + }, + "combine": { + "p50": 1046.6879606246948, + "p90": 1050.9120225906372, + "p95": 1052.9279708862305, + "p99": 1057.8880310058594 + }, + "roundtrip": { + "p50": 2079.040050506592, + "p90": 2086.24005317688, + "p95": 2088.6080265045166, + "p99": 2096.575975418091 + }, + "isolatedSum": { + "p50": 2107.5519323349, + "p90": 2118.6879873275757, + "p95": 2123.2320070266724, + "p99": 2133.5999965667725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 4, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e85db3cb", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|prefill|normal|none|none|0|tuned||e15d35cfeaea91f", + "colorKey": "gb300_2da51caf", + "comparisonKey": "612fe64be08398a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:19.301086+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e15d35cfeaea91f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 98.84800016880035, + "p90": 106.23999685049057, + "p95": 113.11999708414078, + "p99": 121.98399752378464 + }, + "combine": { + "p50": 79.03999835252762, + "p90": 83.3280012011528, + "p95": 84.73599702119827, + "p99": 89.72799777984619 + }, + "roundtrip": { + "p50": 151.07199549674988, + "p90": 161.72799468040466, + "p95": 164.89599645137787, + "p99": 173.5360026359558 + }, + "isolatedSum": { + "p50": 177.88799852132797, + "p90": 189.56799805164337, + "p95": 197.85599410533905, + "p99": 211.71199530363083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 126.14400684833527, + "p90": 142.94399321079254, + "p95": 156.70399367809296, + "p99": 185.2159947156906 + }, + "combine": { + "p50": 118.81600320339203, + "p90": 122.6240023970604, + "p95": 123.99999797344208, + "p99": 134.75200533866882 + }, + "roundtrip": { + "p50": 219.61599588394165, + "p90": 228.19200158119202, + "p95": 230.78399896621704, + "p99": 239.26399648189545 + }, + "isolatedSum": { + "p50": 244.9600100517273, + "p90": 265.56799560785294, + "p95": 280.70399165153503, + "p99": 319.96800005435944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 253.63200902938843, + "p90": 262.08001375198364, + "p95": 264.22399282455444, + "p99": 269.72800493240356 + }, + "combine": { + "p50": 238.78400027751923, + "p90": 244.35199797153473, + "p95": 246.0159957408905, + "p99": 250.88000297546387 + }, + "roundtrip": { + "p50": 477.3760139942169, + "p90": 483.93601179122925, + "p95": 486.27200722694397, + "p99": 491.93599820137024 + }, + "isolatedSum": { + "p50": 492.41600930690765, + "p90": 506.4320117235184, + "p95": 510.23998856544495, + "p99": 520.6080079078674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7d6ecde9", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|prefill|normal|none|none|0|tuned||33484f7e5b87248", + "colorKey": "gb300_22122c9a", + "comparisonKey": "a353b19e46e17b61", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:12.732568+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "33484f7e5b87248", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 99.74399954080582, + "p90": 109.15199667215347, + "p95": 112.47999966144562, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 95.29600292444229, + "p95": 97.79199957847595, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 164.22399878501892, + "p90": 171.87200486660004, + "p95": 174.04800653457642, + "p99": 181.60000443458557 + }, + "isolatedSum": { + "p50": 186.87999993562698, + "p90": 204.44799959659576, + "p95": 210.27199923992157, + "p99": 222.4319949746132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 125.34399330615997, + "p90": 133.15199315547943, + "p95": 135.83999872207642, + "p99": 143.0400013923645 + }, + "combine": { + "p50": 106.39999806880951, + "p90": 110.33599823713303, + "p95": 113.02399635314941, + "p99": 121.79200351238251 + }, + "roundtrip": { + "p50": 208.639994263649, + "p90": 215.45599400997162, + "p95": 218.27200055122375, + "p99": 225.92000663280487 + }, + "isolatedSum": { + "p50": 231.74399137496948, + "p90": 243.48799139261246, + "p95": 248.86399507522583, + "p99": 264.832004904747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22020096, + "combineLogicalBytes": 22020096, + "fanoutMean": 1.5, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 150.2400040626526, + "p90": 158.4639996290207, + "p95": 161.21600568294525, + "p99": 167.4879938364029 + }, + "combine": { + "p50": 131.3920021057129, + "p90": 135.19999384880066, + "p95": 137.472003698349, + "p99": 141.79199934005737 + }, + "roundtrip": { + "p50": 259.2960000038147, + "p90": 266.04801416397095, + "p95": 269.72800493240356, + "p99": 275.61599016189575 + }, + "isolatedSum": { + "p50": 281.6320061683655, + "p90": 293.66399347782135, + "p95": 298.68800938129425, + "p99": 309.27999317646027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44040192, + "combineLogicalBytes": 44040192, + "fanoutMean": 1.5, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 225.8560061454773, + "p90": 233.91999304294586, + "p95": 236.03199422359467, + "p99": 241.4720058441162 + }, + "combine": { + "p50": 180.63999712467194, + "p90": 185.56800484657288, + "p95": 190.23999571800232, + "p99": 195.0400024652481 + }, + "roundtrip": { + "p50": 383.04001092910767, + "p90": 390.81600308418274, + "p95": 394.4000005722046, + "p99": 403.2000005245209 + }, + "isolatedSum": { + "p50": 406.49600327014923, + "p90": 419.48799788951874, + "p95": 426.271989941597, + "p99": 436.5120083093643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 88080384, + "combineLogicalBytes": 88080384, + "fanoutMean": 1.5, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 350.20801424980164, + "p90": 357.695996761322, + "p95": 360.9279990196228, + "p99": 366.2720024585724 + }, + "combine": { + "p50": 327.87200808525085, + "p90": 332.2240114212036, + "p95": 334.49599146842957, + "p99": 341.95199608802795 + }, + "roundtrip": { + "p50": 661.5039706230164, + "p90": 669.5039868354797, + "p95": 672.7679967880249, + "p99": 679.4880032539368 + }, + "isolatedSum": { + "p50": 678.0800223350525, + "p90": 689.9200081825256, + "p95": 695.4239904880524, + "p99": 708.2239985466003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 176160768, + "combineLogicalBytes": 176160768, + "fanoutMean": 1.5, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 656.4800143241882, + "p90": 663.2639765739441, + "p95": 665.6320095062256, + "p99": 671.3280081748962 + }, + "combine": { + "p50": 562.9119873046875, + "p90": 567.2640204429626, + "p95": 568.6399936676025, + "p99": 575.5519866943359 + }, + "roundtrip": { + "p50": 1194.591999053955, + "p90": 1202.49605178833, + "p95": 1204.6400308609009, + "p99": 1210.976004600525 + }, + "isolatedSum": { + "p50": 1219.3920016288757, + "p90": 1230.5279970169067, + "p95": 1234.2720031738281, + "p99": 1246.8799948692322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352321536, + "combineLogicalBytes": 352321536, + "fanoutMean": 1.5, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dbedde91", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|prefill|normal|none|none|0|tuned||b8e52e92c6d3379", + "colorKey": "gb300_7e1244f6", + "comparisonKey": "05a8e4a0c7cbf29e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:28.822106+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b8e52e92c6d3379", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.8800036907196, + "p90": 126.49600207805634, + "p95": 129.34400141239166, + "p99": 137.85600662231445 + }, + "combine": { + "p50": 98.39999675750732, + "p90": 105.79200088977814, + "p95": 107.61599987745285, + "p99": 111.29599809646606 + }, + "roundtrip": { + "p50": 192.32000410556793, + "p90": 200.57600736618042, + "p95": 202.7519941329956, + "p99": 208.0959975719452 + }, + "isolatedSum": { + "p50": 217.28000044822693, + "p90": 232.28800296783447, + "p95": 236.9600012898445, + "p99": 249.15200471878052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 147.2640037536621, + "p90": 153.31199765205383, + "p95": 157.6319932937622, + "p99": 163.2000058889389 + }, + "combine": { + "p50": 119.9679970741272, + "p90": 123.61600250005722, + "p95": 124.9919980764389, + "p99": 132.1599930524826 + }, + "roundtrip": { + "p50": 242.5920069217682, + "p90": 249.59999322891235, + "p95": 252.128005027771, + "p99": 256.7040026187897 + }, + "isolatedSum": { + "p50": 267.2320008277893, + "p90": 276.92800015211105, + "p95": 282.6239913702011, + "p99": 295.3599989414215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53100544, + "combineLogicalBytes": 53100544, + "fanoutMean": 3.6171875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.34400641918182, + "p90": 213.15200626850128, + "p95": 216.38399362564087, + "p99": 220.44800221920013 + }, + "combine": { + "p50": 167.77600347995758, + "p90": 172.2559928894043, + "p95": 173.72800409793854, + "p99": 177.85599827766418 + }, + "roundtrip": { + "p50": 349.92000460624695, + "p90": 357.695996761322, + "p95": 359.8720133304596, + "p99": 364.6399974822998 + }, + "isolatedSum": { + "p50": 373.1200098991394, + "p90": 385.4079991579056, + "p95": 390.1119977235794, + "p99": 398.3040004968643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 106373120, + "fanoutMean": 3.623046875, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 322.7519989013672, + "p90": 329.4079899787903, + "p95": 331.9680094718933, + "p99": 339.2319977283478 + }, + "combine": { + "p50": 321.9519853591919, + "p90": 330.3999900817871, + "p95": 332.3200047016144, + "p99": 338.81598711013794 + }, + "roundtrip": { + "p50": 621.7600107192993, + "p90": 628.5439729690552, + "p95": 630.623996257782, + "p99": 636.7999911308289 + }, + "isolatedSum": { + "p50": 644.7039842605591, + "p90": 659.8079800605774, + "p95": 664.2880141735077, + "p99": 678.0479848384857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 559.6799850463867, + "p90": 566.7200088500977, + "p95": 569.4720149040222, + "p99": 574.783980846405 + }, + "combine": { + "p50": 552.9279708862305, + "p90": 556.5440058708191, + "p95": 559.328019618988, + "p99": 565.2160048484802 + }, + "roundtrip": { + "p50": 1091.007947921753, + "p90": 1098.5280275344849, + "p95": 1101.1199951171875, + "p99": 1112.3199462890625 + }, + "isolatedSum": { + "p50": 1112.6079559326172, + "p90": 1123.2640147209167, + "p95": 1128.8000345230103, + "p99": 1139.9999856948853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423958528, + "combineLogicalBytes": 423958528, + "fanoutMean": 3.6099853515625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1039.3279790878296, + "p90": 1046.3680028915405, + "p95": 1048.6079454421997, + "p99": 1054.1759729385376 + }, + "combine": { + "p50": 1034.5920324325562, + "p90": 1039.680004119873, + "p95": 1043.9679622650146, + "p99": 1046.7840433120728 + }, + "roundtrip": { + "p50": 2058.2399368286133, + "p90": 2064.255952835083, + "p95": 2066.1439895629883, + "p99": 2071.3279247283936 + }, + "isolatedSum": { + "p50": 2073.9200115203857, + "p90": 2086.0480070114136, + "p95": 2092.5759077072144, + "p99": 2100.9600162506104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847988736, + "combineLogicalBytes": 847988736, + "fanoutMean": 3.61029052734375, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-30487892", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|prefill|normal|none|none|0|tuned||5f9878f45872329", + "colorKey": "gb300_822be538", + "comparisonKey": "56bac01ccf9784c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:55.665808+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "5f9878f45872329", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.358123779296875, + "eplbImbalanceAfter": 1.000026818477746, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.6800023317337, + "p90": 128.38399410247803, + "p95": 131.74399733543396, + "p99": 144.6080058813095 + }, + "combine": { + "p50": 98.49599748849869, + "p90": 105.76000064611435, + "p95": 107.4879989027977, + "p99": 110.46399921178818 + }, + "roundtrip": { + "p50": 192.6400065422058, + "p90": 200.83199441432953, + "p95": 203.5519927740097, + "p99": 209.31200683116913 + }, + "isolatedSum": { + "p50": 218.1759998202324, + "p90": 234.14399474859238, + "p95": 239.23199623823166, + "p99": 255.0720050930977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 26664960, + "fanoutMean": 3.6328125, + "recvTokensMax": 472, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 151.2320041656494, + "p90": 178.75200510025024, + "p95": 187.3600035905838, + "p99": 199.3280053138733 + }, + "combine": { + "p50": 121.31199985742569, + "p90": 147.39200472831726, + "p95": 157.75999426841736, + "p99": 185.248002409935 + }, + "roundtrip": { + "p50": 247.48800694942474, + "p90": 276.89599990844727, + "p95": 291.9040024280548, + "p99": 308.4160089492798 + }, + "isolatedSum": { + "p50": 272.5440040230751, + "p90": 326.1440098285675, + "p95": 345.11999785900116, + "p99": 384.5760077238083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53143552, + "combineLogicalBytes": 53143552, + "fanoutMean": 3.6201171875, + "recvTokensMax": 946, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 209.88799631595612, + "p90": 229.0239930152893, + "p95": 236.28799617290497, + "p99": 264.0959918498993 + }, + "combine": { + "p50": 169.8240041732788, + "p90": 209.6640020608902, + "p95": 230.46399652957916, + "p99": 233.88800024986267 + }, + "roundtrip": { + "p50": 349.7599959373474, + "p90": 378.52799892425537, + "p95": 390.3360068798065, + "p99": 400.2560079097748 + }, + "isolatedSum": { + "p50": 379.7120004892349, + "p90": 438.6879950761795, + "p95": 466.75199270248413, + "p99": 497.98399209976196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106258432, + "combineLogicalBytes": 106258432, + "fanoutMean": 3.619140625, + "recvTokensMax": 1861, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 317.1840012073517, + "p90": 344.0319895744324, + "p95": 355.4239869117737, + "p99": 379.7760009765625 + }, + "combine": { + "p50": 305.7279884815216, + "p90": 330.01598715782166, + "p95": 342.6559865474701, + "p99": 355.77601194381714 + }, + "roundtrip": { + "p50": 591.3919806480408, + "p90": 609.824001789093, + "p95": 635.6800198554993, + "p99": 652.6079773902893 + }, + "isolatedSum": { + "p50": 622.9119896888733, + "p90": 674.047976732254, + "p95": 698.0799734592438, + "p99": 735.5520129203796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212645888, + "combineLogicalBytes": 212645888, + "fanoutMean": 3.621337890625, + "recvTokensMax": 3730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 545.7280278205872, + "p90": 564.8319721221924, + "p95": 576.3840079307556, + "p99": 590.4960036277771 + }, + "combine": { + "p50": 527.9679894447327, + "p90": 544.0959930419922, + "p95": 564.5440220832825, + "p99": 585.4079723358154 + }, + "roundtrip": { + "p50": 1045.6639528274536, + "p90": 1061.4720582962036, + "p95": 1074.112057685852, + "p99": 1095.7759618759155 + }, + "isolatedSum": { + "p50": 1073.6960172653198, + "p90": 1108.9279651641846, + "p95": 1140.928030014038, + "p99": 1175.9039759635925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424775680, + "combineLogicalBytes": 424775680, + "fanoutMean": 3.616943359375, + "recvTokensMax": 7429, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1003.4879446029663, + "p90": 1020.1599597930908, + "p95": 1045.4399585723877, + "p99": 1063.9679431915283 + }, + "combine": { + "p50": 972.000002861023, + "p90": 985.5679869651794, + "p95": 999.5200037956238, + "p99": 1022.4959850311279 + }, + "roundtrip": { + "p50": 1952.895998954773, + "p90": 1977.2160053253174, + "p95": 1989.4720315933228, + "p99": 2010.495901107788 + }, + "isolatedSum": { + "p50": 1975.4879474639893, + "p90": 2005.7279467582703, + "p95": 2044.9599623680115, + "p99": 2086.4639282226562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848547840, + "combineLogicalBytes": 848547840, + "fanoutMean": 3.6126708984375, + "recvTokensMax": 14823, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c847bc6d", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|prefill|normal|none|none|0|tuned||ed21345b2de53e0", + "colorKey": "gb300_75bb6e82", + "comparisonKey": "8c431e019a8e66fe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:52.298195+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ed21345b2de53e0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.003448486328125, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.76000261306763, + "p90": 126.43200159072876, + "p95": 130.52800297737122, + "p99": 137.05599308013916 + }, + "combine": { + "p50": 98.52799773216248, + "p90": 104.16000336408615, + "p95": 107.10400342941284, + "p99": 113.3119985461235 + }, + "roundtrip": { + "p50": 191.23199582099915, + "p90": 199.13600385189056, + "p95": 203.3279985189438, + "p99": 212.351992726326 + }, + "isolatedSum": { + "p50": 216.2880003452301, + "p90": 230.5920049548149, + "p95": 237.63200640678406, + "p99": 250.36799162626266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 464, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.6400067806244, + "p90": 154.9759954214096, + "p95": 158.01599621772766, + "p99": 164.73600268363953 + }, + "combine": { + "p50": 119.58400160074234, + "p90": 123.32800030708313, + "p95": 125.37600100040436, + "p99": 130.40000200271606 + }, + "roundtrip": { + "p50": 241.98399484157562, + "p90": 249.56800043582916, + "p95": 253.28001379966736, + "p99": 259.5840096473694 + }, + "isolatedSum": { + "p50": 268.22400838136673, + "p90": 278.30399572849274, + "p95": 283.391997218132, + "p99": 295.1360046863556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52570112, + "combineLogicalBytes": 52570112, + "fanoutMean": 3.5810546875, + "recvTokensMax": 924, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 207.10399746894836, + "p90": 215.36000072956085, + "p95": 217.8879976272583, + "p99": 223.13599288463593 + }, + "combine": { + "p50": 164.2879992723465, + "p90": 171.55200242996216, + "p95": 173.66400361061096, + "p99": 181.5679967403412 + }, + "roundtrip": { + "p50": 345.08800506591797, + "p90": 353.34399342536926, + "p95": 355.55198788642883, + "p99": 362.43200302124023 + }, + "isolatedSum": { + "p50": 371.39199674129486, + "p90": 386.912003159523, + "p95": 391.55200123786926, + "p99": 404.7039896249771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105613312, + "combineLogicalBytes": 105613312, + "fanoutMean": 3.59716796875, + "recvTokensMax": 1860, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 316.1599934101105, + "p90": 324.22399520874023, + "p95": 326.81599259376526, + "p99": 332.09601044654846 + }, + "combine": { + "p50": 307.99999833106995, + "p90": 313.9519989490509, + "p95": 317.1519935131073, + "p99": 323.64800572395325 + }, + "roundtrip": { + "p50": 597.2480177879333, + "p90": 605.7599782943726, + "p95": 608.7039709091187, + "p99": 613.4719848632812 + }, + "isolatedSum": { + "p50": 624.1599917411804, + "p90": 638.1759941577911, + "p95": 643.9679861068726, + "p99": 655.7440161705017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211843072, + "combineLogicalBytes": 211843072, + "fanoutMean": 3.607666015625, + "recvTokensMax": 3714, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 539.903998374939, + "p90": 548.8319993019104, + "p95": 552.0640015602112, + "p99": 556.8320155143738 + }, + "combine": { + "p50": 522.8800177574158, + "p90": 529.7920107841492, + "p95": 531.328022480011, + "p99": 534.6879959106445 + }, + "roundtrip": { + "p50": 1038.0159616470337, + "p90": 1045.4399585723877, + "p95": 1047.935962677002, + "p99": 1052.4159669876099 + }, + "isolatedSum": { + "p50": 1062.7840161323547, + "p90": 1078.6240100860596, + "p95": 1083.3920240402222, + "p99": 1091.5200114250183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423198720, + "combineLogicalBytes": 423198720, + "fanoutMean": 3.603515625, + "recvTokensMax": 7400, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 995.3920245170593, + "p90": 1002.4960041046143, + "p95": 1004.8320293426514, + "p99": 1010.5279684066772 + }, + "combine": { + "p50": 972.3520278930664, + "p90": 978.7200093269348, + "p95": 983.2959771156311, + "p99": 988.5119795799255 + }, + "roundtrip": { + "p50": 1942.8160190582275, + "p90": 1952.1600008010864, + "p95": 1954.8799991607666, + "p99": 1962.4639749526978 + }, + "isolatedSum": { + "p50": 1967.7440524101257, + "p90": 1981.216013431549, + "p95": 1988.1280064582825, + "p99": 1999.0399479866028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 846024704, + "combineLogicalBytes": 846024704, + "fanoutMean": 3.6019287109375, + "recvTokensMax": 14796, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb8c4ed1", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_4b5c9507", + "comparisonKey": "e274642f2bf057b9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:42.755350+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.88000303506851, + "p90": 122.079998254776, + "p95": 127.51999497413635, + "p99": 134.2719942331314 + }, + "combine": { + "p50": 97.08800166845322, + "p90": 100.76799988746643, + "p95": 103.80800068378448, + "p99": 110.49599945545197 + }, + "roundtrip": { + "p50": 186.94399297237396, + "p90": 194.5600062608719, + "p95": 196.86399400234222, + "p99": 203.87199521064758 + }, + "isolatedSum": { + "p50": 211.96800470352173, + "p90": 222.84799814224243, + "p95": 231.32799565792084, + "p99": 244.76799368858337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 143.96800100803375, + "p90": 151.0079950094223, + "p95": 154.27200496196747, + "p99": 162.4639928340912 + }, + "combine": { + "p50": 119.32799965143204, + "p90": 123.61600250005722, + "p95": 125.59999525547028, + "p99": 134.46399569511414 + }, + "roundtrip": { + "p50": 238.304004073143, + "p90": 245.5040067434311, + "p95": 248.06399643421173, + "p99": 252.99200415611267 + }, + "isolatedSum": { + "p50": 263.2960006594658, + "p90": 274.6239975094795, + "p95": 279.87200021743774, + "p99": 296.9279885292053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 200.76799392700195, + "p90": 209.53600108623505, + "p95": 211.45600080490112, + "p99": 217.75999665260315 + }, + "combine": { + "p50": 161.3759994506836, + "p90": 169.66399550437927, + "p95": 172.2240000963211, + "p99": 177.59999632835388 + }, + "roundtrip": { + "p50": 338.4320139884949, + "p90": 345.69600224494934, + "p95": 348.9600121974945, + "p99": 357.12000727653503 + }, + "isolatedSum": { + "p50": 362.14399337768555, + "p90": 379.1999965906143, + "p95": 383.68000090122223, + "p99": 395.35999298095703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 312.51201033592224, + "p90": 319.64799761772156, + "p95": 322.04800844192505, + "p99": 329.6639919281006 + }, + "combine": { + "p50": 320.16000151634216, + "p90": 327.67999172210693, + "p95": 330.55999875068665, + "p99": 334.01599526405334 + }, + "roundtrip": { + "p50": 605.6320071220398, + "p90": 614.080011844635, + "p95": 617.5680160522461, + "p99": 626.5280246734619 + }, + "isolatedSum": { + "p50": 632.6720118522644, + "p90": 647.3279893398285, + "p95": 652.6080071926117, + "p99": 663.6799871921539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 535.7120037078857, + "p90": 543.5839891433716, + "p95": 546.6560125350952, + "p99": 552.8320074081421 + }, + "combine": { + "p50": 548.2879877090454, + "p90": 554.7199845314026, + "p95": 556.1599731445312, + "p99": 562.2079968452454 + }, + "roundtrip": { + "p50": 1069.6320533752441, + "p90": 1078.752040863037, + "p95": 1081.6960334777832, + "p99": 1088.1919860839844 + }, + "isolatedSum": { + "p50": 1083.9999914169312, + "p90": 1098.3039736747742, + "p95": 1102.8159856796265, + "p99": 1115.0400042533875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1021.6959714889526, + "p90": 1032.0320129394531, + "p95": 1034.9760055541992, + "p99": 1041.3440465927124 + }, + "combine": { + "p50": 1019.8719501495361, + "p90": 1023.8720178604126, + "p95": 1025.5680084228516, + "p99": 1031.5519571304321 + }, + "roundtrip": { + "p50": 2010.3039741516113, + "p90": 2020.927906036377, + "p95": 2024.832010269165, + "p99": 2034.4319343566895 + }, + "isolatedSum": { + "p50": 2041.5679216384888, + "p90": 2055.9040307998657, + "p95": 2060.544013977051, + "p99": 2072.8960037231445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb75b3df", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|prefill|normal|none|none|0|tuned||25840dd8241ba10", + "colorKey": "gb300_bc29f115", + "comparisonKey": "c4057732c5be72d2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:05.521276+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "25840dd8241ba10", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 109.8880022764206, + "p90": 117.72800236940384, + "p95": 121.2799996137619, + "p99": 129.7599971294403 + }, + "combine": { + "p50": 94.97600048780441, + "p90": 99.13600236177444, + "p95": 101.53599828481674, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 180.25599420070648, + "p90": 188.38399648666382, + "p95": 190.46400487422943, + "p99": 197.28000462055206 + }, + "isolatedSum": { + "p50": 204.864002764225, + "p90": 216.86400473117828, + "p95": 222.81599789857864, + "p99": 236.2239956855774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 131.45600259304047, + "p90": 138.0160003900528, + "p95": 141.92000031471252, + "p99": 149.31200444698334 + }, + "combine": { + "p50": 112.09599673748016, + "p90": 120.06399780511856, + "p95": 121.79200351238251, + "p99": 125.44000148773193 + }, + "roundtrip": { + "p50": 219.55199539661407, + "p90": 228.2239943742752, + "p95": 230.71999847888947, + "p99": 241.40800535678864 + }, + "isolatedSum": { + "p50": 243.55199933052063, + "p90": 258.07999819517136, + "p95": 263.71200382709503, + "p99": 274.75200593471527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18335744, + "combineLogicalBytes": 18335744, + "fanoutMean": 1.2490234375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 176.41599476337433, + "p90": 184.76800620555878, + "p95": 186.97600066661835, + "p99": 192.89599359035492 + }, + "combine": { + "p50": 156.70399367809296, + "p90": 160.67199409008026, + "p95": 162.4000072479248, + "p99": 169.08800601959229 + }, + "roundtrip": { + "p50": 309.1520071029663, + "p90": 317.31200218200684, + "p95": 320.92800736427307, + "p99": 327.2320032119751 + }, + "isolatedSum": { + "p50": 333.1199884414673, + "p90": 345.44000029563904, + "p95": 349.37600791454315, + "p99": 361.9839996099472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36900864, + "combineLogicalBytes": 36900864, + "fanoutMean": 1.2568359375, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 272.8320062160492, + "p90": 280.2560031414032, + "p95": 283.07199478149414, + "p99": 288.5119915008545 + }, + "combine": { + "p50": 281.3760042190552, + "p90": 290.17600417137146, + "p95": 292.7359938621521, + "p99": 296.00000381469727 + }, + "roundtrip": { + "p50": 545.1200008392334, + "p90": 554.144024848938, + "p95": 557.4079751968384, + "p99": 563.7440085411072 + }, + "isolatedSum": { + "p50": 554.2080104351044, + "p90": 570.4320073127747, + "p95": 575.8079886436462, + "p99": 584.5119953155518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73730048, + "combineLogicalBytes": 73730048, + "fanoutMean": 1.255615234375, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 456.7039906978607, + "p90": 464.9600088596344, + "p95": 467.3919975757599, + "p99": 471.0080027580261 + }, + "combine": { + "p50": 564.4159913063049, + "p90": 568.2240128517151, + "p95": 570.5919861793518, + "p99": 576.1280059814453 + }, + "roundtrip": { + "p50": 1038.6559963226318, + "p90": 1047.2639799118042, + "p95": 1050.8160591125488, + "p99": 1057.7280521392822 + }, + "isolatedSum": { + "p50": 1021.1199820041656, + "p90": 1033.1840217113495, + "p95": 1037.9839837551117, + "p99": 1047.1360087394714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147746816, + "combineLogicalBytes": 147746816, + "fanoutMean": 1.258056640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 944.703996181488, + "p90": 952.895998954773, + "p95": 956.0319781303406, + "p99": 961.1520171165466 + }, + "combine": { + "p50": 1036.0640287399292, + "p90": 1044.160008430481, + "p95": 1046.0799932479858, + "p99": 1048.8959550857544 + }, + "roundtrip": { + "p50": 1959.6480131149292, + "p90": 1967.8720235824585, + "p95": 1970.6239700317383, + "p99": 1972.8959798812866 + }, + "isolatedSum": { + "p50": 1980.7680249214172, + "p90": 1997.056007385254, + "p95": 2002.1119713783264, + "p99": 2010.047972202301 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295278592, + "combineLogicalBytes": 295278592, + "fanoutMean": 1.25714111328125, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26d00eb5", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|prefill|normal|none|none|0|tuned||cabb28c468fd7cf", + "colorKey": "gb300_fd039f89", + "comparisonKey": "cd736a9dc93d86f8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:31.718483+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cabb28c468fd7cf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86505126953125, + "eplbImbalanceAfter": 1.0000149681454613, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 120.06399780511856, + "p90": 129.4720023870468, + "p95": 133.2480013370514, + "p99": 138.94400000572205 + }, + "combine": { + "p50": 99.04000163078308, + "p90": 105.66399991512299, + "p95": 108.44799876213074, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 191.5840059518814, + "p90": 199.072003364563, + "p95": 201.60000026226044, + "p99": 207.48800039291382 + }, + "isolatedSum": { + "p50": 219.10399943590164, + "p90": 235.1360023021698, + "p95": 241.69600009918213, + "p99": 253.82400304079056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25862144, + "combineLogicalBytes": 25862144, + "fanoutMean": 3.5234375, + "recvTokensMax": 457, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.92800152301788, + "p90": 157.27999806404114, + "p95": 159.84000265598297, + "p99": 164.60800170898438 + }, + "combine": { + "p50": 121.5360015630722, + "p90": 125.63200294971466, + "p95": 129.12000715732574, + "p99": 135.23200154304504 + }, + "roundtrip": { + "p50": 242.43199825286865, + "p90": 250.75200200080872, + "p95": 253.4399926662445, + "p99": 259.48798656463623 + }, + "isolatedSum": { + "p50": 270.4640030860901, + "p90": 282.9120010137558, + "p95": 288.9600098133087, + "p99": 299.8400032520294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 51509248, + "combineLogicalBytes": 51509248, + "fanoutMean": 3.5087890625, + "recvTokensMax": 914, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.79199492931366, + "p90": 213.44000101089478, + "p95": 216.86400473117828, + "p99": 227.6799976825714 + }, + "combine": { + "p50": 164.22399878501892, + "p90": 170.78399658203125, + "p95": 172.35200107097626, + "p99": 176.32000148296356 + }, + "roundtrip": { + "p50": 342.8800106048584, + "p90": 350.5600094795227, + "p95": 353.37600111961365, + "p99": 357.5359880924225 + }, + "isolatedSum": { + "p50": 370.0159937143326, + "p90": 384.223997592926, + "p95": 389.21600580215454, + "p99": 403.999999165535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 102688768, + "combineLogicalBytes": 102688768, + "fanoutMean": 3.49755859375, + "recvTokensMax": 1817, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 312.608003616333, + "p90": 319.8080062866211, + "p95": 322.4000036716461, + "p99": 326.880007982254 + }, + "combine": { + "p50": 304.4480085372925, + "p90": 308.9280128479004, + "p95": 310.91201305389404, + "p99": 315.8079981803894 + }, + "roundtrip": { + "p50": 586.3040089607239, + "p90": 594.111979007721, + "p95": 596.9280004501343, + "p99": 601.472020149231 + }, + "isolatedSum": { + "p50": 617.0560121536255, + "p90": 628.7360191345215, + "p95": 633.3120167255402, + "p99": 642.6880061626434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 205520896, + "combineLogicalBytes": 205520896, + "fanoutMean": 3.5, + "recvTokensMax": 3657, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 537.4720096588135, + "p90": 544.8960065841675, + "p95": 546.4959740638733, + "p99": 551.967978477478 + }, + "combine": { + "p50": 516.6400074958801, + "p90": 523.3920216560364, + "p95": 526.2079834938049, + "p99": 529.088020324707 + }, + "roundtrip": { + "p50": 1021.9839811325073, + "p90": 1030.4960012435913, + "p95": 1035.5199575424194, + "p99": 1064.4160509109497 + }, + "isolatedSum": { + "p50": 1054.1120171546936, + "p90": 1068.2880282402039, + "p95": 1072.7039575576782, + "p99": 1081.055998802185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 412016640, + "combineLogicalBytes": 412016640, + "fanoutMean": 3.50830078125, + "recvTokensMax": 7329, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 980.1279902458191, + "p90": 985.5359792709351, + "p95": 987.3600006103516, + "p99": 992.0960068702698 + }, + "combine": { + "p50": 959.3920111656189, + "p90": 965.7279849052429, + "p95": 968.9599871635437, + "p99": 974.3679761886597 + }, + "roundtrip": { + "p50": 1919.7440147399902, + "p90": 1927.9359579086304, + "p95": 1930.5280447006226, + "p99": 1935.871958732605 + }, + "isolatedSum": { + "p50": 1939.520001411438, + "p90": 1951.263964176178, + "p95": 1956.3199877738953, + "p99": 1966.4639830589294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824119296, + "combineLogicalBytes": 824119296, + "fanoutMean": 3.5086669921875, + "recvTokensMax": 14713, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a210e5a", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|prefill|normal|none|none|0|tuned||370c8dd16f08e2c", + "colorKey": "gb300_82cf5a40", + "comparisonKey": "2f1df15ebd58ee83", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:08.059250+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "370c8dd16f08e2c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.4240003824234, + "p90": 127.03999876976013, + "p95": 129.40800189971924, + "p99": 138.11199367046356 + }, + "combine": { + "p50": 98.24000298976898, + "p90": 102.88000106811523, + "p95": 106.20799660682678, + "p99": 112.15999722480774 + }, + "roundtrip": { + "p50": 192.28799641132355, + "p90": 199.072003364563, + "p95": 201.53599977493286, + "p99": 208.70399475097656 + }, + "isolatedSum": { + "p50": 217.66400337219238, + "p90": 229.91999983787537, + "p95": 235.61599850654602, + "p99": 250.2719908952713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.41599762439728, + "p90": 154.2080044746399, + "p95": 156.6080003976822, + "p99": 161.3440066576004 + }, + "combine": { + "p50": 119.87199634313583, + "p90": 123.96799772977829, + "p95": 124.92799758911133, + "p99": 130.36799430847168 + }, + "roundtrip": { + "p50": 242.65600740909576, + "p90": 249.6960014104843, + "p95": 251.8399953842163, + "p99": 256.9600045681 + }, + "isolatedSum": { + "p50": 268.2879939675331, + "p90": 278.1760022044182, + "p95": 281.5359979867935, + "p99": 291.7120009660721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49057792, + "combineLogicalBytes": 49057792, + "fanoutMean": 3.341796875, + "recvTokensMax": 1018, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 204.19199764728546, + "p90": 211.4879935979843, + "p95": 213.82400393486023, + "p99": 219.7439968585968 + }, + "combine": { + "p50": 167.77600347995758, + "p90": 173.0239987373352, + "p95": 173.98400604724884, + "p99": 178.43200266361237 + }, + "roundtrip": { + "p50": 346.24001383781433, + "p90": 353.7600040435791, + "p95": 356.1600148677826, + "p99": 364.25599455833435 + }, + "isolatedSum": { + "p50": 371.96800112724304, + "p90": 384.5119923353195, + "p95": 387.80800998210907, + "p99": 398.17599952220917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 98344960, + "combineLogicalBytes": 98344960, + "fanoutMean": 3.349609375, + "recvTokensMax": 2039, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 322.01600074768066, + "p90": 329.6000063419342, + "p95": 332.73598551750183, + "p99": 339.58399295806885 + }, + "combine": { + "p50": 321.0879862308502, + "p90": 326.7199993133545, + "p95": 329.6639919281006, + "p99": 333.6000144481659 + }, + "roundtrip": { + "p50": 615.9359812736511, + "p90": 626.5919804573059, + "p95": 630.0479769706726, + "p99": 636.6080045700073 + }, + "isolatedSum": { + "p50": 643.1039869785309, + "p90": 656.3200056552887, + "p95": 662.3999774456024, + "p99": 673.1840074062347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 196704256, + "combineLogicalBytes": 196704256, + "fanoutMean": 3.349853515625, + "recvTokensMax": 4074, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 555.0079941749573, + "p90": 562.0160102844238, + "p95": 563.9680027961731, + "p99": 568.4800148010254 + }, + "combine": { + "p50": 547.9999780654907, + "p90": 555.2319884300232, + "p95": 556.6400289535522, + "p99": 561.0560178756714 + }, + "roundtrip": { + "p50": 1081.4399719238281, + "p90": 1088.2879495620728, + "p95": 1092.4479961395264, + "p99": 1100.1280546188354 + }, + "isolatedSum": { + "p50": 1103.007972240448, + "p90": 1117.247998714447, + "p95": 1120.6080317497253, + "p99": 1129.5360326766968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 393351168, + "combineLogicalBytes": 393351168, + "fanoutMean": 3.349365234375, + "recvTokensMax": 8147, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1027.616024017334, + "p90": 1034.335970878601, + "p95": 1037.2799634933472, + "p99": 1043.0400371551514 + }, + "combine": { + "p50": 1030.6240320205688, + "p90": 1036.352038383484, + "p95": 1037.5360250473022, + "p99": 1043.9679622650146 + }, + "roundtrip": { + "p50": 2040.7359600067139, + "p90": 2047.6479530334473, + "p95": 2050.3358840942383, + "p99": 2054.464101791382 + }, + "isolatedSum": { + "p50": 2058.240056037903, + "p90": 2070.688009262085, + "p95": 2074.8159885406494, + "p99": 2087.007999420166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 785469440, + "combineLogicalBytes": 785469440, + "fanoutMean": 3.3441162109375, + "recvTokensMax": 16298, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a7163bd", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|prefill|normal|none|none|0|tuned||624fdceae193d94", + "colorKey": "gb300_77edcf0e", + "comparisonKey": "459c853c5c937418", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:22.256404+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "624fdceae193d94", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.888397216796875, + "eplbImbalanceAfter": 1.00013427734375, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.59200149774551, + "p90": 127.51999497413635, + "p95": 130.40000200271606, + "p99": 135.3279948234558 + }, + "combine": { + "p50": 98.39999675750732, + "p90": 105.66399991512299, + "p95": 107.96800255775452, + "p99": 113.53600025177002 + }, + "roundtrip": { + "p50": 191.23199582099915, + "p90": 199.42399859428406, + "p95": 202.2079974412918, + "p99": 207.519993185997 + }, + "isolatedSum": { + "p50": 216.99199825525284, + "p90": 233.18399488925934, + "p95": 238.36800456047058, + "p99": 248.86399507522583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.54399859905243, + "p90": 155.2640050649643, + "p95": 157.21599757671356, + "p99": 164.57599401474 + }, + "combine": { + "p50": 119.07199770212173, + "p90": 122.94399738311768, + "p95": 125.31200051307678, + "p99": 135.0719928741455 + }, + "roundtrip": { + "p50": 243.16799640655518, + "p90": 250.75200200080872, + "p95": 254.62400913238525, + "p99": 260.2880001068115 + }, + "isolatedSum": { + "p50": 267.61599630117416, + "p90": 278.20800244808197, + "p95": 282.52799808979034, + "p99": 299.6479868888855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52441088, + "combineLogicalBytes": 52441088, + "fanoutMean": 3.572265625, + "recvTokensMax": 924, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 206.2080055475235, + "p90": 213.69600296020508, + "p95": 216.73600375652313, + "p99": 222.3680019378662 + }, + "combine": { + "p50": 166.97600483894348, + "p90": 171.87200486660004, + "p95": 173.37599396705627, + "p99": 179.23200130462646 + }, + "roundtrip": { + "p50": 346.8160033226013, + "p90": 354.8159897327423, + "p95": 357.4399948120117, + "p99": 363.20000886917114 + }, + "isolatedSum": { + "p50": 373.184010386467, + "p90": 385.5680078268051, + "p95": 390.1119977235794, + "p99": 401.6000032424927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105670656, + "combineLogicalBytes": 105670656, + "fanoutMean": 3.59912109375, + "recvTokensMax": 1859, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 314.14398550987244, + "p90": 321.79200649261475, + "p95": 325.21599531173706, + "p99": 332.3200047016144 + }, + "combine": { + "p50": 303.9360046386719, + "p90": 308.51200222969055, + "p95": 309.9200129508972, + "p99": 316.1279857158661 + }, + "roundtrip": { + "p50": 593.0879712104797, + "p90": 601.0559797286987, + "p95": 603.488028049469, + "p99": 607.5519919395447 + }, + "isolatedSum": { + "p50": 618.0799901485443, + "p90": 630.3040087223053, + "p95": 635.1360082626343, + "p99": 648.4479904174805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211527680, + "combineLogicalBytes": 211527680, + "fanoutMean": 3.602294921875, + "recvTokensMax": 3708, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 541.8559908866882, + "p90": 550.1760244369507, + "p95": 552.8320074081421, + "p99": 557.856023311615 + }, + "combine": { + "p50": 526.4319777488708, + "p90": 531.3599705696106, + "p95": 533.8879823684692, + "p99": 539.5519733428955 + }, + "roundtrip": { + "p50": 1042.4959659576416, + "p90": 1051.3919591903687, + "p95": 1054.368019104004, + "p99": 1061.3759756088257 + }, + "isolatedSum": { + "p50": 1068.287968635559, + "p90": 1081.5359950065613, + "p95": 1086.7199897766113, + "p99": 1097.4079966545105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423284736, + "combineLogicalBytes": 423284736, + "fanoutMean": 3.604248046875, + "recvTokensMax": 7436, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1000.607967376709, + "p90": 1005.952000617981, + "p95": 1008.2880258560181, + "p99": 1014.0160322189331 + }, + "combine": { + "p50": 971.5200066566467, + "p90": 981.0879826545715, + "p95": 983.4880232810974, + "p99": 992.9919838905334 + }, + "roundtrip": { + "p50": 1944.767951965332, + "p90": 1953.4720182418823, + "p95": 1956.447958946228, + "p99": 1961.4720344543457 + }, + "isolatedSum": { + "p50": 1972.1279740333557, + "p90": 1987.0399832725525, + "p95": 1991.7760491371155, + "p99": 2007.0080161094666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847745024, + "combineLogicalBytes": 847745024, + "fanoutMean": 3.6092529296875, + "recvTokensMax": 14866, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37c7854f", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_566ad107", + "comparisonKey": "00028f30de921d11", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:54.274583+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 110.68800091743469, + "p90": 118.8800036907196, + "p95": 122.5920021533966, + "p99": 127.6479959487915 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 99.07200187444687, + "p95": 103.00800204277039, + "p99": 108.51199924945831 + }, + "roundtrip": { + "p50": 184.67199802398682, + "p90": 191.55199825763702, + "p95": 194.5600062608719, + "p99": 199.35999810695648 + }, + "isolatedSum": { + "p50": 206.04800432920456, + "p90": 217.95200556516647, + "p95": 225.600004196167, + "p99": 236.15999519824982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 142.11200177669525, + "p90": 149.50400590896606, + "p95": 152.0639955997467, + "p99": 157.47199952602386 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 121.05599790811539, + "p95": 122.30399996042252, + "p99": 129.08799946308136 + }, + "roundtrip": { + "p50": 236.25600337982178, + "p90": 243.3280050754547, + "p95": 245.7599937915802, + "p99": 251.80798768997192 + }, + "isolatedSum": { + "p50": 258.5280016064644, + "p90": 270.56000381708145, + "p95": 274.3679955601692, + "p99": 286.5599989891052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 197.66399264335632, + "p90": 205.28000593185425, + "p95": 207.48800039291382, + "p99": 214.08000588417053 + }, + "combine": { + "p50": 159.64800119400024, + "p90": 167.39200055599213, + "p95": 168.92799735069275, + "p99": 172.5119948387146 + }, + "roundtrip": { + "p50": 335.61599254608154, + "p90": 343.6160087585449, + "p95": 346.46400809288025, + "p99": 351.0400056838989 + }, + "isolatedSum": { + "p50": 357.31199383735657, + "p90": 372.6720064878464, + "p95": 376.41599774360657, + "p99": 386.59200072288513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 309.7600042819977, + "p90": 316.76799058914185, + "p95": 319.39199566841125, + "p99": 324.16000962257385 + }, + "combine": { + "p50": 318.4320032596588, + "p90": 325.53601264953613, + "p95": 328.5120129585266, + "p99": 331.87198638916016 + }, + "roundtrip": { + "p50": 602.8159856796265, + "p90": 611.2959980964661, + "p95": 614.3360137939453, + "p99": 619.2640066146851 + }, + "isolatedSum": { + "p50": 628.1920075416565, + "p90": 642.304003238678, + "p95": 647.9040086269379, + "p99": 656.031996011734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 531.4559936523438, + "p90": 538.6239886283875, + "p95": 540.5439734458923, + "p99": 545.6960201263428 + }, + "combine": { + "p50": 544.8639988899231, + "p90": 552.3520112037659, + "p95": 553.6640286445618, + "p99": 558.463990688324 + }, + "roundtrip": { + "p50": 1067.4560070037842, + "p90": 1075.6800174713135, + "p95": 1078.3679485321045, + "p99": 1082.1759700775146 + }, + "isolatedSum": { + "p50": 1076.3199925422668, + "p90": 1090.9759998321533, + "p95": 1094.208002090454, + "p99": 1104.1600108146667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1020.1599597930908, + "p90": 1032.3840379714966, + "p95": 1035.3920459747314, + "p99": 1046.94402217865 + }, + "combine": { + "p50": 1017.3439979553223, + "p90": 1021.5040445327759, + "p95": 1022.4959850311279, + "p99": 1027.7119874954224 + }, + "roundtrip": { + "p50": 2007.9679489135742, + "p90": 2019.7439193725586, + "p95": 2023.360013961792, + "p99": 2035.2959632873535 + }, + "isolatedSum": { + "p50": 2037.503957748413, + "p90": 2053.8880825042725, + "p95": 2057.8880310058594, + "p99": 2074.6560096740723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-84981a63", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_7a5ea657", + "comparisonKey": "5cf4c0bd3e5f6983", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:08.582410+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.45600062608719, + "p90": 127.58399546146393, + "p95": 130.0159990787506, + "p99": 136.51199638843536 + }, + "combine": { + "p50": 98.78399968147278, + "p90": 106.23999685049057, + "p95": 108.70400071144104, + "p99": 113.76000195741653 + }, + "roundtrip": { + "p50": 192.1280026435852, + "p90": 198.97599518299103, + "p95": 202.17600464820862, + "p99": 207.8399956226349 + }, + "isolatedSum": { + "p50": 218.24000030755997, + "p90": 233.8239923119545, + "p95": 238.71999979019165, + "p99": 250.2719983458519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.6400067806244, + "p90": 155.16799688339233, + "p95": 157.9200029373169, + "p99": 162.84799575805664 + }, + "combine": { + "p50": 120.25599926710129, + "p90": 123.9359974861145, + "p95": 126.20800733566284, + "p99": 133.91999900341034 + }, + "roundtrip": { + "p50": 242.49599874019623, + "p90": 250.46399235725403, + "p95": 253.66398692131042, + "p99": 261.50399446487427 + }, + "isolatedSum": { + "p50": 268.8960060477257, + "p90": 279.10399436950684, + "p95": 284.12801027297974, + "p99": 296.767994761467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 206.56000077724457, + "p90": 214.33599293231964, + "p95": 216.99200570583344, + "p99": 224.89599883556366 + }, + "combine": { + "p50": 166.27199947834015, + "p90": 171.9679981470108, + "p95": 173.567995429039, + "p99": 179.51999604701996 + }, + "roundtrip": { + "p50": 345.40799260139465, + "p90": 352.9919981956482, + "p95": 356.3520014286041, + "p99": 364.6079897880554 + }, + "isolatedSum": { + "p50": 372.8320002555847, + "p90": 386.30399107933044, + "p95": 390.56000113487244, + "p99": 404.4159948825836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 315.2639865875244, + "p90": 321.9519853591919, + "p95": 324.6079981327057, + "p99": 330.04799485206604 + }, + "combine": { + "p50": 305.7279884815216, + "p90": 310.2720081806183, + "p95": 312.73600459098816, + "p99": 317.9199993610382 + }, + "roundtrip": { + "p50": 589.631974697113, + "p90": 596.9600081443787, + "p95": 599.7120141983032, + "p99": 604.416012763977 + }, + "isolatedSum": { + "p50": 620.991975069046, + "p90": 632.2239935398102, + "p95": 637.3440027236938, + "p99": 647.9679942131042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 539.0400290489197, + "p90": 547.2639799118042, + "p95": 549.4400262832642, + "p99": 555.3920269012451 + }, + "combine": { + "p50": 522.0800042152405, + "p90": 529.6639800071716, + "p95": 531.1040282249451, + "p99": 536.4480018615723 + }, + "roundtrip": { + "p50": 1033.8560342788696, + "p90": 1041.2479639053345, + "p95": 1044.927954673767, + "p99": 1057.4079751968384 + }, + "isolatedSum": { + "p50": 1061.1200332641602, + "p90": 1076.9279599189758, + "p95": 1080.5440545082092, + "p99": 1091.8400287628174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 995.4239726066589, + "p90": 1003.4879446029663, + "p95": 1006.3040256500244, + "p99": 1013.2800340652466 + }, + "combine": { + "p50": 971.4879989624023, + "p90": 977.8879880905151, + "p95": 981.98401927948, + "p99": 988.1600141525269 + }, + "roundtrip": { + "p50": 1943.5839653015137, + "p90": 1951.8719911575317, + "p95": 1954.367995262146, + "p99": 1959.6799612045288 + }, + "isolatedSum": { + "p50": 1966.9119715690613, + "p90": 1981.3759326934814, + "p95": 1988.2880449295044, + "p99": 2001.4400482177734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1e01e1e3", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_f7e2f257", + "comparisonKey": "43d999badc304691", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:35.958723+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 117.95199662446976, + "p90": 126.08000636100769, + "p95": 128.28800082206726, + "p99": 133.82400572299957 + }, + "combine": { + "p50": 99.39199686050415, + "p90": 105.72800040245056, + "p95": 108.12799632549286, + "p99": 112.60800063610077 + }, + "roundtrip": { + "p50": 191.42399728298187, + "p90": 198.08000326156616, + "p95": 200.54399967193604, + "p99": 207.45599269866943 + }, + "isolatedSum": { + "p50": 217.3439934849739, + "p90": 231.80800676345825, + "p95": 236.41599714756012, + "p99": 246.43200635910034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.0640023946762, + "p90": 153.9520025253296, + "p95": 155.64799308776855, + "p99": 158.4320068359375 + }, + "combine": { + "p50": 119.6800023317337, + "p90": 124.03199821710587, + "p95": 125.2480000257492, + "p99": 133.760005235672 + }, + "roundtrip": { + "p50": 241.43999814987183, + "p90": 249.2160052061081, + "p95": 252.22399830818176, + "p99": 258.87998938560486 + }, + "isolatedSum": { + "p50": 267.7440047264099, + "p90": 277.98400074243546, + "p95": 280.89599311351776, + "p99": 292.1920120716095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 204.8960030078888, + "p90": 212.351992726326, + "p95": 215.26400744915009, + "p99": 220.22399306297302 + }, + "combine": { + "p50": 164.92800414562225, + "p90": 172.41600155830383, + "p95": 173.98400604724884, + "p99": 179.29600179195404 + }, + "roundtrip": { + "p50": 344.57600116729736, + "p90": 352.512001991272, + "p95": 355.679988861084, + "p99": 361.952006816864 + }, + "isolatedSum": { + "p50": 369.82400715351105, + "p90": 384.7679942846298, + "p95": 389.2480134963989, + "p99": 399.51999485492706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 313.27998638153076, + "p90": 319.8719918727875, + "p95": 322.52800464630127, + "p99": 327.9680013656616 + }, + "combine": { + "p50": 305.6640028953552, + "p90": 310.4639947414398, + "p95": 311.74400448799133, + "p99": 316.73601269721985 + }, + "roundtrip": { + "p50": 588.9599919319153, + "p90": 596.3199734687805, + "p95": 598.1760025024414, + "p99": 606.112003326416 + }, + "isolatedSum": { + "p50": 618.943989276886, + "p90": 630.3359866142273, + "p95": 634.2720091342926, + "p99": 644.7040140628815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 538.4640097618103, + "p90": 545.9200143814087, + "p95": 548.7679839134216, + "p99": 554.2719960212708 + }, + "combine": { + "p50": 521.2159752845764, + "p90": 529.0560126304626, + "p95": 530.8480262756348, + "p99": 537.0240211486816 + }, + "roundtrip": { + "p50": 1033.728003501892, + "p90": 1041.1200523376465, + "p95": 1043.776035308838, + "p99": 1051.0720014572144 + }, + "isolatedSum": { + "p50": 1059.6799850463867, + "p90": 1074.9760270118713, + "p95": 1079.6160101890564, + "p99": 1091.2960171699524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 995.3920245170593, + "p90": 1002.6240348815918, + "p95": 1005.5999755859375, + "p99": 1010.1759433746338 + }, + "combine": { + "p50": 972.000002861023, + "p90": 978.111982345581, + "p95": 982.047975063324, + "p99": 990.015983581543 + }, + "roundtrip": { + "p50": 1944.2559480667114, + "p90": 1951.5199661254883, + "p95": 1953.8559913635254, + "p99": 1957.8239917755127 + }, + "isolatedSum": { + "p50": 1967.3920273780823, + "p90": 1980.7360172271729, + "p95": 1987.6479506492615, + "p99": 2000.1919269561768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e427ce90", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_960df5b0", + "comparisonKey": "74fe6b1985d1b008", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:52.596697+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.16000336408615, + "p90": 111.77600175142288, + "p95": 114.62400108575821, + "p99": 127.10399925708771 + }, + "combine": { + "p50": 98.9760011434555, + "p90": 107.55199939012527, + "p95": 110.01600325107574, + "p99": 119.32799965143204 + }, + "roundtrip": { + "p50": 177.3120015859604, + "p90": 185.63200533390045, + "p95": 188.03200125694275, + "p99": 197.66399264335632 + }, + "isolatedSum": { + "p50": 203.13600450754166, + "p90": 219.32800114154816, + "p95": 224.64000433683395, + "p99": 246.43199890851974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 134.62400436401367, + "p90": 164.12800550460815, + "p95": 183.32800269126892, + "p99": 194.43200528621674 + }, + "combine": { + "p50": 121.08799815177917, + "p90": 146.62399888038635, + "p95": 154.62400019168854, + "p99": 171.36000096797943 + }, + "roundtrip": { + "p50": 227.4560034275055, + "p90": 240.35200476646423, + "p95": 251.67998671531677, + "p99": 281.18398785591125 + }, + "isolatedSum": { + "p50": 255.71200251579285, + "p90": 310.7520043849945, + "p95": 337.95200288295746, + "p99": 365.79200625419617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 193.34399700164795, + "p90": 206.7839950323105, + "p95": 222.49600291252136, + "p99": 251.45599246025085 + }, + "combine": { + "p50": 169.855996966362, + "p90": 206.2399983406067, + "p95": 218.49599480628967, + "p99": 236.35199666023254 + }, + "roundtrip": { + "p50": 331.0079872608185, + "p90": 348.5119938850403, + "p95": 359.42399501800537, + "p99": 397.43998646736145 + }, + "isolatedSum": { + "p50": 363.19999396800995, + "p90": 413.0239933729172, + "p95": 440.99199771881104, + "p99": 487.8079891204834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 300.2240061759949, + "p90": 311.5839958190918, + "p95": 314.7520124912262, + "p99": 321.3759958744049 + }, + "combine": { + "p50": 306.40000104904175, + "p90": 319.90399956703186, + "p95": 334.879994392395, + "p99": 349.40800070762634 + }, + "roundtrip": { + "p50": 577.7279734611511, + "p90": 592.415988445282, + "p95": 616.4159774780273, + "p99": 637.1200084686279 + }, + "isolatedSum": { + "p50": 606.6240072250366, + "p90": 631.4879953861237, + "p95": 649.6320068836212, + "p99": 670.7839965820312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 528.1919836997986, + "p90": 535.1679921150208, + "p95": 538.0480289459229, + "p99": 544.9920296669006 + }, + "combine": { + "p50": 526.4639854431152, + "p90": 531.6479802131653, + "p95": 533.02401304245, + "p99": 539.4560098648071 + }, + "roundtrip": { + "p50": 1029.5039415359497, + "p90": 1037.8559827804565, + "p95": 1040.8960580825806, + "p99": 1049.407958984375 + }, + "isolatedSum": { + "p50": 1054.6559691429138, + "p90": 1066.815972328186, + "p95": 1071.0720419883728, + "p99": 1084.4480395317078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 975.9680032730103, + "p90": 1011.6159915924072, + "p95": 1022.5280523300171, + "p99": 1043.776035308838 + }, + "combine": { + "p50": 974.399983882904, + "p90": 1003.9039850234985, + "p95": 1020.0639963150024, + "p99": 1035.5839729309082 + }, + "roundtrip": { + "p50": 1926.1759519577026, + "p90": 1957.9520225524902, + "p95": 1968.0960178375244, + "p99": 1986.7199659347534 + }, + "isolatedSum": { + "p50": 1950.3679871559143, + "p90": 2015.5199766159058, + "p95": 2042.5920486450195, + "p99": 2079.360008239746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ce501f59", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||d27781632f6e008", + "colorKey": "gb300_759e5033", + "comparisonKey": "a92c7f6b9510263e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:33.975891+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "d27781632f6e008", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 333.18400382995605, + "p90": 351.3599932193756, + "p95": 355.00800609588623, + "p99": 362.7519905567169 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 80.99199831485748, + "p95": 82.75199681520462, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 385.4080140590668, + "p90": 402.24000811576843, + "p95": 406.14399313926697, + "p99": 415.6480133533478 + }, + "isolatedSum": { + "p50": 410.14400124549866, + "p90": 432.3519915342331, + "p95": 437.76000291109085, + "p99": 451.4879882335663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7647232, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 335.07201075553894, + "p90": 349.40800070762634, + "p95": 352.25600004196167, + "p99": 361.60001158714294 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 100.76799988746643, + "p95": 102.75200009346008, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 407.80800580978394, + "p90": 423.13599586486816, + "p95": 426.94398760795593, + "p99": 440.95999002456665 + }, + "isolatedSum": { + "p50": 431.7760095000267, + "p90": 450.1760005950928, + "p95": 455.00800013542175, + "p99": 469.31201219558716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15192064, + "combineLogicalBytes": 30384128, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 400.31999349594116, + "p90": 414.17598724365234, + "p95": 418.2719886302948, + "p99": 431.0719966888428 + }, + "combine": { + "p50": 137.15200126171112, + "p90": 141.184002161026, + "p95": 143.327996134758, + "p99": 149.75999295711517 + }, + "roundtrip": { + "p50": 519.5519924163818, + "p90": 532.2880148887634, + "p95": 535.7440114021301, + "p99": 541.4720177650452 + }, + "isolatedSum": { + "p50": 537.4719947576523, + "p90": 555.3599894046783, + "p95": 561.5999847650528, + "p99": 580.831989645958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 30371840, + "combineLogicalBytes": 60743680, + "fanoutMean": 3.62060546875, + "recvTokensMax": 1865, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 539.6479964256287, + "p90": 552.5439977645874, + "p95": 556.7039847373962, + "p99": 563.5200142860413 + }, + "combine": { + "p50": 214.4320011138916, + "p90": 219.13599967956543, + "p95": 220.8320051431656, + "p99": 228.19200158119202 + }, + "roundtrip": { + "p50": 746.8479871749878, + "p90": 760.479986667633, + "p95": 763.9679908752441, + "p99": 770.5280184745789 + }, + "isolatedSum": { + "p50": 754.0799975395203, + "p90": 771.6799974441528, + "p95": 777.5359898805618, + "p99": 791.7120158672333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 60858368, + "combineLogicalBytes": 121716736, + "fanoutMean": 3.62744140625, + "recvTokensMax": 3730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 837.7599716186523, + "p90": 848.0640053749084, + "p95": 851.0400056838989, + "p99": 856.9920063018799 + }, + "combine": { + "p50": 470.0160026550293, + "p90": 476.25601291656494, + "p95": 479.13599014282227, + "p99": 485.6320023536682 + }, + "roundtrip": { + "p50": 1260.8000040054321, + "p90": 1276.319980621338, + "p95": 1281.7280292510986, + "p99": 1294.3999767303467 + }, + "isolatedSum": { + "p50": 1307.7759742736816, + "p90": 1324.3200182914734, + "p95": 1330.1759958267212, + "p99": 1342.624008655548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 121618432, + "combineLogicalBytes": 243236864, + "fanoutMean": 3.62451171875, + "recvTokensMax": 7446, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1442.7200555801392, + "p90": 1448.896050453186, + "p95": 1451.6160488128662, + "p99": 1459.5520496368408 + }, + "combine": { + "p50": 860.0320219993591, + "p90": 870.0799942016602, + "p95": 873.5359907150269, + "p99": 879.6160221099854 + }, + "roundtrip": { + "p50": 2287.872076034546, + "p90": 2297.2159385681152, + "p95": 2300.9281158447266, + "p99": 2310.62388420105 + }, + "isolatedSum": { + "p50": 2302.7520775794983, + "p90": 2318.976044654846, + "p95": 2325.152039527893, + "p99": 2339.168071746826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 243171328, + "combineLogicalBytes": 486342656, + "fanoutMean": 3.62353515625, + "recvTokensMax": 14871, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc0359bb", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||6a3023945a551d7", + "colorKey": "gb300_759e5033", + "comparisonKey": "9bc78852791254cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:24.610638+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6a3023945a551d7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 344.63998675346375, + "p90": 366.04800820350647, + "p95": 373.1200098991394, + "p99": 385.4080140590668 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 113.72800171375275, + "p95": 131.99999928474426, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 399.29598569869995, + "p90": 420.1279878616333, + "p95": 425.79200863838196, + "p99": 446.55999541282654 + }, + "isolatedSum": { + "p50": 425.34399032592773, + "p90": 479.7760099172592, + "p95": 505.12000918388367, + "p99": 533.0560207366943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9548800, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 344.7040021419525, + "p90": 359.360009431839, + "p95": 363.9039993286133, + "p99": 371.64801359176636 + }, + "combine": { + "p50": 101.40799731016159, + "p90": 108.96000266075134, + "p95": 114.84800279140472, + "p99": 143.71199905872345 + }, + "roundtrip": { + "p50": 423.7760007381439, + "p90": 438.3679926395416, + "p95": 442.84799695014954, + "p99": 451.10398530960083 + }, + "isolatedSum": { + "p50": 446.1119994521141, + "p90": 468.32001209259033, + "p95": 478.752002120018, + "p99": 515.3600126504898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18990080, + "combineLogicalBytes": 37980160, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 413.6959910392761, + "p90": 427.13600397109985, + "p95": 430.2079975605011, + "p99": 441.2800073623657 + }, + "combine": { + "p50": 143.90400052070618, + "p90": 153.72799336910248, + "p95": 158.9760035276413, + "p99": 167.55199432373047 + }, + "roundtrip": { + "p50": 540.0639772415161, + "p90": 556.2880039215088, + "p95": 560.6399774551392, + "p99": 581.5359950065613 + }, + "isolatedSum": { + "p50": 557.5999915599823, + "p90": 580.8639973402023, + "p95": 589.1840010881424, + "p99": 608.8320016860962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 37888000, + "combineLogicalBytes": 75776000, + "fanoutMean": 3.61328125, + "recvTokensMax": 1867, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 575.4879713058472, + "p90": 588.2239937782288, + "p95": 592.8000211715698, + "p99": 597.5679755210876 + }, + "combine": { + "p50": 258.432000875473, + "p90": 266.4639949798584, + "p95": 274.0800082683563, + "p99": 313.76001238822937 + }, + "roundtrip": { + "p50": 819.648027420044, + "p90": 833.4400057792664, + "p95": 838.0159735679626, + "p99": 850.4319787025452 + }, + "isolatedSum": { + "p50": 833.9199721813202, + "p90": 854.6879887580872, + "p95": 866.8800294399261, + "p99": 911.327987909317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76037120, + "combineLogicalBytes": 152074240, + "fanoutMean": 3.625732421875, + "recvTokensMax": 3722, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 901.0879993438721, + "p90": 916.7680144309998, + "p95": 923.3599901199341, + "p99": 940.7680034637451 + }, + "combine": { + "p50": 492.92799830436707, + "p90": 530.784010887146, + "p95": 547.2959876060486, + "p99": 559.6160292625427 + }, + "roundtrip": { + "p50": 1363.8720512390137, + "p90": 1378.3999681472778, + "p95": 1382.912039756775, + "p99": 1392.6399946212769 + }, + "isolatedSum": { + "p50": 1394.0159976482391, + "p90": 1447.5520253181458, + "p95": 1470.6559777259827, + "p99": 1500.3840327262878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 152058880, + "combineLogicalBytes": 304117760, + "fanoutMean": 3.6253662109375, + "recvTokensMax": 7453, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1591.647982597351, + "p90": 1612.064003944397, + "p95": 1622.5600242614746, + "p99": 1648.192048072815 + }, + "combine": { + "p50": 938.1759762763977, + "p90": 970.7840085029602, + "p95": 984.8639965057373, + "p99": 998.9439845085144 + }, + "roundtrip": { + "p50": 2491.7120933532715, + "p90": 2516.5441036224365, + "p95": 2534.0800285339355, + "p99": 2558.4959983825684 + }, + "isolatedSum": { + "p50": 2529.823958873749, + "p90": 2582.848012447357, + "p95": 2607.424020767212, + "p99": 2647.1360325813293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 304179200, + "combineLogicalBytes": 608358400, + "fanoutMean": 3.6260986328125, + "recvTokensMax": 14884, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-64b5874e", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_759e5033", + "comparisonKey": "3bb6dd1cab6a48d9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:16.081318+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 339.55198526382446, + "p90": 358.72000455856323, + "p95": 365.53600430488586, + "p99": 374.9760091304779 + }, + "combine": { + "p50": 89.40800279378891, + "p90": 93.56799721717834, + "p95": 95.2640026807785, + "p99": 100.99200159311295 + }, + "roundtrip": { + "p50": 401.98400616645813, + "p90": 418.5279905796051, + "p95": 422.7519929409027, + "p99": 453.21598649024963 + }, + "isolatedSum": { + "p50": 428.9599880576134, + "p90": 452.2880017757416, + "p95": 460.80000698566437, + "p99": 475.96801072359085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11470848, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 348.7359881401062, + "p90": 363.3599877357483, + "p95": 367.0400083065033, + "p99": 376.44800543785095 + }, + "combine": { + "p50": 109.37599837779999, + "p90": 113.34399878978729, + "p95": 115.26399850845337, + "p99": 125.21600723266602 + }, + "roundtrip": { + "p50": 436.67200207710266, + "p90": 450.27199387550354, + "p95": 452.57601141929626, + "p99": 462.8159999847412 + }, + "isolatedSum": { + "p50": 458.1119865179062, + "p90": 476.7039865255356, + "p95": 482.30400681495667, + "p99": 501.66401267051697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22824960, + "combineLogicalBytes": 45649920, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 427.839994430542, + "p90": 440.2559995651245, + "p95": 443.64801049232483, + "p99": 451.07200741767883 + }, + "combine": { + "p50": 152.6080071926117, + "p90": 156.25600516796112, + "p95": 158.04800391197205, + "p99": 162.6559942960739 + }, + "roundtrip": { + "p50": 566.0160183906555, + "p90": 580.0960063934326, + "p95": 582.912027835846, + "p99": 589.9839997291565 + }, + "isolatedSum": { + "p50": 580.4480016231537, + "p90": 596.5120047330856, + "p95": 601.6960144042969, + "p99": 613.7280017137527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45533184, + "combineLogicalBytes": 91066368, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 604.8640012741089, + "p90": 617.8879737854004, + "p95": 621.7280030250549, + "p99": 631.8079829216003 + }, + "combine": { + "p50": 287.1679961681366, + "p90": 291.7119860649109, + "p95": 293.5360074043274, + "p99": 299.6479868888855 + }, + "roundtrip": { + "p50": 855.8400273323059, + "p90": 867.904007434845, + "p95": 870.6240057945251, + "p99": 879.1679739952087 + }, + "isolatedSum": { + "p50": 892.0319974422455, + "p90": 909.5999598503113, + "p95": 915.2640104293823, + "p99": 931.4559698104858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91029504, + "combineLogicalBytes": 182059008, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 952.4160027503967, + "p90": 965.6959772109985, + "p95": 970.2079892158508, + "p99": 980.8319807052612 + }, + "combine": { + "p50": 506.879985332489, + "p90": 512.5120282173157, + "p95": 514.7200226783752, + "p99": 518.176019191742 + }, + "roundtrip": { + "p50": 1430.1120042800903, + "p90": 1442.6239728927612, + "p95": 1446.9120502471924, + "p99": 1454.2080163955688 + }, + "isolatedSum": { + "p50": 1459.2959880828857, + "p90": 1478.2080054283142, + "p95": 1484.928011894226, + "p99": 1499.0079998970032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 182224896, + "combineLogicalBytes": 364449792, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1728.0960083007812, + "p90": 1734.8159551620483, + "p95": 1737.9200458526611, + "p99": 1742.3679828643799 + }, + "combine": { + "p50": 950.655996799469, + "p90": 956.1920166015625, + "p95": 958.4320187568665, + "p99": 963.6480212211609 + }, + "roundtrip": { + "p50": 2646.0800170898438, + "p90": 2652.928113937378, + "p95": 2655.3280353546143, + "p99": 2659.775972366333 + }, + "isolatedSum": { + "p50": 2678.7520051002502, + "p90": 2691.007971763611, + "p95": 2696.3520646095276, + "p99": 2706.0160040855408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 363976704, + "combineLogicalBytes": 727953408, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-261cb80d", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_5fe8d497", + "comparisonKey": "b7fadd190a8840d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:42.277477+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 108.76800119876862, + "p90": 114.59200084209442, + "p95": 118.20799857378006, + "p99": 125.02400577068329 + }, + "combine": { + "p50": 94.08000111579895, + "p90": 97.88800030946732, + "p95": 99.7759997844696, + "p99": 106.78400099277496 + }, + "roundtrip": { + "p50": 240.89600145816803, + "p90": 253.9519965648651, + "p95": 258.4640085697174, + "p99": 267.520010471344 + }, + "isolatedSum": { + "p50": 202.84800231456757, + "p90": 212.48000115156174, + "p95": 217.98399835824966, + "p99": 231.80800676345825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 135.68000495433807, + "p90": 141.66399836540222, + "p95": 145.02400159835815, + "p99": 151.36000514030457 + }, + "combine": { + "p50": 114.20799791812897, + "p90": 117.98399686813354, + "p95": 119.35999989509583, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 284.7999930381775, + "p90": 292.4160063266754, + "p95": 295.6799864768982, + "p99": 302.94400453567505 + }, + "isolatedSum": { + "p50": 249.88800287246704, + "p90": 259.64799523353577, + "p95": 264.384001493454, + "p99": 275.04000812768936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 189.7599995136261, + "p90": 196.4160054922104, + "p95": 198.97599518299103, + "p99": 203.23200523853302 + }, + "combine": { + "p50": 163.16799819469452, + "p90": 168.89600455760956, + "p95": 171.7119961977005, + "p99": 177.7919977903366 + }, + "roundtrip": { + "p50": 427.10399627685547, + "p90": 434.62398648262024, + "p95": 437.1519982814789, + "p99": 441.8239891529083 + }, + "isolatedSum": { + "p50": 352.9279977083206, + "p90": 365.31201004981995, + "p95": 370.6879913806915, + "p99": 381.02400302886963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 298.3680069446564, + "p90": 304.9600124359131, + "p95": 306.87999725341797, + "p99": 313.4399950504303 + }, + "combine": { + "p50": 300.2240061759949, + "p90": 305.08801341056824, + "p95": 307.8399896621704, + "p99": 318.4640109539032 + }, + "roundtrip": { + "p50": 736.2239956855774, + "p90": 742.8159713745117, + "p95": 745.5040216445923, + "p99": 754.368007183075 + }, + "isolatedSum": { + "p50": 598.5920131206512, + "p90": 610.0480258464813, + "p95": 614.7199869155884, + "p99": 631.9040060043335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 507.00801610946655, + "p90": 513.4080052375793, + "p95": 516.5759921073914, + "p99": 523.8400101661682 + }, + "combine": { + "p50": 521.1840271949768, + "p90": 526.4000296592712, + "p95": 529.1200280189514, + "p99": 533.1199765205383 + }, + "roundtrip": { + "p50": 1338.4000062942505, + "p90": 1345.4400300979614, + "p95": 1348.8320112228394, + "p99": 1355.0080060958862 + }, + "isolatedSum": { + "p50": 1028.1920433044434, + "p90": 1039.8080348968506, + "p95": 1045.6960201263428, + "p99": 1056.9599866867065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 940.2239918708801, + "p90": 945.1519846916199, + "p95": 947.4560022354126, + "p99": 952.895998954773 + }, + "combine": { + "p50": 968.06401014328, + "p90": 974.0480184555054, + "p95": 975.9359955787659, + "p99": 982.0160269737244 + }, + "roundtrip": { + "p50": 2517.5039768218994, + "p90": 2523.4880447387695, + "p95": 2526.20792388916, + "p99": 2533.440113067627 + }, + "isolatedSum": { + "p50": 1908.2880020141602, + "p90": 1919.2000031471252, + "p95": 1923.3919978141785, + "p99": 1934.9120259284973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4475928d", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_759e5033", + "comparisonKey": "47f11aa239b010b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:36.792587+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 349.727988243103, + "p90": 368.1600093841553, + "p95": 373.63201379776, + "p99": 382.9439878463745 + }, + "combine": { + "p50": 95.39200365543365, + "p90": 99.7759997844696, + "p95": 101.34399682283401, + "p99": 107.68000036478043 + }, + "roundtrip": { + "p50": 419.16799545288086, + "p90": 434.688001871109, + "p95": 437.6960098743439, + "p99": 446.5920031070709 + }, + "isolatedSum": { + "p50": 445.1199918985367, + "p90": 467.9360091686249, + "p95": 474.976010620594, + "p99": 490.62398821115494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 359.9039912223816, + "p90": 374.59200620651245, + "p95": 377.82400846481323, + "p99": 385.18399000167847 + }, + "combine": { + "p50": 115.93600362539291, + "p90": 120.19199877977371, + "p95": 121.63200229406357, + "p99": 130.0799995660782 + }, + "roundtrip": { + "p50": 458.8159918785095, + "p90": 474.016010761261, + "p95": 478.68800163269043, + "p99": 507.29602575302124 + }, + "isolatedSum": { + "p50": 475.8399948477745, + "p90": 494.78400498628616, + "p95": 499.4560107588768, + "p99": 515.2639895677567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 444.92799043655396, + "p90": 458.8800072669983, + "p95": 462.0159864425659, + "p99": 465.66399931907654 + }, + "combine": { + "p50": 162.52799332141876, + "p90": 167.71200299263, + "p95": 169.15200650691986, + "p99": 173.6000031232834 + }, + "roundtrip": { + "p50": 602.2080183029175, + "p90": 614.4639849662781, + "p95": 617.3760294914246, + "p99": 622.1759915351868 + }, + "isolatedSum": { + "p50": 607.4559837579727, + "p90": 626.5920102596283, + "p95": 631.1679929494858, + "p99": 639.2640024423599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 637.6320123672485, + "p90": 651.6479849815369, + "p95": 654.304027557373, + "p99": 660.6400012969971 + }, + "combine": { + "p50": 301.12001299858093, + "p90": 306.43200874328613, + "p95": 308.8639974594116, + "p99": 314.2080008983612 + }, + "roundtrip": { + "p50": 907.1040153503418, + "p90": 919.4560050964355, + "p95": 922.2080111503601, + "p99": 927.1039962768555 + }, + "isolatedSum": { + "p50": 938.7520253658295, + "p90": 958.079993724823, + "p95": 963.1680250167847, + "p99": 974.8480021953583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 1003.7440061569214, + "p90": 1014.0800476074219, + "p95": 1018.01598072052, + "p99": 1025.215983390808 + }, + "combine": { + "p50": 522.271990776062, + "p90": 527.7119874954224, + "p95": 529.8240184783936, + "p99": 534.5919728279114 + }, + "roundtrip": { + "p50": 1497.7279901504517, + "p90": 1509.8880529403687, + "p95": 1514.3040418624878, + "p99": 1521.2160348892212 + }, + "isolatedSum": { + "p50": 1526.0159969329834, + "p90": 1541.7920351028442, + "p95": 1547.8399991989136, + "p99": 1559.8079562187195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1861.2159490585327, + "p90": 1868.2559728622437, + "p95": 1870.6560134887695, + "p99": 1876.4480352401733 + }, + "combine": { + "p50": 966.5279984474182, + "p90": 973.0240106582642, + "p95": 975.6159782409668, + "p99": 980.0000190734863 + }, + "roundtrip": { + "p50": 2798.5599040985107, + "p90": 2805.311918258667, + "p95": 2807.7120780944824, + "p99": 2813.8558864593506 + }, + "isolatedSum": { + "p50": 2827.743947505951, + "p90": 2841.279983520508, + "p95": 2846.2719917297363, + "p99": 2856.4480543136597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75dc945b", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||b208ea04b16e80b", + "colorKey": "gb300_759e5033", + "comparisonKey": "2cfd7bad8d408468", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:43.034340+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b208ea04b16e80b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 343.423992395401, + "p90": 360.6080114841461, + "p95": 365.4400110244751, + "p99": 375.64799189567566 + }, + "combine": { + "p50": 94.04800087213516, + "p90": 97.95200079679489, + "p95": 99.55199807882309, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 410.20798683166504, + "p90": 425.6640076637268, + "p95": 429.02401089668274, + "p99": 438.4639859199524 + }, + "isolatedSum": { + "p50": 437.47199326753616, + "p90": 458.560012280941, + "p95": 464.9920091032982, + "p99": 482.11199045181274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13310976, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 353.4719944000244, + "p90": 367.3279881477356, + "p95": 369.376003742218, + "p99": 378.9759874343872 + }, + "combine": { + "p50": 115.1999980211258, + "p90": 118.94399672746658, + "p95": 120.7360029220581, + "p99": 125.40799379348755 + }, + "roundtrip": { + "p50": 451.26399397850037, + "p90": 465.37598967552185, + "p95": 468.32001209259033, + "p99": 481.82401061058044 + }, + "isolatedSum": { + "p50": 468.6719924211502, + "p90": 486.2719848752022, + "p95": 490.1120066642761, + "p99": 504.38398122787476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 53329920, + "fanoutMean": 3.6328125, + "recvTokensMax": 944, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 439.8080110549927, + "p90": 453.5999894142151, + "p95": 456.7039906978607, + "p99": 462.2400104999542 + }, + "combine": { + "p50": 163.58399391174316, + "p90": 170.3999936580658, + "p95": 172.89599776268005, + "p99": 178.0800074338913 + }, + "roundtrip": { + "p50": 600.0319719314575, + "p90": 612.7679944038391, + "p95": 616.5120005607605, + "p99": 625.5999803543091 + }, + "isolatedSum": { + "p50": 603.3920049667358, + "p90": 623.9999830722809, + "p95": 629.5999884605408, + "p99": 640.3200179338455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53265408, + "combineLogicalBytes": 106530816, + "fanoutMean": 3.62841796875, + "recvTokensMax": 1882, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 630.6880116462708, + "p90": 643.9679861068726, + "p95": 648.1279730796814, + "p99": 658.7520241737366 + }, + "combine": { + "p50": 298.11200499534607, + "p90": 303.3919930458069, + "p95": 305.4719865322113, + "p99": 310.88000535964966 + }, + "roundtrip": { + "p50": 899.2319703102112, + "p90": 910.431981086731, + "p95": 913.919985294342, + "p99": 920.8959937095642 + }, + "isolatedSum": { + "p50": 928.8000166416168, + "p90": 947.3599791526794, + "p95": 953.5999596118927, + "p99": 969.6320295333862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106315776, + "combineLogicalBytes": 212631552, + "fanoutMean": 3.62109375, + "recvTokensMax": 3729, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 999.0400075912476, + "p90": 1009.8240375518799, + "p95": 1013.2479667663574, + "p99": 1023.4559774398804 + }, + "combine": { + "p50": 521.1520195007324, + "p90": 527.2639989852905, + "p95": 529.0240049362183, + "p99": 531.8400263786316 + }, + "roundtrip": { + "p50": 1492.192029953003, + "p90": 1503.551959991455, + "p95": 1509.1840028762817, + "p99": 1522.4000215530396 + }, + "isolatedSum": { + "p50": 1520.19202709198, + "p90": 1537.0880365371704, + "p95": 1542.2719717025757, + "p99": 1555.296003818512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212316160, + "combineLogicalBytes": 424632320, + "fanoutMean": 3.61572265625, + "recvTokensMax": 7430, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1858.2079410552979, + "p90": 1864.3840551376343, + "p95": 1865.8239841461182, + "p99": 1870.2720403671265 + }, + "combine": { + "p50": 970.3999757766724, + "p90": 978.0480265617371, + "p95": 980.4159998893738, + "p99": 986.0159754753113 + }, + "roundtrip": { + "p50": 2800.544023513794, + "p90": 2807.487964630127, + "p95": 2809.6959590911865, + "p99": 2815.040111541748 + }, + "isolatedSum": { + "p50": 2828.60791683197, + "p90": 2842.4320816993713, + "p95": 2846.239984035492, + "p99": 2856.2880158424377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424037376, + "combineLogicalBytes": 848074752, + "fanoutMean": 3.61065673828125, + "recvTokensMax": 14815, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f62ed1a9", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_3c645a5f", + "comparisonKey": "c2aa708d34437526", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:06.991175+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 95.67999839782715, + "p90": 101.27999633550644, + "p95": 104.06400263309479, + "p99": 109.37599837779999 + }, + "combine": { + "p50": 95.45599669218063, + "p90": 99.10400211811066, + "p95": 101.24800354242325, + "p99": 106.88000172376633 + }, + "roundtrip": { + "p50": 229.88800704479218, + "p90": 241.69600009918213, + "p95": 244.73600089550018, + "p99": 252.73600220680237 + }, + "isolatedSum": { + "p50": 191.13599509000778, + "p90": 200.3839984536171, + "p95": 205.31200617551804, + "p99": 216.25600010156631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 122.94399738311768, + "p90": 128.28800082206726, + "p95": 130.75199723243713, + "p99": 135.77599823474884 + }, + "combine": { + "p50": 115.77600240707397, + "p90": 119.71200257539749, + "p95": 121.69600278139114, + "p99": 128.48000228405 + }, + "roundtrip": { + "p50": 272.352010011673, + "p90": 278.30401062965393, + "p95": 280.9920012950897, + "p99": 288.38399052619934 + }, + "isolatedSum": { + "p50": 238.71999979019165, + "p90": 248.00000339746475, + "p95": 252.44800001382828, + "p99": 264.2560005187988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 177.08800733089447, + "p90": 182.81599879264832, + "p95": 185.63200533390045, + "p99": 191.3599967956543 + }, + "combine": { + "p50": 164.15999829769135, + "p90": 169.8240041732788, + "p95": 172.5119948387146, + "p99": 178.17600071430206 + }, + "roundtrip": { + "p50": 412.8960072994232, + "p90": 418.8160002231598, + "p95": 421.7279851436615, + "p99": 427.16801166534424 + }, + "isolatedSum": { + "p50": 341.2480056285858, + "p90": 352.6400029659271, + "p95": 358.14400017261505, + "p99": 369.53599750995636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 284.4800055027008, + "p90": 290.1119887828827, + "p95": 292.03200340270996, + "p99": 297.08799719810486 + }, + "combine": { + "p50": 301.34400725364685, + "p90": 307.0720136165619, + "p95": 308.54400992393494, + "p99": 314.0160143375397 + }, + "roundtrip": { + "p50": 721.8239903450012, + "p90": 728.8640141487122, + "p95": 731.9999933242798, + "p99": 738.0160093307495 + }, + "isolatedSum": { + "p50": 585.8240127563477, + "p90": 597.1840023994446, + "p95": 600.5760133266449, + "p99": 611.1040115356445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 494.52799558639526, + "p90": 500.67198276519775, + "p95": 503.1999945640564, + "p99": 511.6159915924072 + }, + "combine": { + "p50": 522.4000215530396, + "p90": 528.1599760055542, + "p95": 530.1759839057922, + "p99": 534.5600247383118 + }, + "roundtrip": { + "p50": 1324.671983718872, + "p90": 1331.5199613571167, + "p95": 1333.8559865951538, + "p99": 1337.8880023956299 + }, + "isolatedSum": { + "p50": 1016.9280171394348, + "p90": 1028.831958770752, + "p95": 1033.3759784698486, + "p99": 1046.176016330719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 918.7840223312378, + "p90": 924.2240190505981, + "p95": 926.0159730911255, + "p99": 932.8320026397705 + }, + "combine": { + "p50": 968.288004398346, + "p90": 974.3040204048157, + "p95": 976.6719937324524, + "p99": 981.3439846038818 + }, + "roundtrip": { + "p50": 2496.9279766082764, + "p90": 2503.0078887939453, + "p95": 2505.0559043884277, + "p99": 2510.3039741516113 + }, + "isolatedSum": { + "p50": 1887.0720267295837, + "p90": 1898.5280394554138, + "p95": 1902.6879668235779, + "p99": 1914.1759872436523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6ea1d7bb", + "identity": "gb300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||d27781632f6e008", + "colorKey": "gb300_11303bbb", + "comparisonKey": "1586ecd769b563f5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:37.725778+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "d27781632f6e008", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 112.22399771213531, + "p90": 120.41600048542023, + "p95": 125.08800625801086, + "p99": 151.64799988269806 + }, + "combine": { + "p50": 85.15200018882751, + "p90": 89.59999680519104, + "p95": 91.42400324344635, + "p99": 97.21600264310837 + }, + "roundtrip": { + "p50": 167.1680063009262, + "p90": 175.87199807167053, + "p95": 178.46399545669556, + "p99": 216.95999801158905 + }, + "isolatedSum": { + "p50": 197.37599790096283, + "p90": 210.01599729061127, + "p95": 216.51200950145721, + "p99": 248.86400252580643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15294464, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 139.00800049304962, + "p90": 146.33600413799286, + "p95": 150.176003575325, + "p99": 158.49600732326508 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 111.07199639081955, + "p95": 114.07999694347382, + "p99": 118.59200149774551 + }, + "roundtrip": { + "p50": 213.82400393486023, + "p90": 220.47999501228333, + "p95": 222.71999716758728, + "p99": 229.34399545192719 + }, + "isolatedSum": { + "p50": 243.3599978685379, + "p90": 257.4080005288124, + "p95": 264.2560005187988, + "p99": 277.0880088210106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 30384128, + "combineLogicalBytes": 30384128, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 196.86399400234222, + "p90": 204.12799715995789, + "p95": 208.3200067281723, + "p99": 216.63999557495117 + }, + "combine": { + "p50": 146.14400267601013, + "p90": 150.9760022163391, + "p95": 153.6960005760193, + "p99": 159.64800119400024 + }, + "roundtrip": { + "p50": 310.7199966907501, + "p90": 318.7519907951355, + "p95": 321.4080035686493, + "p99": 325.3760039806366 + }, + "isolatedSum": { + "p50": 343.00799667835236, + "p90": 355.103999376297, + "p95": 362.0160073041916, + "p99": 376.2879967689514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 60743680, + "combineLogicalBytes": 60743680, + "fanoutMean": 3.62060546875, + "recvTokensMax": 1865, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 298.8480031490326, + "p90": 305.7279884815216, + "p95": 309.3760013580322, + "p99": 321.1199939250946 + }, + "combine": { + "p50": 220.44800221920013, + "p90": 226.17599368095398, + "p95": 227.9359996318817, + "p99": 233.5360050201416 + }, + "roundtrip": { + "p50": 494.1760003566742, + "p90": 502.1119713783264, + "p95": 504.2880177497864, + "p99": 509.72801446914673 + }, + "isolatedSum": { + "p50": 519.2960053682327, + "p90": 531.9039821624756, + "p95": 537.3120009899139, + "p99": 554.6559989452362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 121716736, + "combineLogicalBytes": 121716736, + "fanoutMean": 3.62744140625, + "recvTokensMax": 3730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 503.1359791755676, + "p90": 511.7759704589844, + "p95": 514.3679976463318, + "p99": 520.031988620758 + }, + "combine": { + "p50": 469.05601024627686, + "p90": 477.60000824928284, + "p95": 480.22401332855225, + "p99": 484.9280118942261 + }, + "roundtrip": { + "p50": 915.4880046844482, + "p90": 925.4400134086609, + "p95": 929.6000003814697, + "p99": 939.1360282897949 + }, + "isolatedSum": { + "p50": 972.1919894218445, + "p90": 989.3759787082672, + "p95": 994.592010974884, + "p99": 1004.9600005149841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 243236864, + "combineLogicalBytes": 243236864, + "fanoutMean": 3.62451171875, + "recvTokensMax": 7446, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 928.7359714508057, + "p90": 936.191976070404, + "p95": 938.7199878692627, + "p99": 943.9039826393127 + }, + "combine": { + "p50": 843.7439799308777, + "p90": 854.3999791145325, + "p95": 858.1439852714539, + "p99": 865.2160167694092 + }, + "roundtrip": { + "p50": 1780.6719541549683, + "p90": 1791.3919687271118, + "p95": 1794.8800325393677, + "p99": 1802.5280237197876 + }, + "isolatedSum": { + "p50": 1772.4799513816833, + "p90": 1790.5919551849365, + "p95": 1796.8639731407166, + "p99": 1809.119999408722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 486342656, + "combineLogicalBytes": 486342656, + "fanoutMean": 3.62353515625, + "recvTokensMax": 14871, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-319b0a77", + "identity": "gb300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||6a3023945a551d7", + "colorKey": "gb300_11303bbb", + "comparisonKey": "faea7dfea2c59392", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:47.516757+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6a3023945a551d7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 112.64000087976456, + "p90": 118.97599697113037, + "p95": 122.6240023970604, + "p99": 132.03200697898865 + }, + "combine": { + "p50": 88.60799670219421, + "p90": 95.61599791049957, + "p95": 97.15200215578079, + "p99": 102.39999741315842 + }, + "roundtrip": { + "p50": 172.95999825000763, + "p90": 181.15200102329254, + "p95": 183.55199694633484, + "p99": 188.54400515556335 + }, + "isolatedSum": { + "p50": 201.24799758195877, + "p90": 214.59199488162994, + "p95": 219.7760045528412, + "p99": 234.43200439214706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19097600, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 140.32000303268433, + "p90": 152.99199521541595, + "p95": 178.78399789333344, + "p99": 197.60000705718994 + }, + "combine": { + "p50": 109.31199789047241, + "p90": 115.35999923944473, + "p95": 117.37599968910217, + "p99": 123.6800029873848 + }, + "roundtrip": { + "p50": 223.23200106620789, + "p90": 231.26399517059326, + "p95": 234.20800268650055, + "p99": 240.83200097084045 + }, + "isolatedSum": { + "p50": 249.63200092315674, + "p90": 268.3519944548607, + "p95": 296.1599975824356, + "p99": 321.28001004457474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 37980160, + "combineLogicalBytes": 37980160, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 198.5280066728592, + "p90": 206.11199736595154, + "p95": 210.04800498485565, + "p99": 215.16799926757812 + }, + "combine": { + "p50": 151.13599598407745, + "p90": 157.75999426841736, + "p95": 159.45599973201752, + "p99": 163.58399391174316 + }, + "roundtrip": { + "p50": 322.01600074768066, + "p90": 329.27998900413513, + "p95": 332.0640027523041, + "p99": 337.72799372673035 + }, + "isolatedSum": { + "p50": 349.66400265693665, + "p90": 363.8719916343689, + "p95": 369.50400471687317, + "p99": 378.7519931793213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 75776000, + "combineLogicalBytes": 75776000, + "fanoutMean": 3.61328125, + "recvTokensMax": 1867, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 303.8400113582611, + "p90": 311.93599104881287, + "p95": 313.79199028015137, + "p99": 318.6880052089691 + }, + "combine": { + "p50": 258.14399123191833, + "p90": 265.53601026535034, + "p95": 268.44799518585205, + "p99": 272.96000719070435 + }, + "roundtrip": { + "p50": 511.9680166244507, + "p90": 518.8159942626953, + "p95": 521.4719772338867, + "p99": 525.9519815444946 + }, + "isolatedSum": { + "p50": 561.9840025901794, + "p90": 577.4720013141632, + "p95": 582.2399854660034, + "p99": 591.6480123996735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 152074240, + "combineLogicalBytes": 152074240, + "fanoutMean": 3.625732421875, + "recvTokensMax": 3722, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 516.543984413147, + "p90": 524.3200063705444, + "p95": 526.0800123214722, + "p99": 532.5440168380737 + }, + "combine": { + "p50": 492.15999245643616, + "p90": 498.4639883041382, + "p95": 501.5680193901062, + "p99": 505.43999671936035 + }, + "roundtrip": { + "p50": 981.3119769096375, + "p90": 988.9280200004578, + "p95": 991.2959933280945, + "p99": 997.6959824562073 + }, + "isolatedSum": { + "p50": 1008.7039768695831, + "p90": 1022.7839946746826, + "p95": 1027.6480317115784, + "p99": 1037.984013557434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 304117760, + "combineLogicalBytes": 304117760, + "fanoutMean": 3.6253662109375, + "recvTokensMax": 7453, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 953.3119797706604, + "p90": 961.2799882888794, + "p95": 963.424026966095, + "p99": 969.8879718780518 + }, + "combine": { + "p50": 932.7999949455261, + "p90": 937.6639723777771, + "p95": 939.1040205955505, + "p99": 943.615972995758 + }, + "roundtrip": { + "p50": 1854.464054107666, + "p90": 1863.0080223083496, + "p95": 1865.9839630126953, + "p99": 1870.3680038452148 + }, + "isolatedSum": { + "p50": 1886.1119747161865, + "p90": 1898.9439606666565, + "p95": 1902.5280475616455, + "p99": 1913.5039448738098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 608358400, + "combineLogicalBytes": 608358400, + "fanoutMean": 3.6260986328125, + "recvTokensMax": 14884, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-067181e4", + "identity": "gb300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_11303bbb", + "comparisonKey": "a0fd1371aa28bc6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:00.008856+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.77600306272507, + "p90": 128.76799702644348, + "p95": 132.1280002593994, + "p99": 138.59200477600098 + }, + "combine": { + "p50": 99.32799637317657, + "p90": 103.80800068378448, + "p95": 105.98400235176086, + "p99": 111.96800321340561 + }, + "roundtrip": { + "p50": 188.57599794864655, + "p90": 196.79999351501465, + "p95": 199.68000054359436, + "p99": 205.72799444198608 + }, + "isolatedSum": { + "p50": 219.10399943590164, + "p90": 232.57599771022797, + "p95": 238.11200261116028, + "p99": 250.56000798940659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22941696, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 150.2400040626526, + "p90": 158.33599865436554, + "p95": 161.43999993801117, + "p99": 167.10400581359863 + }, + "combine": { + "p50": 116.22399836778641, + "p90": 124.35200065374374, + "p95": 126.0479986667633, + "p99": 132.03200697898865 + }, + "roundtrip": { + "p50": 236.2239956855774, + "p90": 244.35199797153473, + "p95": 247.0719963312149, + "p99": 253.50400805473328 + }, + "isolatedSum": { + "p50": 266.464002430439, + "p90": 282.6879993081093, + "p95": 287.4879986047745, + "p99": 299.1360127925873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45649920, + "combineLogicalBytes": 45649920, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.88800311088562, + "p90": 214.56000208854675, + "p95": 218.33600103855133, + "p99": 224.0000069141388 + }, + "combine": { + "p50": 161.98399662971497, + "p90": 167.58400201797485, + "p95": 170.68800330162048, + "p99": 174.55999553203583 + }, + "roundtrip": { + "p50": 338.01600337028503, + "p90": 346.3039994239807, + "p95": 348.60798716545105, + "p99": 354.94399070739746 + }, + "isolatedSum": { + "p50": 367.8719997406006, + "p90": 382.1440041065216, + "p95": 389.0240043401718, + "p99": 398.5600024461746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91066368, + "combineLogicalBytes": 91066368, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 311.39200925827026, + "p90": 319.487988948822, + "p95": 322.7519989013672, + "p99": 330.81600069999695 + }, + "combine": { + "p50": 295.55198550224304, + "p90": 316.0640001296997, + "p95": 325.9519934654236, + "p99": 653.760015964508 + }, + "roundtrip": { + "p50": 560.6080293655396, + "p90": 570.4320073127747, + "p95": 573.5679864883423, + "p99": 582.431972026825 + }, + "isolatedSum": { + "p50": 606.9439947605133, + "p90": 635.5519890785217, + "p95": 648.7039923667908, + "p99": 984.576016664505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 182059008, + "combineLogicalBytes": 182059008, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 532.256007194519, + "p90": 541.7280197143555, + "p95": 545.2160239219666, + "p99": 549.1520166397095 + }, + "combine": { + "p50": 509.72801446914673, + "p90": 518.176019191742, + "p95": 519.8079943656921, + "p99": 522.7839946746826 + }, + "roundtrip": { + "p50": 1011.9680166244507, + "p90": 1020.0639963150024, + "p95": 1023.0720043182373, + "p99": 1027.9680490493774 + }, + "isolatedSum": { + "p50": 1041.9840216636658, + "p90": 1059.9040389060974, + "p95": 1065.0240182876587, + "p99": 1071.936011314392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 364449792, + "combineLogicalBytes": 364449792, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 973.7280011177063, + "p90": 980.9920191764832, + "p95": 983.9040040969849, + "p99": 990.7519817352295 + }, + "combine": { + "p50": 948.8639831542969, + "p90": 953.3119797706604, + "p95": 955.3279876708984, + "p99": 960.8960151672363 + }, + "roundtrip": { + "p50": 1889.7919654846191, + "p90": 1897.2480297088623, + "p95": 1899.6800184249878, + "p99": 1906.3999652862549 + }, + "isolatedSum": { + "p50": 1922.5919842720032, + "p90": 1934.3039989471436, + "p95": 1939.2319917678833, + "p99": 1951.6479969024658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 727953408, + "combineLogicalBytes": 727953408, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a80341d9", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_ffffac8f", + "comparisonKey": "7b4f85037cef6c22", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:29.173646+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 115.10399729013443, + "p90": 123.1359988451004, + "p95": 125.56800246238708, + "p99": 131.6159963607788 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 104.86400127410889, + "p95": 107.71200060844421, + "p99": 112.60800063610077 + }, + "roundtrip": { + "p50": 190.91199338436127, + "p90": 197.85599410533905, + "p95": 200.95999538898468, + "p99": 207.5520008802414 + }, + "isolatedSum": { + "p50": 215.39200097322464, + "p90": 228.0000001192093, + "p95": 233.2800030708313, + "p99": 244.22399699687958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.99200201034546, + "p90": 155.39200603961945, + "p95": 159.2639982700348, + "p99": 187.48800456523895 + }, + "combine": { + "p50": 122.6240023970604, + "p90": 126.62400305271149, + "p95": 130.3039938211441, + "p99": 168.16000640392303 + }, + "roundtrip": { + "p50": 243.20000410079956, + "p90": 251.0719895362854, + "p95": 254.7520101070404, + "p99": 291.6480004787445 + }, + "isolatedSum": { + "p50": 271.61600440740585, + "p90": 282.01600909233093, + "p95": 289.5679920911789, + "p99": 355.648010969162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 202.7519941329956, + "p90": 212.19199895858765, + "p95": 217.1200066804886, + "p99": 263.2319927215576 + }, + "combine": { + "p50": 164.8000031709671, + "p90": 173.6000031232834, + "p95": 176.4480024576187, + "p99": 188.4479969739914 + }, + "roundtrip": { + "p50": 343.07199716567993, + "p90": 352.8960049152374, + "p95": 356.7039966583252, + "p99": 391.7759954929352 + }, + "isolatedSum": { + "p50": 367.5519973039627, + "p90": 385.79200208187103, + "p95": 393.5680091381073, + "p99": 451.679989695549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 313.05599212646484, + "p90": 321.8879997730255, + "p95": 325.0879943370819, + "p99": 355.8399975299835 + }, + "combine": { + "p50": 303.00799012184143, + "p90": 310.2720081806183, + "p95": 312.1280074119568, + "p99": 319.39199566841125 + }, + "roundtrip": { + "p50": 589.2800092697144, + "p90": 597.3759889602661, + "p95": 600.5759835243225, + "p99": 609.6000075340271 + }, + "isolatedSum": { + "p50": 616.0639822483063, + "p90": 632.1600079536438, + "p95": 637.2160017490387, + "p99": 675.2319931983948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 536.8000268936157, + "p90": 544.9280142784119, + "p95": 547.5519895553589, + "p99": 554.4639825820923 + }, + "combine": { + "p50": 520.6400156021118, + "p90": 527.9039740562439, + "p95": 530.5280089378357, + "p99": 533.7280035018921 + }, + "roundtrip": { + "p50": 1031.999945640564, + "p90": 1043.8719987869263, + "p95": 1052.1600246429443, + "p99": 1084.671974182129 + }, + "isolatedSum": { + "p50": 1057.4400424957275, + "p90": 1072.8319883346558, + "p95": 1078.0799984931946, + "p99": 1088.1919860839844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 987.1039986610413, + "p90": 991.9999837875366, + "p95": 993.9200282096863, + "p99": 1001.0240077972412 + }, + "combine": { + "p50": 962.9120230674744, + "p90": 967.8400158882141, + "p95": 972.5120067596436, + "p99": 975.4559993743896 + }, + "roundtrip": { + "p50": 1927.7759790420532, + "p90": 1935.5520009994507, + "p95": 1938.3679628372192, + "p99": 1943.6160326004028 + }, + "isolatedSum": { + "p50": 1950.0160217285156, + "p90": 1959.8399996757507, + "p95": 1966.4320349693298, + "p99": 1976.4800071716309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-52d90461", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_11303bbb", + "comparisonKey": "051cb8f1d23c5079", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:33.657491+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.30399930477142, + "p90": 126.17599964141846, + "p95": 128.92800569534302, + "p99": 133.27999413013458 + }, + "combine": { + "p50": 101.75999999046326, + "p90": 108.15999656915665, + "p95": 110.23999750614166, + "p99": 114.88000303506851 + }, + "roundtrip": { + "p50": 192.73599982261658, + "p90": 199.20000433921814, + "p95": 202.14399695396423, + "p99": 209.98400449752808 + }, + "isolatedSum": { + "p50": 220.06399929523468, + "p90": 234.3359962105751, + "p95": 239.16800320148468, + "p99": 248.1599971652031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 150.01599490642548, + "p90": 156.15999698638916, + "p95": 158.36800634860992, + "p99": 163.00800442695618 + }, + "combine": { + "p50": 123.1359988451004, + "p90": 127.93600559234619, + "p95": 130.94399869441986, + "p99": 138.0160003900528 + }, + "roundtrip": { + "p50": 244.06400322914124, + "p90": 250.68798661231995, + "p95": 253.1520128250122, + "p99": 260.0640058517456 + }, + "isolatedSum": { + "p50": 273.1519937515259, + "p90": 284.09600257873535, + "p95": 289.3120050430298, + "p99": 301.024004817009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.4080069065094, + "p90": 213.6639952659607, + "p95": 217.40800142288208, + "p99": 224.86400604248047 + }, + "combine": { + "p50": 167.55199432373047, + "p90": 173.47200214862823, + "p95": 175.00799894332886, + "p99": 178.97599935531616 + }, + "roundtrip": { + "p50": 345.2480137348175, + "p90": 352.8960049152374, + "p95": 355.80798983573914, + "p99": 360.7040047645569 + }, + "isolatedSum": { + "p50": 372.96000123023987, + "p90": 387.1359974145889, + "p95": 392.41600036621094, + "p99": 403.84000539779663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 314.5599961280823, + "p90": 321.6640055179596, + "p95": 324.5759904384613, + "p99": 329.02398705482483 + }, + "combine": { + "p50": 304.9280047416687, + "p90": 310.68798899650574, + "p95": 312.5759959220886, + "p99": 317.6639974117279 + }, + "roundtrip": { + "p50": 591.1359786987305, + "p90": 597.6639986038208, + "p95": 599.9680161476135, + "p99": 606.112003326416 + }, + "isolatedSum": { + "p50": 619.488000869751, + "p90": 632.3519945144653, + "p95": 637.1519863605499, + "p99": 646.6879844665527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 539.5839810371399, + "p90": 548.6720204353333, + "p95": 552.191972732544, + "p99": 570.7839727401733 + }, + "combine": { + "p50": 522.9439735412598, + "p90": 531.1679840087891, + "p95": 532.5440168380737, + "p99": 536.8319749832153 + }, + "roundtrip": { + "p50": 1033.7599515914917, + "p90": 1040.6080484390259, + "p95": 1043.5199737548828, + "p99": 1048.9280223846436 + }, + "isolatedSum": { + "p50": 1062.5279545783997, + "p90": 1079.8400044441223, + "p95": 1084.7359895706177, + "p99": 1107.6159477233887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 986.8159890174866, + "p90": 992.5119876861572, + "p95": 994.7519898414612, + "p99": 999.5840191841125 + }, + "combine": { + "p50": 963.5519981384277, + "p90": 969.9839949607849, + "p95": 972.8639721870422, + "p99": 975.6159782409668 + }, + "roundtrip": { + "p50": 1927.9040098190308, + "p90": 1934.3039989471436, + "p95": 1936.5119934082031, + "p99": 1940.991997718811 + }, + "isolatedSum": { + "p50": 1950.3679871559143, + "p90": 1962.4959826469421, + "p95": 1967.6159620285034, + "p99": 1975.1999974250793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17bb895b", + "identity": "gb300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||b208ea04b16e80b", + "colorKey": "gb300_11303bbb", + "comparisonKey": "c272d1a5f5cc20a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:31:29.957466+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b208ea04b16e80b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.4240003824234, + "p90": 127.48800218105316, + "p95": 130.46400249004364, + "p99": 141.95199310779572 + }, + "combine": { + "p50": 100.60799866914749, + "p90": 105.8880016207695, + "p95": 108.31999778747559, + "p99": 112.76800185441971 + }, + "roundtrip": { + "p50": 194.20799612998962, + "p90": 200.76799392700195, + "p95": 203.5519927740097, + "p99": 208.67200195789337 + }, + "isolatedSum": { + "p50": 220.0319990515709, + "p90": 233.37600380182266, + "p95": 238.78400027751923, + "p99": 254.71999496221542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26621952, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 151.7760008573532, + "p90": 158.75199437141418, + "p95": 161.6320013999939, + "p99": 165.50399363040924 + }, + "combine": { + "p50": 123.52000176906586, + "p90": 127.77599692344666, + "p95": 131.16799294948578, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 247.8400021791458, + "p90": 255.42399287223816, + "p95": 258.91199707984924, + "p99": 262.4639868736267 + }, + "isolatedSum": { + "p50": 275.29600262641907, + "p90": 286.52799129486084, + "p95": 292.7999943494797, + "p99": 301.8239885568619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53329920, + "combineLogicalBytes": 53329920, + "fanoutMean": 3.6328125, + "recvTokensMax": 944, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 207.8399956226349, + "p90": 217.18400716781616, + "p95": 219.80799734592438, + "p99": 230.3999960422516 + }, + "combine": { + "p50": 168.06399822235107, + "p90": 174.9120056629181, + "p95": 176.15999281406403, + "p99": 181.18399381637573 + }, + "roundtrip": { + "p50": 348.7359881401062, + "p90": 356.1919927597046, + "p95": 359.3280017375946, + "p99": 363.5199964046478 + }, + "isolatedSum": { + "p50": 375.90399384498596, + "p90": 392.09601283073425, + "p95": 395.9679901599884, + "p99": 411.5839898586273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106530816, + "combineLogicalBytes": 106530816, + "fanoutMean": 3.62841796875, + "recvTokensMax": 1882, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 317.31200218200684, + "p90": 323.61599802970886, + "p95": 326.07999444007874, + "p99": 332.63999223709106 + }, + "combine": { + "p50": 304.4480085372925, + "p90": 310.2079927921295, + "p95": 311.74400448799133, + "p99": 316.3839876651764 + }, + "roundtrip": { + "p50": 593.0560231208801, + "p90": 600.2560257911682, + "p95": 603.1680107116699, + "p99": 609.1840267181396 + }, + "isolatedSum": { + "p50": 621.7600107192993, + "p90": 633.8239908218384, + "p95": 637.8239989280701, + "p99": 649.0239799022675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212631552, + "combineLogicalBytes": 212631552, + "fanoutMean": 3.62109375, + "recvTokensMax": 3729, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 541.0240292549133, + "p90": 548.2239723205566, + "p95": 550.9439706802368, + "p99": 554.8480153083801 + }, + "combine": { + "p50": 522.1120119094849, + "p90": 529.9199819564819, + "p95": 531.7760109901428, + "p99": 534.5600247383118 + }, + "roundtrip": { + "p50": 1033.4080457687378, + "p90": 1041.5359735488892, + "p95": 1044.3840026855469, + "p99": 1047.7759838104248 + }, + "isolatedSum": { + "p50": 1063.1360411643982, + "p90": 1078.1439542770386, + "p95": 1082.7199816703796, + "p99": 1089.408040046692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424632320, + "combineLogicalBytes": 424632320, + "fanoutMean": 3.61572265625, + "recvTokensMax": 7430, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 990.4320240020752, + "p90": 996.0640072822571, + "p95": 997.7920055389404, + "p99": 1003.5840272903442 + }, + "combine": { + "p50": 963.9999866485596, + "p90": 972.1919894218445, + "p95": 975.2640128135681, + "p99": 981.2800288200378 + }, + "roundtrip": { + "p50": 1931.4559698104858, + "p90": 1939.7759437561035, + "p95": 1942.8160190582275, + "p99": 1946.4000463485718 + }, + "isolatedSum": { + "p50": 1954.4320106506348, + "p90": 1968.2559967041016, + "p95": 1973.0560183525085, + "p99": 1984.864056110382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848074752, + "combineLogicalBytes": 848074752, + "fanoutMean": 3.61065673828125, + "recvTokensMax": 14815, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f84d26f4", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|prefill|normal|none|none|0|tuned||1104ab83732593b", + "colorKey": "gb300_a16423a9", + "comparisonKey": "6df19bcba08f9c49", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:32.665024+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1104ab83732593b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 123.87199699878693, + "p90": 150.68799257278442, + "p95": 156.41599893569946, + "p99": 176.1920005083084 + }, + "combine": { + "p50": 107.13600367307663, + "p90": 146.65600657463074, + "p95": 153.50399911403656, + "p99": 160.64000129699707 + }, + "roundtrip": { + "p50": 194.91200149059296, + "p90": 211.2320065498352, + "p95": 228.44800353050232, + "p99": 248.9600032567978 + }, + "isolatedSum": { + "p50": 231.00800067186356, + "p90": 297.34399914741516, + "p95": 309.919998049736, + "p99": 336.8320018053055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 151.2320041656494, + "p90": 188.960000872612, + "p95": 197.76000082492828, + "p99": 209.9200040102005 + }, + "combine": { + "p50": 125.18399953842163, + "p90": 148.0959951877594, + "p95": 152.3520052433014, + "p99": 186.75200641155243 + }, + "roundtrip": { + "p50": 246.59200012683868, + "p90": 274.944007396698, + "p95": 287.03999519348145, + "p99": 314.2080008983612 + }, + "isolatedSum": { + "p50": 276.41600370407104, + "p90": 337.0559960603714, + "p95": 350.1120060682297, + "p99": 396.67201042175293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 4, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 208.8959962129593, + "p90": 216.8319970369339, + "p95": 218.9120054244995, + "p99": 224.7679978609085 + }, + "combine": { + "p50": 183.67999792099, + "p90": 189.37599658966064, + "p95": 193.9840018749237, + "p99": 200.1280039548874 + }, + "roundtrip": { + "p50": 354.20799255371094, + "p90": 362.65599727630615, + "p95": 364.8959994316101, + "p99": 370.7840144634247 + }, + "isolatedSum": { + "p50": 392.5759941339493, + "p90": 406.20799362659454, + "p95": 412.8960072994232, + "p99": 424.8960018157959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 4, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 326.1759877204895, + "p90": 350.75199604034424, + "p95": 370.49600481987, + "p99": 391.58400893211365 + }, + "combine": { + "p50": 323.61599802970886, + "p90": 335.4879915714264, + "p95": 354.2720079421997, + "p99": 380.16000390052795 + }, + "roundtrip": { + "p50": 625.6960034370422, + "p90": 642.8160071372986, + "p95": 654.528021812439, + "p99": 676.256000995636 + }, + "isolatedSum": { + "p50": 649.7919857501984, + "p90": 686.2399876117706, + "p95": 724.7680127620697, + "p99": 771.7440128326416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 4, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 568.3839917182922, + "p90": 583.0399990081787, + "p95": 596.2240099906921, + "p99": 633.4720253944397 + }, + "combine": { + "p50": 558.0160021781921, + "p90": 582.2719931602478, + "p95": 592.7680134773254, + "p99": 607.1360111236572 + }, + "roundtrip": { + "p50": 1098.431944847107, + "p90": 1122.048020362854, + "p95": 1134.1760158538818, + "p99": 1156.224012374878 + }, + "isolatedSum": { + "p50": 1126.3999938964844, + "p90": 1165.3119921684265, + "p95": 1188.9920234680176, + "p99": 1240.608036518097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 4, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1049.0880012512207, + "p90": 1073.15194606781, + "p95": 1086.8159532546997, + "p99": 1109.503984451294 + }, + "combine": { + "p50": 1036.4480018615723, + "p90": 1059.4559907913208, + "p95": 1071.0079669952393, + "p99": 1093.4720039367676 + }, + "roundtrip": { + "p50": 2057.5039386749268, + "p90": 2073.6639499664307, + "p95": 2091.3920402526855, + "p99": 2111.999988555908 + }, + "isolatedSum": { + "p50": 2085.536003112793, + "p90": 2132.607936859131, + "p95": 2157.823920249939, + "p99": 2202.9759883880615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 4, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02e2b9e6", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|prefill|normal|none|none|0|tuned||e15d35cfeaea91f", + "colorKey": "gb300_4f334ae0", + "comparisonKey": "21beabbe10dfb404", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:58.131559+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e15d35cfeaea91f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 102.55999863147736, + "p90": 110.30399799346924, + "p95": 115.03999680280685, + "p99": 121.31199985742569 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 87.52000331878662, + "p95": 88.86399865150452, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 154.52800691127777, + "p90": 162.33600676059723, + "p95": 165.75999557971954, + "p99": 174.17599260807037 + }, + "isolatedSum": { + "p50": 183.45599621534348, + "p90": 197.82400131225586, + "p95": 203.90399545431137, + "p99": 220.76799720525742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 128.12800705432892, + "p90": 136.1600011587143, + "p95": 140.1280015707016, + "p99": 144.80000734329224 + }, + "combine": { + "p50": 123.58400225639343, + "p90": 127.68000364303589, + "p95": 130.72000443935394, + "p99": 137.60000467300415 + }, + "roundtrip": { + "p50": 223.7440049648285, + "p90": 231.77599906921387, + "p95": 234.592005610466, + "p99": 239.58399891853333 + }, + "isolatedSum": { + "p50": 251.71200931072235, + "p90": 263.8400048017502, + "p95": 270.84800601005554, + "p99": 282.4000120162964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 254.36800718307495, + "p90": 261.9839906692505, + "p95": 264.41600918769836, + "p99": 268.95999908447266 + }, + "combine": { + "p50": 245.69599330425262, + "p90": 249.91999566555023, + "p95": 251.74400210380554, + "p99": 258.4640085697174 + }, + "roundtrip": { + "p50": 478.94400358200073, + "p90": 485.02400517463684, + "p95": 487.2640073299408, + "p99": 491.8079972267151 + }, + "isolatedSum": { + "p50": 500.0640004873276, + "p90": 511.9039863348007, + "p95": 516.1600112915039, + "p99": 527.4240076541901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b0cfbda4", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|prefill|normal|none|none|0|tuned||33484f7e5b87248", + "colorKey": "gb300_05480265", + "comparisonKey": "8ed31938685a8c64", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:37.230110+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "33484f7e5b87248", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 101.27999633550644, + "p90": 110.36799848079681, + "p95": 114.3679991364479, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 88.32000195980072, + "p90": 96.44799679517746, + "p95": 98.24000298976898, + "p99": 103.29599678516388 + }, + "roundtrip": { + "p50": 164.8319959640503, + "p90": 173.75999689102173, + "p95": 177.7919977903366, + "p99": 184.76800620555878 + }, + "isolatedSum": { + "p50": 189.59999829530716, + "p90": 206.81599527597427, + "p95": 212.6080021262169, + "p99": 226.20799392461777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 125.21600723266602, + "p90": 133.4719955921173, + "p95": 137.11999356746674, + "p99": 145.28000354766846 + }, + "combine": { + "p50": 108.25599730014801, + "p90": 112.15999722480774, + "p95": 113.82400244474411, + "p99": 120.19199877977371 + }, + "roundtrip": { + "p50": 209.53600108623505, + "p90": 218.75199675559998, + "p95": 222.4320024251938, + "p99": 230.75200617313385 + }, + "isolatedSum": { + "p50": 233.47200453281403, + "p90": 245.63199281692505, + "p95": 250.94399601221085, + "p99": 265.47200232744217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22020096, + "combineLogicalBytes": 22020096, + "fanoutMean": 1.5, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 150.39999783039093, + "p90": 158.24000537395477, + "p95": 161.40800714492798, + "p99": 169.18399930000305 + }, + "combine": { + "p50": 133.59999656677246, + "p90": 138.72000575065613, + "p95": 142.2400027513504, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 260.6079876422882, + "p90": 268.8960134983063, + "p95": 271.4560031890869, + "p99": 276.8639922142029 + }, + "isolatedSum": { + "p50": 283.9999943971634, + "p90": 296.9600111246109, + "p95": 303.6480098962784, + "p99": 316.8320059776306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44040192, + "combineLogicalBytes": 44040192, + "fanoutMean": 1.5, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 223.80800545215607, + "p90": 231.1999946832657, + "p95": 234.43199694156647, + "p99": 240.447998046875 + }, + "combine": { + "p50": 182.5920045375824, + "p90": 187.55200505256653, + "p95": 189.2479956150055, + "p99": 195.77600061893463 + }, + "roundtrip": { + "p50": 385.15201210975647, + "p90": 394.3679928779602, + "p95": 396.1600065231323, + "p99": 402.14401483535767 + }, + "isolatedSum": { + "p50": 406.40000998973846, + "p90": 418.7519997358322, + "p95": 423.67999255657196, + "p99": 436.22399866580963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 88080384, + "combineLogicalBytes": 88080384, + "fanoutMean": 1.5, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 345.08800506591797, + "p90": 353.4719944000244, + "p95": 355.9679985046387, + "p99": 363.48798871040344 + }, + "combine": { + "p50": 330.4319977760315, + "p90": 335.5199992656708, + "p95": 337.66400814056396, + "p99": 344.0319895744324 + }, + "roundtrip": { + "p50": 658.079981803894, + "p90": 665.503978729248, + "p95": 667.8720116615295, + "p99": 673.2479929924011 + }, + "isolatedSum": { + "p50": 675.5200028419495, + "p90": 688.9919936656952, + "p95": 693.6320066452026, + "p99": 707.5199782848358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 176160768, + "combineLogicalBytes": 176160768, + "fanoutMean": 1.5, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 647.0400094985962, + "p90": 653.760015964508, + "p95": 655.6800007820129, + "p99": 661.5999937057495 + }, + "combine": { + "p50": 556.6080212593079, + "p90": 565.343976020813, + "p95": 567.1679973602295, + "p99": 570.7520246505737 + }, + "roundtrip": { + "p50": 1180.4159879684448, + "p90": 1188.3519887924194, + "p95": 1190.9120082855225, + "p99": 1195.8080530166626 + }, + "isolatedSum": { + "p50": 1203.648030757904, + "p90": 1219.103991985321, + "p95": 1222.8479981422424, + "p99": 1232.3520183563232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352321536, + "combineLogicalBytes": 352321536, + "fanoutMean": 1.5, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13f7c26e", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|prefill|normal|none|none|0|tuned||b8e52e92c6d3379", + "colorKey": "gb300_5ef5ae4f", + "comparisonKey": "77027917f60eba91", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:34.595809+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b8e52e92c6d3379", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.27199906110764, + "p90": 125.5359947681427, + "p95": 129.2160004377365, + "p99": 136.22400164604187 + }, + "combine": { + "p50": 101.02400183677673, + "p90": 106.01600259542465, + "p95": 109.50399935245514, + "p99": 115.64800143241882 + }, + "roundtrip": { + "p50": 191.52000546455383, + "p90": 199.74400103092194, + "p95": 203.77600193023682, + "p99": 257.6960027217865 + }, + "isolatedSum": { + "p50": 219.29600089788437, + "p90": 231.55199736356735, + "p95": 238.71999979019165, + "p99": 251.8720030784607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.5760062932968, + "p90": 154.59200739860535, + "p95": 157.6319932937622, + "p99": 183.67999792099 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 126.20800733566284, + "p95": 128.86400520801544, + "p99": 143.51999759674072 + }, + "roundtrip": { + "p50": 242.91199445724487, + "p90": 250.17601251602173, + "p95": 255.51998615264893, + "p99": 295.83999514579773 + }, + "isolatedSum": { + "p50": 270.9760069847107, + "p90": 280.8000147342682, + "p95": 286.49599850177765, + "p99": 327.1999955177307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53100544, + "combineLogicalBytes": 53100544, + "fanoutMean": 3.6171875, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 204.48000729084015, + "p90": 211.67999505996704, + "p95": 213.59999477863312, + "p99": 219.32800114154816 + }, + "combine": { + "p50": 166.36799275875092, + "p90": 173.75999689102173, + "p95": 175.90400576591492, + "p99": 184.1920018196106 + }, + "roundtrip": { + "p50": 349.37599301338196, + "p90": 356.9920063018799, + "p95": 359.6160113811493, + "p99": 366.239994764328 + }, + "isolatedSum": { + "p50": 370.84800004959106, + "p90": 385.43999195098877, + "p95": 389.50400054454803, + "p99": 403.52000296115875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 106373120, + "fanoutMean": 3.623046875, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 322.7199912071228, + "p90": 330.7519853115082, + "p95": 334.78400111198425, + "p99": 357.31199383735657 + }, + "combine": { + "p50": 321.696013212204, + "p90": 326.9439935684204, + "p95": 330.7200074195862, + "p99": 340.06398916244507 + }, + "roundtrip": { + "p50": 619.8400259017944, + "p90": 628.5439729690552, + "p95": 634.9120140075684, + "p99": 670.527994632721 + }, + "isolatedSum": { + "p50": 644.4160044193268, + "p90": 657.6959788799286, + "p95": 665.5040085315704, + "p99": 697.3759829998016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 558.2720041275024, + "p90": 566.5919780731201, + "p95": 569.2160129547119, + "p99": 582.3360085487366 + }, + "combine": { + "p50": 551.3920187950134, + "p90": 557.7920079231262, + "p95": 560.8639717102051, + "p99": 571.4880228042603 + }, + "roundtrip": { + "p50": 1083.7440490722656, + "p90": 1092.7040576934814, + "p95": 1097.4080562591553, + "p99": 1122.7519512176514 + }, + "isolatedSum": { + "p50": 1109.6640229225159, + "p90": 1124.3839859962463, + "p95": 1130.079984664917, + "p99": 1153.8240313529968 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423958528, + "combineLogicalBytes": 423958528, + "fanoutMean": 3.6099853515625, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1029.5679569244385, + "p90": 1037.0559692382812, + "p95": 1038.8799905776978, + "p99": 1041.983962059021 + }, + "combine": { + "p50": 1026.0159969329834, + "p90": 1034.208059310913, + "p95": 1036.352038383484, + "p99": 1042.8800582885742 + }, + "roundtrip": { + "p50": 2039.2959117889404, + "p90": 2050.1439571380615, + "p95": 2055.5200576782227, + "p99": 2066.3039684295654 + }, + "isolatedSum": { + "p50": 2055.583953857422, + "p90": 2071.2640285491943, + "p95": 2075.2320289611816, + "p99": 2084.864020347595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847988736, + "combineLogicalBytes": 847988736, + "fanoutMean": 3.61029052734375, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-97519a7b", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|prefill|normal|none|none|0|tuned||5f9878f45872329", + "colorKey": "gb300_43b106ef", + "comparisonKey": "6da7bf56cd762b8c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:28.438686+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "5f9878f45872329", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.358123779296875, + "eplbImbalanceAfter": 1.000026818477746, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 126.01600587368011, + "p90": 133.88800621032715, + "p95": 136.54400408267975, + "p99": 140.3840035200119 + }, + "combine": { + "p50": 100.47999769449234, + "p90": 108.5439994931221, + "p95": 109.95200276374817, + "p99": 114.94400352239609 + }, + "roundtrip": { + "p50": 197.28000462055206, + "p90": 205.9839963912964, + "p95": 209.1519981622696, + "p99": 213.50400149822235 + }, + "isolatedSum": { + "p50": 226.49600356817245, + "p90": 242.43200570344925, + "p95": 246.49600684642792, + "p99": 255.328007042408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 26664960, + "fanoutMean": 3.6328125, + "recvTokensMax": 472, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 152.12799608707428, + "p90": 160.60799360275269, + "p95": 163.39200735092163, + "p99": 168.57600212097168 + }, + "combine": { + "p50": 123.87199699878693, + "p90": 131.071999669075, + "p95": 133.31200182437897, + "p99": 137.7599984407425 + }, + "roundtrip": { + "p50": 249.53599274158478, + "p90": 257.82400369644165, + "p95": 260.22401452064514, + "p99": 264.2880082130432 + }, + "isolatedSum": { + "p50": 275.9999930858612, + "p90": 291.6799932718277, + "p95": 296.7040091753006, + "p99": 306.3360005617142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53143552, + "combineLogicalBytes": 53143552, + "fanoutMean": 3.6201171875, + "recvTokensMax": 946, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 213.85599672794342, + "p90": 221.21599316596985, + "p95": 223.77599775791168, + "p99": 228.35199534893036 + }, + "combine": { + "p50": 170.71999609470367, + "p90": 175.48799514770508, + "p95": 179.1680008172989, + "p99": 183.71200561523438 + }, + "roundtrip": { + "p50": 352.7680039405823, + "p90": 360.1599931716919, + "p95": 362.2719943523407, + "p99": 368.4479892253876 + }, + "isolatedSum": { + "p50": 384.5759928226471, + "p90": 396.7039883136749, + "p95": 402.94399857521057, + "p99": 412.06400096416473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106258432, + "combineLogicalBytes": 106258432, + "fanoutMean": 3.619140625, + "recvTokensMax": 1861, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 317.9199993610382, + "p90": 325.6320059299469, + "p95": 327.7119994163513, + "p99": 332.38399028778076 + }, + "combine": { + "p50": 305.9839904308319, + "p90": 310.68798899650574, + "p95": 313.1519854068756, + "p99": 321.152001619339 + }, + "roundtrip": { + "p50": 594.6239829063416, + "p90": 602.0479798316956, + "p95": 604.416012763977, + "p99": 612.7679944038391 + }, + "isolatedSum": { + "p50": 623.9039897918701, + "p90": 636.3199949264526, + "p95": 640.8639848232269, + "p99": 653.5359919071198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212645888, + "combineLogicalBytes": 212645888, + "fanoutMean": 3.621337890625, + "recvTokensMax": 3730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 544.543981552124, + "p90": 552.4160265922546, + "p95": 555.8720231056213, + "p99": 562.0160102844238 + }, + "combine": { + "p50": 523.5520005226135, + "p90": 531.1359763145447, + "p95": 532.4159860610962, + "p99": 536.0000133514404 + }, + "roundtrip": { + "p50": 1035.2319478988647, + "p90": 1043.9039468765259, + "p95": 1046.7519760131836, + "p99": 1050.9120225906372 + }, + "isolatedSum": { + "p50": 1068.0959820747375, + "p90": 1083.5520029067993, + "p95": 1088.2880091667175, + "p99": 1098.0160236358643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424775680, + "combineLogicalBytes": 424775680, + "fanoutMean": 3.616943359375, + "recvTokensMax": 7429, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 995.8400130271912, + "p90": 1005.6320428848267, + "p95": 1011.199951171875, + "p99": 1035.264015197754 + }, + "combine": { + "p50": 961.9519710540771, + "p90": 971.3280200958252, + "p95": 973.9519953727722, + "p99": 1001.471996307373 + }, + "roundtrip": { + "p50": 1930.8799505233765, + "p90": 1939.743995666504, + "p95": 2157.088041305542, + "p99": 2291.4559841156006 + }, + "isolatedSum": { + "p50": 1957.7919840812683, + "p90": 1976.9600629806519, + "p95": 1985.1519465446472, + "p99": 2036.736011505127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848547840, + "combineLogicalBytes": 848547840, + "fanoutMean": 3.6126708984375, + "recvTokensMax": 14823, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7daef202", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|prefill|normal|none|none|0|tuned||ed21345b2de53e0", + "colorKey": "gb300_339552af", + "comparisonKey": "8a501f05e71a771e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:06.862419+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ed21345b2de53e0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.003448486328125, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.07999759912491, + "p90": 127.61600315570831, + "p95": 133.02400708198547, + "p99": 201.1519968509674 + }, + "combine": { + "p50": 100.00000149011612, + "p90": 105.53599894046783, + "p95": 109.0560033917427, + "p99": 144.41600441932678 + }, + "roundtrip": { + "p50": 191.55199825763702, + "p90": 198.7839937210083, + "p95": 201.50400698184967, + "p99": 246.7840015888214 + }, + "isolatedSum": { + "p50": 218.07999908924103, + "p90": 233.15200209617615, + "p95": 242.08001047372818, + "p99": 345.5680012702942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 464, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 149.82399344444275, + "p90": 154.7199934720993, + "p95": 158.01599621772766, + "p99": 162.59199380874634 + }, + "combine": { + "p50": 121.34400010108948, + "p90": 125.11999905109406, + "p95": 127.26399302482605, + "p99": 133.2480013370514 + }, + "roundtrip": { + "p50": 243.13600361347198, + "p90": 249.59999322891235, + "p95": 252.19199061393738, + "p99": 258.91199707984924 + }, + "isolatedSum": { + "p50": 271.1679935455322, + "p90": 279.83999252319336, + "p95": 285.2799892425537, + "p99": 295.83999514579773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52570112, + "combineLogicalBytes": 52570112, + "fanoutMean": 3.5810546875, + "recvTokensMax": 924, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 203.61599326133728, + "p90": 212.22400665283203, + "p95": 214.78399634361267, + "p99": 219.04000639915466 + }, + "combine": { + "p50": 165.43999314308167, + "p90": 172.60800302028656, + "p95": 174.112007021904, + "p99": 178.24000120162964 + }, + "roundtrip": { + "p50": 343.7120020389557, + "p90": 351.6159951686859, + "p95": 353.69598865509033, + "p99": 358.7839901447296 + }, + "isolatedSum": { + "p50": 369.05598640441895, + "p90": 384.8320096731186, + "p95": 388.89600336551666, + "p99": 397.2800076007843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105613312, + "combineLogicalBytes": 105613312, + "fanoutMean": 3.59716796875, + "recvTokensMax": 1860, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 312.8960132598877, + "p90": 320.19200921058655, + "p95": 323.93598556518555, + "p99": 328.92799377441406 + }, + "combine": { + "p50": 303.3919930458069, + "p90": 309.7600042819977, + "p95": 311.5519881248474, + "p99": 318.11198592185974 + }, + "roundtrip": { + "p50": 591.1999940872192, + "p90": 599.1039872169495, + "p95": 601.6960144042969, + "p99": 609.6000075340271 + }, + "isolatedSum": { + "p50": 616.2880063056946, + "p90": 629.9520134925842, + "p95": 635.487973690033, + "p99": 647.0399796962738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211843072, + "combineLogicalBytes": 211843072, + "fanoutMean": 3.607666015625, + "recvTokensMax": 3714, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 536.1279845237732, + "p90": 544.0000295639038, + "p95": 548.3840107917786, + "p99": 606.4320206642151 + }, + "combine": { + "p50": 520.3840136528015, + "p90": 529.8240184783936, + "p95": 531.9039821624756, + "p99": 554.8800230026245 + }, + "roundtrip": { + "p50": 1029.0240049362183, + "p90": 1040.4800176620483, + "p95": 1046.7519760131836, + "p99": 1079.6480178833008 + }, + "isolatedSum": { + "p50": 1056.5119981765747, + "p90": 1073.8240480422974, + "p95": 1080.2879929542542, + "p99": 1161.3120436668396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423198720, + "combineLogicalBytes": 423198720, + "fanoutMean": 3.603515625, + "recvTokensMax": 7400, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 984.7360253334045, + "p90": 990.2399778366089, + "p95": 991.5199875831604, + "p99": 995.7119822502136 + }, + "combine": { + "p50": 962.8159999847412, + "p90": 971.1040258407593, + "p95": 972.8959798812866, + "p99": 978.7840247154236 + }, + "roundtrip": { + "p50": 1924.6400594711304, + "p90": 1933.2159757614136, + "p95": 1935.0080490112305, + "p99": 1940.4480457305908 + }, + "isolatedSum": { + "p50": 1947.5520253181458, + "p90": 1961.3440036773682, + "p95": 1964.415967464447, + "p99": 1974.4960069656372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 846024704, + "combineLogicalBytes": 846024704, + "fanoutMean": 3.6019287109375, + "recvTokensMax": 14796, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3883163e", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_b554fd9a", + "comparisonKey": "2cf9572474a551e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:30.106440+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.72000181674957, + "p90": 124.25599992275238, + "p95": 128.22400033473969, + "p99": 163.10399770736694 + }, + "combine": { + "p50": 97.59999811649323, + "p90": 106.81600123643875, + "p95": 112.09599673748016, + "p99": 147.07200229167938 + }, + "roundtrip": { + "p50": 189.88800048828125, + "p90": 201.9840031862259, + "p95": 216.06400609016418, + "p99": 256.6080093383789 + }, + "isolatedSum": { + "p50": 212.3199999332428, + "p90": 231.07200115919113, + "p95": 240.31999707221985, + "p99": 310.1759999990463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 143.96800100803375, + "p90": 153.18399667739868, + "p95": 157.3760062456131, + "p99": 181.98400735855103 + }, + "combine": { + "p50": 120.57600170373917, + "p90": 127.29600071907043, + "p95": 133.82400572299957, + "p99": 157.6640009880066 + }, + "roundtrip": { + "p50": 239.1359955072403, + "p90": 247.0400035381317, + "p95": 250.4960000514984, + "p99": 277.5680124759674 + }, + "isolatedSum": { + "p50": 264.5440027117729, + "p90": 280.4799973964691, + "p95": 291.20001196861267, + "p99": 339.6480083465576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 201.9840031862259, + "p90": 214.01600539684296, + "p95": 224.99200701713562, + "p99": 260.51199436187744 + }, + "combine": { + "p50": 163.35999965667725, + "p90": 171.83999717235565, + "p95": 177.50400304794312, + "p99": 208.95999670028687 + }, + "roundtrip": { + "p50": 339.87200260162354, + "p90": 351.3599932193756, + "p95": 359.20000076293945, + "p99": 390.4320001602173 + }, + "isolatedSum": { + "p50": 365.34400284290314, + "p90": 385.8560025691986, + "p95": 402.49601006507874, + "p99": 469.4719910621643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 311.39200925827026, + "p90": 320.95998525619507, + "p95": 324.6079981327057, + "p99": 361.7280125617981 + }, + "combine": { + "p50": 320.41600346565247, + "p90": 330.04799485206604, + "p95": 332.63999223709106, + "p99": 353.05601358413696 + }, + "roundtrip": { + "p50": 605.791985988617, + "p90": 617.8879737854004, + "p95": 624.8319745063782, + "p99": 646.8799710273743 + }, + "isolatedSum": { + "p50": 631.8080127239227, + "p90": 651.0079801082611, + "p95": 657.2479903697968, + "p99": 714.7840261459351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 530.8480262756348, + "p90": 540.3839945793152, + "p95": 546.9440221786499, + "p99": 578.8800120353699 + }, + "combine": { + "p50": 546.9120144844055, + "p90": 555.1679730415344, + "p95": 562.5280141830444, + "p99": 587.0400071144104 + }, + "roundtrip": { + "p50": 1062.78395652771, + "p90": 1076.8959522247314, + "p95": 1084.5439434051514, + "p99": 1107.3280572891235 + }, + "isolatedSum": { + "p50": 1077.7600407600403, + "p90": 1095.5519676208496, + "p95": 1109.4720363616943, + "p99": 1165.9200191497803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1013.1200551986694, + "p90": 1025.8879661560059, + "p95": 1030.1439762115479, + "p99": 1037.824034690857 + }, + "combine": { + "p50": 1010.1759433746338, + "p90": 1021.8240022659302, + "p95": 1030.0480127334595, + "p99": 1044.4480180740356 + }, + "roundtrip": { + "p50": 2004.2240619659424, + "p90": 2021.183967590332, + "p95": 2027.008056640625, + "p99": 2041.8241024017334 + }, + "isolatedSum": { + "p50": 2023.2959985733032, + "p90": 2047.711968421936, + "p95": 2060.1919889450073, + "p99": 2082.2720527648926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7a34d2bc", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|prefill|normal|none|none|0|tuned||25840dd8241ba10", + "colorKey": "gb300_b0a58d70", + "comparisonKey": "a428eb9c5701b22d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:01.780567+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "25840dd8241ba10", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 110.944002866745, + "p90": 118.56000125408173, + "p95": 122.72000312805176, + "p99": 157.151997089386 + }, + "combine": { + "p50": 97.08800166845322, + "p90": 100.89600086212158, + "p95": 104.47999835014343, + "p99": 136.4160031080246 + }, + "roundtrip": { + "p50": 180.41600286960602, + "p90": 188.57599794864655, + "p95": 190.97599387168884, + "p99": 217.18400716781616 + }, + "isolatedSum": { + "p50": 208.0320045351982, + "p90": 219.4560021162033, + "p95": 227.2000014781952, + "p99": 293.5680001974106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 131.9040060043335, + "p90": 140.32000303268433, + "p95": 144.19199526309967, + "p99": 166.59200191497803 + }, + "combine": { + "p50": 113.88800293207169, + "p90": 121.47200107574463, + "p95": 122.8799968957901, + "p99": 128.57599556446075 + }, + "roundtrip": { + "p50": 220.44800221920013, + "p90": 229.8240065574646, + "p95": 233.5679978132248, + "p99": 268.12800765037537 + }, + "isolatedSum": { + "p50": 245.79200893640518, + "p90": 261.79200410842896, + "p95": 267.07199215888977, + "p99": 295.1679974794388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18335744, + "combineLogicalBytes": 18335744, + "fanoutMean": 1.2490234375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 175.10400712490082, + "p90": 183.20000171661377, + "p95": 186.0159933567047, + "p99": 194.04800236225128 + }, + "combine": { + "p50": 158.33599865436554, + "p90": 162.52799332141876, + "p95": 164.000004529953, + "p99": 169.3120002746582 + }, + "roundtrip": { + "p50": 309.1840147972107, + "p90": 317.24798679351807, + "p95": 321.6640055179596, + "p99": 345.984011888504 + }, + "isolatedSum": { + "p50": 333.44000577926636, + "p90": 345.72799503803253, + "p95": 350.0159978866577, + "p99": 363.3600026369095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36900864, + "combineLogicalBytes": 36900864, + "fanoutMean": 1.2568359375, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 272.0000147819519, + "p90": 280.7680070400238, + "p95": 285.98400950431824, + "p99": 319.5840120315552 + }, + "combine": { + "p50": 296.1600124835968, + "p90": 305.1519989967346, + "p95": 308.03200602531433, + "p99": 314.84800577163696 + }, + "roundtrip": { + "p50": 538.0480289459229, + "p90": 550.1760244369507, + "p95": 554.3040037155151, + "p99": 585.5039954185486 + }, + "isolatedSum": { + "p50": 568.1600272655487, + "p90": 585.9200060367584, + "p95": 594.0160155296326, + "p99": 634.4320178031921 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73730048, + "combineLogicalBytes": 73730048, + "fanoutMean": 1.255615234375, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 456.4479887485504, + "p90": 463.80800008773804, + "p95": 466.5600061416626, + "p99": 476.79999470710754 + }, + "combine": { + "p50": 561.1199736595154, + "p90": 568.3199763298035, + "p95": 572.0959901809692, + "p99": 587.3919725418091 + }, + "roundtrip": { + "p50": 1029.919981956482, + "p90": 1039.5519733428955, + "p95": 1046.336054801941, + "p99": 1066.912055015564 + }, + "isolatedSum": { + "p50": 1017.5679624080658, + "p90": 1032.1279764175415, + "p95": 1038.6559963226318, + "p99": 1064.1919672489166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147746816, + "combineLogicalBytes": 147746816, + "fanoutMean": 1.258056640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 943.0080056190491, + "p90": 949.8879909515381, + "p95": 952.2240161895752, + "p99": 958.7519764900208 + }, + "combine": { + "p50": 1030.6559801101685, + "p90": 1043.2640314102173, + "p95": 1050.0160455703735, + "p99": 1069.983959197998 + }, + "roundtrip": { + "p50": 1940.7680034637451, + "p90": 1952.448010444641, + "p95": 1957.535982131958, + "p99": 1977.3759841918945 + }, + "isolatedSum": { + "p50": 1973.6639857292175, + "p90": 1993.1520223617554, + "p95": 2002.2400617599487, + "p99": 2028.7359356880188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295278592, + "combineLogicalBytes": 295278592, + "fanoutMean": 1.25714111328125, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-027788f0", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|prefill|normal|none|none|0|tuned||cabb28c468fd7cf", + "colorKey": "gb300_40a5347e", + "comparisonKey": "5244009dbffea1d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:54.680088+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cabb28c468fd7cf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86505126953125, + "eplbImbalanceAfter": 1.0000149681454613, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.12799763679504, + "p90": 127.55200266838074, + "p95": 149.53599870204926, + "p99": 182.68799781799316 + }, + "combine": { + "p50": 100.19200295209885, + "p90": 106.33599758148193, + "p95": 112.0000034570694, + "p99": 159.32799875736237 + }, + "roundtrip": { + "p50": 189.63199853897095, + "p90": 200.15999674797058, + "p95": 220.64000368118286, + "p99": 252.28801369667053 + }, + "isolatedSum": { + "p50": 216.3200005888939, + "p90": 233.88800024986267, + "p95": 261.53600215911865, + "p99": 342.01599657535553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25862144, + "combineLogicalBytes": 25862144, + "fanoutMean": 3.5234375, + "recvTokensMax": 457, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 146.62399888038635, + "p90": 153.9520025253296, + "p95": 157.82399475574493, + "p99": 168.19199919700623 + }, + "combine": { + "p50": 121.8239963054657, + "p90": 126.75200402736664, + "p95": 129.63199615478516, + "p99": 153.50399911403656 + }, + "roundtrip": { + "p50": 241.56799912452698, + "p90": 249.11999702453613, + "p95": 252.9599964618683, + "p99": 272.7999985218048 + }, + "isolatedSum": { + "p50": 268.44799518585205, + "p90": 280.7040065526962, + "p95": 287.4559909105301, + "p99": 321.6959983110428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 51509248, + "combineLogicalBytes": 51509248, + "fanoutMean": 3.5087890625, + "recvTokensMax": 914, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 202.2079974412918, + "p90": 210.4959934949875, + "p95": 216.15999937057495, + "p99": 253.4720003604889 + }, + "combine": { + "p50": 163.64799439907074, + "p90": 171.00800573825836, + "p95": 174.20800030231476, + "p99": 209.34399962425232 + }, + "roundtrip": { + "p50": 340.1600122451782, + "p90": 347.9039967060089, + "p95": 353.34399342536926, + "p99": 378.6559998989105 + }, + "isolatedSum": { + "p50": 365.85599184036255, + "p90": 381.50399923324585, + "p95": 390.3679996728897, + "p99": 462.8159999847412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 102688768, + "combineLogicalBytes": 102688768, + "fanoutMean": 3.49755859375, + "recvTokensMax": 1817, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 310.016006231308, + "p90": 319.9999928474426, + "p95": 325.3760039806366, + "p99": 362.17600107192993 + }, + "combine": { + "p50": 300.28799176216125, + "p90": 308.7039887905121, + "p95": 311.2959861755371, + "p99": 336.09598875045776 + }, + "roundtrip": { + "p50": 580.2559852600098, + "p90": 590.3040170669556, + "p95": 599.5839834213257, + "p99": 636.031985282898 + }, + "isolatedSum": { + "p50": 610.3039979934692, + "p90": 628.7039816379547, + "p95": 636.6719901561737, + "p99": 698.2719898223877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 205520896, + "combineLogicalBytes": 205520896, + "fanoutMean": 3.5, + "recvTokensMax": 3657, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 527.2639989852905, + "p90": 540.992021560669, + "p95": 553.6320209503174, + "p99": 581.7599892616272 + }, + "combine": { + "p50": 513.6319994926453, + "p90": 522.7519869804382, + "p95": 530.5600166320801, + "p99": 552.7359843254089 + }, + "roundtrip": { + "p50": 1014.3040418624878, + "p90": 1029.3439626693726, + "p95": 1037.0880365371704, + "p99": 1069.2479610443115 + }, + "isolatedSum": { + "p50": 1040.8959984779358, + "p90": 1063.7440085411072, + "p95": 1084.1920375823975, + "p99": 1134.4959735870361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 412016640, + "combineLogicalBytes": 412016640, + "fanoutMean": 3.50830078125, + "recvTokensMax": 7329, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 965.8240079879761, + "p90": 975.4559993743896, + "p95": 979.7760248184204, + "p99": 1012.3519897460938 + }, + "combine": { + "p50": 948.7040042877197, + "p90": 955.9999704360962, + "p95": 961.6000056266785, + "p99": 977.4720072746277 + }, + "roundtrip": { + "p50": 1895.616054534912, + "p90": 1906.1119556427002, + "p95": 1916.3199663162231, + "p99": 1999.2320537567139 + }, + "isolatedSum": { + "p50": 1914.5280122756958, + "p90": 1931.4559698104858, + "p95": 1941.3760304450989, + "p99": 1989.8239970207214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824119296, + "combineLogicalBytes": 824119296, + "fanoutMean": 3.5086669921875, + "recvTokensMax": 14713, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-35912227", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|prefill|normal|none|none|0|tuned||370c8dd16f08e2c", + "colorKey": "gb300_95d14aab", + "comparisonKey": "869fdb7d47d70d7f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:39:58.152712+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "370c8dd16f08e2c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 119.9679970741272, + "p90": 129.63199615478516, + "p95": 142.94399321079254, + "p99": 183.80799889564514 + }, + "combine": { + "p50": 100.0640019774437, + "p90": 106.91200196743011, + "p95": 109.72800105810165, + "p99": 156.70399367809296 + }, + "roundtrip": { + "p50": 194.20799612998962, + "p90": 205.9199959039688, + "p95": 225.24799406528473, + "p99": 278.11199426651 + }, + "isolatedSum": { + "p50": 220.0319990515709, + "p90": 236.54399812221527, + "p95": 252.6719942688942, + "p99": 340.5119925737381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 147.039994597435, + "p90": 153.3759981393814, + "p95": 156.63999319076538, + "p99": 164.19200599193573 + }, + "combine": { + "p50": 122.3360002040863, + "p90": 126.27199292182922, + "p95": 128.54400277137756, + "p99": 134.5600038766861 + }, + "roundtrip": { + "p50": 244.25600469112396, + "p90": 251.55198574066162, + "p95": 255.16799092292786, + "p99": 262.7840042114258 + }, + "isolatedSum": { + "p50": 269.3759948015213, + "p90": 279.64799106121063, + "p95": 285.18399596214294, + "p99": 298.7520098686218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49057792, + "combineLogicalBytes": 49057792, + "fanoutMean": 3.341796875, + "recvTokensMax": 1018, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.02400398254395, + "p90": 214.91199731826782, + "p95": 221.88800573349, + "p99": 262.7519965171814 + }, + "combine": { + "p50": 166.36799275875092, + "p90": 173.5360026359558, + "p95": 174.8799979686737, + "p99": 181.7599982023239 + }, + "roundtrip": { + "p50": 347.3280072212219, + "p90": 356.1600148677826, + "p95": 359.1040074825287, + "p99": 367.74399876594543 + }, + "isolatedSum": { + "p50": 371.39199674129486, + "p90": 388.44799995422363, + "p95": 396.7680037021637, + "p99": 444.5119947195053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 98344960, + "combineLogicalBytes": 98344960, + "fanoutMean": 3.349609375, + "recvTokensMax": 2039, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 317.984014749527, + "p90": 325.53601264953613, + "p95": 327.42398977279663, + "p99": 332.12798833847046 + }, + "combine": { + "p50": 321.02400064468384, + "p90": 325.56799054145813, + "p95": 327.67999172210693, + "p99": 333.21601152420044 + }, + "roundtrip": { + "p50": 614.3680214881897, + "p90": 623.744010925293, + "p95": 626.1439919471741, + "p99": 631.6800117492676 + }, + "isolatedSum": { + "p50": 639.0080153942108, + "p90": 651.1040031909943, + "p95": 655.1039814949036, + "p99": 665.3439998626709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 196704256, + "combineLogicalBytes": 196704256, + "fanoutMean": 3.349853515625, + "recvTokensMax": 4074, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 552.2879958152771, + "p90": 561.6959929466248, + "p95": 567.520022392273, + "p99": 597.8879928588867 + }, + "combine": { + "p50": 546.9759702682495, + "p90": 555.7439923286438, + "p95": 557.6639771461487, + "p99": 570.8159804344177 + }, + "roundtrip": { + "p50": 1073.855996131897, + "p90": 1085.536003112793, + "p95": 1092.352032661438, + "p99": 1110.2080345153809 + }, + "isolatedSum": { + "p50": 1099.2639660835266, + "p90": 1117.4399852752686, + "p95": 1125.1839995384216, + "p99": 1168.7039732933044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 393351168, + "combineLogicalBytes": 393351168, + "fanoutMean": 3.349365234375, + "recvTokensMax": 8147, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1025.696039199829, + "p90": 1034.3040227890015, + "p95": 1036.6079807281494, + "p99": 1057.4079751968384 + }, + "combine": { + "p50": 1025.5999565124512, + "p90": 1034.9760055541992, + "p95": 1038.912057876587, + "p99": 1061.311960220337 + }, + "roundtrip": { + "p50": 2025.439977645874, + "p90": 2039.5519733428955, + "p95": 2047.8720664978027, + "p99": 2071.1679458618164 + }, + "isolatedSum": { + "p50": 2051.2959957122803, + "p90": 2069.2800283432007, + "p95": 2075.5200386047363, + "p99": 2118.7199354171753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 785469440, + "combineLogicalBytes": 785469440, + "fanoutMean": 3.3441162109375, + "recvTokensMax": 16298, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a07d5946", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|prefill|normal|none|none|0|tuned||624fdceae193d94", + "colorKey": "gb300_f6eb4093", + "comparisonKey": "b99f7d558b6c368b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:40:16.855043+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "624fdceae193d94", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.888397216796875, + "eplbImbalanceAfter": 1.00013427734375, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.1119978427887, + "p90": 126.49600207805634, + "p95": 129.98400628566742, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 100.5759984254837, + "p90": 107.32799768447876, + "p95": 109.50399935245514, + "p99": 113.69600147008896 + }, + "roundtrip": { + "p50": 192.6400065422058, + "p90": 199.42399859428406, + "p95": 201.7280012369156, + "p99": 208.22399854660034 + }, + "isolatedSum": { + "p50": 218.6879962682724, + "p90": 233.8239997625351, + "p95": 239.48800563812256, + "p99": 252.03200429677963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 150.07999539375305, + "p90": 156.51200711727142, + "p95": 158.91200304031372, + "p99": 164.44799304008484 + }, + "combine": { + "p50": 122.01599776744843, + "p90": 126.39999389648438, + "p95": 127.96799838542938, + "p99": 132.7359974384308 + }, + "roundtrip": { + "p50": 244.159996509552, + "p90": 251.16801261901855, + "p95": 254.91198897361755, + "p99": 261.7279887199402 + }, + "isolatedSum": { + "p50": 272.0959931612015, + "p90": 282.9120010137558, + "p95": 286.8800014257431, + "p99": 297.1839904785156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52441088, + "combineLogicalBytes": 52441088, + "fanoutMean": 3.572265625, + "recvTokensMax": 924, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.24799823760986, + "p90": 214.1440063714981, + "p95": 216.63999557495117, + "p99": 222.6240038871765 + }, + "combine": { + "p50": 166.01599752902985, + "p90": 173.8239973783493, + "p95": 175.135999917984, + "p99": 181.0240000486374 + }, + "roundtrip": { + "p50": 346.46400809288025, + "p90": 353.983998298645, + "p95": 357.2480082511902, + "p99": 367.35999584198 + }, + "isolatedSum": { + "p50": 371.2639957666397, + "p90": 387.9680037498474, + "p95": 391.7759954929352, + "p99": 403.6480039358139 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105670656, + "combineLogicalBytes": 105670656, + "fanoutMean": 3.59912109375, + "recvTokensMax": 1859, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 313.3760094642639, + "p90": 319.2000091075897, + "p95": 321.152001619339, + "p99": 325.0240087509155 + }, + "combine": { + "p50": 300.5119860172272, + "p90": 308.4479868412018, + "p95": 310.1760149002075, + "p99": 315.0399923324585 + }, + "roundtrip": { + "p50": 588.4799957275391, + "p90": 596.0320234298706, + "p95": 597.7280139923096, + "p99": 605.247974395752 + }, + "isolatedSum": { + "p50": 613.8879954814911, + "p90": 627.6479959487915, + "p95": 631.3280165195465, + "p99": 640.064001083374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211527680, + "combineLogicalBytes": 211527680, + "fanoutMean": 3.602294921875, + "recvTokensMax": 3708, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 537.4400019645691, + "p90": 543.7440276145935, + "p95": 546.1120009422302, + "p99": 549.4080185890198 + }, + "combine": { + "p50": 521.2799906730652, + "p90": 529.6000242233276, + "p95": 531.391978263855, + "p99": 535.103976726532 + }, + "roundtrip": { + "p50": 1032.7999591827393, + "p90": 1040.063977241516, + "p95": 1041.9520139694214, + "p99": 1048.4479665756226 + }, + "isolatedSum": { + "p50": 1058.7199926376343, + "p90": 1073.3440518379211, + "p95": 1077.5039792060852, + "p99": 1084.5119953155518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423284736, + "combineLogicalBytes": 423284736, + "fanoutMean": 3.604248046875, + "recvTokensMax": 7436, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 989.9839758872986, + "p90": 998.6879825592041, + "p95": 1000.6719827651978, + "p99": 1004.2239427566528 + }, + "combine": { + "p50": 962.88001537323, + "p90": 971.9039797782898, + "p95": 974.0480184555054, + "p99": 977.5999784469604 + }, + "roundtrip": { + "p50": 1930.5599927902222, + "p90": 1939.7120475769043, + "p95": 1943.8719749450684, + "p99": 1949.280023574829 + }, + "isolatedSum": { + "p50": 1952.8639912605286, + "p90": 1970.591962337494, + "p95": 1974.7200012207031, + "p99": 1981.8239212036133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847745024, + "combineLogicalBytes": 847745024, + "fanoutMean": 3.6092529296875, + "recvTokensMax": 14866, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3e96c39e", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_f231b710", + "comparisonKey": "2a2625b79fdfc40c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:03.667985+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 113.98400366306305, + "p90": 124.4800016283989, + "p95": 136.73600554466248, + "p99": 182.8799992799759 + }, + "combine": { + "p50": 97.63199836015701, + "p90": 102.30399668216705, + "p95": 106.88000172376633, + "p99": 136.03200018405914 + }, + "roundtrip": { + "p50": 188.4479969739914, + "p90": 199.2959976196289, + "p95": 208.12800526618958, + "p99": 251.90401077270508 + }, + "isolatedSum": { + "p50": 211.61600202322006, + "p90": 226.78399831056595, + "p95": 243.6160072684288, + "p99": 318.91199946403503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 142.4960047006607, + "p90": 149.9200016260147, + "p95": 151.61600708961487, + "p99": 158.36800634860992 + }, + "combine": { + "p50": 119.99999731779099, + "p90": 123.83999675512314, + "p95": 125.2480000257492, + "p99": 129.82399761676788 + }, + "roundtrip": { + "p50": 237.8239929676056, + "p90": 244.89599466323853, + "p95": 248.35200607776642, + "p99": 253.24800610542297 + }, + "isolatedSum": { + "p50": 262.4960020184517, + "p90": 273.75999838113785, + "p95": 276.8640071153641, + "p99": 288.1920039653778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 199.52000677585602, + "p90": 207.5520008802414, + "p95": 210.55999398231506, + "p99": 230.84799945354462 + }, + "combine": { + "p50": 161.6320013999939, + "p90": 169.8240041732788, + "p95": 171.9360053539276, + "p99": 178.0479997396469 + }, + "roundtrip": { + "p50": 337.7920091152191, + "p90": 347.51999378204346, + "p95": 354.94399070739746, + "p99": 384.6080005168915 + }, + "isolatedSum": { + "p50": 361.1520081758499, + "p90": 377.3760050535202, + "p95": 382.4959993362427, + "p99": 408.8959991931915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 309.9839985370636, + "p90": 320.0959861278534, + "p95": 332.89599418640137, + "p99": 373.6959993839264 + }, + "combine": { + "p50": 319.90399956703186, + "p90": 328.15998792648315, + "p95": 332.2240114212036, + "p99": 350.14399886131287 + }, + "roundtrip": { + "p50": 604.2559742927551, + "p90": 613.7279868125916, + "p95": 616.703987121582, + "p99": 626.7840266227722 + }, + "isolatedSum": { + "p50": 629.8879981040955, + "p90": 648.2559740543365, + "p95": 665.120005607605, + "p99": 723.8399982452393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 530.6239724159241, + "p90": 539.0080213546753, + "p95": 545.1520085334778, + "p99": 583.4879875183105 + }, + "combine": { + "p50": 544.48002576828, + "p90": 553.1520247459412, + "p95": 555.2639961242676, + "p99": 565.0240182876587 + }, + "roundtrip": { + "p50": 1060.6720447540283, + "p90": 1071.552038192749, + "p95": 1075.584053993225, + "p99": 1100.2559661865234 + }, + "isolatedSum": { + "p50": 1075.103998184204, + "p90": 1092.1600461006165, + "p95": 1100.4160046577454, + "p99": 1148.5120058059692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1015.8400535583496, + "p90": 1028.3839702606201, + "p95": 1033.7599515914917, + "p99": 1054.368019104004 + }, + "combine": { + "p50": 1009.6640586853027, + "p90": 1019.711971282959, + "p95": 1023.8720178604126, + "p99": 1047.0720529556274 + }, + "roundtrip": { + "p50": 2003.8719177246094, + "p90": 2023.0720043182373, + "p95": 2027.359962463379, + "p99": 2047.1038818359375 + }, + "isolatedSum": { + "p50": 2025.5041122436523, + "p90": 2048.095941543579, + "p95": 2057.6319694519043, + "p99": 2101.4400720596313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bc25e158", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_19caa41e", + "comparisonKey": "489a675fb6011ebc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:22.584181+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.33599954843521, + "p90": 126.01600587368011, + "p95": 128.51199507713318, + "p99": 132.54399597644806 + }, + "combine": { + "p50": 100.47999769449234, + "p90": 106.46399855613708, + "p95": 108.2879975438118, + "p99": 113.34399878978729 + }, + "roundtrip": { + "p50": 193.05600225925446, + "p90": 199.45600628852844, + "p95": 201.9840031862259, + "p99": 207.519993185997 + }, + "isolatedSum": { + "p50": 218.81599724292755, + "p90": 232.4800044298172, + "p95": 236.79999262094498, + "p99": 245.88799476623535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 148.67199957370758, + "p90": 154.52800691127777, + "p95": 156.2879979610443, + "p99": 160.12799739837646 + }, + "combine": { + "p50": 122.01599776744843, + "p90": 126.0479986667633, + "p95": 127.71199643611908, + "p99": 133.7919980287552 + }, + "roundtrip": { + "p50": 243.3280050754547, + "p90": 251.13600492477417, + "p95": 253.4399926662445, + "p99": 260.6399953365326 + }, + "isolatedSum": { + "p50": 270.687997341156, + "p90": 280.5760055780411, + "p95": 283.9999943971634, + "p99": 293.91999542713165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 203.48800718784332, + "p90": 211.32799983024597, + "p95": 214.1759991645813, + "p99": 218.59200298786163 + }, + "combine": { + "p50": 166.75199568271637, + "p90": 172.92800545692444, + "p95": 174.6560037136078, + "p99": 176.83200538158417 + }, + "roundtrip": { + "p50": 344.7679877281189, + "p90": 352.03200578689575, + "p95": 354.2720079421997, + "p99": 358.91199111938477 + }, + "isolatedSum": { + "p50": 370.2400028705597, + "p90": 384.2560052871704, + "p95": 388.8320028781891, + "p99": 395.4240083694458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 313.56799602508545, + "p90": 319.4560110569, + "p95": 321.5680122375488, + "p99": 327.36000418663025 + }, + "combine": { + "p50": 302.3679852485657, + "p90": 308.896005153656, + "p95": 310.62400341033936, + "p99": 315.8400058746338 + }, + "roundtrip": { + "p50": 586.5920186042786, + "p90": 593.9199924468994, + "p95": 596.4800119400024, + "p99": 603.6800146102905 + }, + "isolatedSum": { + "p50": 615.9359812736511, + "p90": 628.352016210556, + "p95": 632.1920156478882, + "p99": 643.200010061264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 534.9119901657104, + "p90": 541.7280197143555, + "p95": 545.2479720115662, + "p99": 552.4160265922546 + }, + "combine": { + "p50": 518.8480019569397, + "p90": 526.3040065765381, + "p95": 529.3440222740173, + "p99": 535.2640151977539 + }, + "roundtrip": { + "p50": 1024.6080160140991, + "p90": 1033.0239534378052, + "p95": 1036.0959768295288, + "p99": 1047.327995300293 + }, + "isolatedSum": { + "p50": 1053.7599921226501, + "p90": 1068.0320262908936, + "p95": 1074.5919942855835, + "p99": 1087.6800417900085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 985.5679869651794, + "p90": 990.4639720916748, + "p95": 992.3520088195801, + "p99": 997.5680112838745 + }, + "combine": { + "p50": 961.1520171165466, + "p90": 966.7199850082397, + "p95": 971.3280200958252, + "p99": 975.4559993743896 + }, + "roundtrip": { + "p50": 1921.0879802703857, + "p90": 1929.1199445724487, + "p95": 1931.6799640655518, + "p99": 2063.3599758148193 + }, + "isolatedSum": { + "p50": 1946.720004081726, + "p90": 1957.1839570999146, + "p95": 1963.6800289154053, + "p99": 1973.0240106582642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-99d1a86e", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_f0245a54", + "comparisonKey": "658e55f4cd96097c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:39:10.375956+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 118.14399808645248, + "p90": 127.51999497413635, + "p95": 130.3360015153885, + "p99": 134.65599715709686 + }, + "combine": { + "p50": 100.67199915647507, + "p90": 108.47999900579453, + "p95": 111.32799834012985, + "p99": 116.86400324106216 + }, + "roundtrip": { + "p50": 191.80800020694733, + "p90": 199.48799908161163, + "p95": 203.71200144290924, + "p99": 214.04799818992615 + }, + "isolatedSum": { + "p50": 218.81599724292755, + "p90": 235.99999397993088, + "p95": 241.66399985551834, + "p99": 251.52000039815903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 149.05600249767303, + "p90": 156.92800283432007, + "p95": 160.0639969110489, + "p99": 183.52000415325165 + }, + "combine": { + "p50": 123.07199835777283, + "p90": 128.06400656700134, + "p95": 133.31200182437897, + "p99": 159.61599349975586 + }, + "roundtrip": { + "p50": 244.06400322914124, + "p90": 252.25600600242615, + "p95": 259.20000672340393, + "p99": 298.2720136642456 + }, + "isolatedSum": { + "p50": 272.12800085544586, + "p90": 284.9920094013214, + "p95": 293.37599873542786, + "p99": 343.1359976530075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 205.08800446987152, + "p90": 215.32799303531647, + "p95": 218.81599724292755, + "p99": 256.25601410865784 + }, + "combine": { + "p50": 167.07199811935425, + "p90": 174.8799979686737, + "p95": 177.05599963665009, + "p99": 198.11199605464935 + }, + "roundtrip": { + "p50": 344.92799639701843, + "p90": 353.0240058898926, + "p95": 356.86400532722473, + "p99": 366.4320111274719 + }, + "isolatedSum": { + "p50": 372.16000258922577, + "p90": 390.2079910039902, + "p95": 395.87199687957764, + "p99": 454.3680101633072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 312.3199939727783, + "p90": 319.0079927444458, + "p95": 321.82401418685913, + "p99": 328.15998792648315 + }, + "combine": { + "p50": 304.22401428222656, + "p90": 310.8159899711609, + "p95": 312.79999017715454, + "p99": 319.0079927444458 + }, + "roundtrip": { + "p50": 587.9999995231628, + "p90": 595.3599810600281, + "p95": 597.823977470398, + "p99": 602.9760241508484 + }, + "isolatedSum": { + "p50": 616.5440082550049, + "p90": 629.8239827156067, + "p95": 634.6240043640137, + "p99": 647.167980670929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 535.2960228919983, + "p90": 542.3039793968201, + "p95": 544.6400046348572, + "p99": 551.1360168457031 + }, + "combine": { + "p50": 519.4560289382935, + "p90": 527.072012424469, + "p95": 529.5040011405945, + "p99": 532.0960283279419 + }, + "roundtrip": { + "p50": 1025.2480506896973, + "p90": 1032.9279899597168, + "p95": 1035.6800556182861, + "p99": 1040.9599542617798 + }, + "isolatedSum": { + "p50": 1054.7520518302917, + "p90": 1069.375991821289, + "p95": 1074.1440057754517, + "p99": 1083.232045173645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 986.0479831695557, + "p90": 991.6800260543823, + "p95": 993.7919974327087, + "p99": 1000.8640289306641 + }, + "combine": { + "p50": 962.88001537323, + "p90": 969.6959853172302, + "p95": 972.2239971160889, + "p99": 975.6479859352112 + }, + "roundtrip": { + "p50": 1924.1600036621094, + "p90": 1934.048056602478, + "p95": 1938.3039474487305, + "p99": 1954.9440145492554 + }, + "isolatedSum": { + "p50": 1948.9279985427856, + "p90": 1961.3760113716125, + "p95": 1966.0159945487976, + "p99": 1976.5120148658752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a28ecbd", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_30ab8c37", + "comparisonKey": "3ce8376ced887a90", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:01.220197+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 103.74400019645691, + "p90": 110.11199653148651, + "p95": 112.67200112342834, + "p99": 116.60800129175186 + }, + "combine": { + "p50": 100.03200173377991, + "p90": 108.09600353240967, + "p95": 109.76000130176544, + "p99": 115.1999980211258 + }, + "roundtrip": { + "p50": 175.99999904632568, + "p90": 183.1039935350418, + "p95": 186.17600202560425, + "p99": 191.39200448989868 + }, + "isolatedSum": { + "p50": 203.77600193023682, + "p90": 218.20800006389618, + "p95": 222.4320024251938, + "p99": 231.80799931287766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 133.15199315547943, + "p90": 138.68799805641174, + "p95": 141.85599982738495, + "p99": 149.63200688362122 + }, + "combine": { + "p50": 121.18399888277054, + "p90": 124.92799758911133, + "p95": 127.61600315570831, + "p99": 132.38400220870972 + }, + "roundtrip": { + "p50": 226.9439995288849, + "p90": 235.03999412059784, + "p95": 237.56800591945648, + "p99": 245.7599937915802 + }, + "isolatedSum": { + "p50": 254.33599203824997, + "p90": 263.61599564552307, + "p95": 269.47200298309326, + "p99": 282.01600909233093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 190.91199338436127, + "p90": 197.53600656986237, + "p95": 198.81600141525269, + "p99": 205.05599677562714 + }, + "combine": { + "p50": 167.52000153064728, + "p90": 171.90399765968323, + "p95": 173.69599640369415, + "p99": 178.6240041255951 + }, + "roundtrip": { + "p50": 329.3119966983795, + "p90": 335.58401465415955, + "p95": 337.95198798179626, + "p99": 343.4560000896454 + }, + "isolatedSum": { + "p50": 358.43199491500854, + "p90": 369.4400042295456, + "p95": 372.51199781894684, + "p99": 383.68000090122223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 299.26401376724243, + "p90": 307.2960078716278, + "p95": 309.7600042819977, + "p99": 318.1439936161041 + }, + "combine": { + "p50": 304.6720027923584, + "p90": 309.63200330734253, + "p95": 311.3279938697815, + "p99": 317.1199858188629 + }, + "roundtrip": { + "p50": 574.9760270118713, + "p90": 582.207977771759, + "p95": 584.6080183982849, + "p99": 589.6000266075134 + }, + "isolatedSum": { + "p50": 603.9360165596008, + "p90": 616.9280111789703, + "p95": 621.0879981517792, + "p99": 635.263979434967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 523.8400101661682, + "p90": 531.2640070915222, + "p95": 534.496009349823, + "p99": 540.6399965286255 + }, + "combine": { + "p50": 521.6320157051086, + "p90": 529.5680165290833, + "p95": 531.4239859580994, + "p99": 535.0080132484436 + }, + "roundtrip": { + "p50": 1019.0080404281616, + "p90": 1026.8160104751587, + "p95": 1029.8559665679932, + "p99": 1035.4880094528198 + }, + "isolatedSum": { + "p50": 1045.4720258712769, + "p90": 1060.8320236206055, + "p95": 1065.9199953079224, + "p99": 1075.648009777069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 965.2479887008667, + "p90": 972.7680087089539, + "p95": 975.8080244064331, + "p99": 981.4720153808594 + }, + "combine": { + "p50": 962.2399806976318, + "p90": 970.304012298584, + "p95": 972.0960259437561, + "p99": 975.0720262527466 + }, + "roundtrip": { + "p50": 1907.5839519500732, + "p90": 1915.6160354614258, + "p95": 1918.2720184326172, + "p99": 1925.055980682373 + }, + "isolatedSum": { + "p50": 1927.4879693984985, + "p90": 1943.0720210075378, + "p95": 1947.9040503501892, + "p99": 1956.544041633606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7a0bec2", + "identity": "gb300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||d27781632f6e008", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "d4036e968a88701e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:05.663962+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "d27781632f6e008", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 350.271999835968, + "p90": 367.8719997406006, + "p95": 374.208003282547, + "p99": 398.6560106277466 + }, + "combine": { + "p50": 78.65600287914276, + "p90": 82.49600231647491, + "p95": 84.41600203514099, + "p99": 91.74399822950363 + }, + "roundtrip": { + "p50": 402.94399857521057, + "p90": 421.37598991394043, + "p95": 425.9200096130371, + "p99": 434.59200859069824 + }, + "isolatedSum": { + "p50": 428.9280027151108, + "p90": 450.3680020570755, + "p95": 458.624005317688, + "p99": 490.4000088572502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7647232, + "combineLogicalBytes": 15294464, + "fanoutMean": 3.646484375, + "recvTokensMax": 473, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 353.92001271247864, + "p90": 371.39201164245605, + "p95": 377.75999307632446, + "p99": 388.51198554039 + }, + "combine": { + "p50": 97.85600006580353, + "p90": 101.79200023412704, + "p95": 103.87200117111206, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 425.9839951992035, + "p90": 443.1680142879486, + "p95": 448.06399941444397, + "p99": 460.00000834465027 + }, + "isolatedSum": { + "p50": 451.77601277828217, + "p90": 473.1840118765831, + "p95": 481.6319942474365, + "p99": 499.64798241853714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 15192064, + "combineLogicalBytes": 30384128, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 413.85599970817566, + "p90": 428.99200320243835, + "p95": 432.3520064353943, + "p99": 443.0080056190491 + }, + "combine": { + "p50": 138.20800185203552, + "p90": 142.59199798107147, + "p95": 144.16000247001648, + "p99": 149.85600113868713 + }, + "roundtrip": { + "p50": 531.9679975509644, + "p90": 545.7599759101868, + "p95": 550.4000186920166, + "p99": 558.0160021781921 + }, + "isolatedSum": { + "p50": 552.0640015602112, + "p90": 571.5840011835098, + "p95": 576.5120089054108, + "p99": 592.8640067577362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 30371840, + "combineLogicalBytes": 60743680, + "fanoutMean": 3.62060546875, + "recvTokensMax": 1865, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 550.3680109977722, + "p90": 565.280020236969, + "p95": 568.8319802284241, + "p99": 575.1039981842041 + }, + "combine": { + "p50": 214.91199731826782, + "p90": 219.200000166893, + "p95": 220.89600563049316, + "p99": 224.8000055551529 + }, + "roundtrip": { + "p50": 762.2079849243164, + "p90": 777.2160172462463, + "p95": 781.7280292510986, + "p99": 792.639970779419 + }, + "isolatedSum": { + "p50": 765.28000831604, + "p90": 784.480020403862, + "p95": 789.7279858589172, + "p99": 799.904003739357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 60858368, + "combineLogicalBytes": 121716736, + "fanoutMean": 3.62744140625, + "recvTokensMax": 3730, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 846.1120128631592, + "p90": 858.6239814758301, + "p95": 862.8159761428833, + "p99": 869.0239787101746 + }, + "combine": { + "p50": 463.0720019340515, + "p90": 470.2399969100952, + "p95": 472.6400077342987, + "p99": 477.05599665641785 + }, + "roundtrip": { + "p50": 1266.9119834899902, + "p90": 1284.0319871902466, + "p95": 1287.3599529266357, + "p99": 1296.8319654464722 + }, + "isolatedSum": { + "p50": 1309.1840147972107, + "p90": 1328.8639783859253, + "p95": 1335.455983877182, + "p99": 1346.0799753665924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 121618432, + "combineLogicalBytes": 243236864, + "fanoutMean": 3.62451171875, + "recvTokensMax": 7446, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1438.912034034729, + "p90": 1451.9360065460205, + "p95": 1456.7680358886719, + "p99": 1486.6559505462646 + }, + "combine": { + "p50": 841.6000008583069, + "p90": 851.9039750099182, + "p95": 855.3280234336853, + "p99": 861.1840009689331 + }, + "roundtrip": { + "p50": 2267.3919200897217, + "p90": 2280.895948410034, + "p95": 2284.991979598999, + "p99": 2297.4400520324707 + }, + "isolatedSum": { + "p50": 2280.512034893036, + "p90": 2303.8399815559387, + "p95": 2312.096059322357, + "p99": 2347.8399515151978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 243171328, + "combineLogicalBytes": 486342656, + "fanoutMean": 3.62353515625, + "recvTokensMax": 14871, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e4b39ac", + "identity": "gb300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||6a3023945a551d7", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "20156941b17f5f56", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:16.173634+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6a3023945a551d7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 319.2000091075897, + "p90": 338.01600337028503, + "p95": 344.543993473053, + "p99": 364.54400420188904 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 115.61600118875504, + "p95": 133.53599607944489, + "p99": 152.3520052433014 + }, + "roundtrip": { + "p50": 377.75999307632446, + "p90": 397.024005651474, + "p95": 401.8239974975586, + "p99": 419.5840060710907 + }, + "isolatedSum": { + "p50": 401.98400616645813, + "p90": 453.63200455904007, + "p95": 478.07998955249786, + "p99": 516.8960094451904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9548800, + "combineLogicalBytes": 19097600, + "fanoutMean": 3.642578125, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 327.42398977279663, + "p90": 345.15199065208435, + "p95": 362.3040020465851, + "p99": 431.2959909439087 + }, + "combine": { + "p50": 103.7760004401207, + "p90": 113.69600147008896, + "p95": 117.27999895811081, + "p99": 131.00799918174744 + }, + "roundtrip": { + "p50": 406.5600037574768, + "p90": 421.7599928379059, + "p95": 436.19200587272644, + "p99": 516.864001750946 + }, + "isolatedSum": { + "p50": 431.1999902129173, + "p90": 458.8479921221733, + "p95": 479.5840010046959, + "p99": 562.3039901256561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18990080, + "combineLogicalBytes": 37980160, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 399.6799886226654, + "p90": 413.8239920139313, + "p95": 418.65599155426025, + "p99": 462.46400475502014 + }, + "combine": { + "p50": 144.83200013637543, + "p90": 149.21599626541138, + "p95": 151.7760008573532, + "p99": 166.24000668525696 + }, + "roundtrip": { + "p50": 525.1839756965637, + "p90": 540.7040119171143, + "p95": 547.7120280265808, + "p99": 610.4639768600464 + }, + "isolatedSum": { + "p50": 544.5119887590408, + "p90": 563.0399882793427, + "p95": 570.4319924116135, + "p99": 628.7040114402771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 37888000, + "combineLogicalBytes": 75776000, + "fanoutMean": 3.61328125, + "recvTokensMax": 1867, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 559.0400099754333, + "p90": 574.2080211639404, + "p95": 582.2399854660034, + "p99": 643.1999802589417 + }, + "combine": { + "p50": 257.63198733329773, + "p90": 264.8000121116638, + "p95": 267.5839960575104, + "p99": 272.19200134277344 + }, + "roundtrip": { + "p50": 798.8799810409546, + "p90": 809.8239898681641, + "p95": 813.7279748916626, + "p99": 820.0640082359314 + }, + "isolatedSum": { + "p50": 816.6719973087311, + "p90": 839.0080332756042, + "p95": 849.8239815235138, + "p99": 915.3919816017151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76037120, + "combineLogicalBytes": 152074240, + "fanoutMean": 3.625732421875, + "recvTokensMax": 3722, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 879.9039721488953, + "p90": 889.7600173950195, + "p95": 893.0559754371643, + "p99": 900.767982006073 + }, + "combine": { + "p50": 486.1440062522888, + "p90": 496.6079890727997, + "p95": 513.3119821548462, + "p99": 544.704020023346 + }, + "roundtrip": { + "p50": 1340.0319814682007, + "p90": 1355.9679985046387, + "p95": 1368.5760498046875, + "p99": 1397.760033607483 + }, + "isolatedSum": { + "p50": 1366.047978401184, + "p90": 1386.3680064678192, + "p95": 1406.3679575920105, + "p99": 1445.472002029419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 152058880, + "combineLogicalBytes": 304117760, + "fanoutMean": 3.6253662109375, + "recvTokensMax": 7453, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1579.2319774627686, + "p90": 1587.9679918289185, + "p95": 1590.9759998321533, + "p99": 1599.1359949111938 + }, + "combine": { + "p50": 925.5359768867493, + "p90": 930.5920004844666, + "p95": 932.2559833526611, + "p99": 935.4559779167175 + }, + "roundtrip": { + "p50": 2468.5120582580566, + "p90": 2477.344036102295, + "p95": 2480.191946029663, + "p99": 2488.3201122283936 + }, + "isolatedSum": { + "p50": 2504.767954349518, + "p90": 2518.559992313385, + "p95": 2523.2319831848145, + "p99": 2534.5919728279114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 304179200, + "combineLogicalBytes": 608358400, + "fanoutMean": 3.6260986328125, + "recvTokensMax": 14884, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac1bf56d", + "identity": "gb300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "9ee84c24ef4594d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:26.115278+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 337.2479975223541, + "p90": 354.52800989151, + "p95": 358.4960103034973, + "p99": 370.62400579452515 + }, + "combine": { + "p50": 91.77599847316742, + "p90": 95.32800316810608, + "p95": 96.99200093746185, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 404.06399965286255, + "p90": 420.51199078559875, + "p95": 423.8080084323883, + "p99": 429.85600233078003 + }, + "isolatedSum": { + "p50": 429.02399599552155, + "p90": 449.8560130596161, + "p95": 455.48801124095917, + "p99": 472.6720079779625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11470848, + "combineLogicalBytes": 22941696, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 346.72001004219055, + "p90": 367.5839900970459, + "p95": 392.5119936466217, + "p99": 417.08800196647644 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 145.34400403499603, + "p95": 151.19999647140503, + "p99": 168.2880073785782 + }, + "roundtrip": { + "p50": 435.9999895095825, + "p90": 466.3679897785187, + "p95": 487.67998814582825, + "p99": 515.1360034942627 + }, + "isolatedSum": { + "p50": 459.29601043462753, + "p90": 512.9279941320419, + "p95": 543.7119901180267, + "p99": 585.3760093450546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22824960, + "combineLogicalBytes": 45649920, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 428.76800894737244, + "p90": 461.7919921875, + "p95": 478.0159890651703, + "p99": 518.2399749755859 + }, + "combine": { + "p50": 155.90399503707886, + "p90": 182.3039948940277, + "p95": 192.35199689865112, + "p99": 219.42399442195892 + }, + "roundtrip": { + "p50": 567.0400261878967, + "p90": 595.2640175819397, + "p95": 613.9839887619019, + "p99": 647.711992263794 + }, + "isolatedSum": { + "p50": 584.6720039844513, + "p90": 644.0959870815277, + "p95": 670.3679859638214, + "p99": 737.6639693975449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45533184, + "combineLogicalBytes": 91066368, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 602.4960279464722, + "p90": 617.5680160522461, + "p95": 622.5280165672302, + "p99": 642.8160071372986 + }, + "combine": { + "p50": 288.4480059146881, + "p90": 314.7200047969818, + "p95": 326.78401470184326, + "p99": 350.816011428833 + }, + "roundtrip": { + "p50": 855.9359908103943, + "p90": 871.6480135917664, + "p95": 883.8719725608826, + "p99": 903.0719995498657 + }, + "isolatedSum": { + "p50": 890.9440338611603, + "p90": 932.2880208492279, + "p95": 949.3120312690735, + "p99": 993.6320185661316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91029504, + "combineLogicalBytes": 182059008, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 941.536009311676, + "p90": 952.9920220375061, + "p95": 955.7120203971863, + "p99": 963.5199904441833 + }, + "combine": { + "p50": 503.55201959609985, + "p90": 509.3119740486145, + "p95": 510.9120011329651, + "p99": 513.4080052375793 + }, + "roundtrip": { + "p50": 1414.5599603652954, + "p90": 1427.456021308899, + "p95": 1431.1679601669312, + "p99": 1439.039945602417 + }, + "isolatedSum": { + "p50": 1445.0880289077759, + "p90": 1462.3039960861206, + "p95": 1466.6240215301514, + "p99": 1476.9279956817627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 182224896, + "combineLogicalBytes": 364449792, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1719.7760343551636, + "p90": 1744.9599504470825, + "p95": 1758.9759826660156, + "p99": 1784.5760583877563 + }, + "combine": { + "p50": 943.7119960784912, + "p90": 966.1440253257751, + "p95": 983.1680059432983, + "p99": 1002.6559829711914 + }, + "roundtrip": { + "p50": 2628.0319690704346, + "p90": 2644.831895828247, + "p95": 2659.7440242767334, + "p99": 2681.1840534210205 + }, + "isolatedSum": { + "p50": 2663.488030433655, + "p90": 2711.1039757728577, + "p95": 2742.143988609314, + "p99": 2787.2320413589478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 363976704, + "combineLogicalBytes": 727953408, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1e5f3082", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_6b2cb596", + "comparisonKey": "41e3b49944131d9c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:08.953726+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 108.92800241708755, + "p90": 114.46399986743927, + "p95": 118.07999759912491, + "p99": 124.1919994354248 + }, + "combine": { + "p50": 95.58399766683578, + "p90": 99.71199929714203, + "p95": 102.27199643850327, + "p99": 107.39199817180634 + }, + "roundtrip": { + "p50": 245.85600197315216, + "p90": 257.4720084667206, + "p95": 262.04800605773926, + "p99": 268.5759961605072 + }, + "isolatedSum": { + "p50": 204.51200008392334, + "p90": 214.1759991645813, + "p95": 220.35199403762817, + "p99": 231.58399760723114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 135.8720064163208, + "p90": 143.77599954605103, + "p95": 150.2079963684082, + "p99": 176.4480024576187 + }, + "combine": { + "p50": 116.7680025100708, + "p90": 121.50400131940842, + "p95": 124.89599734544754, + "p99": 153.888002038002 + }, + "roundtrip": { + "p50": 284.960001707077, + "p90": 293.63200068473816, + "p95": 299.3279993534088, + "p99": 345.2480137348175 + }, + "isolatedSum": { + "p50": 252.6400089263916, + "p90": 265.28000086545944, + "p95": 275.10399371385574, + "p99": 330.3360044956207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 191.0720020532608, + "p90": 197.88800179958344, + "p95": 199.90399479866028, + "p99": 210.7200026512146 + }, + "combine": { + "p50": 162.91199624538422, + "p90": 181.66400492191315, + "p95": 184.79999899864197, + "p99": 188.22400271892548 + }, + "roundtrip": { + "p50": 428.51200699806213, + "p90": 435.61598658561707, + "p95": 438.04800510406494, + "p99": 441.5999948978424 + }, + "isolatedSum": { + "p50": 353.983998298645, + "p90": 379.5520067214966, + "p95": 384.70399379730225, + "p99": 398.9440053701401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 297.40801453590393, + "p90": 304.83201146125793, + "p95": 307.2960078716278, + "p99": 332.35201239585876 + }, + "combine": { + "p50": 300.86401104927063, + "p90": 307.45598673820496, + "p95": 311.5200102329254, + "p99": 334.6239924430847 + }, + "roundtrip": { + "p50": 733.4079742431641, + "p90": 741.5040135383606, + "p95": 750.4000067710876, + "p99": 775.3919959068298 + }, + "isolatedSum": { + "p50": 598.2720255851746, + "p90": 612.2879981994629, + "p95": 618.8160181045532, + "p99": 666.9760048389435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 504.7039985656738, + "p90": 512.4160051345825, + "p95": 519.0719962120056, + "p99": 541.6960120201111 + }, + "combine": { + "p50": 517.4720287322998, + "p90": 523.7119793891907, + "p95": 527.2639989852905, + "p99": 533.7280035018921 + }, + "roundtrip": { + "p50": 1330.0479650497437, + "p90": 1337.9520177841187, + "p95": 1340.5760526657104, + "p99": 1352.1920442581177 + }, + "isolatedSum": { + "p50": 1022.1760272979736, + "p90": 1036.1279845237732, + "p95": 1046.3359951972961, + "p99": 1075.4240155220032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 928.384006023407, + "p90": 933.6320161819458, + "p95": 935.4239702224731, + "p99": 940.5760169029236 + }, + "combine": { + "p50": 959.9679708480835, + "p90": 965.9839868545532, + "p95": 967.4239754676819, + "p99": 971.9359874725342 + }, + "roundtrip": { + "p50": 2495.8720207214355, + "p90": 2501.983880996704, + "p95": 2503.999948501587, + "p99": 2512.0959281921387 + }, + "isolatedSum": { + "p50": 1888.3519768714905, + "p90": 1899.616003036499, + "p95": 1902.847945690155, + "p99": 1912.5120043754578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6ad2b94e", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "92a296c344418f84", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:20.982389+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 332.4800133705139, + "p90": 349.1840064525604, + "p95": 352.54400968551636, + "p99": 363.23198676109314 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 99.16800260543823, + "p95": 100.99200159311295, + "p99": 109.21599715948105 + }, + "roundtrip": { + "p50": 403.2000005245209, + "p90": 418.68799924850464, + "p95": 423.1039881706238, + "p99": 439.2319917678833 + }, + "isolatedSum": { + "p50": 427.8400167822838, + "p90": 448.35200905799866, + "p95": 453.5360112786293, + "p99": 472.4479839205742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 345.2160060405731, + "p90": 359.8720133304596, + "p95": 365.4080033302307, + "p99": 441.6640102863312 + }, + "combine": { + "p50": 115.9679964184761, + "p90": 119.99999731779099, + "p95": 121.76000326871872, + "p99": 128.09599936008453 + }, + "roundtrip": { + "p50": 443.87200474739075, + "p90": 457.8239917755127, + "p95": 461.9840085506439, + "p99": 497.50399589538574 + }, + "isolatedSum": { + "p50": 461.1840024590492, + "p90": 479.8720106482506, + "p95": 487.16800659894943, + "p99": 569.7600096464157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 429.56799268722534, + "p90": 442.3680007457733, + "p95": 447.3919868469238, + "p99": 510.49602031707764 + }, + "combine": { + "p50": 160.51200032234192, + "p90": 165.56799411773682, + "p95": 167.7439957857132, + "p99": 174.23999309539795 + }, + "roundtrip": { + "p50": 585.6639742851257, + "p90": 598.3359813690186, + "p95": 602.7519702911377, + "p99": 663.5199785232544 + }, + "isolatedSum": { + "p50": 590.0799930095673, + "p90": 607.9359948635101, + "p95": 615.135982632637, + "p99": 684.7360134124756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 622.7200031280518, + "p90": 634.2080235481262, + "p95": 637.2799873352051, + "p99": 644.0640091896057 + }, + "combine": { + "p50": 297.8239953517914, + "p90": 302.7839958667755, + "p95": 305.05600571632385, + "p99": 310.43198704719543 + }, + "roundtrip": { + "p50": 889.7600173950195, + "p90": 903.0399918556213, + "p95": 911.1040234565735, + "p99": 954.5599818229675 + }, + "isolatedSum": { + "p50": 920.5439984798431, + "p90": 936.9920194149017, + "p95": 942.3359930515289, + "p99": 954.4959962368011 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 986.2719774246216, + "p90": 998.4959959983826, + "p95": 1003.3919811248779, + "p99": 1077.50403881073 + }, + "combine": { + "p50": 517.2160267829895, + "p90": 523.4559774398804, + "p95": 526.2079834938049, + "p99": 537.5360250473022 + }, + "roundtrip": { + "p50": 1473.9840030670166, + "p90": 1484.4160079956055, + "p95": 1489.5039796829224, + "p99": 1555.2959442138672 + }, + "isolatedSum": { + "p50": 1503.488004207611, + "p90": 1521.951973438263, + "p95": 1529.5999646186829, + "p99": 1615.0400638580322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1847.6159572601318, + "p90": 1853.5679578781128, + "p95": 1855.296015739441, + "p99": 1859.7760200500488 + }, + "combine": { + "p50": 960.0319862365723, + "p90": 966.1440253257751, + "p95": 968.8000082969666, + "p99": 990.1120066642761 + }, + "roundtrip": { + "p50": 2776.1600017547607, + "p90": 2783.071994781494, + "p95": 2785.8879566192627, + "p99": 2794.111967086792 + }, + "isolatedSum": { + "p50": 2807.647943496704, + "p90": 2819.711983203888, + "p95": 2824.0960240364075, + "p99": 2849.888026714325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-29347570", + "identity": "gb300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|4|prefill|normal|none|none|0|tuned||b208ea04b16e80b", + "colorKey": "gb300_237a6c3a", + "comparisonKey": "c7b542cf648d8b37", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:56.355847+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b208ea04b16e80b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 350.5280017852783, + "p90": 378.1760036945343, + "p95": 392.4480080604553, + "p99": 413.567990064621 + }, + "combine": { + "p50": 99.13600236177444, + "p90": 144.48000490665436, + "p95": 150.62400698661804, + "p99": 157.95199573040009 + }, + "roundtrip": { + "p50": 420.0960099697113, + "p90": 440.6079947948456, + "p95": 454.23999428749084, + "p99": 486.1440062522888 + }, + "isolatedSum": { + "p50": 449.66400414705276, + "p90": 522.6560086011887, + "p95": 543.0720150470734, + "p99": 571.5199857950211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13310976, + "combineLogicalBytes": 26621952, + "fanoutMean": 3.626953125, + "recvTokensMax": 475, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 365.9200072288513, + "p90": 415.16798734664917, + "p95": 424.80000853538513, + "p99": 495.2319860458374 + }, + "combine": { + "p50": 118.40000003576279, + "p90": 122.65600264072418, + "p95": 124.95999783277512, + "p99": 131.80799782276154 + }, + "roundtrip": { + "p50": 454.4000029563904, + "p90": 471.3599979877472, + "p95": 479.74398732185364, + "p99": 602.8479933738708 + }, + "isolatedSum": { + "p50": 484.3200072646141, + "p90": 537.8239899873734, + "p95": 549.7600063681602, + "p99": 627.0399838685989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 53329920, + "fanoutMean": 3.6328125, + "recvTokensMax": 944, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 449.1199851036072, + "p90": 487.64801025390625, + "p95": 496.70401215553284, + "p99": 567.1679973602295 + }, + "combine": { + "p50": 163.4880006313324, + "p90": 169.40799355506897, + "p95": 172.41600155830383, + "p99": 180.67200481891632 + }, + "roundtrip": { + "p50": 604.2559742927551, + "p90": 657.9520106315613, + "p95": 671.2639927864075, + "p99": 717.1519994735718 + }, + "isolatedSum": { + "p50": 612.6079857349396, + "p90": 657.0560038089752, + "p95": 669.1200137138367, + "p99": 747.8400021791458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53265408, + "combineLogicalBytes": 106530816, + "fanoutMean": 3.62841796875, + "recvTokensMax": 1882, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 632.1280002593994, + "p90": 675.0079989433289, + "p95": 701.2479901313782, + "p99": 736.8000149726868 + }, + "combine": { + "p50": 301.9840121269226, + "p90": 310.7199966907501, + "p95": 314.5279884338379, + "p99": 334.49599146842957 + }, + "roundtrip": { + "p50": 895.8719968795776, + "p90": 908.4799885749817, + "p95": 913.6319756507874, + "p99": 929.6000003814697 + }, + "isolatedSum": { + "p50": 934.112012386322, + "p90": 985.727995634079, + "p95": 1015.7759785652161, + "p99": 1071.2960064411163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106315776, + "combineLogicalBytes": 212631552, + "fanoutMean": 3.62109375, + "recvTokensMax": 3729, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 998.4639883041382, + "p90": 1048.1280088424683, + "p95": 1054.6879768371582, + "p99": 1063.040018081665 + }, + "combine": { + "p50": 518.6880230903625, + "p90": 524.2879986763, + "p95": 526.0800123214722, + "p99": 529.1200280189514 + }, + "roundtrip": { + "p50": 1483.7119579315186, + "p90": 1545.3439950942993, + "p95": 1552.4159669876099, + "p99": 1562.399983406067 + }, + "isolatedSum": { + "p50": 1517.1520113945007, + "p90": 1572.4160075187683, + "p95": 1580.7679891586304, + "p99": 1592.1600461006165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212316160, + "combineLogicalBytes": 424632320, + "fanoutMean": 3.61572265625, + "recvTokensMax": 7430, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1847.6799726486206, + "p90": 1854.848027229309, + "p95": 1858.3359718322754, + "p99": 1865.18394947052 + }, + "combine": { + "p50": 960.3840112686157, + "p90": 968.608021736145, + "p95": 970.5920219421387, + "p99": 973.5040068626404 + }, + "roundtrip": { + "p50": 2774.463891983032, + "p90": 2785.504102706909, + "p95": 2801.215887069702, + "p99": 2830.2719593048096 + }, + "isolatedSum": { + "p50": 2808.0639839172363, + "p90": 2823.456048965454, + "p95": 2828.927993774414, + "p99": 2838.6879563331604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424037376, + "combineLogicalBytes": 848074752, + "fanoutMean": 3.61065673828125, + "recvTokensMax": 14815, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b9e3c26a", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_b8a1aafe", + "comparisonKey": "8ba2075bde645a01", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:41.462187+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 97.28000313043594, + "p90": 103.7760004401207, + "p95": 106.81600123643875, + "p99": 114.52800035476685 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 101.21600329875946, + "p95": 103.61599922180176, + "p99": 109.27999764680862 + }, + "roundtrip": { + "p50": 232.83199965953827, + "p90": 244.54399943351746, + "p95": 247.39199876785278, + "p99": 253.4720003604889 + }, + "isolatedSum": { + "p50": 194.17600333690643, + "p90": 204.99200373888016, + "p95": 210.4320004582405, + "p99": 223.80799800157547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 123.45600128173828, + "p90": 129.15199995040894, + "p95": 131.48799538612366, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 117.91999638080597, + "p90": 122.65600264072418, + "p95": 124.35200065374374, + "p99": 128.9599984884262 + }, + "roundtrip": { + "p50": 272.3200023174286, + "p90": 278.75199913978577, + "p95": 282.368004322052, + "p99": 290.49599170684814 + }, + "isolatedSum": { + "p50": 241.37599766254425, + "p90": 251.80800259113312, + "p95": 255.8399960398674, + "p99": 267.2960013151169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 177.824005484581, + "p90": 183.71200561523438, + "p95": 185.7919991016388, + "p99": 191.13600254058838 + }, + "combine": { + "p50": 163.55200111865997, + "p90": 180.92800676822662, + "p95": 185.05600094795227, + "p99": 191.71200692653656 + }, + "roundtrip": { + "p50": 413.88800740242004, + "p90": 420.0960099697113, + "p95": 422.432005405426, + "p99": 427.839994430542 + }, + "isolatedSum": { + "p50": 341.37600660324097, + "p90": 364.640012383461, + "p95": 370.84800004959106, + "p99": 382.84800946712494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 284.7679853439331, + "p90": 291.23198986053467, + "p95": 293.3439910411835, + "p99": 299.3600070476532 + }, + "combine": { + "p50": 301.05599761009216, + "p90": 306.2399923801422, + "p95": 308.4160089492798, + "p99": 311.8079900741577 + }, + "roundtrip": { + "p50": 719.1359996795654, + "p90": 727.2319793701172, + "p95": 730.5279970169067, + "p99": 737.8559708595276 + }, + "isolatedSum": { + "p50": 585.8239829540253, + "p90": 597.4719822406769, + "p95": 601.7599999904633, + "p99": 611.1679971218109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 491.07199907302856, + "p90": 497.98399209976196, + "p95": 500.3200173377991, + "p99": 505.9199929237366 + }, + "combine": { + "p50": 518.0799961090088, + "p90": 523.9359736442566, + "p95": 525.6320238113403, + "p99": 532.1919918060303 + }, + "roundtrip": { + "p50": 1317.0239925384521, + "p90": 1323.1359720230103, + "p95": 1324.895977973938, + "p99": 1330.0800323486328 + }, + "isolatedSum": { + "p50": 1009.1519951820374, + "p90": 1021.9199657440186, + "p95": 1025.9520411491394, + "p99": 1038.1119847297668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 910.8160138130188, + "p90": 917.5040125846863, + "p95": 920.7040071487427, + "p99": 926.3039827346802 + }, + "combine": { + "p50": 959.4879746437073, + "p90": 965.9839868545532, + "p95": 968.8959717750549, + "p99": 979.8399806022644 + }, + "roundtrip": { + "p50": 2478.368043899536, + "p90": 2484.19189453125, + "p95": 2485.759973526001, + "p99": 2489.8879528045654 + }, + "isolatedSum": { + "p50": 1870.303988456726, + "p90": 1883.4879994392395, + "p95": 1889.5999789237976, + "p99": 1906.1439633369446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e3a1c52", + "identity": "gb300|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "c1d0b67251736b2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:49.752985+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.72000181674957, + "p90": 125.31200051307678, + "p95": 129.82399761676788, + "p99": 139.3599957227707 + }, + "combine": { + "p50": 106.75200074911118, + "p90": 111.77600175142288, + "p95": 115.9679964184761, + "p99": 124.22399967908859 + }, + "roundtrip": { + "p50": 193.31200420856476, + "p90": 204.25599813461304, + "p95": 210.9760046005249, + "p99": 243.0720031261444 + }, + "isolatedSum": { + "p50": 221.47200256586075, + "p90": 237.08800226449966, + "p95": 245.791994035244, + "p99": 263.5839954018593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.5840061903, + "p90": 159.39199924468994, + "p95": 165.0879979133606, + "p99": 199.5840072631836 + }, + "combine": { + "p50": 144.67200636863708, + "p90": 150.30400454998016, + "p95": 157.24800527095795, + "p99": 183.9359998703003 + }, + "roundtrip": { + "p50": 262.36799359321594, + "p90": 272.2879946231842, + "p95": 278.4000039100647, + "p99": 333.75999331474304 + }, + "isolatedSum": { + "p50": 292.2560125589371, + "p90": 309.6960037946701, + "p95": 322.33600318431854, + "p99": 383.5200071334839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.28000462055206, + "p90": 208.00000429153442, + "p95": 212.67199516296387, + "p99": 257.31199979782104 + }, + "combine": { + "p50": 212.96000480651855, + "p90": 220.73599696159363, + "p95": 222.88000583648682, + "p99": 251.52000784873962 + }, + "roundtrip": { + "p50": 381.632000207901, + "p90": 391.5199935436249, + "p95": 396.8319892883301, + "p99": 440.2239918708801 + }, + "isolatedSum": { + "p50": 410.2400094270706, + "p90": 428.73600125312805, + "p95": 435.5520009994507, + "p99": 508.83200764656067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 297.5040078163147, + "p90": 309.05601382255554, + "p95": 315.2320086956024, + "p99": 345.5680012702942 + }, + "combine": { + "p50": 380.8319866657257, + "p90": 391.1040127277374, + "p95": 393.5360014438629, + "p99": 425.6959855556488 + }, + "roundtrip": { + "p50": 616.864025592804, + "p90": 628.2240152359009, + "p95": 635.1040005683899, + "p99": 690.7200217247009 + }, + "isolatedSum": { + "p50": 678.3359944820404, + "p90": 700.160026550293, + "p95": 708.7680101394653, + "p99": 771.263986825943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 508.7360143661499, + "p90": 520.9280252456665, + "p95": 527.1679759025574, + "p99": 561.7600083351135 + }, + "combine": { + "p50": 801.4079928398132, + "p90": 811.0719919204712, + "p95": 813.2799863815308, + "p99": 835.1039886474609 + }, + "roundtrip": { + "p50": 1276.0319709777832, + "p90": 1288.8000011444092, + "p95": 1293.727993965149, + "p99": 1317.952036857605 + }, + "isolatedSum": { + "p50": 1310.1440072059631, + "p90": 1332.0000171661377, + "p95": 1340.4479622840881, + "p99": 1396.8639969825745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 933.6320161819458, + "p90": 948.5440254211426, + "p95": 956.5119743347168, + "p99": 971.3919758796692 + }, + "combine": { + "p50": 1506.592035293579, + "p90": 1515.712022781372, + "p95": 1517.8879499435425, + "p99": 1525.3759622573853 + }, + "roundtrip": { + "p50": 2405.503988265991, + "p90": 2421.95200920105, + "p95": 2426.815986633301, + "p99": 2460.479974746704 + }, + "isolatedSum": { + "p50": 2440.224051475525, + "p90": 2464.2560482025146, + "p95": 2474.3999242782593, + "p99": 2496.7679381370544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0457a436", + "identity": "gb300|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "e008e386a7e2bc41", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:50.640273+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.98399752378464, + "p90": 132.4480026960373, + "p95": 143.99999380111694, + "p99": 183.96799266338348 + }, + "combine": { + "p50": 112.5119999051094, + "p90": 121.0239976644516, + "p95": 125.40799379348755, + "p99": 159.19999778270721 + }, + "roundtrip": { + "p50": 205.63200116157532, + "p90": 216.44799411296844, + "p95": 224.70399737358093, + "p99": 262.719988822937 + }, + "isolatedSum": { + "p50": 234.49599742889404, + "p90": 253.4720003604889, + "p95": 269.4079875946045, + "p99": 343.1679904460907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.59999918937683, + "p90": 158.65600109100342, + "p95": 163.5199934244156, + "p99": 172.70399630069733 + }, + "combine": { + "p50": 149.31200444698334, + "p90": 157.85600244998932, + "p95": 160.12799739837646, + "p99": 181.0240000486374 + }, + "roundtrip": { + "p50": 274.52799677848816, + "p90": 283.26401114463806, + "p95": 286.75198554992676, + "p99": 315.5519962310791 + }, + "isolatedSum": { + "p50": 298.91200363636017, + "p90": 316.51200354099274, + "p95": 323.64799082279205, + "p99": 353.7279963493347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.2959989309311, + "p90": 215.61600267887115, + "p95": 218.62399578094482, + "p99": 233.5679978132248 + }, + "combine": { + "p50": 242.46400594711304, + "p90": 249.59999322891235, + "p95": 254.04798984527588, + "p99": 258.4640085697174 + }, + "roundtrip": { + "p50": 402.46400237083435, + "p90": 410.71999073028564, + "p95": 414.91198539733887, + "p99": 429.9199879169464 + }, + "isolatedSum": { + "p50": 449.7600048780441, + "p90": 465.2159959077835, + "p95": 472.6719856262207, + "p99": 492.0320063829422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 320.8639919757843, + "p90": 329.3760120868683, + "p95": 332.28799700737, + "p99": 364.9600148200989 + }, + "combine": { + "p50": 465.4400050640106, + "p90": 474.68799352645874, + "p95": 476.6719937324524, + "p99": 489.50400948524475 + }, + "roundtrip": { + "p50": 716.1920070648193, + "p90": 725.9200215339661, + "p95": 729.3440103530884, + "p99": 757.2159767150879 + }, + "isolatedSum": { + "p50": 786.3039970397949, + "p90": 804.064005613327, + "p95": 808.9599907398224, + "p99": 854.4640243053436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 554.8160076141357, + "p90": 564.0000104904175, + "p95": 568.2560205459595, + "p99": 578.1440138816833 + }, + "combine": { + "p50": 833.9840173721313, + "p90": 843.9040184020996, + "p95": 846.7199802398682, + "p99": 873.3119964599609 + }, + "roundtrip": { + "p50": 1354.4319868087769, + "p90": 1364.6399974822998, + "p95": 1367.9360151290894, + "p99": 1380.8000087738037 + }, + "isolatedSum": { + "p50": 1388.800024986267, + "p90": 1407.904028892517, + "p95": 1414.9760007858276, + "p99": 1451.4560103416443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1027.2959470748901, + "p90": 1040.4160022735596, + "p95": 1046.1119413375854, + "p99": 1070.8160400390625 + }, + "combine": { + "p50": 1572.4480152130127, + "p90": 1581.5999507904053, + "p95": 1583.9680433273315, + "p99": 1609.663963317871 + }, + "roundtrip": { + "p50": 2552.1280765533447, + "p90": 2564.5759105682373, + "p95": 2568.4800148010254, + "p99": 2591.1359786987305 + }, + "isolatedSum": { + "p50": 2599.743962287903, + "p90": 2622.015953063965, + "p95": 2630.079984664917, + "p99": 2680.4800033569336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-529dcc68", + "identity": "gb300|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "0958f6765b1be546", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:51.203047+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.80000340938568, + "p90": 129.88799810409546, + "p95": 134.36800241470337, + "p99": 151.19999647140503 + }, + "combine": { + "p50": 120.09599804878235, + "p90": 125.31200051307678, + "p95": 132.86399841308594, + "p99": 166.62399470806122 + }, + "roundtrip": { + "p50": 212.16000616550446, + "p90": 221.53599560260773, + "p95": 224.41600263118744, + "p99": 265.9519910812378 + }, + "isolatedSum": { + "p50": 240.89600145816803, + "p90": 255.19999861717224, + "p95": 267.2320008277893, + "p99": 317.82399117946625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 153.43999862670898, + "p90": 163.10399770736694, + "p95": 166.59200191497803, + "p99": 174.6560037136078 + }, + "combine": { + "p50": 159.10400450229645, + "p90": 166.04800522327423, + "p95": 169.24799978733063, + "p99": 173.0560064315796 + }, + "roundtrip": { + "p50": 289.08801078796387, + "p90": 297.63200879096985, + "p95": 300.3840148448944, + "p99": 309.6959888935089 + }, + "isolatedSum": { + "p50": 312.54400312900543, + "p90": 329.1520029306412, + "p95": 335.84000170230865, + "p99": 347.7120101451874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.24000096321106, + "p90": 245.56800723075867, + "p95": 261.6960108280182, + "p99": 282.04798698425293 + }, + "combine": { + "p50": 258.04799795150757, + "p90": 268.3199942111969, + "p95": 270.6559896469116, + "p99": 284.2879891395569 + }, + "roundtrip": { + "p50": 424.67200756073, + "p90": 433.3760142326355, + "p95": 436.5760087966919, + "p99": 468.1600034236908 + }, + "isolatedSum": { + "p50": 480.2879989147186, + "p90": 513.8880014419556, + "p95": 532.3520004749298, + "p99": 566.3359761238098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 344.7040021419525, + "p90": 356.1280071735382, + "p95": 362.43200302124023, + "p99": 389.98401165008545 + }, + "combine": { + "p50": 471.8720018863678, + "p90": 480.1279902458191, + "p95": 482.40000009536743, + "p99": 518.3680057525635 + }, + "roundtrip": { + "p50": 779.20001745224, + "p90": 786.9120240211487, + "p95": 790.6559705734253, + "p99": 805.5999875068665 + }, + "isolatedSum": { + "p50": 816.5760040283203, + "p90": 836.2559974193573, + "p95": 844.8320031166077, + "p99": 908.3520174026489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 597.5040197372437, + "p90": 611.9679808616638, + "p95": 632.0639848709106, + "p99": 651.4559984207153 + }, + "combine": { + "p50": 850.6559729576111, + "p90": 882.0160031318665, + "p95": 894.1439986228943, + "p99": 907.9040288925171 + }, + "roundtrip": { + "p50": 1417.9840087890625, + "p90": 1443.7119960784912, + "p95": 1455.072045326233, + "p99": 1472.864031791687 + }, + "isolatedSum": { + "p50": 1448.1599926948547, + "p90": 1493.9839839935303, + "p95": 1526.207983493805, + "p99": 1559.3600273132324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1100.640058517456, + "p90": 1109.8560094833374, + "p95": 1112.768054008484, + "p99": 1120.255947113037 + }, + "combine": { + "p50": 1595.296025276184, + "p90": 1602.0480394363403, + "p95": 1607.2319746017456, + "p99": 1637.0879411697388 + }, + "roundtrip": { + "p50": 2655.2000045776367, + "p90": 2665.95196723938, + "p95": 2668.9279079437256, + "p99": 2677.664041519165 + }, + "isolatedSum": { + "p50": 2695.93608379364, + "p90": 2711.9040489196777, + "p95": 2720.0000286102295, + "p99": 2757.343888282776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8b502a1", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_74218200", + "comparisonKey": "2870a44c1f8d758b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:29.014758+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.16799974441528, + "p90": 141.63200557231903, + "p95": 150.30400454998016, + "p99": 187.04000115394592 + }, + "combine": { + "p50": 130.78400492668152, + "p90": 145.60000598430634, + "p95": 158.33599865436554, + "p99": 220.60799598693848 + }, + "roundtrip": { + "p50": 229.37600314617157, + "p90": 245.37600576877594, + "p95": 257.9840123653412, + "p99": 289.66400027275085 + }, + "isolatedSum": { + "p50": 257.9520046710968, + "p90": 287.23201155662537, + "p95": 308.6400032043457, + "p99": 407.6479971408844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 167.90400445461273, + "p90": 193.9840018749237, + "p95": 206.94400370121002, + "p99": 230.880007147789 + }, + "combine": { + "p50": 169.08800601959229, + "p90": 184.1599941253662, + "p95": 204.92799580097198, + "p99": 241.7919933795929 + }, + "roundtrip": { + "p50": 303.5840094089508, + "p90": 319.2639946937561, + "p95": 333.5359990596771, + "p99": 372.51201272010803 + }, + "isolatedSum": { + "p50": 336.992010474205, + "p90": 378.1439960002899, + "p95": 411.871999502182, + "p99": 472.6720005273819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.94399404525757, + "p90": 272.5439965724945, + "p95": 282.81599283218384, + "p99": 303.0720055103302 + }, + "combine": { + "p50": 293.15200448036194, + "p90": 316.22400879859924, + "p95": 326.880007982254, + "p99": 348.1599986553192 + }, + "roundtrip": { + "p50": 475.23200511932373, + "p90": 491.93599820137024, + "p95": 502.4319887161255, + "p99": 546.1440086364746 + }, + "isolatedSum": { + "p50": 532.0959985256195, + "p90": 588.7680053710938, + "p95": 609.6960008144379, + "p99": 651.2320041656494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 364.5760118961334, + "p90": 387.84000277519226, + "p95": 396.5440094470978, + "p99": 426.30401253700256 + }, + "combine": { + "p50": 490.30399322509766, + "p90": 504.06402349472046, + "p95": 515.8720016479492, + "p99": 542.5919890403748 + }, + "roundtrip": { + "p50": 822.9119777679443, + "p90": 852.5760173797607, + "p95": 866.6880130767822, + "p99": 892.8319811820984 + }, + "isolatedSum": { + "p50": 854.8800051212311, + "p90": 891.9040262699127, + "p95": 912.416011095047, + "p99": 968.8960015773773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 625.0560283660889, + "p90": 646.4639902114868, + "p95": 660.1920127868652, + "p99": 1122.6880550384521 + }, + "combine": { + "p50": 881.8560242652893, + "p90": 898.7200260162354, + "p95": 908.6719751358032, + "p99": 932.2559833526611 + }, + "roundtrip": { + "p50": 1472.8319644927979, + "p90": 1490.880012512207, + "p95": 1501.7919540405273, + "p99": 1525.7279872894287 + }, + "isolatedSum": { + "p50": 1506.9120526313782, + "p90": 1545.1840162277222, + "p95": 1568.8639879226685, + "p99": 2054.9440383911133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1139.296054840088, + "p90": 1168.287992477417, + "p95": 1180.3200244903564, + "p99": 1200.4799842834473 + }, + "combine": { + "p50": 1625.8879899978638, + "p90": 1637.4720335006714, + "p95": 1642.4000263214111, + "p99": 1670.6559658050537 + }, + "roundtrip": { + "p50": 2740.864038467407, + "p90": 2752.351999282837, + "p95": 2764.224052429199, + "p99": 2813.119888305664 + }, + "isolatedSum": { + "p50": 2765.1840448379517, + "p90": 2805.7600259780884, + "p95": 2822.7200508117676, + "p99": 2871.135950088501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba4423d9", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "02cbe60d1402a41c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:32.397659+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.69600343704224, + "p90": 136.03200018405914, + "p95": 140.70400595664978, + "p99": 147.32800424098969 + }, + "combine": { + "p50": 124.41600114107132, + "p90": 131.71200454235077, + "p95": 133.7279975414276, + "p99": 142.87999272346497 + }, + "roundtrip": { + "p50": 224.5440036058426, + "p90": 233.08800160884857, + "p95": 237.0239943265915, + "p99": 245.728000998497 + }, + "isolatedSum": { + "p50": 250.11200457811356, + "p90": 267.7440047264099, + "p95": 274.4320034980774, + "p99": 290.20799696445465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.19200599193573, + "p90": 174.78400468826294, + "p95": 181.536003947258, + "p99": 214.65599536895752 + }, + "combine": { + "p50": 169.3439930677414, + "p90": 178.6240041255951, + "p95": 181.08800053596497, + "p99": 197.85599410533905 + }, + "roundtrip": { + "p50": 304.25599217414856, + "p90": 317.53599643707275, + "p95": 322.6880133152008, + "p99": 348.6720025539398 + }, + "isolatedSum": { + "p50": 333.5359990596771, + "p90": 353.40800881385803, + "p95": 362.62400448322296, + "p99": 412.51198947429657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.61599850654602, + "p90": 247.0719963312149, + "p95": 251.13600492477417, + "p99": 257.4720084667206 + }, + "combine": { + "p50": 290.3040051460266, + "p90": 294.97599601745605, + "p95": 298.3039915561676, + "p99": 304.1599988937378 + }, + "roundtrip": { + "p50": 472.06398844718933, + "p90": 483.487993478775, + "p95": 488.95999789237976, + "p99": 494.3999946117401 + }, + "isolatedSum": { + "p50": 525.9200036525726, + "p90": 542.047992348671, + "p95": 549.4399964809418, + "p99": 561.6320073604584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 363.5840117931366, + "p90": 372.70399928092957, + "p95": 377.1840035915375, + "p99": 407.61598944664 + }, + "combine": { + "p50": 490.30399322509766, + "p90": 498.78400564193726, + "p95": 501.24800205230713, + "p99": 509.5999836921692 + }, + "roundtrip": { + "p50": 824.4479894638062, + "p90": 843.9040184020996, + "p95": 864.512026309967, + "p99": 892.799973487854 + }, + "isolatedSum": { + "p50": 853.8880050182343, + "p90": 871.4880049228668, + "p95": 878.4320056438446, + "p99": 917.2159731388092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 622.3679780960083, + "p90": 631.9040060043335, + "p95": 634.6880197525024, + "p99": 646.3680267333984 + }, + "combine": { + "p50": 875.104010105133, + "p90": 884.223997592926, + "p95": 887.0400190353394, + "p99": 897.4400162696838 + }, + "roundtrip": { + "p50": 1467.6159620285034, + "p90": 1477.0879745483398, + "p95": 1480.1599979400635, + "p99": 1488.800048828125 + }, + "isolatedSum": { + "p50": 1497.4719882011414, + "p90": 1516.1280035972595, + "p95": 1521.7280387878418, + "p99": 1543.8080430030823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1142.2079801559448, + "p90": 1161.5999937057495, + "p95": 1176.8640279769897, + "p99": 1198.0160474777222 + }, + "combine": { + "p50": 1626.688003540039, + "p90": 1639.0399932861328, + "p95": 1651.3279676437378, + "p99": 1681.8560361862183 + }, + "roundtrip": { + "p50": 2740.4160499572754, + "p90": 2749.3441104888916, + "p95": 2753.632068634033, + "p99": 2762.6559734344482 + }, + "isolatedSum": { + "p50": 2768.895983695984, + "p90": 2800.6399869918823, + "p95": 2828.1919956207275, + "p99": 2879.8720836639404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fdc9a7c6", + "identity": "gb300|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_b97bfb88", + "comparisonKey": "1dc982986bf98728", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:52.294733+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.88000500202179, + "p90": 138.11199367046356, + "p95": 142.33599603176117, + "p99": 161.40800714492798 + }, + "combine": { + "p50": 126.65599584579468, + "p90": 131.26400113105774, + "p95": 133.215993642807, + "p99": 139.55199718475342 + }, + "roundtrip": { + "p50": 224.95999932289124, + "p90": 239.23200368881226, + "p95": 265.1199996471405, + "p99": 281.5679907798767 + }, + "isolatedSum": { + "p50": 253.53600084781647, + "p90": 269.3759948015213, + "p95": 275.5519896745682, + "p99": 300.9600043296814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.8000031709671, + "p90": 175.23199319839478, + "p95": 179.83999848365784, + "p99": 199.42399859428406 + }, + "combine": { + "p50": 168.16000640392303, + "p90": 175.3920018672943, + "p95": 178.3359944820404, + "p99": 182.8799992799759 + }, + "roundtrip": { + "p50": 302.7839958667755, + "p90": 311.6160035133362, + "p95": 315.4880106449127, + "p99": 327.2320032119751 + }, + "isolatedSum": { + "p50": 332.96000957489014, + "p90": 350.6239950656891, + "p95": 358.17599296569824, + "p99": 382.30399787425995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.70399868488312, + "p90": 240.6720072031021, + "p95": 244.09599602222443, + "p99": 253.4080147743225 + }, + "combine": { + "p50": 287.32800483703613, + "p90": 293.2800054550171, + "p95": 294.9120104312897, + "p99": 301.2480139732361 + }, + "roundtrip": { + "p50": 465.88799357414246, + "p90": 476.4479994773865, + "p95": 480.2879989147186, + "p99": 495.2319860458374 + }, + "isolatedSum": { + "p50": 520.0320035219193, + "p90": 533.9520126581192, + "p95": 539.0080064535141, + "p99": 554.6560287475586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 361.1840009689331, + "p90": 369.1520094871521, + "p95": 372.19199538230896, + "p99": 379.1680037975311 + }, + "combine": { + "p50": 490.33600091934204, + "p90": 498.4639883041382, + "p95": 500.67198276519775, + "p99": 502.9119849205017 + }, + "roundtrip": { + "p50": 819.8400139808655, + "p90": 827.7440071105957, + "p95": 832.7999711036682, + "p99": 840.7359719276428 + }, + "isolatedSum": { + "p50": 851.5200018882751, + "p90": 867.6159977912903, + "p95": 872.8639781475067, + "p99": 882.0799887180328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 613.6959791183472, + "p90": 622.3359704017639, + "p95": 627.6159882545471, + "p99": 653.6319851875305 + }, + "combine": { + "p50": 866.3359880447388, + "p90": 874.3680119514465, + "p95": 878.2719969749451, + "p99": 898.9120125770569 + }, + "roundtrip": { + "p50": 1449.24795627594, + "p90": 1458.5280418395996, + "p95": 1462.7840518951416, + "p99": 1470.4639911651611 + }, + "isolatedSum": { + "p50": 1480.031967163086, + "p90": 1496.7039823532104, + "p95": 1505.8879852294922, + "p99": 1552.5439977645874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1129.6639442443848, + "p90": 1136.064052581787, + "p95": 1137.887954711914, + "p99": 1143.455982208252 + }, + "combine": { + "p50": 1608.6080074310303, + "p90": 1616.6720390319824, + "p95": 1619.711995124817, + "p99": 1625.5680322647095 + }, + "roundtrip": { + "p50": 2708.224058151245, + "p90": 2717.600107192993, + "p95": 2721.2159633636475, + "p99": 2729.1839122772217 + }, + "isolatedSum": { + "p50": 2738.271951675415, + "p90": 2752.7360916137695, + "p95": 2757.599949836731, + "p99": 2769.0240144729614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a70c693", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||03799dfc4e73d7f", + "colorKey": "gb300_d4c8afb8", + "comparisonKey": "7c5d1ae307d82bca", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:10.133290+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03799dfc4e73d7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 141.12000167369843, + "p90": 152.12799608707428, + "p95": 158.11200439929962, + "p99": 172.28800058364868 + }, + "combine": { + "p50": 146.14400267601013, + "p90": 151.90400183200836, + "p95": 155.83999454975128, + "p99": 162.78399527072906 + }, + "roundtrip": { + "p50": 259.0720057487488, + "p90": 268.8960134983063, + "p95": 274.1119861602783, + "p99": 288.06400299072266 + }, + "isolatedSum": { + "p50": 287.26400434970856, + "p90": 304.03199791908264, + "p95": 313.9519989490509, + "p99": 335.07199585437775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 189.11999464035034, + "p90": 198.94400238990784, + "p95": 202.4960070848465, + "p99": 210.68799495697021 + }, + "combine": { + "p50": 206.7839950323105, + "p90": 213.56800198554993, + "p95": 217.50399470329285, + "p99": 223.13599288463593 + }, + "roundtrip": { + "p50": 359.45600271224976, + "p90": 370.11200189590454, + "p95": 373.1519877910614, + "p99": 392.4480080604553 + }, + "isolatedSum": { + "p50": 395.9039896726608, + "p90": 412.51200437545776, + "p95": 420.00000178813934, + "p99": 433.82398784160614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 278.9120078086853, + "p90": 290.23998975753784, + "p95": 294.07998919487, + "p99": 338.3359909057617 + }, + "combine": { + "p50": 367.13600158691406, + "p90": 372.44799733161926, + "p95": 375.0399947166443, + "p99": 381.632000207901 + }, + "roundtrip": { + "p50": 605.7599782943726, + "p90": 615.5200004577637, + "p95": 619.5840239524841, + "p99": 653.7920236587524 + }, + "isolatedSum": { + "p50": 646.0480093955994, + "p90": 662.6879870891571, + "p95": 669.1199839115143, + "p99": 719.9679911136627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 451.58401131629944, + "p90": 464.2240107059479, + "p95": 473.7600088119507, + "p99": 498.84799122810364 + }, + "combine": { + "p50": 638.592004776001, + "p90": 644.8320150375366, + "p95": 648.0000019073486, + "p99": 655.2960276603699 + }, + "roundtrip": { + "p50": 1060.1919889450073, + "p90": 1070.8160400390625, + "p95": 1074.8800039291382, + "p99": 1084.6400260925293 + }, + "isolatedSum": { + "p50": 1090.1760160923004, + "p90": 1109.0560257434845, + "p95": 1121.7600107192993, + "p99": 1154.1440188884735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 808.5439801216125, + "p90": 824.1919875144958, + "p95": 829.0240168571472, + "p99": 845.9200263023376 + }, + "combine": { + "p50": 1193.1840181350708, + "p90": 1199.903964996338, + "p95": 1202.49605178833, + "p99": 1216.863989830017 + }, + "roundtrip": { + "p50": 1971.8400239944458, + "p90": 1989.0880584716797, + "p95": 1996.0639476776123, + "p99": 2021.3439464569092 + }, + "isolatedSum": { + "p50": 2001.7279982566833, + "p90": 2024.0959525108337, + "p95": 2031.5200686454773, + "p99": 2062.7840161323547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1545.7919836044312, + "p90": 1569.4719552993774, + "p95": 1573.5679864883423, + "p99": 1583.3920240402222 + }, + "combine": { + "p50": 2282.20796585083, + "p90": 2290.2400493621826, + "p95": 2296.2560653686523, + "p99": 2318.23992729187 + }, + "roundtrip": { + "p50": 3798.4960079193115, + "p90": 3822.335958480835, + "p95": 3827.199935913086, + "p99": 3847.935914993286 + }, + "isolatedSum": { + "p50": 3827.9999494552612, + "p90": 3859.71200466156, + "p95": 3869.8240518569946, + "p99": 3901.6319513320923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5f85a462", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_f163949b", + "comparisonKey": "255dfa9bd7173c73", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:27.381650+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.85600072145462, + "p90": 111.32799834012985, + "p95": 115.87200313806534, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 82.87999778985977, + "p90": 85.95199882984161, + "p95": 90.08000046014786, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 156.44800662994385, + "p90": 166.143998503685, + "p95": 170.43200135231018, + "p99": 183.55199694633484 + }, + "isolatedSum": { + "p50": 184.7359985113144, + "p90": 197.27999716997147, + "p95": 205.9520035982132, + "p99": 224.38400983810425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 122.36800044775009, + "p90": 132.9279989004135, + "p95": 137.28000223636627, + "p99": 148.47999811172485 + }, + "combine": { + "p50": 123.87199699878693, + "p90": 131.77600502967834, + "p95": 133.85599851608276, + "p99": 146.11199498176575 + }, + "roundtrip": { + "p50": 221.343994140625, + "p90": 229.312002658844, + "p95": 232.7679991722107, + "p99": 239.68000710010529 + }, + "isolatedSum": { + "p50": 246.23999744653702, + "p90": 264.70400393009186, + "p95": 271.13600075244904, + "p99": 294.5919930934906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 210.27199923992157, + "p90": 218.33600103855133, + "p95": 221.37600183486938, + "p99": 233.024001121521 + }, + "combine": { + "p50": 256.3839852809906, + "p90": 261.6960108280182, + "p95": 265.02400636672974, + "p99": 268.8960134983063 + }, + "roundtrip": { + "p50": 448.35200905799866, + "p90": 455.00800013542175, + "p95": 457.72799849510193, + "p99": 471.3920056819916 + }, + "isolatedSum": { + "p50": 466.65598452091217, + "p90": 480.0320118665695, + "p95": 486.4000082015991, + "p99": 501.92001461982727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba690ae0", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_c93e2296", + "comparisonKey": "2ca11a784293be10", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:35.469128+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.84000158309937, + "p90": 119.03999745845795, + "p95": 125.76000392436981, + "p99": 157.27999806404114 + }, + "combine": { + "p50": 97.15200215578079, + "p90": 104.38399761915207, + "p95": 107.51999914646149, + "p99": 113.34399878978729 + }, + "roundtrip": { + "p50": 176.86399817466736, + "p90": 187.23200261592865, + "p95": 191.3280040025711, + "p99": 213.56800198554993 + }, + "isolatedSum": { + "p50": 204.99200373888016, + "p90": 223.42399507761002, + "p95": 233.2800030708313, + "p99": 270.62399685382843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.68800354003906, + "p90": 135.903999209404, + "p95": 139.74399864673615, + "p99": 147.10399508476257 + }, + "combine": { + "p50": 120.60800194740295, + "p90": 125.63200294971466, + "p95": 130.91200590133667, + "p99": 163.68000209331512 + }, + "roundtrip": { + "p50": 217.92000532150269, + "p90": 227.48799622058868, + "p95": 231.48800432682037, + "p99": 246.94399535655975 + }, + "isolatedSum": { + "p50": 247.29600548744202, + "p90": 261.53600215911865, + "p95": 270.6560045480728, + "p99": 310.7839971780777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 160.73599457740784, + "p90": 170.52799463272095, + "p95": 173.95199835300446, + "p99": 179.80800569057465 + }, + "combine": { + "p50": 155.42399883270264, + "p90": 160.7999950647354, + "p95": 162.88000345230103, + "p99": 170.49600183963776 + }, + "roundtrip": { + "p50": 287.48801350593567, + "p90": 296.03201150894165, + "p95": 299.19999837875366, + "p99": 319.7120130062103 + }, + "isolatedSum": { + "p50": 316.1599934101105, + "p90": 331.32798969745636, + "p95": 336.8320018053055, + "p99": 350.3040075302124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 231.26399517059326, + "p90": 240.1600033044815, + "p95": 245.34399807453156, + "p99": 286.3680124282837 + }, + "combine": { + "p50": 280.64000606536865, + "p90": 285.66399216651917, + "p95": 288.89599442481995, + "p99": 298.68799448013306 + }, + "roundtrip": { + "p50": 480.4159998893738, + "p90": 491.2000000476837, + "p95": 494.7519898414612, + "p99": 510.1119875907898 + }, + "isolatedSum": { + "p50": 511.9040012359619, + "p90": 525.8239954710007, + "p95": 534.2399924993515, + "p99": 585.0560069084167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 370.04798650741577, + "p90": 379.7760009765625, + "p95": 384.0000033378601, + "p99": 402.6240110397339 + }, + "combine": { + "p50": 492.73601174354553, + "p90": 500.70399045944214, + "p95": 503.07202339172363, + "p99": 508.38398933410645 + }, + "roundtrip": { + "p50": 836.575984954834, + "p90": 845.3119993209839, + "p95": 847.7759957313538, + "p99": 871.3279962539673 + }, + "isolatedSum": { + "p50": 862.7839982509613, + "p90": 880.4799914360046, + "p95": 887.0720267295837, + "p99": 911.0080003738403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 665.4080152511597, + "p90": 676.3200163841248, + "p95": 682.2720170021057, + "p99": 701.0560035705566 + }, + "combine": { + "p50": 893.9200043678284, + "p90": 900.223970413208, + "p95": 905.1200151443481, + "p99": 912.6399755477905 + }, + "roundtrip": { + "p50": 1529.0240049362183, + "p90": 1539.29603099823, + "p95": 1544.6720123291016, + "p99": 1576.159954071045 + }, + "isolatedSum": { + "p50": 1559.328019618988, + "p90": 1576.5439867973328, + "p95": 1587.3920321464539, + "p99": 1613.6959791183472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60625fb3", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||e3707ddc343088b", + "colorKey": "gb300_440d13a2", + "comparisonKey": "e5ffbcf044278e38", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:44.455283+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e3707ddc343088b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.5599957704544, + "p90": 139.74399864673615, + "p95": 145.60000598430634, + "p99": 154.94400262832642 + }, + "combine": { + "p50": 136.3839954137802, + "p90": 145.37599682807922, + "p95": 146.94400131702423, + "p99": 156.47999942302704 + }, + "roundtrip": { + "p50": 243.48799884319305, + "p90": 251.71199440956116, + "p95": 254.97600436210632, + "p99": 259.99999046325684 + }, + "isolatedSum": { + "p50": 266.9439911842346, + "p90": 285.11999547481537, + "p95": 292.54400730133057, + "p99": 311.42400205135345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 171.26399278640747, + "p90": 180.4800033569336, + "p95": 184.57600474357605, + "p99": 200.6720006465912 + }, + "combine": { + "p50": 192.35199689865112, + "p90": 197.6960003376007, + "p95": 200.19200444221497, + "p99": 209.60000157356262 + }, + "roundtrip": { + "p50": 331.4880132675171, + "p90": 339.4240140914917, + "p95": 342.46399998664856, + "p99": 354.17601466178894 + }, + "isolatedSum": { + "p50": 363.6159896850586, + "p90": 378.1760036945343, + "p95": 384.768009185791, + "p99": 410.2720022201538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 253.37600708007812, + "p90": 261.8879973888397, + "p95": 266.2400007247925, + "p99": 274.3360102176666 + }, + "combine": { + "p50": 358.0799996852875, + "p90": 366.36799573898315, + "p95": 368.1600093841553, + "p99": 371.39201164245605 + }, + "roundtrip": { + "p50": 574.5599865913391, + "p90": 584.9279761314392, + "p95": 589.6000266075134, + "p99": 596.7680215835571 + }, + "isolatedSum": { + "p50": 611.4560067653656, + "p90": 628.2559931278229, + "p95": 634.4000101089478, + "p99": 645.7280218601227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 407.23198652267456, + "p90": 416.28798842430115, + "p95": 419.48801279067993, + "p99": 428.47999930381775 + }, + "combine": { + "p50": 615.6799793243408, + "p90": 623.3919858932495, + "p95": 624.895989894867, + "p99": 628.6720037460327 + }, + "roundtrip": { + "p50": 998.6240267753601, + "p90": 1006.0160160064697, + "p95": 1008.1919431686401, + "p99": 1014.847993850708 + }, + "isolatedSum": { + "p50": 1022.9119658470154, + "p90": 1039.6799743175507, + "p95": 1044.3840026855469, + "p99": 1057.1520030498505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 719.871997833252, + "p90": 728.4160256385803, + "p95": 732.4479818344116, + "p99": 741.599977016449 + }, + "combine": { + "p50": 1152.9279947280884, + "p90": 1157.8559875488281, + "p95": 1158.8480472564697, + "p99": 1165.4080152511597 + }, + "roundtrip": { + "p50": 1844.7999954223633, + "p90": 1853.0880212783813, + "p95": 1856.063961982727, + "p99": 1867.583990097046 + }, + "isolatedSum": { + "p50": 1872.7999925613403, + "p90": 1886.2720131874084, + "p95": 1891.2960290908813, + "p99": 1907.0079922676086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1392.4800157546997, + "p90": 1403.2319784164429, + "p95": 1407.7759981155396, + "p99": 1422.592043876648 + }, + "combine": { + "p50": 2208.3520889282227, + "p90": 2214.2720222473145, + "p95": 2215.872049331665, + "p99": 2222.752094268799 + }, + "roundtrip": { + "p50": 3574.944019317627, + "p90": 3582.848072052002, + "p95": 3585.2160453796387, + "p99": 3590.5919075012207 + }, + "isolatedSum": { + "p50": 3600.8321046829224, + "p90": 3617.5040006637573, + "p95": 3623.6480474472046, + "p99": 3645.344138145447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5f0c4166", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_87f4d4ec", + "comparisonKey": "6a684c17508b8933", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:53.698331+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.11999905109406, + "p90": 135.96799969673157, + "p95": 145.08800208568573, + "p99": 194.14399564266205 + }, + "combine": { + "p50": 123.71200323104858, + "p90": 133.08799266815186, + "p95": 136.7039978504181, + "p99": 170.78399658203125 + }, + "roundtrip": { + "p50": 223.7119972705841, + "p90": 233.8239997625351, + "p95": 243.26400458812714, + "p99": 286.75198554992676 + }, + "isolatedSum": { + "p50": 248.83200228214264, + "p90": 269.0559923648834, + "p95": 281.7919999361038, + "p99": 364.9279922246933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.10399770736694, + "p90": 173.75999689102173, + "p95": 177.98399925231934, + "p99": 216.0319983959198 + }, + "combine": { + "p50": 169.50400173664093, + "p90": 174.81599748134613, + "p95": 179.87200617790222, + "p99": 196.73599302768707 + }, + "roundtrip": { + "p50": 305.85598945617676, + "p90": 316.4159953594208, + "p95": 322.1760094165802, + "p99": 373.56799840927124 + }, + "isolatedSum": { + "p50": 332.6079994440079, + "p90": 348.57599437236786, + "p95": 357.85600543022156, + "p99": 412.7679914236069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.58399760723114, + "p90": 241.43999814987183, + "p95": 247.23200500011444, + "p99": 288.86398673057556 + }, + "combine": { + "p50": 284.0000092983246, + "p90": 294.49599981307983, + "p95": 298.6240088939667, + "p99": 340.2880132198334 + }, + "roundtrip": { + "p50": 463.45600485801697, + "p90": 475.5840003490448, + "p95": 483.5200011730194, + "p99": 539.7760272026062 + }, + "isolatedSum": { + "p50": 515.5840069055557, + "p90": 535.9359979629517, + "p95": 545.8560138940811, + "p99": 629.1519999504089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.57600378990173, + "p90": 369.2159950733185, + "p95": 376.99198722839355, + "p99": 413.05598616600037 + }, + "combine": { + "p50": 492.3520088195801, + "p90": 503.167986869812, + "p95": 513.759970664978, + "p99": 553.2159805297852 + }, + "roundtrip": { + "p50": 826.9439935684204, + "p90": 854.1759848594666, + "p95": 861.6639971733093, + "p99": 2114.3040657043457 + }, + "isolatedSum": { + "p50": 852.9280126094818, + "p90": 872.3839819431305, + "p95": 890.7519578933716, + "p99": 966.2719666957855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 612.7679944038391, + "p90": 621.4079856872559, + "p95": 624.895989894867, + "p99": 646.016001701355 + }, + "combine": { + "p50": 866.4960265159607, + "p90": 873.9519715309143, + "p95": 875.5840063095093, + "p99": 899.9040126800537 + }, + "roundtrip": { + "p50": 1445.8240270614624, + "p90": 1456.5119743347168, + "p95": 1460.1600170135498, + "p99": 1479.9360036849976 + }, + "isolatedSum": { + "p50": 1479.2640209197998, + "p90": 1495.3599572181702, + "p95": 1500.4799962043762, + "p99": 1545.9200143814087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1135.9679698944092, + "p90": 1149.440050125122, + "p95": 1156.383991241455, + "p99": 8341.695785522461 + }, + "combine": { + "p50": 1609.984040260315, + "p90": 1615.9039735794067, + "p95": 1622.5919723510742, + "p99": 1676.416039466858 + }, + "roundtrip": { + "p50": 2715.61598777771, + "p90": 2725.44002532959, + "p95": 2729.599952697754, + "p99": 2762.2718811035156 + }, + "isolatedSum": { + "p50": 2745.952010154724, + "p90": 2765.344023704529, + "p95": 2778.9759635925293, + "p99": 10018.111824989319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a5f4e18", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_8b7def4e", + "comparisonKey": "e430694c35257860", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:10.449100+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.07200646400452, + "p90": 136.73600554466248, + "p95": 140.54399728775024, + "p99": 188.03200125694275 + }, + "combine": { + "p50": 124.38400089740753, + "p90": 133.44000279903412, + "p95": 135.3600025177002, + "p99": 143.77599954605103 + }, + "roundtrip": { + "p50": 225.63199698925018, + "p90": 235.52000522613525, + "p95": 238.97600173950195, + "p99": 244.25600469112396 + }, + "isolatedSum": { + "p50": 251.45600736141205, + "p90": 270.1760083436966, + "p95": 275.90399980545044, + "p99": 331.8080008029938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.71199488639832, + "p90": 172.63999581336975, + "p95": 177.69600450992584, + "p99": 187.19999492168427 + }, + "combine": { + "p50": 170.33599317073822, + "p90": 177.44000256061554, + "p95": 181.92000687122345, + "p99": 186.91200017929077 + }, + "roundtrip": { + "p50": 304.4480085372925, + "p90": 315.13598561286926, + "p95": 319.2639946937561, + "p99": 328.5439908504486 + }, + "isolatedSum": { + "p50": 334.04798805713654, + "p90": 350.0799983739853, + "p95": 359.6160113811493, + "p99": 374.11199510097504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.30399596691132, + "p90": 243.1039959192276, + "p95": 246.68799340724945, + "p99": 254.7839879989624 + }, + "combine": { + "p50": 285.0239872932434, + "p90": 294.3359911441803, + "p95": 295.8720028400421, + "p99": 299.3920147418976 + }, + "roundtrip": { + "p50": 471.0719883441925, + "p90": 481.79200291633606, + "p95": 484.6400022506714, + "p99": 496.16000056266785 + }, + "isolatedSum": { + "p50": 519.3279832601547, + "p90": 537.4399870634079, + "p95": 542.5599962472916, + "p99": 554.17600274086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.4159951210022, + "p90": 371.2320029735565, + "p95": 376.96000933647156, + "p99": 413.7600064277649 + }, + "combine": { + "p50": 492.8320050239563, + "p90": 502.24000215530396, + "p95": 503.90398502349854, + "p99": 507.1359872817993 + }, + "roundtrip": { + "p50": 823.9359855651855, + "p90": 833.5040211677551, + "p95": 836.51202917099, + "p99": 844.3199992179871 + }, + "isolatedSum": { + "p50": 853.2480001449585, + "p90": 873.4720051288605, + "p95": 880.8639943599701, + "p99": 920.8959937095642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 611.840009689331, + "p90": 621.5360164642334, + "p95": 624.351978302002, + "p99": 636.3840103149414 + }, + "combine": { + "p50": 860.863983631134, + "p90": 866.6560053825378, + "p95": 870.5599904060364, + "p99": 874.4000196456909 + }, + "roundtrip": { + "p50": 1442.2080516815186, + "p90": 1451.8719911575317, + "p95": 1456.1280012130737, + "p99": 1462.6879692077637 + }, + "isolatedSum": { + "p50": 1472.703993320465, + "p90": 1488.1920218467712, + "p95": 1494.9119687080383, + "p99": 1510.7840299606323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1132.6719522476196, + "p90": 1139.9040222167969, + "p95": 1142.1120166778564, + "p99": 1151.7119407653809 + }, + "combine": { + "p50": 1624.2560148239136, + "p90": 1634.592056274414, + "p95": 1636.4799737930298, + "p99": 1643.5199975967407 + }, + "roundtrip": { + "p50": 2730.207920074463, + "p90": 2739.583969116211, + "p95": 2743.0078983306885, + "p99": 2749.1838932037354 + }, + "isolatedSum": { + "p50": 2756.927967071533, + "p90": 2774.496078491211, + "p95": 2778.5919904708862, + "p99": 2795.2319383621216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-939b56bc", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_b3a88763", + "comparisonKey": "92dc80df4affb401", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:08.163578+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.2799928188324, + "p90": 134.5919966697693, + "p95": 140.73599874973297, + "p99": 176.64000391960144 + }, + "combine": { + "p50": 134.07999277114868, + "p90": 143.13599467277527, + "p95": 145.02400159835815, + "p99": 148.3200043439865 + }, + "roundtrip": { + "p50": 235.58400571346283, + "p90": 243.80800127983093, + "p95": 247.16800451278687, + "p99": 258.2719922065735 + }, + "isolatedSum": { + "p50": 259.3599855899811, + "p90": 277.72799134254456, + "p95": 285.7600003480911, + "p99": 324.96000826358795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.4880074262619, + "p90": 168.5439944267273, + "p95": 172.83199727535248, + "p99": 178.847998380661 + }, + "combine": { + "p50": 182.72000551223755, + "p90": 187.32799589633942, + "p95": 192.19200313091278, + "p99": 196.06399536132812 + }, + "roundtrip": { + "p50": 317.7280128002167, + "p90": 325.50400495529175, + "p95": 328.000009059906, + "p99": 336.5119993686676 + }, + "isolatedSum": { + "p50": 342.20801293849945, + "p90": 355.8719903230667, + "p95": 365.02400040626526, + "p99": 374.91199374198914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.29600286483765, + "p90": 240.25599658489227, + "p95": 242.88000166416168, + "p99": 250.8159875869751 + }, + "combine": { + "p50": 346.5920090675354, + "p90": 355.45599460601807, + "p95": 357.02401399612427, + "p99": 365.28000235557556 + }, + "roundtrip": { + "p50": 546.2719798088074, + "p90": 556.384027004242, + "p95": 559.2960119247437, + "p99": 567.903995513916 + }, + "isolatedSum": { + "p50": 577.888011932373, + "p90": 595.7119911909103, + "p95": 599.904015660286, + "p99": 616.0959899425507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 370.88000774383545, + "p90": 380.0640106201172, + "p95": 384.223997592926, + "p99": 391.7439877986908 + }, + "combine": { + "p50": 624.8000264167786, + "p90": 629.5679807662964, + "p95": 633.5359811782837, + "p99": 641.0560011863708 + }, + "roundtrip": { + "p50": 961.6000056266785, + "p90": 970.624029636383, + "p95": 973.7920165061951, + "p99": 980.1920056343079 + }, + "isolatedSum": { + "p50": 995.680034160614, + "p90": 1009.6319913864136, + "p95": 1017.7599787712097, + "p99": 1032.7999889850616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 691.9999718666077, + "p90": 708.4159851074219, + "p95": 716.6079878807068, + "p99": 17707.040786743164 + }, + "combine": { + "p50": 1136.1279487609863, + "p90": 1220.639944076538, + "p95": 1230.6239604949951, + "p99": 5635.072231292725 + }, + "roundtrip": { + "p50": 1798.8159656524658, + "p90": 1810.271978378296, + "p95": 1813.920021057129, + "p99": 1827.679991722107 + }, + "isolatedSum": { + "p50": 1828.127920627594, + "p90": 1929.05592918396, + "p95": 1947.231948375702, + "p99": 23342.11301803589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1344.1599607467651, + "p90": 1355.7440042495728, + "p95": 1358.8160276412964, + "p99": 1365.2160167694092 + }, + "combine": { + "p50": 2172.384023666382, + "p90": 2177.824020385742, + "p95": 2180.255889892578, + "p99": 2188.256025314331 + }, + "roundtrip": { + "p50": 3491.0080432891846, + "p90": 3503.200054168701, + "p95": 3507.3280334472656, + "p99": 3516.6079998016357 + }, + "isolatedSum": { + "p50": 3516.543984413147, + "p90": 3533.568024635315, + "p95": 3539.0719175338745, + "p99": 3553.4720420837402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fef7f804", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||8183e404f63b100", + "colorKey": "gb300_961589b9", + "comparisonKey": "796f3c416772b90a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:14.920558+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8183e404f63b100", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.52800101041794, + "p90": 128.31999361515045, + "p95": 133.31200182437897, + "p99": 141.9840008020401 + }, + "combine": { + "p50": 129.95199859142303, + "p90": 134.62400436401367, + "p95": 139.39200341701508, + "p99": 146.88000082969666 + }, + "roundtrip": { + "p50": 224.99200701713562, + "p90": 233.11999440193176, + "p95": 236.00000143051147, + "p99": 245.2480047941208 + }, + "isolatedSum": { + "p50": 248.47999960184097, + "p90": 262.9439979791641, + "p95": 272.70400524139404, + "p99": 288.86400163173676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 150.78400075435638, + "p90": 159.5200002193451, + "p95": 164.000004529953, + "p99": 194.36800479888916 + }, + "combine": { + "p50": 179.58399653434753, + "p90": 185.56800484657288, + "p95": 190.0160014629364, + "p99": 196.06399536132812 + }, + "roundtrip": { + "p50": 304.76799607276917, + "p90": 313.4079873561859, + "p95": 317.6960051059723, + "p99": 327.4880051612854 + }, + "isolatedSum": { + "p50": 330.3679972887039, + "p90": 345.08800506591797, + "p95": 354.0160059928894, + "p99": 390.4320001602173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 210.07999777793884, + "p90": 221.21599316596985, + "p95": 234.3360036611557, + "p99": 251.10399723052979 + }, + "combine": { + "p50": 305.759996175766, + "p90": 315.2959942817688, + "p95": 317.79199838638306, + "p99": 328.031986951828 + }, + "roundtrip": { + "p50": 495.35998702049255, + "p90": 506.46400451660156, + "p95": 509.69600677490234, + "p99": 522.0800042152405 + }, + "isolatedSum": { + "p50": 515.8399939537048, + "p90": 536.5119874477386, + "p95": 552.1280020475388, + "p99": 579.1359841823578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 334.6880078315735, + "p90": 345.34400701522827, + "p95": 351.20001435279846, + "p99": 374.87998604774475 + }, + "combine": { + "p50": 623.0720281600952, + "p90": 627.6800036430359, + "p95": 629.2160153388977, + "p99": 638.3360028266907 + }, + "roundtrip": { + "p50": 934.6240162849426, + "p90": 943.8400268554688, + "p95": 946.7840194702148, + "p99": 974.6559858322144 + }, + "isolatedSum": { + "p50": 957.7600359916687, + "p90": 973.0240106582642, + "p95": 980.4160296916962, + "p99": 1013.2159888744354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 608.5439920425415, + "p90": 622.2079992294312, + "p95": 628.1920075416565, + "p99": 653.4720063209534 + }, + "combine": { + "p50": 1166.4320230484009, + "p90": 1170.7520484924316, + "p95": 1176.6719818115234, + "p99": 1193.824052810669 + }, + "roundtrip": { + "p50": 1720.9279537200928, + "p90": 1732.7359914779663, + "p95": 1736.2879514694214, + "p99": 1745.408058166504 + }, + "isolatedSum": { + "p50": 1774.9760150909424, + "p90": 1792.9600477218628, + "p95": 1804.86398935318, + "p99": 1847.2960591316223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1192.9600238800049, + "p90": 1206.0799598693848, + "p95": 1210.368037223816, + "p99": 1219.9039459228516 + }, + "combine": { + "p50": 2194.9760913848877, + "p90": 2202.0161151885986, + "p95": 2207.1681022644043, + "p99": 2214.495897293091 + }, + "roundtrip": { + "p50": 3330.9121131896973, + "p90": 3344.288110733032, + "p95": 3348.9279747009277, + "p99": 3360.383987426758 + }, + "isolatedSum": { + "p50": 3387.9361152648926, + "p90": 3408.0960750579834, + "p95": 3417.53613948822, + "p99": 3434.3998432159424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37cf5d77", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_db9a43b5", + "comparisonKey": "8cb163d8db9bc0c6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:24.614499+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.86399710178375, + "p90": 132.76800513267517, + "p95": 136.1600011587143, + "p99": 153.02400290966034 + }, + "combine": { + "p50": 122.5920021533966, + "p90": 130.72000443935394, + "p95": 132.57600367069244, + "p99": 137.472003698349 + }, + "roundtrip": { + "p50": 222.56000339984894, + "p90": 229.79199886322021, + "p95": 232.2240024805069, + "p99": 237.8239929676056 + }, + "isolatedSum": { + "p50": 247.45599925518036, + "p90": 263.4880095720291, + "p95": 268.73600482940674, + "p99": 290.49600660800934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.4639928340912, + "p90": 171.83999717235565, + "p95": 175.77600479125977, + "p99": 210.87999641895294 + }, + "combine": { + "p50": 166.78400337696075, + "p90": 171.55200242996216, + "p95": 173.21600019931793, + "p99": 186.5600049495697 + }, + "roundtrip": { + "p50": 299.1679906845093, + "p90": 307.5839877128601, + "p95": 311.1039996147156, + "p99": 355.52000999450684 + }, + "isolatedSum": { + "p50": 329.24799621105194, + "p90": 343.3919996023178, + "p95": 348.9920049905777, + "p99": 397.44000136852264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 230.43200373649597, + "p90": 238.24000358581543, + "p95": 240.76800048351288, + "p99": 251.80798768997192 + }, + "combine": { + "p50": 289.792001247406, + "p90": 296.00000381469727, + "p95": 300.4159927368164, + "p99": 326.78401470184326 + }, + "roundtrip": { + "p50": 468.60799193382263, + "p90": 479.93600368499756, + "p95": 482.81601071357727, + "p99": 512.1279954910278 + }, + "isolatedSum": { + "p50": 520.224004983902, + "p90": 534.2400074005127, + "p95": 541.1839932203293, + "p99": 578.5920023918152 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.83200573921204, + "p90": 369.6320056915283, + "p95": 374.08000230789185, + "p99": 420.19200325012207 + }, + "combine": { + "p50": 499.61599707603455, + "p90": 504.863977432251, + "p95": 508.1599950790405, + "p99": 513.8880014419556 + }, + "roundtrip": { + "p50": 833.184003829956, + "p90": 842.3359990119934, + "p95": 845.1200127601624, + "p99": 854.3360233306885 + }, + "isolatedSum": { + "p50": 860.4480028152466, + "p90": 874.4959831237793, + "p95": 882.2399973869324, + "p99": 934.0800046920776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 622.3679780960083, + "p90": 634.5599889755249, + "p95": 639.8400068283081, + "p99": 1066.4000511169434 + }, + "combine": { + "p50": 898.4959721565247, + "p90": 939.2960071563721, + "p95": 947.2960233688354, + "p99": 13847.488403320312 + }, + "roundtrip": { + "p50": 1487.6480102539062, + "p90": 1496.575951576233, + "p95": 1499.5839595794678, + "p99": 1522.8159427642822 + }, + "isolatedSum": { + "p50": 1520.863950252533, + "p90": 1573.855996131897, + "p95": 1587.1360301971436, + "p99": 14913.888454437256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1156.3520431518555, + "p90": 1162.9120111465454, + "p95": 1164.639949798584, + "p99": 1179.103970527649 + }, + "combine": { + "p50": 1690.176010131836, + "p90": 1699.1039514541626, + "p95": 1702.5279998779297, + "p99": 1709.0879678726196 + }, + "roundtrip": { + "p50": 2818.0160522460938, + "p90": 2827.455997467041, + "p95": 2830.24001121521, + "p99": 2838.1760120391846 + }, + "isolatedSum": { + "p50": 2846.5280532836914, + "p90": 2862.015962600708, + "p95": 2867.1679496765137, + "p99": 2888.1919384002686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2c8de23f", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_15a35db4", + "comparisonKey": "ce656c1689809360", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:41.389556+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.03999876976013, + "p90": 136.63999736309052, + "p95": 140.79999923706055, + "p99": 154.9759954214096 + }, + "combine": { + "p50": 134.2719942331314, + "p90": 141.95199310779572, + "p95": 143.96800100803375, + "p99": 148.6400067806244 + }, + "roundtrip": { + "p50": 237.34399676322937, + "p90": 245.82399427890778, + "p95": 248.9600032567978, + "p99": 258.62398743629456 + }, + "isolatedSum": { + "p50": 261.31199300289154, + "p90": 278.59199047088623, + "p95": 284.7680002450943, + "p99": 303.616002202034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 166.72000288963318, + "p90": 188.9919936656952, + "p95": 203.5519927740097, + "p99": 229.24800217151642 + }, + "combine": { + "p50": 182.68799781799316, + "p90": 206.59199357032776, + "p95": 231.87200725078583, + "p99": 242.49599874019623 + }, + "roundtrip": { + "p50": 324.99200105667114, + "p90": 351.39200091362, + "p95": 362.0480000972748, + "p99": 388.0639970302582 + }, + "isolatedSum": { + "p50": 349.40800070762634, + "p90": 395.58398723602295, + "p95": 435.42400002479553, + "p99": 471.74400091171265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 241.43999814987183, + "p90": 254.04798984527588, + "p95": 261.28000020980835, + "p99": 301.60000920295715 + }, + "combine": { + "p50": 353.1840145587921, + "p90": 373.53599071502686, + "p95": 393.6319947242737, + "p99": 415.583997964859 + }, + "roundtrip": { + "p50": 560.0320100784302, + "p90": 582.751989364624, + "p95": 593.280017375946, + "p99": 611.8080019950867 + }, + "isolatedSum": { + "p50": 594.6240127086639, + "p90": 627.5839805603027, + "p95": 654.911994934082, + "p99": 717.1840071678162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 389.44000005722046, + "p90": 399.29598569869995, + "p95": 404.7999978065491, + "p99": 427.93598771095276 + }, + "combine": { + "p50": 610.1440191268921, + "p90": 616.2239909172058, + "p95": 618.4319853782654, + "p99": 627.3279786109924 + }, + "roundtrip": { + "p50": 976.9920110702515, + "p90": 984.6400022506714, + "p95": 988.0959987640381, + "p99": 994.8480129241943 + }, + "isolatedSum": { + "p50": 999.5840191841125, + "p90": 1015.5199766159058, + "p95": 1023.2319831848145, + "p99": 1055.2639663219452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 697.6640224456787, + "p90": 707.3280215263367, + "p95": 710.3040218353271, + "p99": 715.2959704399109 + }, + "combine": { + "p50": 1115.7759428024292, + "p90": 1120.6079721450806, + "p95": 1123.0080127716064, + "p99": 1130.2399635314941 + }, + "roundtrip": { + "p50": 1790.3039455413818, + "p90": 1807.487964630127, + "p95": 1820.0960159301758, + "p99": 1840.0319814682007 + }, + "isolatedSum": { + "p50": 1813.439965248108, + "p90": 1827.9359936714172, + "p95": 1833.3120346069336, + "p99": 1845.535933971405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1353.0240058898926, + "p90": 1364.1279935836792, + "p95": 1367.3919439315796, + "p99": 1375.2000331878662 + }, + "combine": { + "p50": 2139.967918395996, + "p90": 2149.2159366607666, + "p95": 2152.031898498535, + "p99": 2160.288095474243 + }, + "roundtrip": { + "p50": 3473.24800491333, + "p90": 3647.615909576416, + "p95": 3683.0079555511475, + "p99": 73872.00164794922 + }, + "isolatedSum": { + "p50": 3492.9919242858887, + "p90": 3513.343930244446, + "p95": 3519.4238424301147, + "p99": 3535.4881286621094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6461e658", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_46b172da", + "comparisonKey": "398178595fe92367", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:58.637043+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.37599420547485, + "p90": 140.60799777507782, + "p95": 145.21600306034088, + "p99": 153.60000729560852 + }, + "combine": { + "p50": 127.74400413036346, + "p90": 134.71999764442444, + "p95": 136.7039978504181, + "p99": 143.77599954605103 + }, + "roundtrip": { + "p50": 229.18400168418884, + "p90": 239.04000222682953, + "p95": 242.33600497245789, + "p99": 251.64800882339478 + }, + "isolatedSum": { + "p50": 257.1199983358383, + "p90": 275.32799541950226, + "p95": 281.920000910759, + "p99": 297.37600684165955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 167.87199676036835, + "p90": 178.24000120162964, + "p95": 181.60000443458557, + "p99": 191.103994846344 + }, + "combine": { + "p50": 169.53599452972412, + "p90": 175.07199943065643, + "p95": 180.60800433158875, + "p99": 185.85599958896637 + }, + "roundtrip": { + "p50": 307.1039915084839, + "p90": 318.015992641449, + "p95": 320.8000063896179, + "p99": 327.61600613594055 + }, + "isolatedSum": { + "p50": 337.40799129009247, + "p90": 353.3120006322861, + "p95": 362.2080087661743, + "p99": 376.95999443531036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.27199637889862, + "p90": 246.75199389457703, + "p95": 250.36799907684326, + "p99": 263.9999985694885 + }, + "combine": { + "p50": 285.8879864215851, + "p90": 294.68798637390137, + "p95": 296.9279885292053, + "p99": 308.03200602531433 + }, + "roundtrip": { + "p50": 475.2959907054901, + "p90": 486.59199476242065, + "p95": 492.19200015068054, + "p99": 518.4959769248962 + }, + "isolatedSum": { + "p50": 524.1599828004837, + "p90": 541.4399802684784, + "p95": 547.2959876060486, + "p99": 572.0320045948029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 366.40000343322754, + "p90": 375.2959966659546, + "p95": 377.6000142097473, + "p99": 382.81598687171936 + }, + "combine": { + "p50": 490.9439980983734, + "p90": 496.99199199676514, + "p95": 500.9920001029968, + "p99": 504.89598512649536 + }, + "roundtrip": { + "p50": 825.0880241394043, + "p90": 833.3759903907776, + "p95": 837.4080061912537, + "p99": 843.5519933700562 + }, + "isolatedSum": { + "p50": 857.344001531601, + "p90": 872.2879886627197, + "p95": 878.5920143127441, + "p99": 887.7119719982147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 622.4960088729858, + "p90": 631.4240097999573, + "p95": 634.0159773826599, + "p99": 651.1039733886719 + }, + "combine": { + "p50": 871.999979019165, + "p90": 877.951979637146, + "p95": 882.7199935913086, + "p99": 885.7600092887878 + }, + "roundtrip": { + "p50": 1461.0559940338135, + "p90": 1469.696044921875, + "p95": 1473.9199876785278, + "p99": 1483.1039905548096 + }, + "isolatedSum": { + "p50": 1494.4959878921509, + "p90": 1509.3759894371033, + "p95": 1516.7359709739685, + "p99": 1536.8639826774597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1136.191964149475, + "p90": 1144.1919803619385, + "p95": 1146.239995956421, + "p99": 1153.823971748352 + }, + "combine": { + "p50": 1630.3679943084717, + "p90": 1637.1840238571167, + "p95": 1639.7440433502197, + "p99": 1646.5280055999756 + }, + "roundtrip": { + "p50": 2736.3200187683105, + "p90": 2745.8879947662354, + "p95": 2748.6400604248047, + "p99": 2757.3440074920654 + }, + "isolatedSum": { + "p50": 2766.559958457947, + "p90": 2781.376004219055, + "p95": 2785.9840393066406, + "p99": 2800.3519773483276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a4c44aca", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_6e04dda3", + "comparisonKey": "1dcefebf80b3425d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:38.675858+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.16799974441528, + "p90": 139.5840048789978, + "p95": 151.45599842071533, + "p99": 196.96000218391418 + }, + "combine": { + "p50": 133.7919980287552, + "p90": 141.79199934005737, + "p95": 146.11199498176575, + "p99": 173.63199591636658 + }, + "roundtrip": { + "p50": 235.3920042514801, + "p90": 244.28799748420715, + "p95": 253.24800610542297, + "p99": 309.53601002693176 + }, + "isolatedSum": { + "p50": 260.95999777317047, + "p90": 281.3760042190552, + "p95": 297.5679934024811, + "p99": 370.59199810028076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 159.10400450229645, + "p90": 169.69600319862366, + "p95": 178.52799594402313, + "p99": 251.71199440956116 + }, + "combine": { + "p50": 182.49599635601044, + "p90": 190.2720034122467, + "p95": 193.27999651432037, + "p99": 227.87199914455414 + }, + "roundtrip": { + "p50": 316.6719973087311, + "p90": 325.3760039806366, + "p95": 328.6080062389374, + "p99": 369.7600066661835 + }, + "isolatedSum": { + "p50": 341.6000008583069, + "p90": 359.96800661087036, + "p95": 371.8079924583435, + "p99": 479.5839935541153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.61600530147552, + "p90": 240.12799561023712, + "p95": 242.5599992275238, + "p99": 253.63200902938843 + }, + "combine": { + "p50": 345.8879888057709, + "p90": 355.26400804519653, + "p95": 356.9920063018799, + "p99": 363.45601081848145 + }, + "roundtrip": { + "p50": 545.087993144989, + "p90": 553.4399747848511, + "p95": 557.4399828910828, + "p99": 565.5999779701233 + }, + "isolatedSum": { + "p50": 577.5039941072464, + "p90": 595.3920036554337, + "p95": 599.5520055294037, + "p99": 617.0880198478699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 369.7600066661835, + "p90": 377.9520094394684, + "p95": 381.47199153900146, + "p99": 390.30399918556213 + }, + "combine": { + "p50": 623.520016670227, + "p90": 627.839982509613, + "p95": 630.2080154418945, + "p99": 640.8640146255493 + }, + "roundtrip": { + "p50": 958.7839841842651, + "p90": 968.9919948577881, + "p95": 973.1199741363525, + "p99": 981.5359711647034 + }, + "isolatedSum": { + "p50": 993.2800233364105, + "p90": 1005.7919919490814, + "p95": 1011.680006980896, + "p99": 1031.1680138111115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 688.3839964866638, + "p90": 700.5119919776917, + "p95": 706.8799734115601, + "p99": 729.0880084037781 + }, + "combine": { + "p50": 1134.4000101089478, + "p90": 1143.9039707183838, + "p95": 1148.9919424057007, + "p99": 1179.8720359802246 + }, + "roundtrip": { + "p50": 1797.2160577774048, + "p90": 1807.647943496704, + "p95": 1812.4480247497559, + "p99": 1838.1439447402954 + }, + "isolatedSum": { + "p50": 1822.7840065956116, + "p90": 1844.4159626960754, + "p95": 1855.8719158172607, + "p99": 1908.9600443840027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1343.5839414596558, + "p90": 1355.5200099945068, + "p95": 1360.80002784729, + "p99": 1370.4639673233032 + }, + "combine": { + "p50": 2170.7839965820312, + "p90": 2175.935983657837, + "p95": 2177.6320934295654, + "p99": 2184.6399307250977 + }, + "roundtrip": { + "p50": 3490.015983581543, + "p90": 3502.079963684082, + "p95": 3506.688117980957, + "p99": 3515.6800746917725 + }, + "isolatedSum": { + "p50": 3514.367938041687, + "p90": 3531.4559936523438, + "p95": 3538.4321212768555, + "p99": 3555.103898048401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3622f171", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_04de5a5b", + "comparisonKey": "173fe7343d391895", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:55.701042+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.98399817943573, + "p90": 135.13599336147308, + "p95": 138.5599970817566, + "p99": 159.61599349975586 + }, + "combine": { + "p50": 122.56000190973282, + "p90": 131.96800649166107, + "p95": 134.3040019273758, + "p99": 137.92000710964203 + }, + "roundtrip": { + "p50": 223.7440049648285, + "p90": 232.7360063791275, + "p95": 236.83199286460876, + "p99": 269.4079875946045 + }, + "isolatedSum": { + "p50": 248.54400008916855, + "p90": 267.10399985313416, + "p95": 272.8639990091324, + "p99": 297.5360006093979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.59199380874634, + "p90": 172.992005944252, + "p95": 177.12000012397766, + "p99": 196.31999731063843 + }, + "combine": { + "p50": 168.83200407028198, + "p90": 174.43199455738068, + "p95": 178.46399545669556, + "p99": 183.9359998703003 + }, + "roundtrip": { + "p50": 301.66399478912354, + "p90": 312.0959997177124, + "p95": 315.0720000267029, + "p99": 327.13600993156433 + }, + "isolatedSum": { + "p50": 331.4239978790283, + "p90": 347.4240005016327, + "p95": 355.5839955806732, + "p99": 380.2559971809387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 229.12000119686127, + "p90": 239.9359941482544, + "p95": 243.23199689388275, + "p99": 252.41601467132568 + }, + "combine": { + "p50": 282.46399760246277, + "p90": 291.0720109939575, + "p95": 293.503999710083, + "p99": 298.72000217437744 + }, + "roundtrip": { + "p50": 466.72001481056213, + "p90": 477.24801301956177, + "p95": 480.0960123538971, + "p99": 493.3440089225769 + }, + "isolatedSum": { + "p50": 511.58399879932404, + "p90": 531.0080051422119, + "p95": 536.7359966039658, + "p99": 551.1360168457031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 357.7280044555664, + "p90": 365.9200072288513, + "p95": 369.4399893283844, + "p99": 382.752001285553 + }, + "combine": { + "p50": 485.9519898891449, + "p90": 491.7440116405487, + "p95": 493.6639964580536, + "p99": 506.5600275993347 + }, + "roundtrip": { + "p50": 812.0959997177124, + "p90": 820.032000541687, + "p95": 824.3520259857178, + "p99": 856.544017791748 + }, + "isolatedSum": { + "p50": 843.6799943447113, + "p90": 857.6640188694, + "p95": 863.103985786438, + "p99": 889.3120288848877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 617.7279949188232, + "p90": 627.5839805603027, + "p95": 633.6960196495056, + "p99": 653.6960005760193 + }, + "combine": { + "p50": 882.6239705085754, + "p90": 888.480007648468, + "p95": 895.0719833374023, + "p99": 907.1999788284302 + }, + "roundtrip": { + "p50": 1470.3999757766724, + "p90": 1481.8240404129028, + "p95": 1489.6639585494995, + "p99": 1537.600040435791 + }, + "isolatedSum": { + "p50": 1500.3519654273987, + "p90": 1516.0639882087708, + "p95": 1528.768002986908, + "p99": 1560.8959794044495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1143.0079936981201, + "p90": 1151.5840291976929, + "p95": 1155.2000045776367, + "p99": 1165.6320095062256 + }, + "combine": { + "p50": 1623.4240531921387, + "p90": 1631.9680213928223, + "p95": 1634.4000101089478, + "p99": 1637.0559930801392 + }, + "roundtrip": { + "p50": 2739.487886428833, + "p90": 2748.5439777374268, + "p95": 2751.5199184417725, + "p99": 2760.0319385528564 + }, + "isolatedSum": { + "p50": 2766.432046890259, + "p90": 2783.552050590515, + "p95": 2789.6000146865845, + "p99": 2802.6880025863647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cf8cb8f1", + "identity": "gb300|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_8cda999b", + "comparisonKey": "f8887e85df9ef186", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:02.039748+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.40000200271606, + "p90": 143.45599710941315, + "p95": 152.99199521541595, + "p99": 182.0479929447174 + }, + "combine": { + "p50": 124.92799758911133, + "p90": 135.04000008106232, + "p95": 144.06399428844452, + "p99": 174.23999309539795 + }, + "roundtrip": { + "p50": 226.97600722312927, + "p90": 238.49600553512573, + "p95": 244.63999271392822, + "p99": 289.34401273727417 + }, + "isolatedSum": { + "p50": 255.3279995918274, + "p90": 278.49599719047546, + "p95": 297.0559895038605, + "p99": 356.28798604011536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 168.96000504493713, + "p90": 185.37600338459015, + "p95": 201.37600600719452, + "p99": 220.99199891090393 + }, + "combine": { + "p50": 170.33599317073822, + "p90": 187.96800076961517, + "p95": 207.8080028295517, + "p99": 230.6559979915619 + }, + "roundtrip": { + "p50": 307.20001459121704, + "p90": 325.53601264953613, + "p95": 339.2319977283478, + "p99": 365.34398794174194 + }, + "isolatedSum": { + "p50": 339.29599821567535, + "p90": 373.3440041542053, + "p95": 409.1840088367462, + "p99": 451.6479969024658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 236.9920015335083, + "p90": 255.74401021003723, + "p95": 269.4399952888489, + "p99": 290.8479869365692 + }, + "combine": { + "p50": 286.17599606513977, + "p90": 314.91199135780334, + "p95": 322.11199402809143, + "p99": 347.4879860877991 + }, + "roundtrip": { + "p50": 475.8400022983551, + "p90": 495.712012052536, + "p95": 506.1759948730469, + "p99": 520.031988620758 + }, + "isolatedSum": { + "p50": 523.1679975986481, + "p90": 570.6560015678406, + "p95": 591.5519893169403, + "p99": 638.3359730243683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 363.3919954299927, + "p90": 372.3199963569641, + "p95": 378.1439960002899, + "p99": 398.4000086784363 + }, + "combine": { + "p50": 488.5439872741699, + "p90": 503.7440061569214, + "p95": 514.1440033912659, + "p99": 544.6079969406128 + }, + "roundtrip": { + "p50": 817.3440098762512, + "p90": 844.3520069122314, + "p95": 862.3679876327515, + "p99": 901.0879993438721 + }, + "isolatedSum": { + "p50": 851.9359827041626, + "p90": 876.0640025138855, + "p95": 892.2879993915558, + "p99": 943.0080056190491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 618.0800199508667, + "p90": 628.1599998474121, + "p95": 634.4959735870361, + "p99": 674.3999719619751 + }, + "combine": { + "p50": 880.9279799461365, + "p90": 888.6719942092896, + "p95": 894.1119909286499, + "p99": 919.0400242805481 + }, + "roundtrip": { + "p50": 1469.8879718780518, + "p90": 1481.5360307693481, + "p95": 1485.6640100479126, + "p99": 1501.4079809188843 + }, + "isolatedSum": { + "p50": 1499.0079998970032, + "p90": 1516.8319940567017, + "p95": 1528.607964515686, + "p99": 1593.4399962425232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1147.264003753662, + "p90": 1166.4960384368896, + "p95": 1179.967999458313, + "p99": 1223.1680154800415 + }, + "combine": { + "p50": 1625.8879899978638, + "p90": 1658.3679914474487, + "p95": 1673.7279891967773, + "p99": 1715.0720357894897 + }, + "roundtrip": { + "p50": 2747.904062271118, + "p90": 2766.335964202881, + "p95": 2777.0559787750244, + "p99": 2807.6798915863037 + }, + "isolatedSum": { + "p50": 2773.151993751526, + "p90": 2824.8640298843384, + "p95": 2853.6959886550903, + "p99": 2938.2400512695312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb796145", + "identity": "gb300|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_20de545c", + "comparisonKey": "c081f7cbf8991063", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:05.995857+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.98400300741196, + "p90": 140.70400595664978, + "p95": 150.52799880504608, + "p99": 158.30400586128235 + }, + "combine": { + "p50": 128.38399410247803, + "p90": 151.58399939537048, + "p95": 158.11200439929962, + "p99": 186.5919977426529 + }, + "roundtrip": { + "p50": 208.44799280166626, + "p90": 224.38399493694305, + "p95": 234.78400707244873, + "p99": 256.9279968738556 + }, + "isolatedSum": { + "p50": 238.36799710988998, + "p90": 292.28800535202026, + "p95": 308.6400032043457, + "p99": 344.89600360393524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.08799529075623, + "p90": 181.7920058965683, + "p95": 190.20800292491913, + "p99": 203.5840004682541 + }, + "combine": { + "p50": 172.44799435138702, + "p90": 207.87200331687927, + "p95": 215.45599400997162, + "p99": 231.3919961452484 + }, + "roundtrip": { + "p50": 285.7919931411743, + "p90": 297.34399914741516, + "p95": 301.88798904418945, + "p99": 314.2400085926056 + }, + "isolatedSum": { + "p50": 321.53598964214325, + "p90": 389.66400921344757, + "p95": 405.66399693489075, + "p99": 434.9759966135025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 219.42399442195892, + "p90": 229.47199642658234, + "p95": 233.18399488925934, + "p99": 244.6720004081726 + }, + "combine": { + "p50": 291.3599908351898, + "p90": 313.1200075149536, + "p95": 321.3439881801605, + "p99": 338.4320139884949 + }, + "roundtrip": { + "p50": 457.3439955711365, + "p90": 473.28001260757446, + "p95": 484.03200507164, + "p99": 506.0480237007141 + }, + "isolatedSum": { + "p50": 510.78398525714874, + "p90": 542.592003941536, + "p95": 554.5279830694199, + "p99": 583.1040143966675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 348.7679958343506, + "p90": 387.7120018005371, + "p95": 398.52800965309143, + "p99": 410.3359878063202 + }, + "combine": { + "p50": 491.7120039463043, + "p90": 514.8800015449524, + "p95": 528.2559990882874, + "p99": 548.0319857597351 + }, + "roundtrip": { + "p50": 807.807981967926, + "p90": 841.3119912147522, + "p95": 851.6479730606079, + "p99": 872.3520040512085 + }, + "isolatedSum": { + "p50": 840.4799997806549, + "p90": 902.5920033454895, + "p95": 926.7840087413788, + "p99": 958.3679735660553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 606.1760187149048, + "p90": 622.3999857902527, + "p95": 634.335994720459, + "p99": 659.775972366333 + }, + "combine": { + "p50": 879.1679739952087, + "p90": 896.3199853897095, + "p95": 911.1040234565735, + "p99": 930.079996585846 + }, + "roundtrip": { + "p50": 1450.9119987487793, + "p90": 1460.1279497146606, + "p95": 1462.5600576400757, + "p99": 1471.295952796936 + }, + "isolatedSum": { + "p50": 1485.3439927101135, + "p90": 1518.7199711799622, + "p95": 1545.4400181770325, + "p99": 1589.855968952179 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1112.9920482635498, + "p90": 1144.544005393982, + "p95": 1155.4559469223022, + "p99": 1189.9839639663696 + }, + "combine": { + "p50": 1635.1360082626343, + "p90": 1682.8479766845703, + "p95": 1697.3439455032349, + "p99": 2095.3280925750732 + }, + "roundtrip": { + "p50": 2714.6239280700684, + "p90": 2737.8880977630615, + "p95": 2751.7120838165283, + "p99": 2765.2480602264404 + }, + "isolatedSum": { + "p50": 2748.128056526184, + "p90": 2827.3919820785522, + "p95": 2852.799892425537, + "p99": 3285.312056541443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ddfe8a4d", + "identity": "gb300|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "ed367f24667806d7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:02:23.843312+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 364.25599455833435, + "p90": 387.6799941062927, + "p95": 414.88000750541687, + "p99": 506.4319968223572 + }, + "combine": { + "p50": 101.1200025677681, + "p90": 107.13600367307663, + "p95": 112.03200370073318, + "p99": 137.1839940547943 + }, + "roundtrip": { + "p50": 436.7679953575134, + "p90": 456.7039906978607, + "p95": 470.2399969100952, + "p99": 547.4879741668701 + }, + "isolatedSum": { + "p50": 465.37599712610245, + "p90": 494.81599777936935, + "p95": 526.91201120615, + "p99": 643.6159908771515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 358.65598917007446, + "p90": 376.5439987182617, + "p95": 382.07998871803284, + "p99": 406.7839980125427 + }, + "combine": { + "p50": 139.23199474811554, + "p90": 145.08800208568573, + "p95": 147.42399752140045, + "p99": 156.25600516796112 + }, + "roundtrip": { + "p50": 476.7040014266968, + "p90": 491.2639856338501, + "p95": 497.3120093345642, + "p99": 520.8320021629333 + }, + "isolatedSum": { + "p50": 497.88798391819, + "p90": 521.6320008039474, + "p95": 529.5039862394333, + "p99": 563.0400031805038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 429.9519956111908, + "p90": 446.52798771858215, + "p95": 450.75199007987976, + "p99": 460.89598536491394 + }, + "combine": { + "p50": 208.95999670028687, + "p90": 214.6880030632019, + "p95": 217.0879989862442, + "p99": 224.44799542427063 + }, + "roundtrip": { + "p50": 619.9359893798828, + "p90": 635.1360082626343, + "p95": 640.0960087776184, + "p99": 655.3599834442139 + }, + "isolatedSum": { + "p50": 638.9119923114777, + "p90": 661.2159907817841, + "p95": 667.839989066124, + "p99": 685.3439807891846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 587.4879956245422, + "p90": 602.3679971694946, + "p95": 606.8480014801025, + "p99": 617.4399852752686 + }, + "combine": { + "p50": 379.5520067214966, + "p90": 389.75998759269714, + "p95": 393.75999569892883, + "p99": 404.57600355148315 + }, + "roundtrip": { + "p50": 965.0880098342896, + "p90": 978.9119958877563, + "p95": 984.000027179718, + "p99": 993.7599897384644 + }, + "isolatedSum": { + "p50": 967.0400023460388, + "p90": 992.1279847621918, + "p95": 1000.6079971790314, + "p99": 1022.0159888267517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 896.7360258102417, + "p90": 912.2560024261475, + "p95": 917.9199934005737, + "p99": 943.9679980278015 + }, + "combine": { + "p50": 801.5360236167908, + "p90": 809.6960186958313, + "p95": 813.0239844322205, + "p99": 820.7679986953735 + }, + "roundtrip": { + "p50": 1643.1360244750977, + "p90": 1662.943959236145, + "p95": 1671.0079908370972, + "p99": 1700.8320093154907 + }, + "isolatedSum": { + "p50": 1698.2720494270325, + "p90": 1721.9520211219788, + "p95": 1730.9439778327942, + "p99": 1764.735996723175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1514.8799419403076, + "p90": 1529.312014579773, + "p95": 1533.9200496673584, + "p99": 1554.8160076141357 + }, + "combine": { + "p50": 1505.9839487075806, + "p90": 1515.455961227417, + "p95": 1521.4719772338867, + "p99": 1559.0399503707886 + }, + "roundtrip": { + "p50": 2987.391948699951, + "p90": 3007.551908493042, + "p95": 3022.0160484313965, + "p99": 3057.0240020751953 + }, + "isolatedSum": { + "p50": 3020.863890647888, + "p90": 3044.76797580719, + "p95": 3055.392026901245, + "p99": 3113.8559579849243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7de7dc87", + "identity": "gb300|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6233cb31a6511067", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:03:24.409546+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 419.5840060710907, + "p90": 455.3599953651428, + "p95": 466.65599942207336, + "p99": 500.2560019493103 + }, + "combine": { + "p50": 109.02400314807892, + "p90": 116.19199812412262, + "p95": 121.79200351238251, + "p99": 157.82399475574493 + }, + "roundtrip": { + "p50": 488.0959987640381, + "p90": 516.5759921073914, + "p95": 532.6719880104065, + "p99": 566.1759972572327 + }, + "isolatedSum": { + "p50": 528.6080092191696, + "p90": 571.5519934892654, + "p95": 588.4480029344559, + "p99": 658.0799967050552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 407.584011554718, + "p90": 436.8320107460022, + "p95": 452.5440037250519, + "p99": 487.7760112285614 + }, + "combine": { + "p50": 144.83200013637543, + "p90": 153.3759981393814, + "p95": 156.2879979610443, + "p99": 185.5040043592453 + }, + "roundtrip": { + "p50": 525.7920026779175, + "p90": 546.5599894523621, + "p95": 556.0640096664429, + "p99": 583.5840106010437 + }, + "isolatedSum": { + "p50": 552.4160116910934, + "p90": 590.2080088853836, + "p95": 608.8320016860962, + "p99": 673.2800155878067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 479.0720045566559, + "p90": 498.3679950237274, + "p95": 506.9440007209778, + "p99": 537.8879904747009 + }, + "combine": { + "p50": 221.98399901390076, + "p90": 229.98400032520294, + "p95": 232.96000063419342, + "p99": 249.31199848651886 + }, + "roundtrip": { + "p50": 695.6800222396851, + "p90": 717.3759937286377, + "p95": 724.6080040931702, + "p99": 738.207995891571 + }, + "isolatedSum": { + "p50": 701.0560035705566, + "p90": 728.3519953489304, + "p95": 739.9040013551712, + "p99": 787.1999889612198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 665.8239960670471, + "p90": 684.8000288009644, + "p95": 691.5839910507202, + "p99": 733.2479953765869 + }, + "combine": { + "p50": 460.03198623657227, + "p90": 468.1920111179352, + "p95": 471.1039960384369, + "p99": 502.6559829711914 + }, + "roundtrip": { + "p50": 1079.7760486602783, + "p90": 1101.1199951171875, + "p95": 1109.7279787063599, + "p99": 1160.0639820098877 + }, + "isolatedSum": { + "p50": 1125.8559823036194, + "p90": 1152.9920399188995, + "p95": 1162.687987089157, + "p99": 1235.9039783477783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1032.0320129394531, + "p90": 1053.0879497528076, + "p95": 1060.0639581680298, + "p99": 1100.3199815750122 + }, + "combine": { + "p50": 832.5759768486023, + "p90": 841.8880105018616, + "p95": 845.7599878311157, + "p99": 873.2799887657166 + }, + "roundtrip": { + "p50": 1827.072024345398, + "p90": 1850.8800268173218, + "p95": 1859.5839738845825, + "p99": 1893.7920331954956 + }, + "isolatedSum": { + "p50": 1864.6079897880554, + "p90": 1894.9759602546692, + "p95": 1905.8239459991455, + "p99": 1973.5999703407288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1785.5039834976196, + "p90": 1804.57603931427, + "p95": 1814.0159845352173, + "p99": 1836.7999792099 + }, + "combine": { + "p50": 1569.2479610443115, + "p90": 1576.8320560455322, + "p95": 1579.8720121383667, + "p99": 1595.4240560531616 + }, + "roundtrip": { + "p50": 3308.703899383545, + "p90": 3325.0880241394043, + "p95": 3332.927942276001, + "p99": 3370.2399730682373 + }, + "isolatedSum": { + "p50": 3354.751944541931, + "p90": 3381.4080953598022, + "p95": 3393.887996673584, + "p99": 3432.2240352630615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d318914f", + "identity": "gb300|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "6304da2c595b352d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:24.136861+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 421.82400822639465, + "p90": 458.3680033683777, + "p95": 474.4639992713928, + "p99": 513.1840109825134 + }, + "combine": { + "p50": 115.42399972677231, + "p90": 123.1359988451004, + "p95": 129.7920048236847, + "p99": 178.43200266361237 + }, + "roundtrip": { + "p50": 496.3519871234894, + "p90": 528.223991394043, + "p95": 545.3439950942993, + "p99": 595.8080291748047 + }, + "isolatedSum": { + "p50": 537.248007953167, + "p90": 581.5040022134781, + "p95": 604.2560040950775, + "p99": 691.6160136461258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 413.63200545310974, + "p90": 447.1360146999359, + "p95": 462.3680114746094, + "p99": 499.83999133110046 + }, + "combine": { + "p50": 156.51200711727142, + "p90": 161.98399662971497, + "p95": 164.09599781036377, + "p99": 168.03200542926788 + }, + "roundtrip": { + "p50": 545.9840297698975, + "p90": 564.0320181846619, + "p95": 569.8559880256653, + "p99": 608.4480285644531 + }, + "isolatedSum": { + "p50": 570.1440125703812, + "p90": 609.1200113296509, + "p95": 626.4640092849731, + "p99": 667.8719967603683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 518.7199711799622, + "p90": 539.1680002212524, + "p95": 545.3439950942993, + "p99": 579.9360275268555 + }, + "combine": { + "p50": 258.36798548698425, + "p90": 265.3760015964508, + "p95": 268.73600482940674, + "p99": 284.60800647735596 + }, + "roundtrip": { + "p50": 752.7679800987244, + "p90": 772.0320224761963, + "p95": 776.8319845199585, + "p99": 791.9999957084656 + }, + "isolatedSum": { + "p50": 777.0879566669464, + "p90": 804.5440018177032, + "p95": 814.079999923706, + "p99": 864.5440340042114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 726.0800004005432, + "p90": 743.8079714775085, + "p95": 749.2799758911133, + "p99": 763.6160254478455 + }, + "combine": { + "p50": 469.2479968070984, + "p90": 482.2719991207123, + "p95": 495.58401107788086, + "p99": 516.3519978523254 + }, + "roundtrip": { + "p50": 1159.872055053711, + "p90": 1178.9120435714722, + "p95": 1186.8480443954468, + "p99": 1250.8480548858643 + }, + "isolatedSum": { + "p50": 1195.3279972076416, + "p90": 1226.0799705982208, + "p95": 1244.8639869689941, + "p99": 1279.968023300171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1150.6240367889404, + "p90": 1169.4719791412354, + "p95": 1176.192045211792, + "p99": 1184.8959922790527 + }, + "combine": { + "p50": 846.8480110168457, + "p90": 854.52800989151, + "p95": 856.3200235366821, + "p99": 863.3279800415039 + }, + "roundtrip": { + "p50": 1968.4159755706787, + "p90": 1991.3599491119385, + "p95": 2007.200002670288, + "p99": 2045.9840297698975 + }, + "isolatedSum": { + "p50": 1997.4720478057861, + "p90": 2023.9999890327454, + "p95": 2032.5120687484741, + "p99": 2048.2239723205566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2042.1760082244873, + "p90": 2056.288003921509, + "p95": 2061.824083328247, + "p99": 2072.4480152130127 + }, + "combine": { + "p50": 1591.3599729537964, + "p90": 1599.8719930648804, + "p95": 1603.8399934768677, + "p99": 1614.2719984054565 + }, + "roundtrip": { + "p50": 3593.503952026367, + "p90": 3613.152027130127, + "p95": 3620.2878952026367, + "p99": 3650.9439945220947 + }, + "isolatedSum": { + "p50": 3633.5359811782837, + "p90": 3656.159996986389, + "p95": 3665.6640768051147, + "p99": 3686.7200136184692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6fe76bb4", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_c4ac4643", + "comparisonKey": "9bdacb5bc6bbc14d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:02.493622+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.167997777462, + "p90": 124.76799637079239, + "p95": 129.2160004377365, + "p99": 144.67200636863708 + }, + "combine": { + "p50": 120.35199999809265, + "p90": 126.14400684833527, + "p95": 130.2720010280609, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 280.8000147342682, + "p90": 295.80798745155334, + "p95": 301.0239899158478, + "p99": 318.59201192855835 + }, + "isolatedSum": { + "p50": 235.51999777555466, + "p90": 250.91200321912766, + "p95": 259.4880014657974, + "p99": 278.52800488471985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.039994597435, + "p90": 155.87200224399567, + "p95": 158.91200304031372, + "p99": 169.11999881267548 + }, + "combine": { + "p50": 162.52799332141876, + "p90": 167.84000396728516, + "p95": 169.72799599170685, + "p99": 178.30400168895721 + }, + "roundtrip": { + "p50": 361.88799142837524, + "p90": 370.7199990749359, + "p95": 375.42399764060974, + "p99": 381.6959857940674 + }, + "isolatedSum": { + "p50": 309.56798791885376, + "p90": 323.7120062112808, + "p95": 328.63999903202057, + "p99": 347.4240005016327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.07999646663666, + "p90": 210.11200547218323, + "p95": 214.33599293231964, + "p99": 221.95200622081757 + }, + "combine": { + "p50": 283.58399868011475, + "p90": 290.71998596191406, + "p95": 292.928010225296, + "p99": 298.17599058151245 + }, + "roundtrip": { + "p50": 587.5840187072754, + "p90": 597.4720120429993, + "p95": 601.3439893722534, + "p99": 607.6160073280334 + }, + "isolatedSum": { + "p50": 485.6639951467514, + "p90": 500.8319914340973, + "p95": 507.26400315761566, + "p99": 520.12799680233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.59200382232666, + "p90": 324.22399520874023, + "p95": 327.5519907474518, + "p99": 341.8880105018616 + }, + "combine": { + "p50": 485.9519898891449, + "p90": 492.15999245643616, + "p95": 493.9520061016083, + "p99": 502.3040175437927 + }, + "roundtrip": { + "p50": 1015.5520439147949, + "p90": 1025.056004524231, + "p95": 1027.4879932403564, + "p99": 1035.904049873352 + }, + "isolatedSum": { + "p50": 800.5439937114716, + "p90": 816.3839876651764, + "p95": 821.5039968490601, + "p99": 844.1920280456543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 551.1999726295471, + "p90": 563.264012336731, + "p95": 568.5439705848694, + "p99": 578.5599946975708 + }, + "combine": { + "p50": 874.4000196456909, + "p90": 881.8879723548889, + "p95": 885.4079842567444, + "p99": 897.9520201683044 + }, + "roundtrip": { + "p50": 1861.8559837341309, + "p90": 1882.912039756775, + "p95": 1900.3839492797852, + "p99": 1915.4239892959595 + }, + "isolatedSum": { + "p50": 1425.599992275238, + "p90": 1445.1519846916199, + "p95": 1453.9519548416138, + "p99": 1476.5120148658752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1019.9359655380249, + "p90": 1033.3119630813599, + "p95": 1037.2480154037476, + "p99": 1045.9519624710083 + }, + "combine": { + "p50": 1622.2399473190308, + "p90": 1629.696011543274, + "p95": 1632.3519945144653, + "p99": 1639.0080451965332 + }, + "roundtrip": { + "p50": 3522.239923477173, + "p90": 3533.0240726470947, + "p95": 3537.2159481048584, + "p99": 3544.703960418701 + }, + "isolatedSum": { + "p50": 2642.1759128570557, + "p90": 2663.007974624634, + "p95": 2669.600009918213, + "p99": 2684.9600076675415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e404634", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "67d9b2df504c0ef6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:07.026763+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 435.4560077190399, + "p90": 475.3279983997345, + "p95": 485.0560128688812, + "p99": 501.6639828681946 + }, + "combine": { + "p50": 123.23199957609177, + "p90": 128.00000607967377, + "p95": 129.7920048236847, + "p99": 134.46399569511414 + }, + "roundtrip": { + "p50": 521.888017654419, + "p90": 553.7919998168945, + "p95": 559.6799850463867, + "p99": 572.4800229072571 + }, + "isolatedSum": { + "p50": 558.6880072951317, + "p90": 603.3280044794083, + "p95": 614.8480176925659, + "p99": 636.1279785633087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 433.4079921245575, + "p90": 471.5839922428131, + "p95": 478.4319996833801, + "p99": 502.0480155944824 + }, + "combine": { + "p50": 165.3759926557541, + "p90": 171.55200242996216, + "p95": 175.23199319839478, + "p99": 187.48800456523895 + }, + "roundtrip": { + "p50": 576.7040252685547, + "p90": 612.5440001487732, + "p95": 621.3120222091675, + "p99": 649.3120193481445 + }, + "isolatedSum": { + "p50": 598.7839847803116, + "p90": 643.1359946727753, + "p95": 653.6639928817749, + "p99": 689.5360201597214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 566.1439895629883, + "p90": 593.2160019874573, + "p95": 606.2080264091492, + "p99": 15038.2719039917 + }, + "combine": { + "p50": 281.5360128879547, + "p90": 290.0159955024719, + "p95": 293.88800263404846, + "p99": 634.2719793319702 + }, + "roundtrip": { + "p50": 808.4800243377686, + "p90": 833.2800269126892, + "p95": 840.8960103988647, + "p99": 42493.953704833984 + }, + "isolatedSum": { + "p50": 847.680002450943, + "p90": 883.2319974899292, + "p95": 900.0960290431976, + "p99": 15672.54388332367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 795.8080172538757, + "p90": 816.3520097732544, + "p95": 822.3999738693237, + "p99": 841.759979724884 + }, + "combine": { + "p50": 486.7520034313202, + "p90": 493.75998973846436, + "p95": 496.2559938430786, + "p99": 507.04002380371094 + }, + "roundtrip": { + "p50": 1243.9039945602417, + "p90": 1267.0719623565674, + "p95": 1273.4719514846802, + "p99": 1287.8400087356567 + }, + "isolatedSum": { + "p50": 1282.560020685196, + "p90": 1310.1119995117188, + "p95": 1318.6559677124023, + "p99": 1348.800003528595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1236.7680072784424, + "p90": 1264.0000581741333, + "p95": 1268.8640356063843, + "p99": 1278.2399654388428 + }, + "combine": { + "p50": 876.7039775848389, + "p90": 883.2640051841736, + "p95": 885.6319785118103, + "p99": 888.8639807701111 + }, + "roundtrip": { + "p50": 2060.2879524230957, + "p90": 2091.615915298462, + "p95": 2098.655939102173, + "p99": 2115.5519485473633 + }, + "isolatedSum": { + "p50": 2113.4719848632812, + "p90": 2147.264063358307, + "p95": 2154.4960141181946, + "p99": 2167.103946208954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2237.152099609375, + "p90": 2252.8960704803467, + "p95": 2257.9519748687744, + "p99": 2266.495943069458 + }, + "combine": { + "p50": 1623.3919858932495, + "p90": 1632.4800252914429, + "p95": 1635.7439756393433, + "p99": 1649.664044380188 + }, + "roundtrip": { + "p50": 3829.8239707946777, + "p90": 3844.4159030914307, + "p95": 3849.855899810791, + "p99": 3864.4800186157227 + }, + "isolatedSum": { + "p50": 3860.5440855026245, + "p90": 3885.3760957717896, + "p95": 3893.6959505081177, + "p99": 3916.159987449646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16cb50ff", + "identity": "gb300|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_b1bd5887", + "comparisonKey": "ff56b33f9f8f54e2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:25.036765+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 354.91201281547546, + "p90": 382.4639916419983, + "p95": 390.8799886703491, + "p99": 470.0480103492737 + }, + "combine": { + "p50": 119.4240003824234, + "p90": 145.11999487876892, + "p95": 153.28000485897064, + "p99": 175.26400089263916 + }, + "roundtrip": { + "p50": 444.2560076713562, + "p90": 468.3839976787567, + "p95": 477.75998711586, + "p99": 530.0480127334595 + }, + "isolatedSum": { + "p50": 474.33601319789886, + "p90": 527.5839865207672, + "p95": 544.1599935293198, + "p99": 645.3120112419128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 382.9120099544525, + "p90": 406.72001242637634, + "p95": 417.4399971961975, + "p99": 446.8800127506256 + }, + "combine": { + "p50": 163.7440025806427, + "p90": 188.1600022315979, + "p95": 204.73599433898926, + "p99": 225.75999796390533 + }, + "roundtrip": { + "p50": 531.6799879074097, + "p90": 555.2319884300232, + "p95": 561.7920160293579, + "p99": 580.2239775657654 + }, + "isolatedSum": { + "p50": 546.6560125350952, + "p90": 594.8800146579742, + "p95": 622.1759915351868, + "p99": 672.640010714531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 496.3519871234894, + "p90": 523.1360197067261, + "p95": 538.7200117111206, + "p99": 555.616021156311 + }, + "combine": { + "p50": 276.5119969844818, + "p90": 292.83198714256287, + "p95": 312.73600459098816, + "p99": 328.92799377441406 + }, + "roundtrip": { + "p50": 754.0799975395203, + "p90": 775.7760286331177, + "p95": 786.7199778556824, + "p99": 808.896005153656 + }, + "isolatedSum": { + "p50": 772.8639841079712, + "p90": 815.9680068492889, + "p95": 851.4560163021088, + "p99": 884.5440149307251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 730.4319739341736, + "p90": 747.871994972229, + "p95": 755.2000284194946, + "p99": 771.0080146789551 + }, + "combine": { + "p50": 486.4000082015991, + "p90": 492.12801456451416, + "p95": 494.30400133132935, + "p99": 502.3679733276367 + }, + "roundtrip": { + "p50": 1187.9040002822876, + "p90": 1203.6479711532593, + "p95": 1210.6560468673706, + "p99": 1251.0720491409302 + }, + "isolatedSum": { + "p50": 1216.8319821357727, + "p90": 1240.0000095367432, + "p95": 1249.504029750824, + "p99": 1273.3759880065918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1182.911992073059, + "p90": 1205.3760290145874, + "p95": 1213.055968284607, + "p99": 1229.024052619934 + }, + "combine": { + "p50": 863.4560108184814, + "p90": 879.7119855880737, + "p95": 893.6960101127625, + "p99": 917.3759818077087 + }, + "roundtrip": { + "p50": 2011.7440223693848, + "p90": 2034.208059310913, + "p95": 2043.071985244751, + "p99": 2103.9040088653564 + }, + "isolatedSum": { + "p50": 2046.3680028915405, + "p90": 2085.088014602661, + "p95": 2106.7519783973694, + "p99": 2146.400034427643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2232.640027999878, + "p90": 2257.5039863586426, + "p95": 2267.359972000122, + "p99": 2284.6078872680664 + }, + "combine": { + "p50": 1604.5759916305542, + "p90": 1617.184042930603, + "p95": 1625.1840591430664, + "p99": 1648.6400365829468 + }, + "roundtrip": { + "p50": 3802.464008331299, + "p90": 3894.1121101379395, + "p95": 3906.0161113739014, + "p99": 24122.400283813477 + }, + "isolatedSum": { + "p50": 3837.216019630432, + "p90": 3874.6880292892456, + "p95": 3892.5440311431885, + "p99": 3933.247923851013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8ab0990", + "identity": "gb300|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_b1b733fb", + "comparisonKey": "5ba58d24d34449fd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:31.141043+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.1.0+814e508", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.68799895048141, + "p90": 104.92800176143646, + "p95": 110.11199653148651, + "p99": 125.08800625801086 + }, + "combine": { + "p50": 121.0239976644516, + "p90": 128.7360042333603, + "p95": 134.5279961824417, + "p99": 161.6320013999939 + }, + "roundtrip": { + "p50": 275.07200837135315, + "p90": 295.5839931964874, + "p95": 304.9919903278351, + "p99": 345.8879888057709 + }, + "isolatedSum": { + "p50": 219.711996614933, + "p90": 233.66400599479675, + "p95": 244.63999271392822, + "p99": 286.72000765800476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 131.32800161838531, + "p90": 150.14399588108063, + "p95": 161.02400422096252, + "p99": 191.64800643920898 + }, + "combine": { + "p50": 162.78399527072906, + "p90": 177.59999632835388, + "p95": 189.40800428390503, + "p99": 216.15999937057495 + }, + "roundtrip": { + "p50": 346.1439907550812, + "p90": 385.18399000167847, + "p95": 400.0000059604645, + "p99": 410.68801283836365 + }, + "isolatedSum": { + "p50": 294.1119968891144, + "p90": 327.7439922094345, + "p95": 350.43200850486755, + "p99": 407.80800580978394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 185.5359971523285, + "p90": 200.25600492954254, + "p95": 210.01599729061127, + "p99": 234.047994017601 + }, + "combine": { + "p50": 282.368004322052, + "p90": 292.83198714256287, + "p95": 309.56798791885376, + "p99": 333.0880105495453 + }, + "roundtrip": { + "p50": 568.8959956169128, + "p90": 579.1360139846802, + "p95": 583.1040143966675, + "p99": 599.3279814720154 + }, + "isolatedSum": { + "p50": 467.9040014743805, + "p90": 493.0879920721054, + "p95": 519.583985209465, + "p99": 567.1360045671463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 296.1919903755188, + "p90": 310.8159899711609, + "p95": 324.41601157188416, + "p99": 350.43200850486755 + }, + "combine": { + "p50": 483.7439954280853, + "p90": 492.5439953804016, + "p95": 504.32002544403076, + "p99": 536.8319749832153 + }, + "roundtrip": { + "p50": 996.9599843025208, + "p90": 1032.4480533599854, + "p95": 1040.7999753952026, + "p99": 11966.68815612793 + }, + "isolatedSum": { + "p50": 779.9359858036041, + "p90": 803.3599853515625, + "p95": 828.7360370159149, + "p99": 887.2639834880829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 535.0080132484436, + "p90": 545.8880066871643, + "p95": 549.0559935569763, + "p99": 556.9919943809509 + }, + "combine": { + "p50": 876.2239813804626, + "p90": 883.6159706115723, + "p95": 886.9119882583618, + "p99": 892.6079869270325 + }, + "roundtrip": { + "p50": 1845.9199666976929, + "p90": 1858.3040237426758, + "p95": 1870.6879615783691, + "p99": 1893.183946609497 + }, + "isolatedSum": { + "p50": 1411.2319946289062, + "p90": 1429.5039772987366, + "p95": 1435.9679818153381, + "p99": 1449.5999813079834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 991.8079972267151, + "p90": 1007.3599815368652, + "p95": 1013.5680437088013, + "p99": 10620.12767791748 + }, + "combine": { + "p50": 1621.8880414962769, + "p90": 1630.2720308303833, + "p95": 1632.6080560684204, + "p99": 1640.064001083374 + }, + "roundtrip": { + "p50": 3496.256113052368, + "p90": 3508.1920623779297, + "p95": 3511.2318992614746, + "p99": 3520.3518867492676 + }, + "isolatedSum": { + "p50": 2613.696038722992, + "p90": 2637.6320123672485, + "p95": 2646.1760997772217, + "p99": 12260.191679000854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf14055b", + "identity": "gb300|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "c5d25cb460cd4a84", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:49.601611+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.92799627780914, + "p90": 132.38400220870972, + "p95": 145.7280069589615, + "p99": 2677.0880222320557 + }, + "combine": { + "p50": 110.3999987244606, + "p90": 116.89600348472595, + "p95": 121.76000326871872, + "p99": 159.36000645160675 + }, + "roundtrip": { + "p50": 193.37600469589233, + "p90": 202.59200036525726, + "p95": 206.33600652217865, + "p99": 214.1440063714981 + }, + "isolatedSum": { + "p50": 227.32799500226974, + "p90": 249.28000569343567, + "p95": 267.4880102276802, + "p99": 2836.4480286836624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.25600385665894, + "p90": 155.64799308776855, + "p95": 158.9760035276413, + "p99": 168.64000260829926 + }, + "combine": { + "p50": 146.2080031633377, + "p90": 150.91200172901154, + "p95": 154.23999726772308, + "p99": 163.13600540161133 + }, + "roundtrip": { + "p50": 262.14399933815, + "p90": 270.27198672294617, + "p95": 273.6319899559021, + "p99": 298.7520098686218 + }, + "isolatedSum": { + "p50": 294.46400701999664, + "p90": 306.5599948167801, + "p95": 313.2160007953644, + "p99": 331.7760080099106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.1520036458969, + "p90": 205.56800067424774, + "p95": 208.22399854660034, + "p99": 216.8000042438507 + }, + "combine": { + "p50": 216.22399985790253, + "p90": 223.51999580860138, + "p95": 225.3119945526123, + "p99": 233.88800024986267 + }, + "roundtrip": { + "p50": 381.53600692749023, + "p90": 389.7919952869415, + "p95": 391.64799451828003, + "p99": 396.12799882888794 + }, + "isolatedSum": { + "p50": 413.37600350379944, + "p90": 429.0879964828491, + "p95": 433.53599309921265, + "p99": 450.6880044937134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 299.23200607299805, + "p90": 307.74399638175964, + "p95": 310.88000535964966, + "p99": 321.21598720550537 + }, + "combine": { + "p50": 369.4399893283844, + "p90": 376.44800543785095, + "p95": 380.22398948669434, + "p99": 384.6080005168915 + }, + "roundtrip": { + "p50": 612.8000020980835, + "p90": 622.0160126686096, + "p95": 624.5120167732239, + "p99": 628.7680268287659 + }, + "isolatedSum": { + "p50": 668.6719954013824, + "p90": 684.1920018196106, + "p95": 691.103994846344, + "p99": 705.8239877223969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 510.43200492858887, + "p90": 521.6000080108643, + "p95": 525.0239968299866, + "p99": 537.056028842926 + }, + "combine": { + "p50": 792.6080226898193, + "p90": 802.5599718093872, + "p95": 804.4800162315369, + "p99": 811.5839958190918 + }, + "roundtrip": { + "p50": 1267.0079469680786, + "p90": 1278.5919904708862, + "p95": 1282.528042793274, + "p99": 1291.3600206375122 + }, + "isolatedSum": { + "p50": 1303.0400276184082, + "p90": 1324.1599798202515, + "p95": 1329.5040130615234, + "p99": 1348.6400246620178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 939.6160244941711, + "p90": 961.8239998817444, + "p95": 970.5280065536499, + "p99": 3707.904100418091 + }, + "combine": { + "p50": 1488.9600276947021, + "p90": 1507.2640180587769, + "p95": 1518.5279846191406, + "p99": 5451.456069946289 + }, + "roundtrip": { + "p50": 2391.1681175231934, + "p90": 2433.7921142578125, + "p95": 2823.1680393218994, + "p99": 8404.831886291504 + }, + "isolatedSum": { + "p50": 2428.5760521888733, + "p90": 2469.0880179405212, + "p95": 2489.0559911727905, + "p99": 9159.36017036438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b92f33ab", + "identity": "gb300|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "7b8ce6a67a0ecb6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:58.105651+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.86399644613266, + "p90": 132.28799402713776, + "p95": 139.615997672081, + "p99": 180.35200238227844 + }, + "combine": { + "p50": 118.01599711179733, + "p90": 125.88800489902496, + "p95": 135.51999628543854, + "p99": 370.4319894313812 + }, + "roundtrip": { + "p50": 204.48000729084015, + "p90": 216.0000056028366, + "p95": 225.3440022468567, + "p99": 264.8639976978302 + }, + "isolatedSum": { + "p50": 238.87999355793, + "p90": 258.1759989261627, + "p95": 275.13599395751953, + "p99": 550.7839918136597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.85600113868713, + "p90": 158.11200439929962, + "p95": 161.24799847602844, + "p99": 167.39200055599213 + }, + "combine": { + "p50": 150.52799880504608, + "p90": 158.81599485874176, + "p95": 160.89600324630737, + "p99": 170.23999989032745 + }, + "roundtrip": { + "p50": 273.1199860572815, + "p90": 280.86400032043457, + "p95": 283.87200832366943, + "p99": 291.3280129432678 + }, + "isolatedSum": { + "p50": 300.3839999437332, + "p90": 316.9279992580414, + "p95": 322.1440017223358, + "p99": 337.6320004463196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.28000593185425, + "p90": 216.0319983959198, + "p95": 221.37600183486938, + "p99": 247.42400646209717 + }, + "combine": { + "p50": 225.98400712013245, + "p90": 234.43199694156647, + "p95": 237.2480034828186, + "p99": 271.7759907245636 + }, + "roundtrip": { + "p50": 399.1680145263672, + "p90": 408.25599431991577, + "p95": 411.3599956035614, + "p99": 435.7439875602722 + }, + "isolatedSum": { + "p50": 431.2640130519867, + "p90": 450.46399533748627, + "p95": 458.624005317688, + "p99": 519.1999971866608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 321.9519853591919, + "p90": 331.13598823547363, + "p95": 335.7439935207367, + "p99": 373.76001477241516 + }, + "combine": { + "p50": 466.91200137138367, + "p90": 472.7039933204651, + "p95": 476.00001096725464, + "p99": 484.44798588752747 + }, + "roundtrip": { + "p50": 714.7200107574463, + "p90": 737.0880246162415, + "p95": 749.8239874839783, + "p99": 2908.5440635681152 + }, + "isolatedSum": { + "p50": 788.8639867305756, + "p90": 803.8399815559387, + "p95": 811.7440044879913, + "p99": 858.2080006599426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 554.5920133590698, + "p90": 563.9680027961731, + "p95": 565.8239722251892, + "p99": 573.5039710998535 + }, + "combine": { + "p50": 828.0320167541504, + "p90": 836.2240195274353, + "p95": 837.984025478363, + "p99": 844.1280126571655 + }, + "roundtrip": { + "p50": 1350.6560325622559, + "p90": 1365.3440475463867, + "p95": 1373.6000061035156, + "p99": 1411.1360311508179 + }, + "isolatedSum": { + "p50": 1382.6240301132202, + "p90": 1400.1920223236084, + "p95": 1403.8079977035522, + "p99": 1417.631983757019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1026.6239643096924, + "p90": 1038.432002067566, + "p95": 1042.4000024795532, + "p99": 1052.8000593185425 + }, + "combine": { + "p50": 1567.903995513916, + "p90": 1586.4640474319458, + "p95": 1594.6240425109863, + "p99": 6328.639984130859 + }, + "roundtrip": { + "p50": 2559.583902359009, + "p90": 2593.8560962677, + "p95": 3304.095983505249, + "p99": 17706.49528503418 + }, + "isolatedSum": { + "p50": 2594.5279598236084, + "p90": 2624.8960494995117, + "p95": 2637.0240449905396, + "p99": 7381.440043449402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0931d8b9", + "identity": "gb300|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "94694bf292906dad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:08:06.955566+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.5360015630722, + "p90": 130.3039938211441, + "p95": 133.69600474834442, + "p99": 141.59999787807465 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 126.97599828243256, + "p95": 130.3039938211441, + "p99": 135.96799969673157 + }, + "roundtrip": { + "p50": 211.776003241539, + "p90": 218.46400201320648, + "p95": 221.8559980392456, + "p99": 231.10400140285492 + }, + "isolatedSum": { + "p50": 243.96800249814987, + "p90": 257.27999210357666, + "p95": 263.9999985694885, + "p99": 277.5679975748062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.35999834537506, + "p90": 166.87999665737152, + "p95": 171.64799571037292, + "p99": 1093.4720039367676 + }, + "combine": { + "p50": 162.30399906635284, + "p90": 171.74400389194489, + "p95": 175.58400332927704, + "p99": 194.07999515533447 + }, + "roundtrip": { + "p50": 290.78400135040283, + "p90": 304.76799607276917, + "p95": 313.34400177001953, + "p99": 732.479989528656 + }, + "isolatedSum": { + "p50": 317.6639974117279, + "p90": 338.6240005493164, + "p95": 347.23199903964996, + "p99": 1287.551999092102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.39199483394623, + "p90": 234.27200317382812, + "p95": 240.38399755954742, + "p99": 284.38401222229004 + }, + "combine": { + "p50": 255.295991897583, + "p90": 265.6640112400055, + "p95": 269.82399821281433, + "p99": 281.1200022697449 + }, + "roundtrip": { + "p50": 427.2319972515106, + "p90": 448.7040042877197, + "p95": 457.92001485824585, + "p99": 1563.2959604263306 + }, + "isolatedSum": { + "p50": 478.68798673152924, + "p90": 499.9360144138336, + "p95": 510.20799577236176, + "p99": 565.5040144920349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 343.9039885997772, + "p90": 353.43998670578003, + "p95": 356.83199763298035, + "p99": 403.77599000930786 + }, + "combine": { + "p50": 469.85599398612976, + "p90": 477.5039851665497, + "p95": 479.74398732185364, + "p99": 484.70398783683777 + }, + "roundtrip": { + "p50": 781.6640138626099, + "p90": 792.0640110969543, + "p95": 798.9439964294434, + "p99": 825.5360126495361 + }, + "isolatedSum": { + "p50": 813.759982585907, + "p90": 830.9439718723297, + "p95": 836.575984954834, + "p99": 888.4799778461456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 595.8719849586487, + "p90": 604.9280166625977, + "p95": 607.9679727554321, + "p99": 616.4479851722717 + }, + "combine": { + "p50": 841.4720296859741, + "p90": 850.4959940910339, + "p95": 852.2560000419617, + "p99": 856.5760254859924 + }, + "roundtrip": { + "p50": 1418.1760549545288, + "p90": 1446.1439847946167, + "p95": 1499.2320537567139, + "p99": 9740.768432617188 + }, + "isolatedSum": { + "p50": 1437.3440146446228, + "p90": 1455.4240107536316, + "p95": 1460.2239727973938, + "p99": 1473.0240106582642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1095.5840349197388, + "p90": 1105.0879955291748, + "p95": 1109.2480421066284, + "p99": 1125.3119707107544 + }, + "combine": { + "p50": 1588.6080265045166, + "p90": 1597.375988960266, + "p95": 1600.8000373840332, + "p99": 1607.9360246658325 + }, + "roundtrip": { + "p50": 2641.279935836792, + "p90": 2651.6480445861816, + "p95": 2654.9758911132812, + "p99": 2665.5359268188477 + }, + "isolatedSum": { + "p50": 2684.1920614242554, + "p90": 2702.463984489441, + "p95": 2710.0480794906616, + "p99": 2733.247995376587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-85202560", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_882967eb", + "comparisonKey": "abeec56a9e0038e7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:51.745197+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.46399438381195, + "p90": 142.59199798107147, + "p95": 150.01599490642548, + "p99": 173.0560064315796 + }, + "combine": { + "p50": 126.8479973077774, + "p90": 147.5519984960556, + "p95": 157.6640009880066, + "p99": 185.15199422836304 + }, + "roundtrip": { + "p50": 225.18399357795715, + "p90": 245.1840043067932, + "p95": 257.27999210357666, + "p99": 282.27201104164124 + }, + "isolatedSum": { + "p50": 253.31199169158936, + "p90": 290.1439964771271, + "p95": 307.67999589443207, + "p99": 358.2080006599426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.35199975967407, + "p90": 173.2800006866455, + "p95": 175.9680062532425, + "p99": 182.559996843338 + }, + "combine": { + "p50": 170.04799842834473, + "p90": 175.1679927110672, + "p95": 176.7680048942566, + "p99": 182.3039948940277 + }, + "roundtrip": { + "p50": 299.23200607299805, + "p90": 307.5839877128601, + "p95": 311.16798520088196, + "p99": 343.7120020389557 + }, + "isolatedSum": { + "p50": 334.3999981880188, + "p90": 348.4479933977127, + "p95": 352.7360111474991, + "p99": 364.8639917373657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 230.01599311828613, + "p90": 242.14400351047516, + "p95": 253.88801097869873, + "p99": 274.97598528862 + }, + "combine": { + "p50": 286.3360047340393, + "p90": 293.88800263404846, + "p95": 295.55198550224304, + "p99": 300.8959889411926 + }, + "roundtrip": { + "p50": 468.4160053730011, + "p90": 477.3760139942169, + "p95": 481.1519980430603, + "p99": 501.47199630737305 + }, + "isolatedSum": { + "p50": 516.3519978523254, + "p90": 536.0320061445236, + "p95": 549.4399964809418, + "p99": 575.8719742298126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 361.9840145111084, + "p90": 376.44800543785095, + "p95": 395.9360122680664, + "p99": 412.83199191093445 + }, + "combine": { + "p50": 488.7680113315582, + "p90": 504.92799282073975, + "p95": 515.5519843101501, + "p99": 546.2080240249634 + }, + "roundtrip": { + "p50": 815.392017364502, + "p90": 826.0480165481567, + "p95": 838.4640216827393, + "p99": 857.695996761322 + }, + "isolatedSum": { + "p50": 850.7520258426666, + "p90": 881.3759982585907, + "p95": 911.4879965782166, + "p99": 959.0400159358978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 615.1360273361206, + "p90": 624.5759725570679, + "p95": 628.000020980835, + "p99": 651.1679887771606 + }, + "combine": { + "p50": 867.2000169754028, + "p90": 875.1680254936218, + "p95": 877.3120045661926, + "p99": 889.4400000572205 + }, + "roundtrip": { + "p50": 1452.8319835662842, + "p90": 1463.1999731063843, + "p95": 1470.3680276870728, + "p99": 1493.6319589614868 + }, + "isolatedSum": { + "p50": 1482.3360443115234, + "p90": 1499.7439980506897, + "p95": 1505.3120255470276, + "p99": 1540.607988834381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1132.3200464248657, + "p90": 1140.1920318603516, + "p95": 1142.4319744110107, + "p99": 1150.015950202942 + }, + "combine": { + "p50": 1620.0640201568604, + "p90": 1627.8719902038574, + "p95": 1631.168007850647, + "p99": 1637.727975845337 + }, + "roundtrip": { + "p50": 2723.328113555908, + "p90": 2733.567953109741, + "p95": 2736.959934234619, + "p99": 2750.4959106445312 + }, + "isolatedSum": { + "p50": 2752.384066581726, + "p90": 2768.064022064209, + "p95": 2773.5999822616577, + "p99": 2787.743926048279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f2938268", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "7c8605acaaca3dc0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:52.645376+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.66399574279785, + "p90": 133.40799510478973, + "p95": 136.99199259281158, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 125.56800246238708, + "p90": 131.96800649166107, + "p95": 134.07999277114868, + "p99": 141.4719969034195 + }, + "roundtrip": { + "p50": 223.61600399017334, + "p90": 231.9359928369522, + "p95": 235.45600473880768, + "p99": 243.74400079250336 + }, + "isolatedSum": { + "p50": 251.23199820518494, + "p90": 265.3760015964508, + "p95": 271.07198536396027, + "p99": 290.3999984264374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.80800306797028, + "p90": 171.36000096797943, + "p95": 175.07199943065643, + "p99": 182.75199830532074 + }, + "combine": { + "p50": 170.6559956073761, + "p90": 175.32800137996674, + "p95": 178.14399302005768, + "p99": 185.44000387191772 + }, + "roundtrip": { + "p50": 300.03198981285095, + "p90": 308.6079955101013, + "p95": 311.67998909950256, + "p99": 317.50398874282837 + }, + "isolatedSum": { + "p50": 334.4639986753464, + "p90": 346.68800234794617, + "p95": 353.2159924507141, + "p99": 368.19200217723846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.10400140285492, + "p90": 239.23200368881226, + "p95": 241.82400107383728, + "p99": 251.13600492477417 + }, + "combine": { + "p50": 287.4560058116913, + "p90": 295.6480085849762, + "p95": 297.5359857082367, + "p99": 303.1040132045746 + }, + "roundtrip": { + "p50": 472.7360010147095, + "p90": 483.68000984191895, + "p95": 486.9439899921417, + "p99": 493.151992559433 + }, + "isolatedSum": { + "p50": 518.5600072145462, + "p90": 534.8800122737885, + "p95": 539.359986782074, + "p99": 554.2400181293488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 361.63198947906494, + "p90": 368.5759902000427, + "p95": 370.84800004959106, + "p99": 373.6959993839264 + }, + "combine": { + "p50": 486.2079918384552, + "p90": 493.79199743270874, + "p95": 495.35998702049255, + "p99": 496.89599871635437 + }, + "roundtrip": { + "p50": 817.5039887428284, + "p90": 825.4079818725586, + "p95": 827.8399705886841, + "p99": 837.4080061912537 + }, + "isolatedSum": { + "p50": 847.8399813175201, + "p90": 862.3679876327515, + "p95": 866.2079870700836, + "p99": 870.5919981002808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 617.5040006637573, + "p90": 626.2720227241516, + "p95": 628.000020980835, + "p99": 638.1440162658691 + }, + "combine": { + "p50": 869.5999979972839, + "p90": 877.6320219039917, + "p95": 880.1599740982056, + "p99": 886.2400054931641 + }, + "roundtrip": { + "p50": 1457.0879936218262, + "p90": 1467.0720100402832, + "p95": 1470.3680276870728, + "p99": 1476.5119552612305 + }, + "isolatedSum": { + "p50": 1487.1039986610413, + "p90": 1503.9040446281433, + "p95": 1508.1599950790405, + "p99": 1524.3840217590332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1129.3120384216309, + "p90": 1136.1600160598755, + "p95": 1138.975977897644, + "p99": 1145.0560092926025 + }, + "combine": { + "p50": 1618.783950805664, + "p90": 1627.7439594268799, + "p95": 1629.9200057983398, + "p99": 1636.7360353469849 + }, + "roundtrip": { + "p50": 2724.2560386657715, + "p90": 2733.6320877075195, + "p95": 2736.35196685791, + "p99": 2742.719888687134 + }, + "isolatedSum": { + "p50": 2748.095989227295, + "p90": 2763.9039754867554, + "p95": 2768.895983695984, + "p99": 2781.7920446395874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8aa45ff", + "identity": "gb300|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_bd97b71f", + "comparisonKey": "6cbef6cdafdc201c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:45.243774+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.87999820709229, + "p90": 140.9599930047989, + "p95": 143.8080072402954, + "p99": 151.13599598407745 + }, + "combine": { + "p50": 129.18399274349213, + "p90": 135.903999209404, + "p95": 137.7599984407425, + "p99": 143.77599954605103 + }, + "roundtrip": { + "p50": 229.8240065574646, + "p90": 238.5600060224533, + "p95": 243.00800263881683, + "p99": 249.63200092315674 + }, + "isolatedSum": { + "p50": 260.0639909505844, + "p90": 276.8639922142029, + "p95": 281.5680056810379, + "p99": 294.9119955301285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 170.6240028142929, + "p90": 203.13599705696106, + "p95": 217.75999665260315, + "p99": 235.6799989938736 + }, + "combine": { + "p50": 174.27200078964233, + "p90": 195.13599574565887, + "p95": 209.72800254821777, + "p99": 230.17600178718567 + }, + "roundtrip": { + "p50": 308.28800797462463, + "p90": 318.2719945907593, + "p95": 325.6640136241913, + "p99": 356.4800024032593 + }, + "isolatedSum": { + "p50": 344.89600360393524, + "p90": 398.27199280261993, + "p95": 427.4879992008209, + "p99": 465.85600078105927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.83999276161194, + "p90": 248.19199740886688, + "p95": 256.00001215934753, + "p99": 424.80000853538513 + }, + "combine": { + "p50": 294.71999406814575, + "p90": 302.7839958667755, + "p95": 309.88800525665283, + "p99": 349.7920036315918 + }, + "roundtrip": { + "p50": 473.1520116329193, + "p90": 487.5519871711731, + "p95": 497.1199929714203, + "p99": 18750.14305114746 + }, + "isolatedSum": { + "p50": 530.5599868297577, + "p90": 550.9759932756424, + "p95": 565.8880174160004, + "p99": 774.5920121669769 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 362.62398958206177, + "p90": 376.22401118278503, + "p95": 387.03998923301697, + "p99": 413.85599970817566 + }, + "combine": { + "p50": 496.0640072822571, + "p90": 516.7359709739685, + "p95": 540.6720042228699, + "p99": 558.6559772491455 + }, + "roundtrip": { + "p50": 827.2960186004639, + "p90": 843.2639837265015, + "p95": 856.0960292816162, + "p99": 890.8159732818604 + }, + "isolatedSum": { + "p50": 858.6879968643188, + "p90": 892.9599821567535, + "p95": 927.7119934558868, + "p99": 972.5119769573212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 618.9119815826416, + "p90": 648.7680077552795, + "p95": 674.5280027389526, + "p99": 36891.998291015625 + }, + "combine": { + "p50": 867.2320246696472, + "p90": 895.1039910316467, + "p95": 909.6320271492004, + "p99": 15450.783729553223 + }, + "roundtrip": { + "p50": 1448.5759735107422, + "p90": 1492.095947265625, + "p95": 1511.8080377578735, + "p99": 36891.61682128906 + }, + "isolatedSum": { + "p50": 1486.1440062522888, + "p90": 1543.8719987869263, + "p95": 1584.160029888153, + "p99": 52342.78202056885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1128.2880306243896, + "p90": 1146.9759941101074, + "p95": 1153.2479524612427, + "p99": 1176.3520240783691 + }, + "combine": { + "p50": 1603.9040088653564, + "p90": 1619.4560527801514, + "p95": 1626.8479824066162, + "p99": 1638.6879682540894 + }, + "roundtrip": { + "p50": 2699.968099594116, + "p90": 2717.952013015747, + "p95": 2724.0960597991943, + "p99": 2740.8320903778076 + }, + "isolatedSum": { + "p50": 2732.192039489746, + "p90": 2766.432046890259, + "p95": 2780.095934867859, + "p99": 2815.0399923324585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9695fce8", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||03799dfc4e73d7f", + "colorKey": "gb300_6379de25", + "comparisonKey": "5d226cec9dfaf87c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:09:41.490659+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03799dfc4e73d7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 140.60799777507782, + "p90": 150.91200172901154, + "p95": 154.2080044746399, + "p99": 172.70399630069733 + }, + "combine": { + "p50": 148.3519971370697, + "p90": 155.45600652694702, + "p95": 158.49600732326508, + "p99": 166.04800522327423 + }, + "roundtrip": { + "p50": 257.7280104160309, + "p90": 266.62400364875793, + "p95": 271.10400795936584, + "p99": 296.9920039176941 + }, + "isolatedSum": { + "p50": 288.9599949121475, + "p90": 306.36800825595856, + "p95": 312.70401179790497, + "p99": 338.75200152397156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.94399297237396, + "p90": 194.04800236225128, + "p95": 197.82400131225586, + "p99": 216.2880003452301 + }, + "combine": { + "p50": 206.7199945449829, + "p90": 211.90400421619415, + "p95": 214.65599536895752, + "p99": 222.30400145053864 + }, + "roundtrip": { + "p50": 353.37600111961365, + "p90": 362.08000779151917, + "p95": 366.04800820350647, + "p99": 393.50399374961853 + }, + "isolatedSum": { + "p50": 393.6639875173569, + "p90": 405.95200657844543, + "p95": 412.4799966812134, + "p99": 438.59200179576874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 276.2239873409271, + "p90": 284.0319871902466, + "p95": 287.1679961681366, + "p99": 294.2720055580139 + }, + "combine": { + "p50": 366.07998609542847, + "p90": 370.7840144634247, + "p95": 372.4159896373749, + "p99": 377.4079978466034 + }, + "roundtrip": { + "p50": 597.0240235328674, + "p90": 604.3519973754883, + "p95": 607.2319746017456, + "p99": 613.8240098953247 + }, + "isolatedSum": { + "p50": 642.3039734363556, + "p90": 654.8160016536713, + "p95": 659.5839858055115, + "p99": 671.6800034046173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 441.18401408195496, + "p90": 451.775997877121, + "p95": 455.3599953651428, + "p99": 466.17600321769714 + }, + "combine": { + "p50": 634.5919966697693, + "p90": 641.1839723587036, + "p95": 643.6160206794739, + "p99": 656.3839912414551 + }, + "roundtrip": { + "p50": 1045.024037361145, + "p90": 1055.359959602356, + "p95": 1058.9120388031006, + "p99": 1073.0559825897217 + }, + "isolatedSum": { + "p50": 1075.7760107517242, + "p90": 1092.9599702358246, + "p95": 1098.9760160446167, + "p99": 1122.5599944591522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 795.6799864768982, + "p90": 807.1680068969727, + "p95": 813.7919902801514, + "p99": 825.7279992103577 + }, + "combine": { + "p50": 1184.607982635498, + "p90": 1192.0640468597412, + "p95": 1194.1440105438232, + "p99": 1199.679970741272 + }, + "roundtrip": { + "p50": 1948.7359523773193, + "p90": 1960.5439901351929, + "p95": 1965.5040502548218, + "p99": 1975.6799936294556 + }, + "isolatedSum": { + "p50": 1980.2879691123962, + "p90": 1999.2320537567139, + "p95": 2007.9360008239746, + "p99": 2025.4079699516296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1519.4560289382935, + "p90": 1536.8640422821045, + "p95": 1550.719976425171, + "p99": 5325.28018951416 + }, + "combine": { + "p50": 2268.480062484741, + "p90": 2431.3600063323975, + "p95": 2671.5519428253174, + "p99": 26066.272735595703 + }, + "roundtrip": { + "p50": 3756.0958862304688, + "p90": 3870.3360557556152, + "p95": 3893.4080600738525, + "p99": 11255.488395690918 + }, + "isolatedSum": { + "p50": 3787.9360914230347, + "p90": 3968.224048614502, + "p95": 4222.271919250488, + "p99": 31391.552925109863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f3aa616b", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_58c6ccd4", + "comparisonKey": "5982b7d11a3fab41", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:01.470253+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.10400277376175, + "p90": 110.04800349473953, + "p95": 113.11999708414078, + "p99": 119.29599940776825 + }, + "combine": { + "p50": 85.40800213813782, + "p90": 89.31200206279755, + "p95": 90.87999910116196, + "p99": 123.16799908876419 + }, + "roundtrip": { + "p50": 157.95199573040009, + "p90": 167.23200678825378, + "p95": 169.8240041732788, + "p99": 174.6239960193634 + }, + "isolatedSum": { + "p50": 188.51200491189957, + "p90": 199.36000555753708, + "p95": 203.99999618530273, + "p99": 242.46399849653244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 125.95200538635254, + "p90": 133.18400084972382, + "p95": 136.89599931240082, + "p99": 141.50400459766388 + }, + "combine": { + "p50": 128.03199887275696, + "p90": 135.77599823474884, + "p95": 137.472003698349, + "p99": 142.2400027513504 + }, + "roundtrip": { + "p50": 223.26399385929108, + "p90": 231.23200237751007, + "p95": 233.43999683856964, + "p99": 240.447998046875 + }, + "isolatedSum": { + "p50": 253.9840042591095, + "p90": 268.95999908447266, + "p95": 274.3680030107498, + "p99": 283.7440073490143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 214.56000208854675, + "p90": 222.08000719547272, + "p95": 226.04799270629883, + "p99": 250.11199712753296 + }, + "combine": { + "p50": 263.7439966201782, + "p90": 271.32800221443176, + "p95": 273.6319899559021, + "p99": 278.56001257896423 + }, + "roundtrip": { + "p50": 459.7119987010956, + "p90": 466.3679897785187, + "p95": 469.2800045013428, + "p99": 478.14399003982544 + }, + "isolatedSum": { + "p50": 478.303998708725, + "p90": 493.4080094099045, + "p95": 499.6799826622009, + "p99": 528.6720097064972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2fc6131d", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_0bc52499", + "comparisonKey": "01a84bfe3e788c51", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:27.691123+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.56799983978271, + "p90": 119.87199634313583, + "p95": 126.11199915409088, + "p99": 186.91200017929077 + }, + "combine": { + "p50": 100.3199964761734, + "p90": 109.11999642848969, + "p95": 112.67200112342834, + "p99": 139.67999815940857 + }, + "roundtrip": { + "p50": 180.57599663734436, + "p90": 190.75199961662292, + "p95": 196.25599682331085, + "p99": 245.728000998497 + }, + "isolatedSum": { + "p50": 209.88799631595612, + "p90": 228.99199277162552, + "p95": 238.78400027751923, + "p99": 326.59199833869934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.75200402736664, + "p90": 135.83999872207642, + "p95": 139.55199718475342, + "p99": 149.53599870204926 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 129.69599664211273, + "p95": 132.51200318336487, + "p99": 136.80000603199005 + }, + "roundtrip": { + "p50": 218.30399334430695, + "p90": 225.75999796390533, + "p95": 228.7680059671402, + "p99": 235.87200045585632 + }, + "isolatedSum": { + "p50": 249.12000447511673, + "p90": 265.53599536418915, + "p95": 272.0640003681183, + "p99": 286.3360047340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 161.9199961423874, + "p90": 173.50399494171143, + "p95": 179.6479970216751, + "p99": 1437.0239973068237 + }, + "combine": { + "p50": 158.4639996290207, + "p90": 171.36000096797943, + "p95": 176.28799378871918, + "p99": 4013.9517784118652 + }, + "roundtrip": { + "p50": 291.9999957084656, + "p90": 313.31199407577515, + "p95": 321.9839930534363, + "p99": 10585.184097290039 + }, + "isolatedSum": { + "p50": 320.3839957714081, + "p90": 344.86399590969086, + "p95": 355.9359908103943, + "p99": 5450.975775718689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 230.24000227451324, + "p90": 238.49600553512573, + "p95": 241.15200340747833, + "p99": 249.34400618076324 + }, + "combine": { + "p50": 283.1679880619049, + "p90": 290.46401381492615, + "p95": 292.4480140209198, + "p99": 303.00799012184143 + }, + "roundtrip": { + "p50": 478.94400358200073, + "p90": 488.70399594306946, + "p95": 491.36000871658325, + "p99": 501.18398666381836 + }, + "isolatedSum": { + "p50": 513.4079903364182, + "p90": 528.9600193500519, + "p95": 533.6000174283981, + "p99": 552.3519963026047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 370.11200189590454, + "p90": 379.2319893836975, + "p95": 384.2880129814148, + "p99": 398.9439904689789 + }, + "combine": { + "p50": 493.0880069732666, + "p90": 500.19198656082153, + "p95": 502.27200984954834, + "p99": 505.7600140571594 + }, + "roundtrip": { + "p50": 831.3599824905396, + "p90": 838.047981262207, + "p95": 840.6080007553101, + "p99": 846.9439744949341 + }, + "isolatedSum": { + "p50": 863.2000088691711, + "p90": 879.423975944519, + "p95": 886.5600228309631, + "p99": 904.7040045261383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 669.7919964790344, + "p90": 699.2319822311401, + "p95": 707.1679830551147, + "p99": 3542.7839756011963 + }, + "combine": { + "p50": 890.720009803772, + "p90": 950.9119987487793, + "p95": 969.0240025520325, + "p99": 6326.176166534424 + }, + "roundtrip": { + "p50": 1520.095944404602, + "p90": 1531.0720205307007, + "p95": 1535.3280305862427, + "p99": 1558.4959983825684 + }, + "isolatedSum": { + "p50": 1560.5120062828064, + "p90": 1650.1439809799194, + "p95": 1676.1919856071472, + "p99": 9868.96014213562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a1e7ba8", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||e3707ddc343088b", + "colorKey": "gb300_b8354a13", + "comparisonKey": "0b6bb35b64841fd6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:11:29.875428+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e3707ddc343088b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 135.71199774742126, + "p90": 147.0080018043518, + "p95": 149.53599870204926, + "p99": 171.6800034046173 + }, + "combine": { + "p50": 143.8400000333786, + "p90": 149.1840034723282, + "p95": 151.0079950094223, + "p99": 158.55999290943146 + }, + "roundtrip": { + "p50": 249.66399371623993, + "p90": 258.8160037994385, + "p95": 264.5440101623535, + "p99": 299.1679906845093 + }, + "isolatedSum": { + "p50": 279.55199778079987, + "p90": 296.19200527668, + "p95": 300.54399371147156, + "p99": 330.23999631404877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 178.75200510025024, + "p90": 203.77600193023682, + "p95": 228.64000499248505, + "p99": 246.46399915218353 + }, + "combine": { + "p50": 196.4160054922104, + "p90": 221.95200622081757, + "p95": 236.83199286460876, + "p99": 867.8399920463562 + }, + "roundtrip": { + "p50": 345.0559973716736, + "p90": 374.04799461364746, + "p95": 399.29598569869995, + "p99": 2795.1040267944336 + }, + "isolatedSum": { + "p50": 375.16801059246063, + "p90": 425.7280081510544, + "p95": 465.4719978570938, + "p99": 1114.3039911985397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 257.1200132369995, + "p90": 267.8399980068207, + "p95": 271.0080146789551, + "p99": 294.3359911441803 + }, + "combine": { + "p50": 365.5039966106415, + "p90": 372.6080060005188, + "p95": 375.5199909210205, + "p99": 396.3199853897095 + }, + "roundtrip": { + "p50": 582.7839970588684, + "p90": 601.9520163536072, + "p95": 611.9040250778198, + "p99": 5109.0240478515625 + }, + "isolatedSum": { + "p50": 622.624009847641, + "p90": 640.4480040073395, + "p95": 646.5280055999756, + "p99": 690.6559765338898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 415.583997964859, + "p90": 440.89600443840027, + "p95": 462.72000670433044, + "p99": 2433.824062347412 + }, + "combine": { + "p50": 630.8479905128479, + "p90": 667.6160097122192, + "p95": 682.1439862251282, + "p99": 2330.6241035461426 + }, + "roundtrip": { + "p50": 1000.480055809021, + "p90": 1018.5279846191406, + "p95": 1037.4720096588135, + "p99": 1065.6640529632568 + }, + "isolatedSum": { + "p50": 1046.431988477707, + "p90": 1108.5120141506195, + "p95": 1144.8639929294586, + "p99": 4764.448165893555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 722.3039865493774, + "p90": 739.4239902496338, + "p95": 753.9520263671875, + "p99": 777.2160172462463 + }, + "combine": { + "p50": 1153.1200408935547, + "p90": 1178.5279512405396, + "p95": 1187.9040002822876, + "p99": 1208.2879543304443 + }, + "roundtrip": { + "p50": 1843.2639837265015, + "p90": 1857.375979423523, + "p95": 1868.5120344161987, + "p99": 1887.8719806671143 + }, + "isolatedSum": { + "p50": 1875.4240274429321, + "p90": 1917.9519414901733, + "p95": 1941.856026649475, + "p99": 1985.5039715766907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1386.1119747161865, + "p90": 1396.191954612732, + "p95": 1399.4239568710327, + "p99": 1408.6400270462036 + }, + "combine": { + "p50": 2202.080011367798, + "p90": 2211.7760181427, + "p95": 2216.320037841797, + "p99": 2232.0001125335693 + }, + "roundtrip": { + "p50": 3558.687925338745, + "p90": 3581.88796043396, + "p95": 3597.1839427948, + "p99": 3618.1440353393555 + }, + "isolatedSum": { + "p50": 3588.1919860839844, + "p90": 3607.967972755432, + "p95": 3615.7439947128296, + "p99": 3640.640139579773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2792367", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_8d40934b", + "comparisonKey": "fd1812b9648c66fc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:16:14.197458+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.87999820709229, + "p90": 141.59999787807465, + "p95": 147.07200229167938, + "p99": 200.19200444221497 + }, + "combine": { + "p50": 130.17599284648895, + "p90": 139.8400068283081, + "p95": 147.67999947071075, + "p99": 2135.3600025177 + }, + "roundtrip": { + "p50": 232.60800540447235, + "p90": 246.75199389457703, + "p95": 252.41601467132568, + "p99": 486.1760139465332 + }, + "isolatedSum": { + "p50": 261.05599105358124, + "p90": 281.44000470638275, + "p95": 294.75200176239014, + "p99": 2335.552006959915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.47200083732605, + "p90": 173.18400740623474, + "p95": 175.48799514770508, + "p99": 181.88799917697906 + }, + "combine": { + "p50": 172.38399386405945, + "p90": 178.20799350738525, + "p95": 183.20000171661377, + "p99": 195.93599438667297 + }, + "roundtrip": { + "p50": 305.59998750686646, + "p90": 313.56799602508545, + "p95": 317.4720108509064, + "p99": 324.8960077762604 + }, + "isolatedSum": { + "p50": 337.8559947013855, + "p90": 351.39200091362, + "p95": 358.68799686431885, + "p99": 377.82399356365204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.9359928369522, + "p90": 239.9359941482544, + "p95": 243.26400458812714, + "p99": 248.28800559043884 + }, + "combine": { + "p50": 288.8000011444092, + "p90": 296.8960106372833, + "p95": 299.0719974040985, + "p99": 310.94399094581604 + }, + "roundtrip": { + "p50": 462.14398741722107, + "p90": 470.14400362968445, + "p95": 473.08799624443054, + "p99": 480.1599979400635 + }, + "isolatedSum": { + "p50": 520.7359939813614, + "p90": 536.8320047855377, + "p95": 542.3360019922256, + "p99": 559.2319965362549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 359.96800661087036, + "p90": 367.74399876594543, + "p95": 370.8159923553467, + "p99": 376.67199969291687 + }, + "combine": { + "p50": 493.8240051269531, + "p90": 502.3679733276367, + "p95": 504.7680139541626, + "p99": 508.3199739456177 + }, + "roundtrip": { + "p50": 823.2960104942322, + "p90": 832.2240114212036, + "p95": 835.2320194244385, + "p99": 842.8159952163696 + }, + "isolatedSum": { + "p50": 853.7920117378235, + "p90": 870.1119720935822, + "p95": 875.5840063095093, + "p99": 884.9919736385345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 615.9039735794067, + "p90": 629.5679807662964, + "p95": 635.2959871292114, + "p99": 3432.0321083068848 + }, + "combine": { + "p50": 867.3920035362244, + "p90": 914.1119718551636, + "p95": 926.6560077667236, + "p99": 3466.815948486328 + }, + "roundtrip": { + "p50": 1448.8639831542969, + "p90": 1489.151954650879, + "p95": 1501.4079809188843, + "p99": 14311.00845336914 + }, + "isolatedSum": { + "p50": 1483.295977115631, + "p90": 1543.67995262146, + "p95": 1561.951994895935, + "p99": 6898.848056793213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1123.4240531921387, + "p90": 1130.5279731750488, + "p95": 1132.5759887695312, + "p99": 1135.9679698944092 + }, + "combine": { + "p50": 1603.2639741897583, + "p90": 1611.7759943008423, + "p95": 1614.3039464950562, + "p99": 1617.9519891738892 + }, + "roundtrip": { + "p50": 2704.6079635620117, + "p90": 2783.1358909606934, + "p95": 2822.335958480835, + "p99": 47827.35824584961 + }, + "isolatedSum": { + "p50": 2726.688027381897, + "p90": 2742.303967475891, + "p95": 2746.8799352645874, + "p99": 2753.9199590682983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0195c86b", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_70e3fa53", + "comparisonKey": "a0868e694414b600", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:00.791984+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.68000364303589, + "p90": 136.1279934644699, + "p95": 139.20000195503235, + "p99": 143.71199905872345 + }, + "combine": { + "p50": 127.07200646400452, + "p90": 134.8160058259964, + "p95": 136.48000359535217, + "p99": 140.83200693130493 + }, + "roundtrip": { + "p50": 225.92000663280487, + "p90": 234.9119931459427, + "p95": 238.71999979019165, + "p99": 249.56800043582916 + }, + "isolatedSum": { + "p50": 254.7520101070404, + "p90": 270.9439992904663, + "p95": 275.6800055503845, + "p99": 284.5440059900284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 166.87999665737152, + "p90": 176.32000148296356, + "p95": 180.67200481891632, + "p99": 3888.7999057769775 + }, + "combine": { + "p50": 173.2800006866455, + "p90": 184.03199315071106, + "p95": 185.98400056362152, + "p99": 197.91999459266663 + }, + "roundtrip": { + "p50": 306.36799335479736, + "p90": 324.6400058269501, + "p95": 337.40800619125366, + "p99": 3208.1921100616455 + }, + "isolatedSum": { + "p50": 340.15999734401703, + "p90": 360.3519946336746, + "p95": 366.65600538253784, + "p99": 4086.719900369644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 233.40800404548645, + "p90": 241.82400107383728, + "p95": 243.8720017671585, + "p99": 250.71999430656433 + }, + "combine": { + "p50": 287.55199909210205, + "p90": 295.48799991607666, + "p95": 296.9279885292053, + "p99": 303.3280074596405 + }, + "roundtrip": { + "p50": 472.9920029640198, + "p90": 489.50400948524475, + "p95": 495.712012052536, + "p99": 4981.44006729126 + }, + "isolatedSum": { + "p50": 520.9600031375885, + "p90": 537.3120009899139, + "p95": 540.7999902963638, + "p99": 554.0480017662048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.9600067138672, + "p90": 372.0000088214874, + "p95": 377.75999307632446, + "p99": 909.600019454956 + }, + "combine": { + "p50": 494.2399859428406, + "p90": 516.0639882087708, + "p95": 537.5999808311462, + "p99": 2434.1440200805664 + }, + "roundtrip": { + "p50": 817.6959753036499, + "p90": 828.4479975700378, + "p95": 839.0399813652039, + "p99": 890.8159732818604 + }, + "isolatedSum": { + "p50": 855.1999926567078, + "p90": 888.0639970302582, + "p95": 915.3599739074707, + "p99": 3343.7440395355225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 610.0159883499146, + "p90": 618.1120276451111, + "p95": 621.2480068206787, + "p99": 649.9199867248535 + }, + "combine": { + "p50": 855.8080196380615, + "p90": 864.512026309967, + "p95": 866.4000034332275, + "p99": 872.0960021018982 + }, + "roundtrip": { + "p50": 1435.8079433441162, + "p90": 1443.6160326004028, + "p95": 1446.112036705017, + "p99": 1453.5679817199707 + }, + "isolatedSum": { + "p50": 1465.824007987976, + "p90": 1482.6240539550781, + "p95": 1487.6480102539062, + "p99": 1522.0159888267517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1126.1759996414185, + "p90": 1132.7359676361084, + "p95": 1136.191964149475, + "p99": 1140.9920454025269 + }, + "combine": { + "p50": 1611.840009689331, + "p90": 1617.0560121536255, + "p95": 1618.8160181045532, + "p99": 1626.911997795105 + }, + "roundtrip": { + "p50": 2710.848093032837, + "p90": 2719.327926635742, + "p95": 2722.4318981170654, + "p99": 2730.5281162261963 + }, + "isolatedSum": { + "p50": 2738.0160093307495, + "p90": 2749.791979789734, + "p95": 2755.0079822540283, + "p99": 2767.904043197632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01dcf3ff", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_ed023b5e", + "comparisonKey": "f25693fc30623445", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:33.407837+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.4160017967224, + "p90": 137.31199502944946, + "p95": 140.79999923706055, + "p99": 151.90400183200836 + }, + "combine": { + "p50": 136.06399297714233, + "p90": 143.8719928264618, + "p95": 146.11199498176575, + "p99": 150.39999783039093 + }, + "roundtrip": { + "p50": 236.32000386714935, + "p90": 244.3840056657791, + "p95": 246.65600061416626, + "p99": 253.85600328445435 + }, + "isolatedSum": { + "p50": 264.47999477386475, + "p90": 281.18398785591125, + "p95": 286.9119942188263, + "p99": 302.3039996623993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.15200519561768, + "p90": 169.27999258041382, + "p95": 174.3679940700531, + "p99": 191.3599967956543 + }, + "combine": { + "p50": 183.87199938297272, + "p90": 189.88800048828125, + "p95": 193.63200664520264, + "p99": 198.91199469566345 + }, + "roundtrip": { + "p50": 315.775990486145, + "p90": 323.5200047492981, + "p95": 326.2079954147339, + "p99": 338.3040130138397 + }, + "isolatedSum": { + "p50": 345.0240045785904, + "p90": 359.16799306869507, + "p95": 368.00000071525574, + "p99": 390.27199149131775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.80800676345825, + "p90": 245.31200528144836, + "p95": 254.20799851417542, + "p99": 291.9360101222992 + }, + "combine": { + "p50": 344.1919982433319, + "p90": 353.11999917030334, + "p95": 357.7919900417328, + "p99": 381.1520040035248 + }, + "roundtrip": { + "p50": 544.7360277175903, + "p90": 558.7520003318787, + "p95": 568.0639743804932, + "p99": 603.3599972724915 + }, + "isolatedSum": { + "p50": 576.0000050067902, + "p90": 598.4320044517517, + "p95": 611.9999885559082, + "p99": 673.088014125824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 373.1200098991394, + "p90": 382.56001472473145, + "p95": 386.3680064678192, + "p99": 395.7439959049225 + }, + "combine": { + "p50": 621.0240125656128, + "p90": 627.8079748153687, + "p95": 630.2400231361389, + "p99": 639.2959952354431 + }, + "roundtrip": { + "p50": 955.2320241928101, + "p90": 964.2239809036255, + "p95": 968.2239890098572, + "p99": 985.4400157928467 + }, + "isolatedSum": { + "p50": 994.1440224647522, + "p90": 1010.3679895401001, + "p95": 1016.6080296039581, + "p99": 1035.0399911403656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 692.575991153717, + "p90": 704.1280269622803, + "p95": 706.7840099334717, + "p99": 712.9279971122742 + }, + "combine": { + "p50": 1131.5200328826904, + "p90": 1136.3840103149414, + "p95": 1140.3199434280396, + "p99": 1145.0239419937134 + }, + "roundtrip": { + "p50": 1790.7520532608032, + "p90": 1808.2239627838135, + "p95": 1816.7999982833862, + "p99": 1843.4239625930786 + }, + "isolatedSum": { + "p50": 1824.0960240364075, + "p90": 1840.5120372772217, + "p95": 1847.1039533615112, + "p99": 1857.9519391059875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1334.7840309143066, + "p90": 1348.031997680664, + "p95": 1351.9999980926514, + "p99": 1358.5599660873413 + }, + "combine": { + "p50": 2164.383888244629, + "p90": 2177.1841049194336, + "p95": 2181.119918823242, + "p99": 2197.6959705352783 + }, + "roundtrip": { + "p50": 3470.655918121338, + "p90": 3486.1440658569336, + "p95": 3490.6880855560303, + "p99": 3503.648042678833 + }, + "isolatedSum": { + "p50": 3499.1679191589355, + "p90": 3525.2161026000977, + "p95": 3533.1199169158936, + "p99": 3556.2559366226196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c56853cf", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||8183e404f63b100", + "colorKey": "gb300_92ddb4ac", + "comparisonKey": "6b1fa99a9d45798e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:10:55.526109+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8183e404f63b100", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.64800274372101, + "p90": 132.03200697898865, + "p95": 134.88000631332397, + "p99": 145.79200744628906 + }, + "combine": { + "p50": 134.20799374580383, + "p90": 139.16799426078796, + "p95": 141.79199934005737, + "p99": 147.5840061903 + }, + "roundtrip": { + "p50": 230.3999960422516, + "p90": 238.68800699710846, + "p95": 240.89600145816803, + "p99": 248.25599789619446 + }, + "isolatedSum": { + "p50": 257.85599648952484, + "p90": 271.2000012397766, + "p95": 276.67200565338135, + "p99": 293.37601363658905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 155.7759940624237, + "p90": 168.2880073785782, + "p95": 185.08799374103546, + "p99": 209.24800634384155 + }, + "combine": { + "p50": 181.72800540924072, + "p90": 208.76799523830414, + "p95": 223.77599775791168, + "p99": 240.63999950885773 + }, + "roundtrip": { + "p50": 308.9280128479004, + "p90": 323.71199131011963, + "p95": 333.6000144481659, + "p99": 354.559987783432 + }, + "isolatedSum": { + "p50": 337.50399947166443, + "p90": 377.0560026168823, + "p95": 408.86399149894714, + "p99": 449.8880058526993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 213.6639952659607, + "p90": 222.49600291252136, + "p95": 226.01599991321564, + "p99": 237.88799345493317 + }, + "combine": { + "p50": 291.20001196861267, + "p90": 318.30400228500366, + "p95": 328.96000146865845, + "p99": 346.5920090675354 + }, + "roundtrip": { + "p50": 496.99199199676514, + "p90": 511.48802042007446, + "p95": 518.7839865684509, + "p99": 543.936014175415 + }, + "isolatedSum": { + "p50": 504.86400723457336, + "p90": 540.800005197525, + "p95": 554.9760013818741, + "p99": 584.4800025224686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 342.75200963020325, + "p90": 361.5039885044098, + "p95": 373.8560080528259, + "p99": 3621.151924133301 + }, + "combine": { + "p50": 627.0719766616821, + "p90": 687.7760291099548, + "p95": 780.5119752883911, + "p99": 7254.39977645874 + }, + "roundtrip": { + "p50": 939.1040205955505, + "p90": 1010.2399587631226, + "p95": 1104.9599647521973, + "p99": 7489.727973937988 + }, + "isolatedSum": { + "p50": 969.8239862918854, + "p90": 1049.2800176143646, + "p95": 1154.367983341217, + "p99": 10875.551700592041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 610.5599999427795, + "p90": 623.2320070266724, + "p95": 627.0080208778381, + "p99": 634.8479986190796 + }, + "combine": { + "p50": 1162.176012992859, + "p90": 1169.75998878479, + "p95": 1171.7439889907837, + "p99": 1176.31995677948 + }, + "roundtrip": { + "p50": 1710.1119756698608, + "p90": 1719.0719842910767, + "p95": 1722.9759693145752, + "p99": 1729.632019996643 + }, + "isolatedSum": { + "p50": 1772.7360129356384, + "p90": 1792.9919958114624, + "p95": 1798.7520098686218, + "p99": 1811.1679553985596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1192.4480199813843, + "p90": 1204.9599885940552, + "p95": 1208.7680101394653, + "p99": 1217.2800302505493 + }, + "combine": { + "p50": 2190.6559467315674, + "p90": 2196.9919204711914, + "p95": 2201.119899749756, + "p99": 2215.167999267578 + }, + "roundtrip": { + "p50": 3313.3440017700195, + "p90": 3325.8559703826904, + "p95": 3329.632043838501, + "p99": 3336.1918926239014 + }, + "isolatedSum": { + "p50": 3383.1039667129517, + "p90": 3401.9519090652466, + "p95": 3409.887909889221, + "p99": 3432.4480295181274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d17f90d2", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_3e2f6cc2", + "comparisonKey": "a35df4341c0b79d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:40.589801+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.56800246238708, + "p90": 134.14399325847626, + "p95": 136.6720050573349, + "p99": 144.06399428844452 + }, + "combine": { + "p50": 125.34399330615997, + "p90": 132.4480026960373, + "p95": 134.94400680065155, + "p99": 140.54399728775024 + }, + "roundtrip": { + "p50": 224.0000069141388, + "p90": 232.80000686645508, + "p95": 236.80000007152557, + "p99": 248.35200607776642 + }, + "isolatedSum": { + "p50": 250.91199576854706, + "p90": 266.59199595451355, + "p95": 271.61601185798645, + "p99": 284.60799157619476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.93600404262543, + "p90": 171.58399522304535, + "p95": 175.29599368572235, + "p99": 181.15200102329254 + }, + "combine": { + "p50": 169.66399550437927, + "p90": 174.43199455738068, + "p95": 176.12800002098083, + "p99": 185.98400056362152 + }, + "roundtrip": { + "p50": 302.5600016117096, + "p90": 309.63200330734253, + "p95": 312.8319978713989, + "p99": 319.10398602485657 + }, + "isolatedSum": { + "p50": 333.5999995470047, + "p90": 346.015989780426, + "p95": 351.4239937067032, + "p99": 367.13600158691406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.1919947862625, + "p90": 242.33600497245789, + "p95": 245.08799612522125, + "p99": 852.3520231246948 + }, + "combine": { + "p50": 292.4480140209198, + "p90": 299.8400032520294, + "p95": 305.2479922771454, + "p99": 1562.3680353164673 + }, + "roundtrip": { + "p50": 472.9599952697754, + "p90": 494.4640100002289, + "p95": 504.2240023612976, + "p99": 29774.97673034668 + }, + "isolatedSum": { + "p50": 524.6400088071823, + "p90": 542.1760082244873, + "p95": 550.3359884023666, + "p99": 2414.720058441162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 361.60001158714294, + "p90": 368.3519959449768, + "p95": 371.16798758506775, + "p99": 378.33601236343384 + }, + "combine": { + "p50": 497.69601225852966, + "p90": 505.7920217514038, + "p95": 507.26401805877686, + "p99": 512.9280090332031 + }, + "roundtrip": { + "p50": 831.6479921340942, + "p90": 859.6159815788269, + "p95": 865.3119802474976, + "p99": 877.2799968719482 + }, + "isolatedSum": { + "p50": 859.2960238456726, + "p90": 874.1440176963806, + "p95": 878.4320056438446, + "p99": 891.264021396637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 624.1919994354248, + "p90": 637.6320123672485, + "p95": 646.943986415863, + "p99": 10679.488182067871 + }, + "combine": { + "p50": 892.0320272445679, + "p90": 934.7839951515198, + "p95": 949.1519927978516, + "p99": 5418.816089630127 + }, + "roundtrip": { + "p50": 1484.928011894226, + "p90": 1538.5279655456543, + "p95": 1664.8000478744507, + "p99": 11221.823692321777 + }, + "isolatedSum": { + "p50": 1516.2240266799927, + "p90": 1572.4160075187683, + "p95": 1596.0959792137146, + "p99": 16098.304271697998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1155.2319526672363, + "p90": 1169.0560579299927, + "p95": 1177.3439645767212, + "p99": 1204.9280405044556 + }, + "combine": { + "p50": 1685.1520538330078, + "p90": 1758.1440210342407, + "p95": 1765.3119564056396, + "p99": 1782.688021659851 + }, + "roundtrip": { + "p50": 2799.5519638061523, + "p90": 2879.5199394226074, + "p95": 2892.224073410034, + "p99": 11104.512214660645 + }, + "isolatedSum": { + "p50": 2840.384006500244, + "p90": 2927.2000789642334, + "p95": 2942.655920982361, + "p99": 2987.6160621643066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-678200da", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_edb03f57", + "comparisonKey": "949cd9e440bb6b51", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:13:44.255038+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.3360015153885, + "p90": 139.67999815940857, + "p95": 143.19999516010284, + "p99": 155.83999454975128 + }, + "combine": { + "p50": 141.9840008020401, + "p90": 150.36800503730774, + "p95": 154.4319987297058, + "p99": 162.33600676059723 + }, + "roundtrip": { + "p50": 245.2480047941208, + "p90": 255.45600056648254, + "p95": 259.96801257133484, + "p99": 271.93599939346313 + }, + "isolatedSum": { + "p50": 272.3200023174286, + "p90": 290.0480031967163, + "p95": 297.63199388980865, + "p99": 318.1760013103485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 167.4560010433197, + "p90": 176.1920005083084, + "p95": 179.45599555969238, + "p99": 191.39200448989868 + }, + "combine": { + "p50": 186.43200397491455, + "p90": 192.9280012845993, + "p95": 194.7840005159378, + "p99": 200.3519982099533 + }, + "roundtrip": { + "p50": 327.93599367141724, + "p90": 335.10398864746094, + "p95": 338.1440043449402, + "p99": 350.8799970149994 + }, + "isolatedSum": { + "p50": 353.88800501823425, + "p90": 369.1200017929077, + "p95": 374.2399960756302, + "p99": 391.744002699852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 244.57600712776184, + "p90": 254.4960081577301, + "p95": 259.5840096473694, + "p99": 278.52800488471985 + }, + "combine": { + "p50": 359.77599024772644, + "p90": 388.5760009288788, + "p95": 401.7600119113922, + "p99": 414.7840142250061 + }, + "roundtrip": { + "p50": 565.9199953079224, + "p90": 581.6320180892944, + "p95": 593.6959981918335, + "p99": 611.2319827079773 + }, + "isolatedSum": { + "p50": 604.3519973754883, + "p90": 643.0720090866089, + "p95": 661.3440215587616, + "p99": 693.312019109726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 393.0880129337311, + "p90": 409.15200114250183, + "p95": 426.33599042892456, + "p99": 444.8640048503876 + }, + "combine": { + "p50": 613.0239963531494, + "p90": 631.6159963607788, + "p95": 646.7519998550415, + "p99": 668.9599752426147 + }, + "roundtrip": { + "p50": 979.6159863471985, + "p90": 1008.4160566329956, + "p95": 1021.0880041122437, + "p99": 1042.1439409255981 + }, + "isolatedSum": { + "p50": 1006.1120092868805, + "p90": 1040.7679975032806, + "p95": 1073.087990283966, + "p99": 1113.8239800930023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 696.9599723815918, + "p90": 712.7040028572083, + "p95": 771.8719840049744, + "p99": 3680.511951446533 + }, + "combine": { + "p50": 1111.9359731674194, + "p90": 1121.9199895858765, + "p95": 1173.8879680633545, + "p99": 1333.024024963379 + }, + "roundtrip": { + "p50": 1778.8159847259521, + "p90": 1790.079951286316, + "p95": 1793.4720516204834, + "p99": 1801.535964012146 + }, + "isolatedSum": { + "p50": 1808.8959455490112, + "p90": 1834.6239924430847, + "p95": 1945.7599520683289, + "p99": 5013.535976409912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1340.7360315322876, + "p90": 1357.5359582901, + "p95": 1368.1919574737549, + "p99": 1389.5679712295532 + }, + "combine": { + "p50": 2132.1918964385986, + "p90": 2145.2159881591797, + "p95": 2159.071922302246, + "p99": 2176.448106765747 + }, + "roundtrip": { + "p50": 3451.2319564819336, + "p90": 3471.712112426758, + "p95": 3492.703914642334, + "p99": 3513.1518840789795 + }, + "isolatedSum": { + "p50": 3472.9279279708862, + "p90": 3502.75194644928, + "p95": 3527.263879776001, + "p99": 3566.0160779953003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3022204b", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_2194b8a7", + "comparisonKey": "b41ef9107c941e10", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:03.705187+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.31999361515045, + "p90": 137.472003698349, + "p95": 140.06400108337402, + "p99": 158.65600109100342 + }, + "combine": { + "p50": 128.1919926404953, + "p90": 136.31999492645264, + "p95": 137.7599984407425, + "p99": 142.33599603176117 + }, + "roundtrip": { + "p50": 227.7120053768158, + "p90": 237.12000250816345, + "p95": 240.38399755954742, + "p99": 249.24799799919128 + }, + "isolatedSum": { + "p50": 256.51198625564575, + "p90": 273.79199862480164, + "p95": 277.8239995241165, + "p99": 300.9919971227646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 166.1120057106018, + "p90": 174.3679940700531, + "p95": 176.54399573802948, + "p99": 189.31199610233307 + }, + "combine": { + "p50": 172.28800058364868, + "p90": 176.7680048942566, + "p95": 179.51999604701996, + "p99": 186.20799481868744 + }, + "roundtrip": { + "p50": 304.6720027923584, + "p90": 313.6320114135742, + "p95": 316.895991563797, + "p99": 325.1839876174927 + }, + "isolatedSum": { + "p50": 338.4000062942505, + "p90": 351.1359989643097, + "p95": 356.06399178504944, + "p99": 375.5199909210205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.99999332427979, + "p90": 240.4160052537918, + "p95": 243.3920055627823, + "p99": 252.22399830818176 + }, + "combine": { + "p50": 287.77599334716797, + "p90": 295.74400186538696, + "p95": 297.12000489234924, + "p99": 301.40799283981323 + }, + "roundtrip": { + "p50": 467.48799085617065, + "p90": 476.6719937324524, + "p95": 479.64799404144287, + "p99": 492.000013589859 + }, + "isolatedSum": { + "p50": 519.7759866714478, + "p90": 536.1600071191788, + "p95": 540.5120104551315, + "p99": 553.631991147995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 360.0960075855255, + "p90": 369.2159950733185, + "p95": 373.05599451065063, + "p99": 395.4559862613678 + }, + "combine": { + "p50": 491.42399430274963, + "p90": 497.1199929714203, + "p95": 500.38397312164307, + "p99": 508.4800124168396 + }, + "roundtrip": { + "p50": 817.6959753036499, + "p90": 825.8559703826904, + "p95": 829.9199938774109, + "p99": 843.5840010643005 + }, + "isolatedSum": { + "p50": 851.5200018882751, + "p90": 866.3359880447388, + "p95": 873.4399676322937, + "p99": 903.9359986782074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 614.4319772720337, + "p90": 621.8240261077881, + "p95": 626.2720227241516, + "p99": 633.8880062103271 + }, + "combine": { + "p50": 866.1760091781616, + "p90": 874.6880292892456, + "p95": 876.2879967689514, + "p99": 879.7760009765625 + }, + "roundtrip": { + "p50": 1449.6959447860718, + "p90": 1458.016037940979, + "p95": 1460.6399536132812, + "p99": 1470.4960584640503 + }, + "isolatedSum": { + "p50": 1480.6079864501953, + "p90": 1496.5120553970337, + "p95": 1502.560019493103, + "p99": 1513.6640071868896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1128.383994102478, + "p90": 1135.6159448623657, + "p95": 1137.4080181121826, + "p99": 1143.9039707183838 + }, + "combine": { + "p50": 1617.1200275421143, + "p90": 1626.7199516296387, + "p95": 1628.4799575805664, + "p99": 1633.6959600448608 + }, + "roundtrip": { + "p50": 2718.0800437927246, + "p90": 2730.1440238952637, + "p95": 2762.4640464782715, + "p99": 3182.3360919952393 + }, + "isolatedSum": { + "p50": 2745.5040216445923, + "p90": 2762.3358964920044, + "p95": 2765.887975692749, + "p99": 2777.5999307632446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7210b5f", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_359d9fe4", + "comparisonKey": "58317a6fc38d0c98", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:14:48.681102+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.71199643611908, + "p90": 136.4160031080246, + "p95": 141.12000167369843, + "p99": 148.25600385665894 + }, + "combine": { + "p50": 137.2160017490387, + "p90": 145.50399780273438, + "p95": 147.90399372577667, + "p99": 151.90400183200836 + }, + "roundtrip": { + "p50": 239.6160066127777, + "p90": 248.416006565094, + "p95": 251.80798768997192, + "p99": 260.9280049800873 + }, + "isolatedSum": { + "p50": 264.9279981851578, + "p90": 281.920000910759, + "p95": 289.0239953994751, + "p99": 300.1600056886673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.24799847602844, + "p90": 175.64800381660461, + "p95": 180.63999712467194, + "p99": 191.77600741386414 + }, + "combine": { + "p50": 186.20799481868744, + "p90": 204.22400534152985, + "p95": 209.56799387931824, + "p99": 246.46399915218353 + }, + "roundtrip": { + "p50": 321.21598720550537, + "p90": 344.2560136318207, + "p95": 349.2799997329712, + "p99": 803.4560084342957 + }, + "isolatedSum": { + "p50": 347.4559932947159, + "p90": 379.87200915813446, + "p95": 390.2079910039902, + "p99": 438.24000656604767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 233.63199830055237, + "p90": 246.62399291992188, + "p95": 255.13601303100586, + "p99": 274.6559977531433 + }, + "combine": { + "p50": 344.35200691223145, + "p90": 357.40798711776733, + "p95": 361.7280125617981, + "p99": 388.92799615859985 + }, + "roundtrip": { + "p50": 547.2319722175598, + "p90": 565.2160048484802, + "p95": 574.2400288581848, + "p99": 591.7440056800842 + }, + "isolatedSum": { + "p50": 577.9840052127838, + "p90": 604.0319800376892, + "p95": 616.864025592804, + "p99": 663.5839939117432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.81600856781006, + "p90": 397.3439931869507, + "p95": 405.85601329803467, + "p99": 4513.279914855957 + }, + "combine": { + "p50": 630.8799982070923, + "p90": 674.2079854011536, + "p95": 686.3679885864258, + "p99": 4674.71981048584 + }, + "roundtrip": { + "p50": 967.8720235824585, + "p90": 1032.8320264816284, + "p95": 1373.6319541931152, + "p99": 8504.83226776123 + }, + "isolatedSum": { + "p50": 1009.6960067749023, + "p90": 1071.5519785881042, + "p95": 1092.2240018844604, + "p99": 9187.999725341797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 693.2479739189148, + "p90": 708.0320119857788, + "p95": 734.4319820404053, + "p99": 1682.2400093078613 + }, + "combine": { + "p50": 1132.9280138015747, + "p90": 1137.727975845337, + "p95": 1141.4079666137695, + "p99": 1182.9440593719482 + }, + "roundtrip": { + "p50": 1791.808009147644, + "p90": 1803.9679527282715, + "p95": 1807.3920011520386, + "p99": 1816.3520097732544 + }, + "isolatedSum": { + "p50": 1826.1759877204895, + "p90": 1845.7599878311157, + "p95": 1875.8399486541748, + "p99": 2865.1840686798096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1334.015965461731, + "p90": 1346.8480110168457, + "p95": 1352.2239923477173, + "p99": 1365.8560514450073 + }, + "combine": { + "p50": 2166.3360595703125, + "p90": 2193.4399604797363, + "p95": 2205.5039405822754, + "p99": 2221.3120460510254 + }, + "roundtrip": { + "p50": 3473.952054977417, + "p90": 3497.8880882263184, + "p95": 3507.4241161346436, + "p99": 3538.2080078125 + }, + "isolatedSum": { + "p50": 3500.3520250320435, + "p90": 3540.287971496582, + "p95": 3557.7279329299927, + "p99": 3587.1680974960327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26e15b9a", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_e82c0e0a", + "comparisonKey": "554f011da8aefcbc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:15:07.845443+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.54400277137756, + "p90": 138.047993183136, + "p95": 143.96800100803375, + "p99": 177.08800733089447 + }, + "combine": { + "p50": 125.791996717453, + "p90": 134.17600095272064, + "p95": 136.76799833774567, + "p99": 169.98399794101715 + }, + "roundtrip": { + "p50": 226.27200186252594, + "p90": 234.72000658512115, + "p95": 240.447998046875, + "p99": 277.0879864692688 + }, + "isolatedSum": { + "p50": 254.33599948883057, + "p90": 272.2239941358566, + "p95": 280.7359993457794, + "p99": 347.0720052719116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 166.9120043516159, + "p90": 191.48799777030945, + "p95": 206.62400126457214, + "p99": 222.88000583648682 + }, + "combine": { + "p50": 171.23199999332428, + "p90": 200.32000541687012, + "p95": 209.72800254821777, + "p99": 228.06400060653687 + }, + "roundtrip": { + "p50": 303.5840094089508, + "p90": 315.20000100135803, + "p95": 322.7519989013672, + "p99": 353.1840145587921 + }, + "isolatedSum": { + "p50": 338.1440043449402, + "p90": 391.80800318717957, + "p95": 416.3520038127899, + "p99": 450.9440064430237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.80800676345825, + "p90": 241.05599522590637, + "p95": 244.35199797153473, + "p99": 268.8640058040619 + }, + "combine": { + "p50": 286.8480086326599, + "p90": 295.1039969921112, + "p95": 298.6559867858887, + "p99": 341.2800133228302 + }, + "roundtrip": { + "p50": 466.2080109119415, + "p90": 476.8959879875183, + "p95": 482.56000876426697, + "p99": 522.2079753875732 + }, + "isolatedSum": { + "p50": 518.6560153961182, + "p90": 536.1599922180176, + "p95": 543.0079847574234, + "p99": 610.1440191268921 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 356.224000453949, + "p90": 367.35999584198, + "p95": 371.45599722862244, + "p99": 385.72800159454346 + }, + "combine": { + "p50": 486.6560101509094, + "p90": 494.4959878921509, + "p95": 498.6880123615265, + "p99": 524.8000025749207 + }, + "roundtrip": { + "p50": 811.9360208511353, + "p90": 822.3040103912354, + "p95": 835.6800079345703, + "p99": 856.544017791748 + }, + "isolatedSum": { + "p50": 842.8800106048584, + "p90": 861.8559837341309, + "p95": 870.1440095901489, + "p99": 910.5280041694641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 617.5680160522461, + "p90": 627.9360055923462, + "p95": 636.6720199584961, + "p99": 652.4800062179565 + }, + "combine": { + "p50": 879.9039721488953, + "p90": 890.0160193443298, + "p95": 894.2720293998718, + "p99": 911.4879965782166 + }, + "roundtrip": { + "p50": 1468.384027481079, + "p90": 1479.2640209197998, + "p95": 1483.3279848098755, + "p99": 1508.1919431686401 + }, + "isolatedSum": { + "p50": 1497.4719882011414, + "p90": 1517.952024936676, + "p95": 1530.944049358368, + "p99": 1563.968002796173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1134.1439485549927, + "p90": 1142.8799629211426, + "p95": 1146.239995956421, + "p99": 1166.7200326919556 + }, + "combine": { + "p50": 1622.5279569625854, + "p90": 1631.8080425262451, + "p95": 1637.3440027236938, + "p99": 1647.1680402755737 + }, + "roundtrip": { + "p50": 2727.776050567627, + "p90": 2746.4640140533447, + "p95": 2764.7359371185303, + "p99": 3326.2720108032227 + }, + "isolatedSum": { + "p50": 2756.671905517578, + "p90": 2774.6880054473877, + "p95": 2783.5839986801147, + "p99": 2813.8880729675293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a9cab508", + "identity": "gb300|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_e52484d0", + "comparisonKey": "3149207a97f4df7e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:12:59.891574+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.77599692344666, + "p90": 137.53600418567657, + "p95": 144.31999623775482, + "p99": 229.5999974012375 + }, + "combine": { + "p50": 127.36000120639801, + "p90": 138.0160003900528, + "p95": 145.79200744628906, + "p99": 445.3440010547638 + }, + "roundtrip": { + "p50": 224.95999932289124, + "p90": 235.00800132751465, + "p95": 239.45599794387817, + "p99": 290.0480031967163 + }, + "isolatedSum": { + "p50": 255.13599812984467, + "p90": 275.55200457572937, + "p95": 290.1120036840439, + "p99": 674.9439984560013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.000004529953, + "p90": 171.2000072002411, + "p95": 174.75199699401855, + "p99": 180.51199615001678 + }, + "combine": { + "p50": 169.37600076198578, + "p90": 173.95199835300446, + "p95": 175.77600479125977, + "p99": 182.97599256038666 + }, + "roundtrip": { + "p50": 300.7679879665375, + "p90": 307.776004076004, + "p95": 310.7199966907501, + "p99": 316.73601269721985 + }, + "isolatedSum": { + "p50": 333.3760052919388, + "p90": 345.15200555324554, + "p95": 350.5280017852783, + "p99": 363.48798871040344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 228.92799973487854, + "p90": 237.88799345493317, + "p95": 240.6720072031021, + "p99": 248.9279955625534 + }, + "combine": { + "p50": 284.5439910888672, + "p90": 291.9360101222992, + "p95": 294.0160036087036, + "p99": 296.9920039176941 + }, + "roundtrip": { + "p50": 461.95200085639954, + "p90": 470.0799882411957, + "p95": 471.77600860595703, + "p99": 478.91199588775635 + }, + "isolatedSum": { + "p50": 513.4719908237457, + "p90": 529.8240035772324, + "p95": 534.6880108118057, + "p99": 545.9199994802475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 354.3359935283661, + "p90": 362.8480136394501, + "p95": 365.56801199913025, + "p99": 373.6959993839264 + }, + "combine": { + "p50": 484.5759868621826, + "p90": 492.48000979423523, + "p95": 494.2080080509186, + "p99": 499.2319941520691 + }, + "roundtrip": { + "p50": 810.0799918174744, + "p90": 818.8160061836243, + "p95": 822.6240277290344, + "p99": 838.0159735679626 + }, + "isolatedSum": { + "p50": 838.9119803905487, + "p90": 855.3280234336853, + "p95": 859.7760200500488, + "p99": 872.9279935359955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 617.3120141029358, + "p90": 632.3519945144653, + "p95": 651.8399715423584, + "p99": 6839.4880294799805 + }, + "combine": { + "p50": 880.8640241622925, + "p90": 901.5359878540039, + "p95": 911.4559888839722, + "p99": 5946.49600982666 + }, + "roundtrip": { + "p50": 1464.4479751586914, + "p90": 1490.1119470596313, + "p95": 1496.6399669647217, + "p99": 3896.6081142425537 + }, + "isolatedSum": { + "p50": 1498.1760382652283, + "p90": 1533.8879823684692, + "p95": 1563.2959604263306, + "p99": 12785.98403930664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1133.1839561462402, + "p90": 1140.28799533844, + "p95": 1141.9520378112793, + "p99": 1146.1759805679321 + }, + "combine": { + "p50": 1616.7999505996704, + "p90": 1625.6959438323975, + "p95": 1627.5520324707031, + "p99": 1634.559988975525 + }, + "roundtrip": { + "p50": 2723.167896270752, + "p90": 2734.0478897094727, + "p95": 2739.7119998931885, + "p99": 2777.8239250183105 + }, + "isolatedSum": { + "p50": 2749.9839067459106, + "p90": 2765.9839391708374, + "p95": 2769.5040702819824, + "p99": 2780.735969543457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87877f44", + "identity": "gb300|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_2d2139e3", + "comparisonKey": "507957a6d00b5693", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:59:22.073804+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.96000331640244, + "p90": 119.55200135707855, + "p95": 125.791996717453, + "p99": 132.06399977207184 + }, + "combine": { + "p50": 126.30400061607361, + "p90": 133.88800621032715, + "p95": 135.5839967727661, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 210.24000644683838, + "p90": 216.5440022945404, + "p95": 218.52800250053406, + "p99": 225.75999796390533 + }, + "isolatedSum": { + "p50": 239.26400393247604, + "p90": 253.4400075674057, + "p95": 261.3759934902191, + "p99": 271.87199890613556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.4400054216385, + "p90": 155.93600273132324, + "p95": 158.9760035276413, + "p99": 167.67999529838562 + }, + "combine": { + "p50": 171.424001455307, + "p90": 175.55199563503265, + "p95": 178.56000363826752, + "p99": 185.05600094795227 + }, + "roundtrip": { + "p50": 286.24001145362854, + "p90": 292.959988117218, + "p95": 295.3599989414215, + "p99": 303.26399207115173 + }, + "isolatedSum": { + "p50": 320.8640068769455, + "p90": 331.4879983663559, + "p95": 337.5360071659088, + "p99": 352.7359962463379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.75999665260315, + "p90": 226.17599368095398, + "p95": 229.98400032520294, + "p99": 234.20800268650055 + }, + "combine": { + "p50": 289.792001247406, + "p90": 296.25600576400757, + "p95": 297.5679934024811, + "p99": 301.56800150871277 + }, + "roundtrip": { + "p50": 454.68801259994507, + "p90": 464.60801362991333, + "p95": 467.51999855041504, + "p99": 472.6719856262207 + }, + "isolatedSum": { + "p50": 507.55199790000916, + "p90": 522.4319994449615, + "p95": 527.551993727684, + "p99": 535.7760041952133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.680002450943, + "p90": 355.3279936313629, + "p95": 358.43199491500854, + "p99": 369.6320056915283 + }, + "combine": { + "p50": 489.3440008163452, + "p90": 494.9119985103607, + "p95": 496.70401215553284, + "p99": 503.52001190185547 + }, + "roundtrip": { + "p50": 803.48801612854, + "p90": 811.680018901825, + "p95": 814.4959807395935, + "p99": 820.3200101852417 + }, + "isolatedSum": { + "p50": 837.0240032672882, + "p90": 850.2399921417236, + "p95": 855.1360070705414, + "p99": 873.1520175933838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 601.311981678009, + "p90": 609.503984451294, + "p95": 612.8000020980835, + "p99": 623.3599781990051 + }, + "combine": { + "p50": 869.6960210800171, + "p90": 876.8960237503052, + "p95": 878.9759874343872, + "p99": 886.3360285758972 + }, + "roundtrip": { + "p50": 1438.655972480774, + "p90": 1447.4560022354126, + "p95": 1450.3040313720703, + "p99": 1455.0080299377441 + }, + "isolatedSum": { + "p50": 1471.0080027580261, + "p90": 1486.4000082015991, + "p95": 1491.7759895324707, + "p99": 1509.6960067749023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1106.8480014801025, + "p90": 1114.848017692566, + "p95": 1117.2159910202026, + "p99": 1125.823974609375 + }, + "combine": { + "p50": 1616.1279678344727, + "p90": 1624.384045600891, + "p95": 1626.7839670181274, + "p99": 1629.1199922561646 + }, + "roundtrip": { + "p50": 2696.063995361328, + "p90": 2705.9199810028076, + "p95": 2708.479881286621, + "p99": 2714.9760723114014 + }, + "isolatedSum": { + "p50": 2722.975969314575, + "p90": 2739.232063293457, + "p95": 2743.99995803833, + "p99": 2754.9439668655396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01eb4fea", + "identity": "gb300|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||03f98832f76b043", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "538b9bb72f6fae09", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:05:21.294959+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03f98832f76b043", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 369.6640133857727, + "p90": 388.2879912853241, + "p95": 393.312007188797, + "p99": 412.3840034008026 + }, + "combine": { + "p50": 104.51199859380722, + "p90": 109.53599959611893, + "p95": 111.80800199508667, + "p99": 118.04799735546112 + }, + "roundtrip": { + "p50": 444.09599900245667, + "p90": 461.4720046520233, + "p95": 466.39999747276306, + "p99": 485.2159917354584 + }, + "isolatedSum": { + "p50": 474.1760119795799, + "p90": 497.823990881443, + "p95": 505.12000918388367, + "p99": 530.4320007562637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 374.62401390075684, + "p90": 420.3839898109436, + "p95": 480.19200563430786, + "p99": 11754.816055297852 + }, + "combine": { + "p50": 139.71200585365295, + "p90": 145.56799829006195, + "p95": 148.03199470043182, + "p99": 154.11199629306793 + }, + "roundtrip": { + "p50": 486.1760139465332, + "p90": 533.6319804191589, + "p95": 542.0799851417542, + "p99": 553.1520247459412 + }, + "isolatedSum": { + "p50": 514.3360197544098, + "p90": 565.9519881010056, + "p95": 628.2240003347397, + "p99": 11908.92805159092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 442.4319863319397, + "p90": 484.8960041999817, + "p95": 491.0399913787842, + "p99": 558.239996433258 + }, + "combine": { + "p50": 211.13599836826324, + "p90": 218.27200055122375, + "p95": 220.7999974489212, + "p99": 240.7039999961853 + }, + "roundtrip": { + "p50": 634.9760293960571, + "p90": 677.2480010986328, + "p95": 688.1920099258423, + "p99": 735.647976398468 + }, + "isolatedSum": { + "p50": 653.5679847002029, + "p90": 703.1680047512054, + "p95": 711.8399888277054, + "p99": 798.9439964294434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 599.3599891662598, + "p90": 636.896014213562, + "p95": 644.4479823112488, + "p99": 675.2319931983948 + }, + "combine": { + "p50": 376.3520121574402, + "p90": 387.03998923301697, + "p95": 391.64799451828003, + "p99": 405.9840142726898 + }, + "roundtrip": { + "p50": 983.1680059432983, + "p90": 1010.208010673523, + "p95": 1023.2000350952148, + "p99": 1683.1680536270142 + }, + "isolatedSum": { + "p50": 975.7120013237, + "p90": 1023.936003446579, + "p95": 1036.0959768295288, + "p99": 1081.2160074710846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 911.4559888839722, + "p90": 1080.9919834136963, + "p95": 3099.423885345459, + "p99": 8077.792167663574 + }, + "combine": { + "p50": 791.263997554779, + "p90": 803.0080199241638, + "p95": 811.2639784812927, + "p99": 4132.6398849487305 + }, + "roundtrip": { + "p50": 1644.0320014953613, + "p90": 1685.3439807891846, + "p95": 1709.4080448150635, + "p99": 5158.048152923584 + }, + "isolatedSum": { + "p50": 1702.7199864387512, + "p90": 1884.00000333786, + "p95": 3910.6878638267517, + "p99": 12210.432052612305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1527.6800394058228, + "p90": 1566.2720203399658, + "p95": 1573.4080076217651, + "p99": 1582.0800065994263 + }, + "combine": { + "p50": 1480.3199768066406, + "p90": 1489.3120527267456, + "p95": 1493.1520223617554, + "p99": 1503.2960176467896 + }, + "roundtrip": { + "p50": 2968.8000679016113, + "p90": 2989.567995071411, + "p95": 2998.3999729156494, + "p99": 3034.559965133667 + }, + "isolatedSum": { + "p50": 3008.0000162124634, + "p90": 3055.5840730667114, + "p95": 3066.5600299835205, + "p99": 3085.376024246216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5ff14054", + "identity": "gb300|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||a9df48e6438e77a", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "3f78b1423bebd92e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:06:27.296510+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a9df48e6438e77a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 406.17600083351135, + "p90": 430.1440119743347, + "p95": 435.808002948761, + "p99": 454.8160135746002 + }, + "combine": { + "p50": 113.56800049543381, + "p90": 118.367999792099, + "p95": 120.83200365304947, + "p99": 128.09599936008453 + }, + "roundtrip": { + "p50": 496.47998809814453, + "p90": 523.1360197067261, + "p95": 530.6559801101685, + "p99": 550.6560206413269 + }, + "isolatedSum": { + "p50": 519.7440013289452, + "p90": 548.5120117664337, + "p95": 556.6400066018105, + "p99": 582.9120129346848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 402.68799662590027, + "p90": 427.4879992008209, + "p95": 438.87999653816223, + "p99": 495.4240024089813 + }, + "combine": { + "p50": 147.39200472831726, + "p90": 152.99199521541595, + "p95": 156.38400614261627, + "p99": 193.15199553966522 + }, + "roundtrip": { + "p50": 532.4479937553406, + "p90": 558.1759810447693, + "p95": 568.7360167503357, + "p99": 588.6080265045166 + }, + "isolatedSum": { + "p50": 550.0800013542175, + "p90": 580.4799944162369, + "p95": 595.2640026807785, + "p99": 688.5759979486465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 486.2079918384552, + "p90": 502.8799772262573, + "p95": 510.68800687789917, + "p99": 544.0639853477478 + }, + "combine": { + "p50": 222.6240038871765, + "p90": 229.0239930152893, + "p95": 231.26399517059326, + "p99": 243.80800127983093 + }, + "roundtrip": { + "p50": 707.96799659729, + "p90": 727.616012096405, + "p95": 732.479989528656, + "p99": 749.0559816360474 + }, + "isolatedSum": { + "p50": 708.8319957256317, + "p90": 731.9039702415466, + "p95": 741.9520020484924, + "p99": 787.8719866275787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 671.0720062255859, + "p90": 689.7280216217041, + "p95": 695.1360106468201, + "p99": 706.1439752578735 + }, + "combine": { + "p50": 461.63201332092285, + "p90": 467.74399280548096, + "p95": 469.5360064506531, + "p99": 476.3840138912201 + }, + "roundtrip": { + "p50": 1071.2000131607056, + "p90": 1092.1599864959717, + "p95": 1097.6959466934204, + "p99": 1114.527940750122 + }, + "isolatedSum": { + "p50": 1132.7040195465088, + "p90": 1157.472014427185, + "p95": 1164.6720170974731, + "p99": 1182.5279891490936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1041.632056236267, + "p90": 1060.8320236206055, + "p95": 1066.7840242385864, + "p99": 1080.064058303833 + }, + "combine": { + "p50": 832.4159979820251, + "p90": 839.680016040802, + "p95": 841.4720296859741, + "p99": 845.1520204544067 + }, + "roundtrip": { + "p50": 1824.895977973938, + "p90": 1844.704031944275, + "p95": 1852.6719808578491, + "p99": 1870.3360557556152 + }, + "isolatedSum": { + "p50": 1874.0480542182922, + "p90": 1900.5120396614075, + "p95": 1908.2560539245605, + "p99": 1925.2160787582397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1799.7759580612183, + "p90": 1848.7039804458618, + "p95": 2759.135961532593, + "p99": 21328.031539916992 + }, + "combine": { + "p50": 1563.7439489364624, + "p90": 1576.6079425811768, + "p95": 1587.6799821853638, + "p99": 3263.3919715881348 + }, + "roundtrip": { + "p50": 3316.8320655822754, + "p90": 3357.7280044555664, + "p95": 3370.6560134887695, + "p99": 6514.560222625732 + }, + "isolatedSum": { + "p50": 3363.5199069976807, + "p90": 3425.3119230270386, + "p95": 4346.8159437179565, + "p99": 24591.423511505127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f9116414", + "identity": "gb300|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "3005fa056d422201", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:07:37.157169+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 354.0160059928894, + "p90": 378.08001041412354, + "p95": 392.63999462127686, + "p99": 464.92800116539 + }, + "combine": { + "p50": 116.60800129175186, + "p90": 123.61600250005722, + "p95": 127.16799974441528, + "p99": 163.07200491428375 + }, + "roundtrip": { + "p50": 439.9360120296478, + "p90": 457.66401290893555, + "p95": 462.3680114746094, + "p99": 470.17601132392883 + }, + "isolatedSum": { + "p50": 470.62400728464127, + "p90": 501.69601291418076, + "p95": 519.8079943656921, + "p99": 628.0000060796738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 374.2719888687134, + "p90": 393.5999870300293, + "p95": 432.44799971580505, + "p99": 4312.255859375 + }, + "combine": { + "p50": 160.0320041179657, + "p90": 167.61599481105804, + "p95": 170.0800061225891, + "p99": 646.56001329422 + }, + "roundtrip": { + "p50": 501.6639828681946, + "p90": 514.303982257843, + "p95": 518.0479884147644, + "p99": 562.2079968452454 + }, + "isolatedSum": { + "p50": 534.3039929866791, + "p90": 561.2159818410873, + "p95": 602.5280058383942, + "p99": 4958.81587266922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 478.14399003982544, + "p90": 495.58401107788086, + "p95": 567.903995513916, + "p99": 25684.608459472656 + }, + "combine": { + "p50": 249.31199848651886, + "p90": 263.16800713539124, + "p95": 267.61600375175476, + "p99": 714.4960165023804 + }, + "roundtrip": { + "p50": 719.5199728012085, + "p90": 746.6239929199219, + "p95": 758.080005645752, + "p99": 3289.5359992980957 + }, + "isolatedSum": { + "p50": 727.4559885263443, + "p90": 758.7520182132721, + "p95": 835.5199992656708, + "p99": 26399.104475975037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 686.0479712486267, + "p90": 698.1440186500549, + "p95": 701.4080286026001, + "p99": 709.5680236816406 + }, + "combine": { + "p50": 466.43200516700745, + "p90": 472.6719856262207, + "p95": 474.7839868068695, + "p99": 482.4320077896118 + }, + "roundtrip": { + "p50": 1123.5840320587158, + "p90": 1136.4799737930298, + "p95": 1139.7440433502197, + "p99": 1148.2239961624146 + }, + "isolatedSum": { + "p50": 1152.4799764156342, + "p90": 1170.8160042762756, + "p95": 1176.1920154094696, + "p99": 1192.0000314712524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1111.456036567688, + "p90": 1124.1919994354248, + "p95": 1127.7439594268799, + "p99": 1141.6000127792358 + }, + "combine": { + "p50": 838.6880159378052, + "p90": 845.1200127601624, + "p95": 847.1680283546448, + "p99": 851.1679768562317 + }, + "roundtrip": { + "p50": 1917.631983757019, + "p90": 1949.5359659194946, + "p95": 1988.0319833755493, + "p99": 8563.96770477295 + }, + "isolatedSum": { + "p50": 1950.1440525054932, + "p90": 1969.3120121955872, + "p95": 1974.9119877815247, + "p99": 1992.7679896354675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2043.7119007110596, + "p90": 2058.3999156951904, + "p95": 2062.079906463623, + "p99": 2071.743965148926 + }, + "combine": { + "p50": 1586.2720012664795, + "p90": 1594.3039655685425, + "p95": 1597.1200466156006, + "p99": 1601.4080047607422 + }, + "roundtrip": { + "p50": 3590.816020965576, + "p90": 3602.5919914245605, + "p95": 3607.9039573669434, + "p99": 3628.0319690704346 + }, + "isolatedSum": { + "p50": 3629.983901977539, + "p90": 3652.703881263733, + "p95": 3659.1999530792236, + "p99": 3673.151969909668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5bff7a3", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_33bbf042", + "comparisonKey": "1a0fcaf48d6ac10c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:27.591667+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.57600104808807, + "p90": 123.6800029873848, + "p95": 126.94400548934937, + "p99": 130.72000443935394 + }, + "combine": { + "p50": 123.55200201272964, + "p90": 128.1919926404953, + "p95": 129.7920048236847, + "p99": 137.2479945421219 + }, + "roundtrip": { + "p50": 284.09600257873535, + "p90": 296.03201150894165, + "p95": 299.74400997161865, + "p99": 306.36799335479736 + }, + "isolatedSum": { + "p50": 240.12800306081772, + "p90": 251.8719956278801, + "p95": 256.73601031303406, + "p99": 267.96799898147583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.40000462532043, + "p90": 152.0320028066635, + "p95": 154.11199629306793, + "p99": 161.31199896335602 + }, + "combine": { + "p50": 165.98400473594666, + "p90": 172.2559928894043, + "p95": 175.55199563503265, + "p99": 181.69599771499634 + }, + "roundtrip": { + "p50": 358.43199491500854, + "p90": 366.07998609542847, + "p95": 370.40001153945923, + "p99": 378.7840008735657 + }, + "isolatedSum": { + "p50": 312.3840093612671, + "p90": 324.2879956960678, + "p95": 329.6639919281006, + "p99": 343.00799667835236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.92000269889832, + "p90": 209.75999534130096, + "p95": 213.15200626850128, + "p99": 218.1439995765686 + }, + "combine": { + "p50": 287.51999139785767, + "p90": 295.2960133552551, + "p95": 297.5679934024811, + "p99": 303.99999022483826 + }, + "roundtrip": { + "p50": 589.024007320404, + "p90": 599.0399718284607, + "p95": 601.5679836273193, + "p99": 608.7679862976074 + }, + "isolatedSum": { + "p50": 489.439994096756, + "p90": 505.0560086965561, + "p95": 510.71999967098236, + "p99": 522.1439898014069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 315.71200489997864, + "p90": 323.35999608039856, + "p95": 325.9199857711792, + "p99": 331.4880132675171 + }, + "combine": { + "p50": 485.1840138435364, + "p90": 490.62401056289673, + "p95": 492.3200011253357, + "p99": 495.58401107788086 + }, + "roundtrip": { + "p50": 1014.6239995956421, + "p90": 1023.3919620513916, + "p95": 1025.696039199829, + "p99": 1032.480001449585 + }, + "isolatedSum": { + "p50": 800.896018743515, + "p90": 813.9840066432953, + "p95": 818.2399868965149, + "p99": 827.072024345398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 555.0079941749573, + "p90": 570.3999996185303, + "p95": 581.0880064964294, + "p99": 45689.3424987793 + }, + "combine": { + "p50": 870.0799942016602, + "p90": 910.3040099143982, + "p95": 919.7760224342346, + "p99": 18490.144729614258 + }, + "roundtrip": { + "p50": 1847.4559783935547, + "p90": 1857.759952545166, + "p95": 1861.7600202560425, + "p99": 1873.2160329818726 + }, + "isolatedSum": { + "p50": 1425.0879883766174, + "p90": 1480.7040095329285, + "p95": 1500.864028930664, + "p99": 64179.487228393555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1024.1279602050781, + "p90": 1037.5679731369019, + "p95": 1041.1200523376465, + "p99": 1051.2959957122803 + }, + "combine": { + "p50": 1616.5440082550049, + "p90": 1624.384045600891, + "p95": 1629.6319961547852, + "p99": 1657.439947128296 + }, + "roundtrip": { + "p50": 3510.6239318847656, + "p90": 3521.4080810546875, + "p95": 3524.1599082946777, + "p99": 3531.071901321411 + }, + "isolatedSum": { + "p50": 2640.671968460083, + "p90": 2661.952018737793, + "p95": 2670.7520484924316, + "p99": 2708.735942840576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d935c34f", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "3ff5b3e4c6759573", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:40.720679+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 354.7520041465759, + "p90": 375.5840063095093, + "p95": 382.6879858970642, + "p99": 404.7360122203827 + }, + "combine": { + "p50": 120.35199999809265, + "p90": 125.21600723266602, + "p95": 129.15199995040894, + "p99": 137.15200126171112 + }, + "roundtrip": { + "p50": 448.7040042877197, + "p90": 465.56800603866577, + "p95": 469.760000705719, + "p99": 480.6399941444397 + }, + "isolatedSum": { + "p50": 475.1040041446686, + "p90": 500.8000135421753, + "p95": 511.83998584747314, + "p99": 541.8880134820938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 382.78400897979736, + "p90": 404.9600064754486, + "p95": 423.68000745773315, + "p99": 480.73598742485046 + }, + "combine": { + "p50": 164.86400365829468, + "p90": 201.12000405788422, + "p95": 207.93600380420685, + "p99": 229.0560007095337 + }, + "roundtrip": { + "p50": 532.2239995002747, + "p90": 553.9839863777161, + "p95": 567.7760243415833, + "p99": 590.9119844436646 + }, + "isolatedSum": { + "p50": 547.648012638092, + "p90": 606.0800105333328, + "p95": 631.61601126194, + "p99": 709.7919881343842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 496.92800641059875, + "p90": 512.8639936447144, + "p95": 517.6640152931213, + "p99": 560.5760216712952 + }, + "combine": { + "p50": 280.89600801467896, + "p90": 286.72000765800476, + "p95": 288.672000169754, + "p99": 298.0160117149353 + }, + "roundtrip": { + "p50": 761.8560194969177, + "p90": 776.960015296936, + "p95": 781.5999984741211, + "p99": 816.4479732513428 + }, + "isolatedSum": { + "p50": 777.8240144252777, + "p90": 799.5840013027191, + "p95": 806.3360154628754, + "p99": 858.5920333862305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 728.9280295372009, + "p90": 743.4560060501099, + "p95": 751.7759799957275, + "p99": 781.823992729187 + }, + "combine": { + "p50": 485.4080080986023, + "p90": 508.3199739456177, + "p95": 513.8239860534668, + "p99": 533.0560207366943 + }, + "roundtrip": { + "p50": 1185.9519481658936, + "p90": 1224.8640060424805, + "p95": 1237.9200458526611, + "p99": 2058.6559772491455 + }, + "isolatedSum": { + "p50": 1214.3360376358032, + "p90": 1251.7759799957275, + "p95": 1265.5999660491943, + "p99": 1314.8800134658813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1197.0560550689697, + "p90": 1229.8239469528198, + "p95": 1249.824047088623, + "p99": 33397.151947021484 + }, + "combine": { + "p50": 870.5919981002808, + "p90": 884.3520283699036, + "p95": 892.799973487854, + "p99": 911.4559888839722 + }, + "roundtrip": { + "p50": 2028.0001163482666, + "p90": 2049.5359897613525, + "p95": 2058.271884918213, + "p99": 2109.407901763916 + }, + "isolatedSum": { + "p50": 2067.6480531692505, + "p90": 2114.1759753227234, + "p95": 2142.624020576477, + "p99": 34308.60793590546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2231.775999069214, + "p90": 2244.0640926361084, + "p95": 2248.800039291382, + "p99": 2256.8318843841553 + }, + "combine": { + "p50": 1616.0000562667847, + "p90": 1624.6720552444458, + "p95": 1626.3999938964844, + "p99": 1634.4959735870361 + }, + "roundtrip": { + "p50": 3819.3600177764893, + "p90": 3834.912061691284, + "p95": 3840.7680988311768, + "p99": 3870.2080249786377 + }, + "isolatedSum": { + "p50": 3847.7760553359985, + "p90": 3868.736147880554, + "p95": 3875.200033187866, + "p99": 3891.3278579711914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2c3ec2e0", + "identity": "gb300|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||fc08bf2f8d42ed8", + "colorKey": "gb300_8db1bd7e", + "comparisonKey": "ed8c93d292580037", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:04:11.660854+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "fc08bf2f8d42ed8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 470.2399969100952, + "p90": 499.55201148986816, + "p95": 508.67199897766113, + "p99": 522.1440196037292 + }, + "combine": { + "p50": 126.20800733566284, + "p90": 133.82400572299957, + "p95": 137.56799697875977, + "p99": 145.11999487876892 + }, + "roundtrip": { + "p50": 561.0560178756714, + "p90": 589.3120169639587, + "p95": 595.8080291748047, + "p99": 609.7919940948486 + }, + "isolatedSum": { + "p50": 596.448004245758, + "p90": 633.3760172128677, + "p95": 646.2399959564209, + "p99": 667.2640144824982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 453.792005777359, + "p90": 482.87999629974365, + "p95": 494.7519898414612, + "p99": 596.2240099906921 + }, + "combine": { + "p50": 168.86399686336517, + "p90": 176.4799952507019, + "p95": 180.00000715255737, + "p99": 184.7359985113144 + }, + "roundtrip": { + "p50": 603.551983833313, + "p90": 633.5999965667725, + "p95": 648.7680077552795, + "p99": 764.9919986724854 + }, + "isolatedSum": { + "p50": 622.6560026407242, + "p90": 659.3599915504456, + "p95": 674.7519969940186, + "p99": 780.9600085020065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 568.3519840240479, + "p90": 595.52001953125, + "p95": 688.1279945373535, + "p99": 20593.759536743164 + }, + "combine": { + "p50": 282.943993806839, + "p90": 293.15200448036194, + "p95": 297.0240116119385, + "p99": 4348.288059234619 + }, + "roundtrip": { + "p50": 829.15198802948, + "p90": 859.1359853744507, + "p95": 864.7040128707886, + "p99": 932.2559833526611 + }, + "isolatedSum": { + "p50": 851.2959778308868, + "p90": 888.6720240116119, + "p95": 985.152006149292, + "p99": 24942.047595977783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 802.2080063819885, + "p90": 826.6879916191101, + "p95": 837.5359773635864, + "p99": 944.5760250091553 + }, + "combine": { + "p50": 495.93600630760193, + "p90": 504.35197353363037, + "p95": 508.54402780532837, + "p99": 538.4320020675659 + }, + "roundtrip": { + "p50": 1268.8319683074951, + "p90": 1295.7760095596313, + "p95": 1302.847981452942, + "p99": 1385.0560188293457 + }, + "isolatedSum": { + "p50": 1298.1440126895905, + "p90": 1331.0399651527405, + "p95": 1346.0800051689148, + "p99": 1483.0080270767212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1264.0000581741333, + "p90": 1334.9119424819946, + "p95": 3505.631923675537, + "p99": 58169.34585571289 + }, + "combine": { + "p50": 866.8479919433594, + "p90": 909.8560214042664, + "p95": 925.5359768867493, + "p99": 19435.199737548828 + }, + "roundtrip": { + "p50": 2096.479892730713, + "p90": 2131.55198097229, + "p95": 6006.976127624512, + "p99": 59836.544036865234 + }, + "isolatedSum": { + "p50": 2130.8480501174927, + "p90": 2244.767963886261, + "p95": 4431.167900562286, + "p99": 77604.54559326172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2235.2640628814697, + "p90": 2249.1519451141357, + "p95": 2254.175901412964, + "p99": 2266.047954559326 + }, + "combine": { + "p50": 1598.7199544906616, + "p90": 1610.0159883499146, + "p95": 1619.00794506073, + "p99": 1633.952021598816 + }, + "roundtrip": { + "p50": 3792.2239303588867, + "p90": 3814.3999576568604, + "p95": 3824.415922164917, + "p99": 3882.2720050811768 + }, + "isolatedSum": { + "p50": 3833.9840173721313, + "p90": 3859.1679334640503, + "p95": 3873.183846473694, + "p99": 3899.999976158142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e967862b", + "identity": "gb300|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_4e6a59ba", + "comparisonKey": "fdc598d60e698f44", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:01:00.171348+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1316, + "configuredUnits": 20, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.52799773216248, + "p90": 105.85600137710571, + "p95": 111.77600175142288, + "p99": 133.27999413013458 + }, + "combine": { + "p50": 120.83200365304947, + "p90": 127.10399925708771, + "p95": 132.32000172138214, + "p99": 159.45599973201752 + }, + "roundtrip": { + "p50": 261.1519992351532, + "p90": 277.6319980621338, + "p95": 282.6240062713623, + "p99": 332.67199993133545 + }, + "isolatedSum": { + "p50": 219.36000138521194, + "p90": 232.96000063419342, + "p95": 244.09600347280502, + "p99": 292.7359938621521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 129.85600531101227, + "p90": 135.71199774742126, + "p95": 140.32000303268433, + "p99": 149.85600113868713 + }, + "combine": { + "p50": 163.10399770736694, + "p90": 167.4560010433197, + "p95": 169.88800466060638, + "p99": 177.5359958410263 + }, + "roundtrip": { + "p50": 340.92798829078674, + "p90": 347.3919928073883, + "p95": 352.2239923477173, + "p99": 361.5039885044098 + }, + "isolatedSum": { + "p50": 292.9600030183792, + "p90": 303.16799879074097, + "p95": 310.2080076932907, + "p99": 327.39199697971344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.28799510002136, + "p90": 190.40000438690186, + "p95": 194.14399564266205, + "p99": 202.78400182724 + }, + "combine": { + "p50": 285.5679988861084, + "p90": 294.1119968891144, + "p95": 297.5359857082367, + "p99": 305.08801341056824 + }, + "roundtrip": { + "p50": 568.7999725341797, + "p90": 577.888011932373, + "p95": 582.7199816703796, + "p99": 588.5760188102722 + }, + "isolatedSum": { + "p50": 469.85599398612976, + "p90": 484.51200127601624, + "p95": 491.67998135089874, + "p99": 507.8720152378082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 300.5119860172272, + "p90": 310.11199951171875, + "p95": 313.85600566864014, + "p99": 338.55998516082764 + }, + "combine": { + "p50": 483.5839867591858, + "p90": 490.1120066642761, + "p95": 492.15999245643616, + "p99": 502.3999810218811 + }, + "roundtrip": { + "p50": 995.9679841995239, + "p90": 1003.3919811248779, + "p95": 1006.9119930267334, + "p99": 1013.2800340652466 + }, + "isolatedSum": { + "p50": 784.095972776413, + "p90": 800.2240061759949, + "p95": 806.0159981250763, + "p99": 840.9599661827087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.7920269966125, + "p90": 548.4480261802673, + "p95": 552.7359843254089, + "p99": 579.8720121383667 + }, + "combine": { + "p50": 866.6880130767822, + "p90": 874.9439716339111, + "p95": 878.0159950256348, + "p99": 892.4480080604553 + }, + "roundtrip": { + "p50": 1832.095980644226, + "p90": 1841.599941253662, + "p95": 1844.223976135254, + "p99": 1852.128028869629 + }, + "isolatedSum": { + "p50": 1404.4800400733948, + "p90": 1423.3919978141785, + "p95": 1430.7519793510437, + "p99": 1472.320020198822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 998.7840056419373, + "p90": 1010.7200145721436, + "p95": 1014.7199630737305, + "p99": 1021.28005027771 + }, + "combine": { + "p50": 1615.839958190918, + "p90": 1623.5840320587158, + "p95": 1625.3119707107544, + "p99": 1629.3760538101196 + }, + "roundtrip": { + "p50": 3491.136074066162, + "p90": 3503.2639503479004, + "p95": 3507.296085357666, + "p99": 3524.2879390716553 + }, + "isolatedSum": { + "p50": 2614.623963832855, + "p90": 2634.3040466308594, + "p95": 2640.031933784485, + "p99": 2650.6561040878296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d9e83965", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_9e1e517c", + "comparisonKey": "28d197f67cdd9ca0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:26.944034+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 253.4399926662445, + "p90": 266.7520046234131, + "p95": 270.687997341156, + "p99": 284.960001707077 + }, + "combine": { + "p50": 47.16800153255463, + "p90": 50.52800104022026, + "p95": 52.70399898290634, + "p99": 59.167999774217606 + }, + "roundtrip": { + "p50": 287.3919904232025, + "p90": 301.2799918651581, + "p95": 304.6720027923584, + "p99": 318.30400228500366 + }, + "isolatedSum": { + "p50": 300.60799419879913, + "p90": 317.28000566363335, + "p95": 323.39199632406235, + "p99": 344.12800148129463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 254.97600436210632, + "p90": 269.9199914932251, + "p95": 273.1199860572815, + "p99": 280.60799837112427 + }, + "combine": { + "p50": 47.58400097489357, + "p90": 51.00800096988678, + "p95": 53.53600159287453, + "p99": 59.20000001788139 + }, + "roundtrip": { + "p50": 288.1920039653778, + "p90": 300.57600140571594, + "p95": 304.064005613327, + "p99": 310.8479976654053 + }, + "isolatedSum": { + "p50": 302.5600053369999, + "p90": 320.9279924631119, + "p95": 326.655987650156, + "p99": 339.80799838900566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 256.3199996948242, + "p90": 270.4960107803345, + "p95": 273.9520072937012, + "p99": 281.21599555015564 + }, + "combine": { + "p50": 49.50400069355965, + "p90": 52.928000688552856, + "p95": 56.2559999525547, + "p99": 61.055999249219894 + }, + "roundtrip": { + "p50": 288.35201263427734, + "p90": 301.2480139732361, + "p95": 305.11999130249023, + "p99": 314.94399905204773 + }, + "isolatedSum": { + "p50": 305.82400038838387, + "p90": 323.42401146888733, + "p95": 330.2080072462559, + "p99": 342.27199479937553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 255.67999482154846, + "p90": 270.04799246788025, + "p95": 272.92799949645996, + "p99": 281.3439965248108 + }, + "combine": { + "p50": 49.79199916124344, + "p90": 53.02400141954422, + "p95": 54.55999821424484, + "p99": 61.28000095486641 + }, + "roundtrip": { + "p50": 287.9039943218231, + "p90": 302.2080063819885, + "p95": 305.7920038700104, + "p99": 318.4320032596588 + }, + "isolatedSum": { + "p50": 305.4719939827919, + "p90": 323.07199388742447, + "p95": 327.4879977107048, + "p99": 342.6239974796772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 256.48000836372375, + "p90": 269.27998661994934, + "p95": 272.92799949645996, + "p99": 285.0559949874878 + }, + "combine": { + "p50": 49.79199916124344, + "p90": 53.18399891257286, + "p95": 56.12799897789955, + "p99": 64.7680014371872 + }, + "roundtrip": { + "p50": 290.43200612068176, + "p90": 302.5279939174652, + "p95": 305.82401156425476, + "p99": 313.4720027446747 + }, + "isolatedSum": { + "p50": 306.2720075249672, + "p90": 322.4639855325222, + "p95": 329.0559984743595, + "p99": 349.823996424675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 58, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 262.4320089817047, + "p90": 273.6000120639801, + "p95": 277.24799513816833, + "p99": 284.06399488449097 + }, + "combine": { + "p50": 51.4880008995533, + "p90": 54.71999943256378, + "p95": 58.240000158548355, + "p99": 64.96000289916992 + }, + "roundtrip": { + "p50": 296.7680096626282, + "p90": 309.4080090522766, + "p95": 311.2959861755371, + "p99": 316.5439963340759 + }, + "isolatedSum": { + "p50": 313.920009881258, + "p90": 328.3200114965439, + "p95": 335.4879952967167, + "p99": 349.0239977836609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 119, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 293.4719920158386, + "p90": 306.7840039730072, + "p95": 309.9200129508972, + "p99": 318.4320032596588 + }, + "combine": { + "p50": 53.15199866890907, + "p90": 56.384000927209854, + "p95": 58.720000088214874, + "p99": 64.03200328350067 + }, + "roundtrip": { + "p50": 325.439989566803, + "p90": 336.4799916744232, + "p95": 339.83999490737915, + "p99": 345.66399455070496 + }, + "isolatedSum": { + "p50": 346.6239906847477, + "p90": 363.16800490021706, + "p95": 368.6400130391121, + "p99": 382.4640065431595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 241, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 291.7439937591553, + "p90": 304.60798740386963, + "p95": 307.9040050506592, + "p99": 312.0959997177124 + }, + "combine": { + "p50": 57.18399956822395, + "p90": 59.808000922203064, + "p95": 60.99199876189232, + "p99": 65.85600227117538 + }, + "roundtrip": { + "p50": 331.13598823547363, + "p90": 342.4000144004822, + "p95": 346.6559946537018, + "p99": 353.69598865509033 + }, + "isolatedSum": { + "p50": 348.9279933273792, + "p90": 364.4159883260727, + "p95": 368.8960038125515, + "p99": 377.9520019888878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1764c13e", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|decode|normal|none|none|0|tuned||bb358a3c2e68578", + "colorKey": "gb300_f078f264", + "comparisonKey": "87aebe90bbac74b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:00.363131+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "bb358a3c2e68578", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 253.08799743652344, + "p90": 266.400009393692, + "p95": 270.33600211143494, + "p99": 277.15200185775757 + }, + "combine": { + "p50": 46.9760000705719, + "p90": 50.016000866889954, + "p95": 52.73599922657013, + "p99": 58.559998869895935 + }, + "roundtrip": { + "p50": 284.2240035533905, + "p90": 297.08799719810486, + "p95": 301.472008228302, + "p99": 308.51200222969055 + }, + "isolatedSum": { + "p50": 300.06399750709534, + "p90": 316.41601026058197, + "p95": 323.07200133800507, + "p99": 335.7120007276535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 254.7839879989624, + "p90": 268.48000288009644, + "p95": 274.01599287986755, + "p99": 283.9359939098358 + }, + "combine": { + "p50": 47.07200080156326, + "p90": 50.27199909090996, + "p95": 52.480001002550125, + "p99": 58.720000088214874 + }, + "roundtrip": { + "p50": 284.8320007324219, + "p90": 298.43199253082275, + "p95": 301.37598514556885, + "p99": 307.0079982280731 + }, + "isolatedSum": { + "p50": 301.85598880052567, + "p90": 318.7520019710064, + "p95": 326.4959938824177, + "p99": 342.6559939980507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 4, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 255.87201118469238, + "p90": 269.21600103378296, + "p95": 273.75999093055725, + "p99": 281.823992729187 + }, + "combine": { + "p50": 48.767998814582825, + "p90": 52.09600180387497, + "p95": 54.016001522541046, + "p99": 60.67200005054474 + }, + "roundtrip": { + "p50": 286.27198934555054, + "p90": 299.1360127925873, + "p95": 302.3360073566437, + "p99": 306.65600299835205 + }, + "isolatedSum": { + "p50": 304.6400099992752, + "p90": 321.31200283765793, + "p95": 327.7759924530983, + "p99": 342.49599277973175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 4, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 254.59200143814087, + "p90": 268.5439884662628, + "p95": 272.12798595428467, + "p99": 280.95999360084534 + }, + "combine": { + "p50": 49.056001007556915, + "p90": 52.25599929690361, + "p95": 54.17599901556969, + "p99": 59.967998415231705 + }, + "roundtrip": { + "p50": 285.47200560569763, + "p90": 298.880010843277, + "p95": 302.623987197876, + "p99": 307.93601274490356 + }, + "isolatedSum": { + "p50": 303.6480024456978, + "p90": 320.7999877631664, + "p95": 326.30398496985435, + "p99": 340.92799201607704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 4, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 253.85600328445435, + "p90": 266.7199969291687, + "p95": 269.8880136013031, + "p99": 278.4000039100647 + }, + "combine": { + "p50": 49.215998500585556, + "p90": 51.96800082921982, + "p95": 54.207999259233475, + "p99": 59.55199897289276 + }, + "roundtrip": { + "p50": 287.58400678634644, + "p90": 300.5119860172272, + "p95": 304.7359883785248, + "p99": 310.2720081806183 + }, + "isolatedSum": { + "p50": 303.0720017850399, + "p90": 318.6879977583885, + "p95": 324.0960128605366, + "p99": 337.95200288295746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 4, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 259.45600867271423, + "p90": 271.4880108833313, + "p95": 274.6239900588989, + "p99": 280.7359993457794 + }, + "combine": { + "p50": 49.34399947524071, + "p90": 52.83199995756149, + "p95": 55.16799911856651, + "p99": 61.824001371860504 + }, + "roundtrip": { + "p50": 291.6480004787445, + "p90": 303.3919930458069, + "p95": 306.5280020236969, + "p99": 313.60000371932983 + }, + "isolatedSum": { + "p50": 308.80000814795494, + "p90": 324.3200108408928, + "p95": 329.79198917746544, + "p99": 342.5600007176399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 4, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 292.86399483680725, + "p90": 305.08801341056824, + "p95": 309.28000807762146, + "p99": 316.864013671875 + }, + "combine": { + "p50": 50.87999999523163, + "p90": 53.79199981689453, + "p95": 55.52000179886818, + "p99": 62.68800050020218 + }, + "roundtrip": { + "p50": 325.3119885921478, + "p90": 336.2880051136017, + "p95": 340.06398916244507, + "p99": 348.06400537490845 + }, + "isolatedSum": { + "p50": 343.7439948320389, + "p90": 358.88001322746277, + "p95": 364.80000987648964, + "p99": 379.5520141720772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 4, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 292.9919958114624, + "p90": 305.27999997138977, + "p95": 309.1520071029663, + "p99": 314.7839903831482 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 59.84000116586685, + "p95": 61.08799949288368, + "p99": 66.11199676990509 + }, + "roundtrip": { + "p50": 331.5199911594391, + "p90": 342.848002910614, + "p95": 347.0720052719116, + "p99": 382.4000060558319 + }, + "isolatedSum": { + "p50": 349.7919961810112, + "p90": 365.1200011372566, + "p95": 370.24000659585, + "p99": 380.8959871530533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0501259e", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|decode|normal|none|none|0|tuned||c9bbf5a132d7fdf", + "colorKey": "gb300_07cee71f", + "comparisonKey": "9bdf4b47952d15dc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:31.942812+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9bbf5a132d7fdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 238.97600173950195, + "p90": 252.6400089263916, + "p95": 256.22400641441345, + "p99": 262.65600323677063 + }, + "combine": { + "p50": 41.6640006005764, + "p90": 44.64000090956688, + "p95": 46.78399860858917, + "p99": 52.671998739242554 + }, + "roundtrip": { + "p50": 268.8319981098175, + "p90": 282.81599283218384, + "p95": 286.0479950904846, + "p99": 293.0240035057068 + }, + "isolatedSum": { + "p50": 280.64000234007835, + "p90": 297.2800098359585, + "p95": 303.0080050230026, + "p99": 315.3280019760132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 57344, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 249.7279942035675, + "p90": 263.9999985694885, + "p95": 267.42398738861084, + "p99": 276.95998549461365 + }, + "combine": { + "p50": 46.112000942230225, + "p90": 50.36799982190132, + "p95": 53.63199859857559, + "p99": 58.75200033187866 + }, + "roundtrip": { + "p50": 282.6240062713623, + "p90": 295.1680123806, + "p95": 298.14401268959045, + "p99": 304.7040104866028 + }, + "isolatedSum": { + "p50": 295.83999514579773, + "p90": 314.36799839138985, + "p95": 321.05598598718643, + "p99": 335.7119858264923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 250.4960000514984, + "p90": 264.6079957485199, + "p95": 267.93599128723145, + "p99": 275.39199590682983 + }, + "combine": { + "p50": 48.31999912858009, + "p90": 51.83999985456467, + "p95": 54.496001452207565, + "p99": 59.58399921655655 + }, + "roundtrip": { + "p50": 283.90398621559143, + "p90": 296.31999135017395, + "p95": 299.45600032806396, + "p99": 306.08001351356506 + }, + "isolatedSum": { + "p50": 298.8159991800785, + "p90": 316.44799560308456, + "p95": 322.431992739439, + "p99": 334.9759951233864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 253.4080147743225, + "p90": 266.81599020957947, + "p95": 270.2080011367798, + "p99": 284.41599011421204 + }, + "combine": { + "p50": 50.23999884724617, + "p90": 53.279999643564224, + "p95": 55.296000093221664, + "p99": 61.47199869155884 + }, + "roundtrip": { + "p50": 285.69599986076355, + "p90": 297.5040078163147, + "p95": 300.8959889411926, + "p99": 307.74399638175964 + }, + "isolatedSum": { + "p50": 303.6480136215687, + "p90": 320.0959898531437, + "p95": 325.50400123000145, + "p99": 345.8879888057709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4952c9f5", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|decode|normal|none|none|0|tuned||4dc6cbd03327f4e", + "colorKey": "gb300_e29d658a", + "comparisonKey": "3a93cc18076f99a1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:15.769092+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "4dc6cbd03327f4e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 249.85599517822266, + "p90": 265.1520073413849, + "p95": 270.112007856369, + "p99": 277.9519855976105 + }, + "combine": { + "p50": 47.040000557899475, + "p90": 51.552001386880875, + "p95": 55.03999814391136, + "p99": 61.503998935222626 + }, + "roundtrip": { + "p50": 281.0240089893341, + "p90": 294.97599601745605, + "p95": 298.6559867858887, + "p99": 307.0720136165619 + }, + "isolatedSum": { + "p50": 296.89599573612213, + "p90": 316.70400872826576, + "p95": 325.1520060002804, + "p99": 339.4559845328331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 249.9839961528778, + "p90": 263.39200139045715, + "p95": 268.38400959968567, + "p99": 278.0480086803436 + }, + "combine": { + "p50": 47.16800153255463, + "p90": 51.32799968123436, + "p95": 54.976001381874084, + "p99": 62.24000081419945 + }, + "roundtrip": { + "p50": 281.15200996398926, + "p90": 294.624000787735, + "p95": 298.7520098686218, + "p99": 307.8719973564148 + }, + "isolatedSum": { + "p50": 297.15199768543243, + "p90": 314.7200010716915, + "p95": 323.36001098155975, + "p99": 340.2880094945431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 250.17601251602173, + "p90": 264.6400034427643, + "p95": 269.27998661994934, + "p99": 277.5680124759674 + }, + "combine": { + "p50": 48.51200059056282, + "p90": 51.7439991235733, + "p95": 55.55199831724167, + "p99": 63.26399743556976 + }, + "roundtrip": { + "p50": 280.2239954471588, + "p90": 294.40000653266907, + "p95": 297.5040078163147, + "p99": 305.11999130249023 + }, + "isolatedSum": { + "p50": 298.68801310658455, + "p90": 316.3840025663376, + "p95": 324.831984937191, + "p99": 340.83200991153717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 251.67998671531677, + "p90": 267.5839960575104, + "p95": 272.92799949645996, + "p99": 286.3360047340393 + }, + "combine": { + "p50": 48.895999789237976, + "p90": 52.51200124621391, + "p95": 56.89600110054016, + "p99": 364.22398686408997 + }, + "roundtrip": { + "p50": 281.15200996398926, + "p90": 295.6160008907318, + "p95": 300.35200715065, + "p99": 308.47999453544617 + }, + "isolatedSum": { + "p50": 300.57598650455475, + "p90": 320.0959973037243, + "p95": 329.8240005970001, + "p99": 650.5599915981293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 250.84799528121948, + "p90": 265.8880054950714, + "p95": 270.112007856369, + "p99": 277.3120105266571 + }, + "combine": { + "p50": 48.99200052022934, + "p90": 53.69599908590317, + "p95": 56.73599988222122, + "p99": 63.80800157785416 + }, + "roundtrip": { + "p50": 282.9119861125946, + "p90": 297.40801453590393, + "p95": 300.6399869918823, + "p99": 308.25600028038025 + }, + "isolatedSum": { + "p50": 299.8399958014488, + "p90": 319.5840045809746, + "p95": 326.84800773859024, + "p99": 341.12001210451126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 251.2960135936737, + "p90": 266.2400007247925, + "p95": 270.4640030860901, + "p99": 284.4800055027008 + }, + "combine": { + "p50": 48.48000034689903, + "p90": 52.06400156021118, + "p95": 54.78399991989136, + "p99": 63.45599889755249 + }, + "roundtrip": { + "p50": 283.3600044250488, + "p90": 298.880010843277, + "p95": 302.592009305954, + "p99": 310.1760149002075 + }, + "isolatedSum": { + "p50": 299.77601394057274, + "p90": 318.30400228500366, + "p95": 325.24800300598145, + "p99": 347.9360044002533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2752512, + "combineLogicalBytes": 2752512, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 254.07999753952026, + "p90": 268.6080038547516, + "p95": 272.7999985218048, + "p99": 284.12801027297974 + }, + "combine": { + "p50": 50.912000238895416, + "p90": 54.816000163555145, + "p95": 57.24800005555153, + "p99": 64.00000303983688 + }, + "roundtrip": { + "p50": 286.01598739624023, + "p90": 299.45600032806396, + "p95": 303.42400074005127, + "p99": 312.0959997177124 + }, + "isolatedSum": { + "p50": 304.9919977784157, + "p90": 323.42400401830673, + "p95": 330.04799857735634, + "p99": 348.1280133128166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5505024, + "combineLogicalBytes": 5505024, + "fanoutMean": 1.5, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 253.9519965648651, + "p90": 267.90401339530945, + "p95": 272.7679908275604, + "p99": 278.78400683403015 + }, + "combine": { + "p50": 52.86400020122528, + "p90": 56.703999638557434, + "p95": 59.328000992536545, + "p99": 66.72000139951706 + }, + "roundtrip": { + "p50": 287.48801350593567, + "p90": 299.51998591423035, + "p95": 303.1040132045746, + "p99": 307.5200021266937 + }, + "isolatedSum": { + "p50": 306.8159967660904, + "p90": 324.6080130338669, + "p95": 332.09599182009697, + "p99": 345.5040082335472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e544f1c0", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|decode|normal|none|none|0|tuned||0d921f8a9d2cb27", + "colorKey": "gb300_48a86946", + "comparisonKey": "77c3b83ad86fb6ab", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:08.084865+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "0d921f8a9d2cb27", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 256.44800066947937, + "p90": 270.112007856369, + "p95": 274.1439938545227, + "p99": 289.6000146865845 + }, + "combine": { + "p50": 47.74399846792221, + "p90": 50.81599950790405, + "p95": 54.07999828457832, + "p99": 61.47199869155884 + }, + "roundtrip": { + "p50": 289.0560030937195, + "p90": 302.14399099349976, + "p95": 305.4080009460449, + "p99": 313.6959969997406 + }, + "isolatedSum": { + "p50": 304.1919991374016, + "p90": 320.92800736427307, + "p95": 328.22399213910103, + "p99": 351.0720133781433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 200704, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 257.27999210357666, + "p90": 272.6080119609833, + "p95": 276.2239873409271, + "p99": 288.4480059146881 + }, + "combine": { + "p50": 47.775998711586, + "p90": 51.231998950242996, + "p95": 53.3440001308918, + "p99": 60.54399907588959 + }, + "roundtrip": { + "p50": 287.87198662757874, + "p90": 301.34400725364685, + "p95": 303.74398827552795, + "p99": 310.33599376678467 + }, + "isolatedSum": { + "p50": 305.05599081516266, + "p90": 323.8400109112263, + "p95": 329.5679874718189, + "p99": 348.9920049905777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 257.53599405288696, + "p90": 272.09600806236267, + "p95": 277.3439884185791, + "p99": 283.6799919605255 + }, + "combine": { + "p50": 49.375999718904495, + "p90": 52.639998495578766, + "p95": 55.39200082421303, + "p99": 63.00800293684006 + }, + "roundtrip": { + "p50": 288.1599962711334, + "p90": 302.3039996623993, + "p95": 304.54400181770325, + "p99": 311.48800253868103 + }, + "isolatedSum": { + "p50": 306.91199377179146, + "p90": 324.73600655794144, + "p95": 332.73598924279213, + "p99": 346.68799489736557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 257.88798928260803, + "p90": 272.19200134277344, + "p95": 275.8080065250397, + "p99": 288.4800136089325 + }, + "combine": { + "p50": 49.6320016682148, + "p90": 52.86400020122528, + "p95": 55.456001311540604, + "p99": 61.63199990987778 + }, + "roundtrip": { + "p50": 289.98398780822754, + "p90": 303.45600843429565, + "p95": 306.43200874328613, + "p99": 315.775990486145 + }, + "isolatedSum": { + "p50": 307.51999095082283, + "p90": 325.0560015439987, + "p95": 331.2640078365803, + "p99": 350.1120135188103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 257.88798928260803, + "p90": 271.0399925708771, + "p95": 275.39199590682983, + "p99": 285.5679988861084 + }, + "combine": { + "p50": 49.695998430252075, + "p90": 52.86400020122528, + "p95": 55.67999929189682, + "p99": 63.26399743556976 + }, + "roundtrip": { + "p50": 291.00799560546875, + "p90": 304.1920065879822, + "p95": 307.5839877128601, + "p99": 314.7200047969818 + }, + "isolatedSum": { + "p50": 307.5839877128601, + "p90": 323.90399277210236, + "p95": 331.07199519872665, + "p99": 348.83199632167816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3282944, + "combineLogicalBytes": 3282944, + "fanoutMean": 3.578125, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 261.6640031337738, + "p90": 274.6880054473877, + "p95": 278.01600098609924, + "p99": 289.98398780822754 + }, + "combine": { + "p50": 51.29599943757057, + "p90": 55.07199838757515, + "p95": 58.17599967122078, + "p99": 65.34399837255478 + }, + "roundtrip": { + "p50": 296.83199524879456, + "p90": 308.6720108985901, + "p95": 311.71199679374695, + "p99": 319.5840120315552 + }, + "isolatedSum": { + "p50": 312.9600025713444, + "p90": 329.76000383496284, + "p95": 336.19200065732, + "p99": 355.3279861807823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6694912, + "combineLogicalBytes": 6694912, + "fanoutMean": 3.6484375, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 291.3280129432678, + "p90": 305.4400086402893, + "p95": 310.65601110458374, + "p99": 341.47199988365173 + }, + "combine": { + "p50": 53.599998354911804, + "p90": 57.8560009598732, + "p95": 64.31999802589417, + "p99": 86.65599673986435 + }, + "roundtrip": { + "p50": 325.76000690460205, + "p90": 336.5119993686676, + "p95": 340.5120074748993, + "p99": 348.03199768066406 + }, + "isolatedSum": { + "p50": 344.9280112981796, + "p90": 363.2960096001625, + "p95": 374.9760091304779, + "p99": 428.1279966235161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13318144, + "combineLogicalBytes": 13318144, + "fanoutMean": 3.62890625, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 289.40799832344055, + "p90": 301.82400345802307, + "p95": 305.4080009460449, + "p99": 313.9519989490509 + }, + "combine": { + "p50": 56.992001831531525, + "p90": 60.22400036454201, + "p95": 61.91999837756157, + "p99": 67.391999065876 + }, + "roundtrip": { + "p50": 330.3999900817871, + "p90": 341.7919874191284, + "p95": 344.31999921798706, + "p99": 350.94401240348816 + }, + "isolatedSum": { + "p50": 346.4000001549721, + "p90": 362.0480038225651, + "p95": 367.3279993236065, + "p99": 381.3439980149269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8686ff71", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|decode|normal|none|none|0|tuned||cc5ad1cb2e95ef6", + "colorKey": "gb300_419c8808", + "comparisonKey": "0bcc1db4c6b7aea9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:10.297396+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cc5ad1cb2e95ef6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.3408203125, + "eplbImbalanceAfter": 1.000390625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 257.7280104160309, + "p90": 272.3200023174286, + "p95": 276.095986366272, + "p99": 282.3359966278076 + }, + "combine": { + "p50": 47.839999198913574, + "p90": 51.29599943757057, + "p95": 54.46400120854378, + "p99": 62.111999839544296 + }, + "roundtrip": { + "p50": 289.37599062919617, + "p90": 303.00799012184143, + "p95": 307.2960078716278, + "p99": 314.94399905204773 + }, + "isolatedSum": { + "p50": 305.56800961494446, + "p90": 323.61600175499916, + "p95": 330.55998757481575, + "p99": 344.4479964673519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 259.16799902915955, + "p90": 273.6000120639801, + "p95": 276.4799892902374, + "p99": 285.6000065803528 + }, + "combine": { + "p50": 47.90399968624115, + "p90": 51.552001386880875, + "p95": 54.17599901556969, + "p99": 61.5679994225502 + }, + "roundtrip": { + "p50": 289.247989654541, + "p90": 303.8400113582611, + "p95": 306.36799335479736, + "p99": 313.24800848960876 + }, + "isolatedSum": { + "p50": 307.0719987154007, + "p90": 325.152013450861, + "p95": 330.6559883058071, + "p99": 347.168006002903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 258.30399990081787, + "p90": 273.0560004711151, + "p95": 276.95998549461365, + "p99": 285.504013299942 + }, + "combine": { + "p50": 49.6320016682148, + "p90": 53.05600166320801, + "p95": 55.10399863123894, + "p99": 60.70400029420853 + }, + "roundtrip": { + "p50": 291.1680042743683, + "p90": 303.99999022483826, + "p95": 307.71198868751526, + "p99": 314.2400085926056 + }, + "isolatedSum": { + "p50": 307.93600156903267, + "p90": 326.1120021343231, + "p95": 332.0639841258526, + "p99": 346.20801359415054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 259.3280076980591, + "p90": 273.3440101146698, + "p95": 277.24799513816833, + "p99": 284.960001707077 + }, + "combine": { + "p50": 49.66399818658829, + "p90": 53.18399891257286, + "p95": 56.57599866390228, + "p99": 65.05600363016129 + }, + "roundtrip": { + "p50": 290.94401001930237, + "p90": 304.4480085372925, + "p95": 307.20001459121704, + "p99": 315.10400772094727 + }, + "isolatedSum": { + "p50": 308.99200588464737, + "p90": 326.52800902724266, + "p95": 333.8239938020706, + "p99": 350.0160053372383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1705984, + "combineLogicalBytes": 1705984, + "fanoutMean": 3.71875, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 259.99999046325684, + "p90": 273.4079957008362, + "p95": 277.50399708747864, + "p99": 292.89600253105164 + }, + "combine": { + "p50": 49.44000020623207, + "p90": 52.57600173354149, + "p95": 54.59199845790863, + "p99": 61.47199869155884 + }, + "roundtrip": { + "p50": 290.0159955024719, + "p90": 304.22401428222656, + "p95": 307.16800689697266, + "p99": 325.3760039806366 + }, + "isolatedSum": { + "p50": 309.4399906694889, + "p90": 325.98399743437767, + "p95": 332.09599554538727, + "p99": 354.3680012226105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 3411968, + "fanoutMean": 3.71875, + "recvTokensMax": 62, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 261.6960108280182, + "p90": 275.29600262641907, + "p95": 278.8800001144409, + "p99": 287.03999519348145 + }, + "combine": { + "p50": 49.82399940490723, + "p90": 53.119998425245285, + "p95": 55.58399856090546, + "p99": 66.27199798822403 + }, + "roundtrip": { + "p50": 297.4720001220703, + "p90": 310.11199951171875, + "p95": 312.8640055656433, + "p99": 321.1199939250946 + }, + "isolatedSum": { + "p50": 311.5200102329254, + "p90": 328.41600105166435, + "p95": 334.4639986753464, + "p99": 353.3119931817055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6680576, + "combineLogicalBytes": 6680576, + "fanoutMean": 3.640625, + "recvTokensMax": 119, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 291.7439937591553, + "p90": 304.1279911994934, + "p95": 308.80001187324524, + "p99": 316.70400500297546 + }, + "combine": { + "p50": 51.711998879909515, + "p90": 55.00800162553787, + "p95": 56.92800134420395, + "p99": 65.47199934720993 + }, + "roundtrip": { + "p50": 327.0080089569092, + "p90": 337.66400814056396, + "p95": 342.3359990119934, + "p99": 349.5999872684479 + }, + "isolatedSum": { + "p50": 343.4559926390648, + "p90": 359.1359928250313, + "p95": 365.7280132174492, + "p99": 382.1760043501854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13432832, + "combineLogicalBytes": 13432832, + "fanoutMean": 3.66015625, + "recvTokensMax": 241, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 291.23198986053467, + "p90": 303.6159873008728, + "p95": 306.7840039730072, + "p99": 312.99200654029846 + }, + "combine": { + "p50": 57.24800005555153, + "p90": 60.575999319553375, + "p95": 63.19999694824219, + "p99": 71.32799923419952 + }, + "roundtrip": { + "p50": 332.44800567626953, + "p90": 346.3039994239807, + "p95": 350.3679931163788, + "p99": 357.02401399612427 + }, + "isolatedSum": { + "p50": 348.4799899160862, + "p90": 364.1919866204262, + "p95": 369.9840009212494, + "p99": 384.320005774498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26464256, + "combineLogicalBytes": 26464256, + "fanoutMean": 3.60546875, + "recvTokensMax": 471, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-301a822f", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|decode|normal|none|none|0|tuned||c186e8c8d66ece3", + "colorKey": "gb300_ea2ca9d2", + "comparisonKey": "67c4e7347cc37f00", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:42.257868+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c186e8c8d66ece3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.091796875, + "eplbImbalanceAfter": 1.00146484375, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 250.7840096950531, + "p90": 263.5839879512787, + "p95": 266.975998878479, + "p99": 276.16000175476074 + }, + "combine": { + "p50": 47.00800031423569, + "p90": 50.303999334573746, + "p95": 52.799999713897705, + "p99": 59.487998485565186 + }, + "roundtrip": { + "p50": 282.1440100669861, + "p90": 295.3599989414215, + "p95": 300.57600140571594, + "p99": 309.28000807762146 + }, + "isolatedSum": { + "p50": 297.7920100092888, + "p90": 313.88798728585243, + "p95": 319.7759985923767, + "p99": 335.6480002403259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 251.48800015449524, + "p90": 265.4080092906952, + "p95": 270.04799246788025, + "p99": 277.1199941635132 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 51.16799846291542, + "p95": 53.75999957323074, + "p99": 61.69600039720535 + }, + "roundtrip": { + "p50": 282.24000334739685, + "p90": 296.1280047893524, + "p95": 299.6160089969635, + "p99": 305.5039942264557 + }, + "isolatedSum": { + "p50": 299.1360016167164, + "p90": 316.5760077536106, + "p95": 323.807992041111, + "p99": 338.81599456071854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 253.85600328445435, + "p90": 268.2560086250305, + "p95": 272.19200134277344, + "p99": 283.52001309394836 + }, + "combine": { + "p50": 49.695998430252075, + "p90": 52.799999713897705, + "p95": 55.296000093221664, + "p99": 61.5679994225502 + }, + "roundtrip": { + "p50": 284.15998816490173, + "p90": 297.4399924278259, + "p95": 301.66399478912354, + "p99": 308.7039887905121 + }, + "isolatedSum": { + "p50": 303.5520017147064, + "p90": 321.0560083389282, + "p95": 327.4880014359951, + "p99": 345.08801251649857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 745472, + "combineLogicalBytes": 745472, + "fanoutMean": 3.25, + "recvTokensMax": 15, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 252.8960108757019, + "p90": 266.36800169944763, + "p95": 270.27198672294617, + "p99": 278.23999524116516 + }, + "combine": { + "p50": 49.75999891757965, + "p90": 53.02400141954422, + "p95": 55.296000093221664, + "p99": 63.64800035953522 + }, + "roundtrip": { + "p50": 285.12001037597656, + "p90": 299.1040050983429, + "p95": 303.96801233291626, + "p99": 328.96000146865845 + }, + "isolatedSum": { + "p50": 302.65600979328156, + "p90": 319.39200311899185, + "p95": 325.56798681616783, + "p99": 341.8879956007004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 251.90401077270508, + "p90": 266.1440074443817, + "p95": 270.1759934425354, + "p99": 278.3359885215759 + }, + "combine": { + "p50": 49.6320016682148, + "p90": 53.21599915623665, + "p95": 55.07199838757515, + "p99": 63.968002796173096 + }, + "roundtrip": { + "p50": 283.7759852409363, + "p90": 298.0160117149353, + "p95": 301.05599761009216, + "p99": 307.3599934577942 + }, + "isolatedSum": { + "p50": 301.5360124409199, + "p90": 319.36000660061836, + "p95": 325.24799183011055, + "p99": 342.303991317749 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3225600, + "combineLogicalBytes": 3225600, + "fanoutMean": 3.515625, + "recvTokensMax": 60, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 255.51998615264893, + "p90": 267.90401339530945, + "p95": 272.0000147819519, + "p99": 278.656005859375 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 53.02400141954422, + "p95": 54.91200089454651, + "p99": 61.43999844789505 + }, + "roundtrip": { + "p50": 289.98398780822754, + "p90": 301.7919957637787, + "p95": 305.08801341056824, + "p99": 312.6719892024994 + }, + "isolatedSum": { + "p50": 305.5039867758751, + "p90": 320.92801481485367, + "p95": 326.9120156764984, + "p99": 340.09600430727005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6522880, + "combineLogicalBytes": 6522880, + "fanoutMean": 3.5546875, + "recvTokensMax": 118, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 285.8560085296631, + "p90": 298.8480031490326, + "p95": 303.51999402046204, + "p99": 310.2400004863739 + }, + "combine": { + "p50": 52.000001072883606, + "p90": 56.09599873423576, + "p95": 58.17599967122078, + "p99": 64.70400094985962 + }, + "roundtrip": { + "p50": 317.8879916667938, + "p90": 328.8640081882477, + "p95": 332.96000957489014, + "p99": 339.7440016269684 + }, + "isolatedSum": { + "p50": 337.8560096025467, + "p90": 354.94400188326836, + "p95": 361.6959936916828, + "p99": 374.9440014362335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13160448, + "combineLogicalBytes": 13160448, + "fanoutMean": 3.5859375, + "recvTokensMax": 238, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 284.5759987831116, + "p90": 296.79998755455017, + "p95": 300.4480004310608, + "p99": 304.9919903278351 + }, + "combine": { + "p50": 56.063998490571976, + "p90": 59.90400165319443, + "p95": 61.91999837756157, + "p99": 68.76800209283829 + }, + "roundtrip": { + "p50": 322.2079873085022, + "p90": 335.55200695991516, + "p95": 340.5439853668213, + "p99": 347.26399183273315 + }, + "isolatedSum": { + "p50": 340.63999727368355, + "p90": 356.7039892077446, + "p95": 362.36799880862236, + "p99": 373.75999242067337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26406912, + "combineLogicalBytes": 26406912, + "fanoutMean": 3.59765625, + "recvTokensMax": 474, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-99513a52", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_5ad05e77", + "comparisonKey": "0f6c769bdb3f8c39", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:01.798867+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 258.04799795150757, + "p90": 272.96000719070435, + "p95": 276.95998549461365, + "p99": 284.4800055027008 + }, + "combine": { + "p50": 44.544000178575516, + "p90": 48.576001077890396, + "p95": 51.7439991235733, + "p99": 58.687999844551086 + }, + "roundtrip": { + "p50": 290.1439964771271, + "p90": 305.184006690979, + "p95": 308.9599907398224, + "p99": 315.5199885368347 + }, + "isolatedSum": { + "p50": 302.5919981300831, + "p90": 321.53600826859474, + "p95": 328.70398461818695, + "p99": 343.1680053472519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 258.39999318122864, + "p90": 272.12798595428467, + "p95": 276.06400847435, + "p99": 284.7039997577667 + }, + "combine": { + "p50": 46.560000628232956, + "p90": 49.95200037956238, + "p95": 52.57600173354149, + "p99": 59.99999865889549 + }, + "roundtrip": { + "p50": 289.8559868335724, + "p90": 303.1359910964966, + "p95": 307.0400059223175, + "p99": 313.6320114135742 + }, + "isolatedSum": { + "p50": 304.9599938094616, + "p90": 322.07998633384705, + "p95": 328.64001020789146, + "p99": 344.7039984166622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 258.39999318122864, + "p90": 272.7999985218048, + "p95": 277.18400955200195, + "p99": 290.52799940109253 + }, + "combine": { + "p50": 47.040000557899475, + "p90": 50.04800111055374, + "p95": 53.21599915623665, + "p99": 60.54399907588959 + }, + "roundtrip": { + "p50": 290.367990732193, + "p90": 304.8959970474243, + "p95": 308.1600069999695, + "p99": 314.303994178772 + }, + "isolatedSum": { + "p50": 305.4399937391281, + "p90": 322.84799963235855, + "p95": 330.4000087082386, + "p99": 351.0719984769821 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 257.60000944137573, + "p90": 273.47201108932495, + "p95": 277.0879864692688, + "p99": 288.5439991950989 + }, + "combine": { + "p50": 48.928000032901764, + "p90": 52.15999856591225, + "p95": 55.424001067876816, + "p99": 72.80000299215317 + }, + "roundtrip": { + "p50": 289.8240089416504, + "p90": 305.27999997138977, + "p95": 308.28800797462463, + "p99": 316.79999828338623 + }, + "isolatedSum": { + "p50": 306.5280094742775, + "p90": 325.6320096552372, + "p95": 332.5119875371456, + "p99": 361.34400218725204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 258.30399990081787, + "p90": 272.2879946231842, + "p95": 276.0320007801056, + "p99": 284.86400842666626 + }, + "combine": { + "p50": 49.215998500585556, + "p90": 52.25599929690361, + "p95": 55.23199960589409, + "p99": 67.23199784755707 + }, + "roundtrip": { + "p50": 292.7039861679077, + "p90": 305.85598945617676, + "p95": 309.9200129508972, + "p99": 316.3839876651764 + }, + "isolatedSum": { + "p50": 307.5199984014034, + "p90": 324.5439939200878, + "p95": 331.2640003859997, + "p99": 352.0960062742233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 260.76799631118774, + "p90": 275.2000093460083, + "p95": 278.59199047088623, + "p99": 287.26398944854736 + }, + "combine": { + "p50": 48.96000027656555, + "p90": 52.44800075888634, + "p95": 56.57599866390228, + "p99": 64.2239972949028 + }, + "roundtrip": { + "p50": 293.37599873542786, + "p90": 306.40000104904175, + "p95": 311.64801120758057, + "p99": 321.3120102882385 + }, + "isolatedSum": { + "p50": 309.7279965877533, + "p90": 327.64801010489464, + "p95": 335.1679891347885, + "p99": 351.48798674345016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 275.4560112953186, + "p90": 288.9600098133087, + "p95": 292.54400730133057, + "p99": 297.9840040206909 + }, + "combine": { + "p50": 51.00800096988678, + "p90": 54.368000477552414, + "p95": 56.28800019621849, + "p99": 64.51199948787689 + }, + "roundtrip": { + "p50": 308.76800417900085, + "p90": 316.0000145435333, + "p95": 320.92800736427307, + "p99": 329.18399572372437 + }, + "isolatedSum": { + "p50": 326.4640122652054, + "p90": 343.32801029086113, + "p95": 348.83200749754906, + "p99": 362.4960035085678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 275.4560112953186, + "p90": 288.03199529647827, + "p95": 292.38399863243103, + "p99": 297.760009765625 + }, + "combine": { + "p50": 55.26399984955788, + "p90": 58.97599831223488, + "p95": 62.111999839544296, + "p99": 69.50400024652481 + }, + "roundtrip": { + "p50": 313.6639893054962, + "p90": 324.9279856681824, + "p95": 329.47200536727905, + "p99": 336.41600608825684 + }, + "isolatedSum": { + "p50": 330.7200111448765, + "p90": 347.00799360871315, + "p95": 354.4959984719753, + "p99": 367.2640100121498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17e3af4b", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|decode|normal|none|none|0|tuned||3f8ffeba9f65629", + "colorKey": "gb300_c8260f45", + "comparisonKey": "2c0ff1333ecc142b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:35.029206+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3f8ffeba9f65629", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 239.1040027141571, + "p90": 253.50400805473328, + "p95": 256.76798820495605, + "p99": 263.2960081100464 + }, + "combine": { + "p50": 43.58400031924248, + "p90": 47.58400097489357, + "p95": 50.912000238895416, + "p99": 61.08799949288368 + }, + "roundtrip": { + "p50": 270.7520127296448, + "p90": 285.0880026817322, + "p95": 288.4480059146881, + "p99": 296.00000381469727 + }, + "isolatedSum": { + "p50": 282.6880030333996, + "p90": 301.08800902962685, + "p95": 307.67998844385147, + "p99": 324.38400760293007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 71680, + "combineLogicalBytes": 71680, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 244.25600469112396, + "p90": 258.2080066204071, + "p95": 261.9200050830841, + "p99": 272.5439965724945 + }, + "combine": { + "p50": 43.616000562906265, + "p90": 47.648001462221146, + "p95": 51.90400034189224, + "p99": 58.17599967122078 + }, + "roundtrip": { + "p50": 274.399995803833, + "p90": 289.44000601768494, + "p95": 292.7680015563965, + "p99": 297.95199632644653 + }, + "isolatedSum": { + "p50": 287.8720052540302, + "p90": 305.85600808262825, + "p95": 313.82400542497635, + "p99": 330.7199962437153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 250.65600872039795, + "p90": 265.0560140609741, + "p95": 269.50401067733765, + "p99": 278.75199913978577 + }, + "combine": { + "p50": 46.879999339580536, + "p90": 50.592001527547836, + "p95": 53.79199981689453, + "p99": 60.83200126886368 + }, + "roundtrip": { + "p50": 281.5040051937103, + "p90": 295.1360046863556, + "p95": 299.9039888381958, + "p99": 311.0080063343048 + }, + "isolatedSum": { + "p50": 297.5360080599785, + "p90": 315.64801558852196, + "p95": 323.2960104942322, + "p99": 339.58400040864944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 286720, + "combineLogicalBytes": 286720, + "fanoutMean": 1.25, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 251.13600492477417, + "p90": 264.8000121116638, + "p95": 269.0880000591278, + "p99": 278.11199426651 + }, + "combine": { + "p50": 46.62400111556053, + "p90": 50.27199909090996, + "p95": 54.016001522541046, + "p99": 59.328000992536545 + }, + "roundtrip": { + "p50": 283.55199098587036, + "p90": 296.3840067386627, + "p95": 299.9359965324402, + "p99": 309.3760013580322 + }, + "isolatedSum": { + "p50": 297.7600060403347, + "p90": 315.0720112025738, + "p95": 323.10400158166885, + "p99": 337.43999525904655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 1.21875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 250.84799528121948, + "p90": 265.6320035457611, + "p95": 272.0640003681183, + "p99": 281.8880081176758 + }, + "combine": { + "p50": 46.46399989724159, + "p90": 49.79199916124344, + "p95": 52.86400020122528, + "p99": 59.487998485565186 + }, + "roundtrip": { + "p50": 283.55199098587036, + "p90": 298.3039915561676, + "p95": 300.1280128955841, + "p99": 312.73600459098816 + }, + "isolatedSum": { + "p50": 297.3119951784611, + "p90": 315.42400270700455, + "p95": 324.92800056934357, + "p99": 341.37600660324097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 1.265625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 252.41601467132568, + "p90": 266.1440074443817, + "p95": 271.2000012397766, + "p99": 276.95998549461365 + }, + "combine": { + "p50": 47.040000557899475, + "p90": 51.04000121355057, + "p95": 54.048001766204834, + "p99": 62.111999839544296 + }, + "roundtrip": { + "p50": 286.27198934555054, + "p90": 299.77598786354065, + "p95": 303.1040132045746, + "p99": 311.74400448799133 + }, + "isolatedSum": { + "p50": 299.45601522922516, + "p90": 317.1840086579323, + "p95": 325.24800300598145, + "p99": 339.07198533415794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2279424, + "combineLogicalBytes": 2279424, + "fanoutMean": 1.2421875, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 255.0080120563507, + "p90": 267.16798543930054, + "p95": 270.6240117549896, + "p99": 277.2800028324127 + }, + "combine": { + "p50": 48.79999905824661, + "p90": 52.06400156021118, + "p95": 55.58399856090546, + "p99": 62.272001057863235 + }, + "roundtrip": { + "p50": 284.5120131969452, + "p90": 297.0240116119385, + "p95": 300.4800081253052, + "p99": 308.76800417900085 + }, + "isolatedSum": { + "p50": 303.8080111145973, + "p90": 319.2319869995117, + "p95": 326.2080103158951, + "p99": 339.55200389027596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4587520, + "combineLogicalBytes": 4587520, + "fanoutMean": 1.25, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 255.0719976425171, + "p90": 267.90401339530945, + "p95": 271.1679935455322, + "p99": 277.40800380706787 + }, + "combine": { + "p50": 52.960000932216644, + "p90": 56.2559999525547, + "p95": 58.88000130653381, + "p99": 64.80000168085098 + }, + "roundtrip": { + "p50": 289.44000601768494, + "p90": 301.63198709487915, + "p95": 305.34398555755615, + "p99": 313.1200075149536 + }, + "isolatedSum": { + "p50": 308.03199857473373, + "p90": 324.16001334786415, + "p95": 330.04799485206604, + "p99": 342.20800548791885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7586bda6", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|decode|normal|none|none|0|tuned||e9a6e5febe08793", + "colorKey": "gb300_6ce4cab9", + "comparisonKey": "f646643eb7149f15", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:36.475965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e9a6e5febe08793", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86328125, + "eplbImbalanceAfter": 1.0003348214285714, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 244.32000517845154, + "p90": 258.4640085697174, + "p95": 263.68001103401184, + "p99": 273.0880081653595 + }, + "combine": { + "p50": 47.29599878191948, + "p90": 51.61599814891815, + "p95": 54.207999259233475, + "p99": 63.391998410224915 + }, + "roundtrip": { + "p50": 275.519996881485, + "p90": 289.92000222206116, + "p95": 293.5999929904938, + "p99": 302.2719919681549 + }, + "isolatedSum": { + "p50": 291.616003960371, + "p90": 310.08000671863556, + "p95": 317.8880102932453, + "p99": 336.4800065755844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 245.05600333213806, + "p90": 258.91199707984924, + "p95": 262.04800605773926, + "p99": 269.6000039577484 + }, + "combine": { + "p50": 46.9760000705719, + "p90": 50.84799975156784, + "p95": 54.11199852824211, + "p99": 61.664000153541565 + }, + "roundtrip": { + "p50": 276.06400847435, + "p90": 289.34401273727417, + "p95": 292.4799919128418, + "p99": 300.00001192092896 + }, + "isolatedSum": { + "p50": 292.03200340270996, + "p90": 309.7599968314171, + "p95": 316.16000458598137, + "p99": 331.26400411129 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 246.14399671554565, + "p90": 260.6079876422882, + "p95": 266.11199975013733, + "p99": 302.39999294281006 + }, + "combine": { + "p50": 49.40799996256828, + "p90": 52.57600173354149, + "p95": 55.743999779224396, + "p99": 63.93600255250931 + }, + "roundtrip": { + "p50": 277.0879864692688, + "p90": 290.8479869365692, + "p95": 294.49599981307983, + "p99": 307.96799063682556 + }, + "isolatedSum": { + "p50": 295.55199667811394, + "p90": 313.1839893758297, + "p95": 321.8559995293617, + "p99": 366.33599549531937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 788480, + "combineLogicalBytes": 788480, + "fanoutMean": 3.4375, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 244.9280023574829, + "p90": 259.6159875392914, + "p95": 263.8719975948334, + "p99": 272.44800329208374 + }, + "combine": { + "p50": 49.60000142455101, + "p90": 53.18399891257286, + "p95": 56.223999708890915, + "p99": 62.49599903821945 + }, + "roundtrip": { + "p50": 277.3759961128235, + "p90": 289.98398780822754, + "p95": 294.0160036087036, + "p99": 304.1279911994934 + }, + "isolatedSum": { + "p50": 294.5280037820339, + "p90": 312.79998645186424, + "p95": 320.0959973037243, + "p99": 334.9440023303032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 246.07999622821808, + "p90": 259.93600487709045, + "p95": 264.8639976978302, + "p99": 274.4640111923218 + }, + "combine": { + "p50": 49.50400069355965, + "p90": 53.31199988722801, + "p95": 55.64799904823303, + "p99": 63.71200084686279 + }, + "roundtrip": { + "p50": 277.44001150131226, + "p90": 291.8719947338104, + "p95": 296.3840067386627, + "p99": 303.48798632621765 + }, + "isolatedSum": { + "p50": 295.5839969217777, + "p90": 313.24800476431847, + "p95": 320.51199674606323, + "p99": 338.17601203918457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3196928, + "combineLogicalBytes": 3196928, + "fanoutMean": 3.484375, + "recvTokensMax": 59, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 252.9599964618683, + "p90": 266.2079930305481, + "p95": 270.112007856369, + "p99": 280.4799973964691 + }, + "combine": { + "p50": 51.552001386880875, + "p90": 55.00800162553787, + "p95": 57.631999254226685, + "p99": 64.54399973154068 + }, + "roundtrip": { + "p50": 287.48801350593567, + "p90": 300.6399869918823, + "p95": 304.57600951194763, + "p99": 309.9519908428192 + }, + "isolatedSum": { + "p50": 304.51199784874916, + "p90": 321.21599465608597, + "p95": 327.7440071105957, + "p99": 345.0239971280098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6408192, + "combineLogicalBytes": 6408192, + "fanoutMean": 3.4921875, + "recvTokensMax": 114, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 279.55201268196106, + "p90": 292.32001304626465, + "p95": 295.48799991607666, + "p99": 303.8400113582611 + }, + "combine": { + "p50": 51.64799839258194, + "p90": 55.456001311540604, + "p95": 57.760000228881836, + "p99": 66.84800237417221 + }, + "roundtrip": { + "p50": 314.04799222946167, + "p90": 324.41601157188416, + "p95": 329.8240005970001, + "p99": 340.256005525589 + }, + "isolatedSum": { + "p50": 331.200011074543, + "p90": 347.77601435780525, + "p95": 353.2480001449585, + "p99": 370.6880137324333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12773376, + "combineLogicalBytes": 12773376, + "fanoutMean": 3.48046875, + "recvTokensMax": 226, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 278.23999524116516, + "p90": 291.1680042743683, + "p95": 294.5599853992462, + "p99": 301.6960024833679 + }, + "combine": { + "p50": 55.87200075387955, + "p90": 59.39200147986412, + "p95": 62.30400130152702, + "p99": 69.72800195217133 + }, + "roundtrip": { + "p50": 316.6719973087311, + "p90": 328.44799757003784, + "p95": 333.18400382995605, + "p99": 339.9359881877899 + }, + "isolatedSum": { + "p50": 334.1119959950447, + "p90": 350.5600057542324, + "p95": 356.86398670077324, + "p99": 371.42400443553925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25661440, + "combineLogicalBytes": 25661440, + "fanoutMean": 3.49609375, + "recvTokensMax": 454, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a4f543da", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|decode|normal|none|none|0|tuned||e596902aaaeb56c", + "colorKey": "gb300_4b074890", + "comparisonKey": "5b2941d3dc85df49", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:21.968007+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e596902aaaeb56c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 246.0480034351349, + "p90": 260.5440020561218, + "p95": 263.8719975948334, + "p99": 271.5519964694977 + }, + "combine": { + "p50": 45.31199857592583, + "p90": 48.64000156521797, + "p95": 51.83999985456467, + "p99": 60.256000608205795 + }, + "roundtrip": { + "p50": 277.7920067310333, + "p90": 291.9360101222992, + "p95": 294.8479950428009, + "p99": 302.14399099349976 + }, + "isolatedSum": { + "p50": 291.3600020110607, + "p90": 309.1840036213398, + "p95": 315.71199744939804, + "p99": 331.8079970777035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 247.1040040254593, + "p90": 261.1840069293976, + "p95": 265.50400257110596, + "p99": 274.52799677848816 + }, + "combine": { + "p50": 46.1760014295578, + "p90": 49.12000149488449, + "p95": 51.19999870657921, + "p99": 58.27200040221214 + }, + "roundtrip": { + "p50": 278.3359885215759, + "p90": 292.38399863243103, + "p95": 295.0719892978668, + "p99": 301.7599880695343 + }, + "isolatedSum": { + "p50": 293.2800054550171, + "p90": 310.3040084242821, + "p95": 316.70400127768517, + "p99": 332.7999971807003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 387072, + "combineLogicalBytes": 387072, + "fanoutMean": 3.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 245.88799476623535, + "p90": 260.8320116996765, + "p95": 265.1520073413849, + "p99": 275.4240036010742 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 50.65599828958511, + "p95": 53.727999329566956, + "p99": 61.63199990987778 + }, + "roundtrip": { + "p50": 277.9200077056885, + "p90": 293.08798909187317, + "p95": 296.2239980697632, + "p99": 303.9360046386719 + }, + "isolatedSum": { + "p50": 293.5359962284565, + "p90": 311.4880099892616, + "p95": 318.88000667095184, + "p99": 337.056003510952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 731136, + "combineLogicalBytes": 731136, + "fanoutMean": 3.1875, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 246.24000489711761, + "p90": 261.9839906692505, + "p95": 265.4719948768616, + "p99": 274.84801411628723 + }, + "combine": { + "p50": 47.93599992990494, + "p90": 51.32799968123436, + "p95": 55.48800155520439, + "p99": 62.81600147485733 + }, + "roundtrip": { + "p50": 278.1760096549988, + "p90": 292.928010225296, + "p95": 296.79998755455017, + "p99": 306.68801069259644 + }, + "isolatedSum": { + "p50": 294.17600482702255, + "p90": 313.31199035048485, + "p95": 320.95999643206596, + "p99": 337.66401559114456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1519616, + "combineLogicalBytes": 1519616, + "fanoutMean": 3.3125, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 246.5279996395111, + "p90": 259.5199942588806, + "p95": 262.81601190567017, + "p99": 270.112007856369 + }, + "combine": { + "p50": 47.90399968624115, + "p90": 50.783999264240265, + "p95": 53.408000618219376, + "p99": 63.1679967045784 + }, + "roundtrip": { + "p50": 279.58399057388306, + "p90": 292.9919958114624, + "p95": 297.5359857082367, + "p99": 303.0399978160858 + }, + "isolatedSum": { + "p50": 294.43199932575226, + "p90": 310.3039935231209, + "p95": 316.22401252388954, + "p99": 333.2800045609474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3082240, + "combineLogicalBytes": 3082240, + "fanoutMean": 3.359375, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 251.55198574066162, + "p90": 265.53601026535034, + "p95": 268.44799518585205, + "p99": 284.09600257873535 + }, + "combine": { + "p50": 50.144001841545105, + "p90": 53.53600159287453, + "p95": 55.84000051021576, + "p99": 63.35999816656113 + }, + "roundtrip": { + "p50": 285.8879864215851, + "p90": 299.23200607299805, + "p95": 302.14399099349976, + "p99": 330.2080035209656 + }, + "isolatedSum": { + "p50": 301.6959875822067, + "p90": 319.07201185822487, + "p95": 324.2879956960678, + "p99": 347.4560007452965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6121472, + "combineLogicalBytes": 6121472, + "fanoutMean": 3.3359375, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 279.4240117073059, + "p90": 292.38399863243103, + "p95": 296.4160144329071, + "p99": 304.064005613327 + }, + "combine": { + "p50": 51.4880008995533, + "p90": 55.16799911856651, + "p95": 58.17599967122078, + "p99": 66.880002617836 + }, + "roundtrip": { + "p50": 314.9760067462921, + "p90": 325.8880078792572, + "p95": 330.9119939804077, + "p99": 335.5199992656708 + }, + "isolatedSum": { + "p50": 330.9120126068592, + "p90": 347.55199775099754, + "p95": 354.5920141041279, + "p99": 370.944008231163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12271616, + "combineLogicalBytes": 12271616, + "fanoutMean": 3.34375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 280.3199887275696, + "p90": 293.0240035057068, + "p95": 297.88801074028015, + "p99": 305.2160143852234 + }, + "combine": { + "p50": 56.32000043988228, + "p90": 59.167999774217606, + "p95": 60.70400029420853, + "p99": 67.55200028419495 + }, + "roundtrip": { + "p50": 320.19200921058655, + "p90": 332.7679932117462, + "p95": 336.64000034332275, + "p99": 346.1120128631592 + }, + "isolatedSum": { + "p50": 336.63998916745186, + "p90": 352.1920032799244, + "p95": 358.5920110344887, + "p99": 372.76801466941833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f229f981", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|decode|normal|none|none|0|tuned||194008255dcd869", + "colorKey": "gb300_ea52f89e", + "comparisonKey": "1e50870c226601e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:38.213256+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "194008255dcd869", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.865234375, + "eplbImbalanceAfter": 1.0003580729166668, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 252.3840069770813, + "p90": 265.855997800827, + "p95": 269.72800493240356, + "p99": 276.67200565338135 + }, + "combine": { + "p50": 48.25599864125252, + "p90": 51.711998879909515, + "p95": 54.687999188899994, + "p99": 61.15199998021126 + }, + "roundtrip": { + "p50": 284.5120131969452, + "p90": 297.760009765625, + "p95": 300.83200335502625, + "p99": 306.5280020236969 + }, + "isolatedSum": { + "p50": 300.6400056183338, + "p90": 317.56799668073654, + "p95": 324.41600412130356, + "p99": 337.8240056335926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 253.53598594665527, + "p90": 268.5439884662628, + "p95": 272.92799949645996, + "p99": 283.1360101699829 + }, + "combine": { + "p50": 47.775998711586, + "p90": 51.872000098228455, + "p95": 54.496001452207565, + "p99": 60.47999858856201 + }, + "roundtrip": { + "p50": 285.98400950431824, + "p90": 298.97600412368774, + "p95": 302.43200063705444, + "p99": 313.8880133628845 + }, + "isolatedSum": { + "p50": 301.3119846582413, + "p90": 320.4159885644913, + "p95": 327.4240009486675, + "p99": 343.6160087585449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 3, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 253.56799364089966, + "p90": 267.39200949668884, + "p95": 271.1679935455322, + "p99": 280.7359993457794 + }, + "combine": { + "p50": 49.40799996256828, + "p90": 52.41600051522255, + "p95": 54.9440011382103, + "p99": 61.535999178886414 + }, + "roundtrip": { + "p50": 286.17599606513977, + "p90": 299.1040050983429, + "p95": 303.3280074596405, + "p99": 309.08799171447754 + }, + "isolatedSum": { + "p50": 302.97599360346794, + "p90": 319.8080100119114, + "p95": 326.1119946837425, + "p99": 342.27199852466583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 774144, + "combineLogicalBytes": 774144, + "fanoutMean": 3.375, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 252.70399451255798, + "p90": 266.52801036834717, + "p95": 269.27998661994934, + "p99": 279.4559895992279 + }, + "combine": { + "p50": 50.27199909090996, + "p90": 53.37600037455559, + "p95": 54.75199967622757, + "p99": 62.111999839544296 + }, + "roundtrip": { + "p50": 286.655992269516, + "p90": 299.77598786354065, + "p95": 303.1359910964966, + "p99": 308.6079955101013 + }, + "isolatedSum": { + "p50": 302.97599360346794, + "p90": 319.90401074290276, + "p95": 324.0319862961769, + "p99": 341.5679894387722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 252.99200415611267, + "p90": 265.82399010658264, + "p95": 269.53598856925964, + "p99": 275.7120132446289 + }, + "combine": { + "p50": 50.65599828958511, + "p90": 53.727999329566956, + "p95": 55.904000997543335, + "p99": 61.184000223875046 + }, + "roundtrip": { + "p50": 286.0479950904846, + "p90": 299.45600032806396, + "p95": 303.96801233291626, + "p99": 317.3440098762512 + }, + "isolatedSum": { + "p50": 303.6480024456978, + "p90": 319.5519894361496, + "p95": 325.439989566803, + "p99": 336.89601346850395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3268608, + "combineLogicalBytes": 3268608, + "fanoutMean": 3.5625, + "recvTokensMax": 60, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 259.3280076980591, + "p90": 271.87201380729675, + "p95": 275.90399980545044, + "p99": 281.0240089893341 + }, + "combine": { + "p50": 52.57600173354149, + "p90": 56.86400085687637, + "p95": 59.4559982419014, + "p99": 68.57600063085556 + }, + "roundtrip": { + "p50": 294.8479950428009, + "p90": 306.5919876098633, + "p95": 309.05601382255554, + "p99": 315.20000100135803 + }, + "isolatedSum": { + "p50": 311.90400943160057, + "p90": 328.7360146641731, + "p95": 335.35999804735184, + "p99": 349.60000962018967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6393856, + "combineLogicalBytes": 6393856, + "fanoutMean": 3.484375, + "recvTokensMax": 115, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 287.77599334716797, + "p90": 301.2480139732361, + "p95": 304.9600124359131, + "p99": 331.07200264930725 + }, + "combine": { + "p50": 52.83199995756149, + "p90": 55.904000997543335, + "p95": 58.04799869656563, + "p99": 65.8240020275116 + }, + "roundtrip": { + "p50": 324.73599910736084, + "p90": 336.64000034332275, + "p95": 339.9679958820343, + "p99": 345.6000089645386 + }, + "isolatedSum": { + "p50": 340.60799330472946, + "p90": 357.1520149707794, + "p95": 363.0080111324787, + "p99": 396.89600467681885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13045760, + "combineLogicalBytes": 13045760, + "fanoutMean": 3.5546875, + "recvTokensMax": 234, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 289.6000146865845, + "p90": 302.14399099349976, + "p95": 304.76799607276917, + "p99": 309.08799171447754 + }, + "combine": { + "p50": 58.88000130653381, + "p90": 62.144000083208084, + "p95": 64.00000303983688, + "p99": 69.82400268316269 + }, + "roundtrip": { + "p50": 331.0079872608185, + "p90": 342.0799970626831, + "p95": 345.5359935760498, + "p99": 351.83998942375183 + }, + "isolatedSum": { + "p50": 348.4800159931183, + "p90": 364.28799107670784, + "p95": 368.76799911260605, + "p99": 378.9119943976402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26263552, + "combineLogicalBytes": 26263552, + "fanoutMean": 3.578125, + "recvTokensMax": 469, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ffbb3a4", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_0ce440d7", + "comparisonKey": "4c6653b97e765747", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:29.327892+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 246.62399291992188, + "p90": 259.64799523353577, + "p95": 263.8719975948334, + "p99": 277.47198939323425 + }, + "combine": { + "p50": 44.67200115323067, + "p90": 47.90399968624115, + "p95": 51.32799968123436, + "p99": 59.42400172352791 + }, + "roundtrip": { + "p50": 277.0879864692688, + "p90": 290.94401001930237, + "p95": 293.98399591445923, + "p99": 300.06399750709534 + }, + "isolatedSum": { + "p50": 291.29599407315254, + "p90": 307.5519949197769, + "p95": 315.19999727606773, + "p99": 336.89599111676216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 247.48800694942474, + "p90": 261.50399446487427, + "p95": 265.6959891319275, + "p99": 276.0320007801056 + }, + "combine": { + "p50": 46.65600135922432, + "p90": 49.44000020623207, + "p95": 52.2879995405674, + "p99": 57.40800127387047 + }, + "roundtrip": { + "p50": 278.56001257896423, + "p90": 292.09598898887634, + "p95": 295.5839931964874, + "p99": 303.51999402046204 + }, + "isolatedSum": { + "p50": 294.14400830864906, + "p90": 310.94399467110634, + "p95": 317.9839886724949, + "p99": 333.44000205397606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 246.87999486923218, + "p90": 261.0880136489868, + "p95": 264.0959918498993, + "p99": 272.2240090370178 + }, + "combine": { + "p50": 46.36799916625023, + "p90": 49.056001007556915, + "p95": 50.40000006556511, + "p99": 55.67999929189682 + }, + "roundtrip": { + "p50": 278.8800001144409, + "p90": 291.1039888858795, + "p95": 295.23199796676636, + "p99": 302.0800054073334 + }, + "isolatedSum": { + "p50": 293.2479940354824, + "p90": 310.14401465654373, + "p95": 314.4959919154644, + "p99": 327.90400832891464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 246.97600305080414, + "p90": 261.34398579597473, + "p95": 265.4080092906952, + "p99": 270.27198672294617 + }, + "combine": { + "p50": 48.287998884916306, + "p90": 50.944000482559204, + "p95": 53.37600037455559, + "p99": 59.776000678539276 + }, + "roundtrip": { + "p50": 278.49599719047546, + "p90": 290.52799940109253, + "p95": 294.9120104312897, + "p99": 305.82401156425476 + }, + "isolatedSum": { + "p50": 295.26400193572044, + "p90": 312.28798627853394, + "p95": 318.7840096652508, + "p99": 330.04798740148544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 248.22400510311127, + "p90": 262.1760070323944, + "p95": 265.4399871826172, + "p99": 271.67999744415283 + }, + "combine": { + "p50": 48.06400090456009, + "p90": 50.87999999523163, + "p95": 53.31199988722801, + "p99": 63.231997191905975 + }, + "roundtrip": { + "p50": 278.59199047088623, + "p90": 291.23198986053467, + "p95": 295.00800371170044, + "p99": 305.6960105895996 + }, + "isolatedSum": { + "p50": 296.28800600767136, + "p90": 313.05600702762604, + "p95": 318.7519870698452, + "p99": 334.9119946360588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 250.75200200080872, + "p90": 261.9520127773285, + "p95": 264.67201113700867, + "p99": 274.27199482917786 + }, + "combine": { + "p50": 48.608001321554184, + "p90": 51.67999863624573, + "p95": 54.91200089454651, + "p99": 61.5679994225502 + }, + "roundtrip": { + "p50": 282.8480005264282, + "p90": 295.74400186538696, + "p95": 299.48800802230835, + "p99": 307.48799443244934 + }, + "isolatedSum": { + "p50": 299.3600033223629, + "p90": 313.6320114135742, + "p95": 319.5840120315552, + "p99": 335.83999425172806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 266.9120132923126, + "p90": 277.18400955200195, + "p95": 282.1440100669861, + "p99": 288.1599962711334 + }, + "combine": { + "p50": 50.49600079655647, + "p90": 53.599998354911804, + "p95": 55.10399863123894, + "p99": 63.74400109052658 + }, + "roundtrip": { + "p50": 301.2160062789917, + "p90": 314.4319951534271, + "p95": 318.36798787117004, + "p99": 323.96799325942993 + }, + "isolatedSum": { + "p50": 317.4080140888691, + "p90": 330.78400790691376, + "p95": 337.248008698225, + "p99": 351.90399736166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 267.2640085220337, + "p90": 278.49599719047546, + "p95": 281.72799944877625, + "p99": 289.95200991630554 + }, + "combine": { + "p50": 54.655998945236206, + "p90": 58.111999183893204, + "p95": 60.35200133919716, + "p99": 65.05600363016129 + }, + "roundtrip": { + "p50": 305.85598945617676, + "p90": 319.4560110569, + "p95": 322.9120075702667, + "p99": 326.81599259376526 + }, + "isolatedSum": { + "p50": 321.9200074672699, + "p90": 336.60799637436867, + "p95": 342.0800007879734, + "p99": 355.0080135464668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d1665ba6", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_4d1c5d27", + "comparisonKey": "180154363af5c463", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:45.351186+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 254.14401292800903, + "p90": 268.0639922618866, + "p95": 271.7120051383972, + "p99": 277.3439884185791 + }, + "combine": { + "p50": 47.00800031423569, + "p90": 50.87999999523163, + "p95": 54.4000007212162, + "p99": 60.80000102519989 + }, + "roundtrip": { + "p50": 285.5679988861084, + "p90": 298.6240088939667, + "p95": 302.8799891471863, + "p99": 316.5439963340759 + }, + "isolatedSum": { + "p50": 301.1520132422447, + "p90": 318.9439922571182, + "p95": 326.1120058596134, + "p99": 338.143989443779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 253.9840042591095, + "p90": 268.92799139022827, + "p95": 273.5680043697357, + "p99": 287.6160144805908 + }, + "combine": { + "p50": 47.359999269247055, + "p90": 51.61599814891815, + "p95": 54.71999943256378, + "p99": 62.49599903821945 + }, + "roundtrip": { + "p50": 284.92799401283264, + "p90": 299.51998591423035, + "p95": 302.4959862232208, + "p99": 310.11199951171875 + }, + "isolatedSum": { + "p50": 301.34400352835655, + "p90": 320.5439895391464, + "p95": 328.2880038022995, + "p99": 350.1120135188103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 254.62400913238525, + "p90": 269.1519856452942, + "p95": 272.70400524139404, + "p99": 278.0799865722656 + }, + "combine": { + "p50": 48.767998814582825, + "p90": 51.80799961090088, + "p95": 55.03999814391136, + "p99": 62.431998550891876 + }, + "roundtrip": { + "p50": 285.5679988861084, + "p90": 300.927996635437, + "p95": 303.6159873008728, + "p99": 311.3279938697815 + }, + "isolatedSum": { + "p50": 303.3920079469681, + "p90": 320.95998525619507, + "p95": 327.7440033853054, + "p99": 340.5119851231575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 253.6959946155548, + "p90": 268.8640058040619, + "p95": 272.12798595428467, + "p99": 280.89600801467896 + }, + "combine": { + "p50": 49.56800118088722, + "p90": 53.15199866890907, + "p95": 56.8000003695488, + "p99": 61.85600161552429 + }, + "roundtrip": { + "p50": 286.624014377594, + "p90": 300.9920120239258, + "p95": 303.9360046386719, + "p99": 313.1519854068756 + }, + "isolatedSum": { + "p50": 303.26399579644203, + "p90": 322.01600447297096, + "p95": 328.92798632383347, + "p99": 342.75200963020325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 255.5840015411377, + "p90": 269.0880000591278, + "p95": 274.6880054473877, + "p99": 284.89598631858826 + }, + "combine": { + "p50": 49.27999898791313, + "p90": 53.0879981815815, + "p95": 56.15999922156334, + "p99": 62.78400123119354 + }, + "roundtrip": { + "p50": 287.23201155662537, + "p90": 300.86401104927063, + "p95": 305.4400086402893, + "p99": 318.6880052089691 + }, + "isolatedSum": { + "p50": 304.8640005290508, + "p90": 322.1759982407093, + "p95": 330.84800466895103, + "p99": 347.6799875497818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 258.4959864616394, + "p90": 271.58400416374207, + "p95": 275.10398626327515, + "p99": 283.58399868011475 + }, + "combine": { + "p50": 49.375999718904495, + "p90": 53.119998425245285, + "p95": 56.063998490571976, + "p99": 63.1679967045784 + }, + "roundtrip": { + "p50": 293.0240035057068, + "p90": 305.08801341056824, + "p95": 308.4160089492798, + "p99": 316.6080117225647 + }, + "isolatedSum": { + "p50": 307.8719861805439, + "p90": 324.70400258898735, + "p95": 331.1679847538471, + "p99": 346.75199538469315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 289.4720137119293, + "p90": 301.37598514556885, + "p95": 305.4080009460449, + "p99": 313.3760094642639 + }, + "combine": { + "p50": 53.37600037455559, + "p90": 56.992001831531525, + "p95": 59.51999872922897, + "p99": 67.32799857854843 + }, + "roundtrip": { + "p50": 324.41601157188416, + "p90": 335.6480002403259, + "p95": 339.87200260162354, + "p99": 349.4719862937927 + }, + "isolatedSum": { + "p50": 342.8480140864849, + "p90": 358.3679869771004, + "p95": 364.9279996752739, + "p99": 380.70400804281235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 288.2879972457886, + "p90": 301.2160062789917, + "p95": 304.639995098114, + "p99": 313.56799602508545 + }, + "combine": { + "p50": 56.57599866390228, + "p90": 59.487998485565186, + "p95": 61.15199998021126, + "p99": 75.68000257015228 + }, + "roundtrip": { + "p50": 327.2959887981415, + "p90": 339.77600932121277, + "p95": 343.07199716567993, + "p99": 350.0800132751465 + }, + "isolatedSum": { + "p50": 344.86399590969086, + "p90": 360.7040047645569, + "p95": 365.7919950783253, + "p99": 389.24799859523773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92825186", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_08625a47", + "comparisonKey": "b242b98ba930cfc4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:48.475036+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 250.36799907684326, + "p90": 263.71198892593384, + "p95": 268.38400959968567, + "p99": 317.02399253845215 + }, + "combine": { + "p50": 48.22399839758873, + "p90": 51.64799839258194, + "p95": 54.336000233888626, + "p99": 60.35200133919716 + }, + "roundtrip": { + "p50": 281.2480032444, + "p90": 294.46399211883545, + "p95": 298.0160117149353, + "p99": 308.1600069999695 + }, + "isolatedSum": { + "p50": 298.591997474432, + "p90": 315.3599873185158, + "p95": 322.7200098335743, + "p99": 377.3759938776493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 249.15200471878052, + "p90": 262.33598589897156, + "p95": 268.92799139022827, + "p99": 280.12800216674805 + }, + "combine": { + "p50": 48.73599857091904, + "p90": 51.96800082921982, + "p95": 54.91200089454651, + "p99": 59.58399921655655 + }, + "roundtrip": { + "p50": 280.60799837112427, + "p90": 293.88800263404846, + "p95": 297.37600684165955, + "p99": 307.0400059223175 + }, + "isolatedSum": { + "p50": 297.88800328969955, + "p90": 314.3039867281914, + "p95": 323.8399922847748, + "p99": 339.7120013833046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 250.0160038471222, + "p90": 263.68001103401184, + "p95": 267.551988363266, + "p99": 277.2800028324127 + }, + "combine": { + "p50": 50.20799860358238, + "p90": 53.21599915623665, + "p95": 54.43200096487999, + "p99": 60.63999980688095 + }, + "roundtrip": { + "p50": 281.5999984741211, + "p90": 295.52000761032104, + "p95": 299.1040050983429, + "p99": 307.68001079559326 + }, + "isolatedSum": { + "p50": 300.2240024507046, + "p90": 316.8960101902485, + "p95": 321.983989328146, + "p99": 337.92000263929367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 250.97599625587463, + "p90": 265.1199996471405, + "p95": 268.5120105743408, + "p99": 281.2800109386444 + }, + "combine": { + "p50": 50.464000552892685, + "p90": 53.247999399900436, + "p95": 54.816000163555145, + "p99": 59.26400050520897 + }, + "roundtrip": { + "p50": 281.9199860095978, + "p90": 296.7680096626282, + "p95": 299.6479868888855, + "p99": 305.184006690979 + }, + "isolatedSum": { + "p50": 301.4399968087673, + "p90": 318.36799904704094, + "p95": 323.32801073789597, + "p99": 340.5440114438534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 30, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 251.23199820518494, + "p90": 264.5440101623535, + "p95": 267.7760124206543, + "p99": 274.01599287986755 + }, + "combine": { + "p50": 50.944000482559204, + "p90": 54.46400120854378, + "p95": 55.96800148487091, + "p99": 63.26399743556976 + }, + "roundtrip": { + "p50": 282.4319899082184, + "p90": 295.77600955963135, + "p95": 300.06399750709534, + "p99": 305.82401156425476 + }, + "isolatedSum": { + "p50": 302.17599868774414, + "p90": 319.0080113708973, + "p95": 323.7440139055252, + "p99": 337.2799903154373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 57, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 255.19999861717224, + "p90": 267.13600754737854, + "p95": 272.0319926738739, + "p99": 276.7679989337921 + }, + "combine": { + "p50": 51.13599821925163, + "p90": 55.07199838757515, + "p95": 56.703999638557434, + "p99": 65.5359998345375 + }, + "roundtrip": { + "p50": 289.792001247406, + "p90": 302.2719919681549, + "p95": 305.27999997138977, + "p99": 313.31199407577515 + }, + "isolatedSum": { + "p50": 306.3359968364239, + "p90": 322.2080059349537, + "p95": 328.73599231243134, + "p99": 342.3039987683296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 115, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 284.2560112476349, + "p90": 297.0240116119385, + "p95": 302.11201310157776, + "p99": 309.28000807762146 + }, + "combine": { + "p50": 53.47200110554695, + "p90": 56.48000165820122, + "p95": 58.848001062870026, + "p99": 66.68800115585327 + }, + "roundtrip": { + "p50": 321.02400064468384, + "p90": 331.7759931087494, + "p95": 334.6239924430847, + "p99": 344.7679877281189 + }, + "isolatedSum": { + "p50": 337.72801235318184, + "p90": 353.5040132701397, + "p95": 360.9600141644478, + "p99": 375.96800923347473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 232, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 283.87200832366943, + "p90": 296.7360019683838, + "p95": 300.00001192092896, + "p99": 308.76800417900085 + }, + "combine": { + "p50": 58.97599831223488, + "p90": 62.65600025653839, + "p95": 64.54399973154068, + "p99": 72.51200079917908 + }, + "roundtrip": { + "p50": 323.8399922847748, + "p90": 336.0320031642914, + "p95": 339.9359881877899, + "p99": 344.63998675346375 + }, + "isolatedSum": { + "p50": 342.8480066359043, + "p90": 359.3920022249222, + "p95": 364.54401165246964, + "p99": 381.28000497817993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 462, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e17fedb", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_74aea6d0", + "comparisonKey": "83fd58abb1384c03", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:57.262888+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 355.77601194381714, + "p90": 375.8080005645752, + "p95": 380.67200779914856, + "p99": 393.5360014438629 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 67.74400174617767, + "p95": 72.35199958086014, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 399.1039991378784, + "p90": 420.48001289367676, + "p95": 427.0719885826111, + "p99": 449.3440091609955 + }, + "isolatedSum": { + "p50": 415.3920114040375, + "p90": 443.55200231075287, + "p95": 453.0240073800087, + "p99": 477.1840050816536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 362.4959886074066, + "p90": 382.27200508117676, + "p95": 386.78398728370667, + "p99": 400.35200119018555 + }, + "combine": { + "p50": 59.51999872922897, + "p90": 70.68800181150436, + "p95": 74.11199808120728, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 404.7040045261383, + "p90": 424.4160056114197, + "p95": 431.2959909439087, + "p99": 443.4559941291809 + }, + "isolatedSum": { + "p50": 422.0159873366356, + "p90": 452.9600068926811, + "p95": 460.89598536491394, + "p99": 482.5280010700226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 358.8480055332184, + "p90": 380.5760145187378, + "p95": 385.76000928878784, + "p99": 396.95999026298523 + }, + "combine": { + "p50": 60.83200126886368, + "p90": 67.77600198984146, + "p95": 73.7600028514862, + "p99": 86.14400029182434 + }, + "roundtrip": { + "p50": 398.1119990348816, + "p90": 423.74399304389954, + "p95": 429.3760061264038, + "p99": 441.2800073623657 + }, + "isolatedSum": { + "p50": 419.68000680208206, + "p90": 448.35201650857925, + "p95": 459.52001214027405, + "p99": 483.10399055480957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 356.76801204681396, + "p90": 377.85598635673523, + "p95": 384.2880129814148, + "p99": 395.87199687957764 + }, + "combine": { + "p50": 62.3680017888546, + "p90": 69.40799951553345, + "p95": 73.2479989528656, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 397.599995136261, + "p90": 418.17599534988403, + "p95": 426.71999335289, + "p99": 441.18401408195496 + }, + "isolatedSum": { + "p50": 419.13601383566856, + "p90": 447.2639858722687, + "p95": 457.5360119342804, + "p99": 483.7759956717491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 350.9120047092438, + "p90": 380.0320029258728, + "p95": 387.4559998512268, + "p99": 405.11998534202576 + }, + "combine": { + "p50": 66.11199676990509, + "p90": 73.05599749088287, + "p95": 78.015998005867, + "p99": 84.3840017914772 + }, + "roundtrip": { + "p50": 399.7440040111542, + "p90": 429.28001284599304, + "p95": 437.47198581695557, + "p99": 456.28800988197327 + }, + "isolatedSum": { + "p50": 417.02400147914886, + "p90": 453.0880004167557, + "p95": 465.4719978570938, + "p99": 489.50398713350296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 359.1040074825287, + "p90": 382.3679983615875, + "p95": 387.5519931316376, + "p99": 419.20000314712524 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 82.36800134181976, + "p95": 101.34399682283401, + "p99": 117.08799749612808 + }, + "roundtrip": { + "p50": 410.4959964752197, + "p90": 431.13601207733154, + "p95": 437.0560050010681, + "p99": 447.519987821579 + }, + "isolatedSum": { + "p50": 427.16800421476364, + "p90": 464.7359997034073, + "p95": 488.8959899544716, + "p99": 536.2880006432533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 385.2800130844116, + "p90": 405.7280123233795, + "p95": 411.00800037384033, + "p99": 424.8960018157959 + }, + "combine": { + "p50": 67.87200272083282, + "p90": 74.91199672222137, + "p95": 77.504001557827, + "p99": 83.55200290679932 + }, + "roundtrip": { + "p50": 429.76000905036926, + "p90": 451.10398530960083, + "p95": 458.75200629234314, + "p99": 469.215989112854 + }, + "isolatedSum": { + "p50": 453.15201580524445, + "p90": 480.6400090456009, + "p95": 488.5120019316673, + "p99": 508.4480047225952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 388.38401436805725, + "p90": 408.735990524292, + "p95": 414.3359959125519, + "p99": 436.2559914588928 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 79.74400371313095, + "p95": 84.03199911117554, + "p99": 93.59999746084213 + }, + "roundtrip": { + "p50": 441.24799966812134, + "p90": 461.7280066013336, + "p95": 466.623991727829, + "p99": 482.4320077896118 + }, + "isolatedSum": { + "p50": 460.32001078128815, + "p90": 488.47999423742294, + "p95": 498.3679950237274, + "p99": 529.855988919735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-76b484ca", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||f1c99f5cf8ca9ed", + "colorKey": "gb300_753a1ca8", + "comparisonKey": "98c070e23ad43add", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:13.132393+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f1c99f5cf8ca9ed", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 345.5039858818054, + "p90": 369.8880076408386, + "p95": 376.51199102401733, + "p99": 462.3999893665314 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 66.3679987192154, + "p95": 70.49600034952164, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 381.1199963092804, + "p90": 406.1119854450226, + "p95": 416.9600009918213, + "p99": 459.6480131149292 + }, + "isolatedSum": { + "p50": 403.1039848923683, + "p90": 436.256006360054, + "p95": 447.00799137353897, + "p99": 567.7759870886803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 348.54400157928467, + "p90": 378.6559998989105, + "p95": 456.4799964427948, + "p99": 504.863977432251 + }, + "combine": { + "p50": 57.34400078654289, + "p90": 66.78400188684464, + "p95": 75.80800354480743, + "p99": 119.23199892044067 + }, + "roundtrip": { + "p50": 383.90401005744934, + "p90": 410.2720022201538, + "p95": 455.487996339798, + "p99": 566.6559934616089 + }, + "isolatedSum": { + "p50": 405.88800236582756, + "p90": 445.44000178575516, + "p95": 532.2879999876022, + "p99": 624.0959763526917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 344.1280126571655, + "p90": 366.11199378967285, + "p95": 376.44800543785095, + "p99": 468.1600034236908 + }, + "combine": { + "p50": 61.69600039720535, + "p90": 69.66400146484375, + "p95": 73.72800260782242, + "p99": 118.68800222873688 + }, + "roundtrip": { + "p50": 383.9679956436157, + "p90": 411.45598888397217, + "p95": 421.31200432777405, + "p99": 537.11998462677 + }, + "isolatedSum": { + "p50": 405.8240130543709, + "p90": 435.7759952545166, + "p95": 450.17600804567337, + "p99": 586.8480056524277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 345.7919955253601, + "p90": 367.68001317977905, + "p95": 376.25598907470703, + "p99": 491.7759895324707 + }, + "combine": { + "p50": 61.37600168585777, + "p90": 68.89600306749344, + "p95": 72.57600128650665, + "p99": 80.48000186681747 + }, + "roundtrip": { + "p50": 387.29599118232727, + "p90": 409.88799929618835, + "p95": 417.6639914512634, + "p99": 522.0800042152405 + }, + "isolatedSum": { + "p50": 407.1679972112179, + "p90": 436.5760162472725, + "p95": 448.8319903612137, + "p99": 572.2559913992882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 345.18399834632874, + "p90": 374.0159869194031, + "p95": 389.44000005722046, + "p99": 500.7680058479309 + }, + "combine": { + "p50": 62.111999839544296, + "p90": 74.46400076150894, + "p95": 81.79199695587158, + "p99": 134.46399569511414 + }, + "roundtrip": { + "p50": 385.8239948749542, + "p90": 412.8960072994232, + "p95": 420.8320081233978, + "p99": 533.7920188903809 + }, + "isolatedSum": { + "p50": 407.29599818587303, + "p90": 448.479987680912, + "p95": 471.23199701309204, + "p99": 635.232001543045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 359.5840036869049, + "p90": 378.7519931793213, + "p95": 411.42401099205017, + "p99": 519.4879770278931 + }, + "combine": { + "p50": 63.77600133419037, + "p90": 72.12799787521362, + "p95": 78.14399898052216, + "p99": 137.53600418567657 + }, + "roundtrip": { + "p50": 399.4239866733551, + "p90": 425.1520037651062, + "p95": 495.4879879951477, + "p99": 561.8240237236023 + }, + "isolatedSum": { + "p50": 423.3600050210953, + "p90": 450.8799910545349, + "p95": 489.5680099725723, + "p99": 657.0239812135696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 420.22401094436646, + "p90": 439.04000520706177, + "p95": 446.46400213241577, + "p99": 555.1360249519348 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 74.78400319814682, + "p95": 79.00799810886383, + "p99": 94.81599926948547 + }, + "roundtrip": { + "p50": 464.4480049610138, + "p90": 485.82398891448975, + "p95": 492.0960068702698, + "p99": 597.4400043487549 + }, + "isolatedSum": { + "p50": 487.840011715889, + "p90": 513.8240084052086, + "p95": 525.4720002412796, + "p99": 649.9520242214203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 424.1600036621094, + "p90": 448.5760033130646, + "p95": 491.32800102233887, + "p99": 563.1359815597534 + }, + "combine": { + "p50": 79.93599772453308, + "p90": 90.01599997282028, + "p95": 96.6079980134964, + "p99": 146.04799449443817 + }, + "roundtrip": { + "p50": 480.8320105075836, + "p90": 503.64798307418823, + "p95": 559.7440004348755, + "p99": 627.7120113372803 + }, + "isolatedSum": { + "p50": 504.09600138664246, + "p90": 538.5920032858849, + "p95": 587.9359990358353, + "p99": 709.1839760541916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56e64c14", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_793c150b", + "comparisonKey": "a6c79b4091d5449e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:52.410969+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 327.13600993156433, + "p90": 342.9119884967804, + "p95": 348.63999485969543, + "p99": 355.3920090198517 + }, + "combine": { + "p50": 53.599998354911804, + "p90": 63.61600011587143, + "p95": 67.32799857854843, + "p99": 75.32799988985062 + }, + "roundtrip": { + "p50": 363.0400002002716, + "p90": 379.07201051712036, + "p95": 384.8319947719574, + "p99": 401.7280042171478 + }, + "isolatedSum": { + "p50": 380.73600828647614, + "p90": 406.5279886126518, + "p95": 415.96799343824387, + "p99": 430.7200089097023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 347.2000062465668, + "p90": 363.8400137424469, + "p95": 367.3279881477356, + "p99": 381.3759982585907 + }, + "combine": { + "p50": 58.49599838256836, + "p90": 67.71200150251389, + "p95": 71.00799679756165, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 387.4239921569824, + "p90": 404.83200550079346, + "p95": 409.5039963722229, + "p99": 421.5039908885956 + }, + "isolatedSum": { + "p50": 405.69600462913513, + "p90": 431.5520152449608, + "p95": 438.33598494529724, + "p99": 459.3279957771301 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 349.2160141468048, + "p90": 367.45598912239075, + "p95": 372.8640079498291, + "p99": 387.4239921569824 + }, + "combine": { + "p50": 58.46399813890457, + "p90": 69.60000097751617, + "p95": 73.95199686288834, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 391.9999897480011, + "p90": 410.3679955005646, + "p95": 415.45599699020386, + "p99": 427.13600397109985 + }, + "isolatedSum": { + "p50": 407.6800122857094, + "p90": 437.0559900999069, + "p95": 446.81600481271744, + "p99": 495.13599276542664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 345.8879888057709, + "p90": 364.3519878387451, + "p95": 368.80001425743103, + "p99": 382.2399973869324 + }, + "combine": { + "p50": 60.32000109553337, + "p90": 69.40799951553345, + "p95": 73.15199822187424, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 390.78399538993835, + "p90": 410.5919897556305, + "p95": 415.77601432800293, + "p99": 424.9599874019623 + }, + "isolatedSum": { + "p50": 406.20798990130424, + "p90": 433.75998735427856, + "p95": 441.95201247930527, + "p99": 461.15199476480484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fb83c0f4", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_d99d6f06", + "comparisonKey": "e4d862f1e23b5d32", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:54.635206+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 344.2560136318207, + "p90": 367.0719861984253, + "p95": 374.04799461364746, + "p99": 387.1679902076721 + }, + "combine": { + "p50": 53.02400141954422, + "p90": 64.80000168085098, + "p95": 68.44799965620041, + "p99": 75.00799745321274 + }, + "roundtrip": { + "p50": 377.6960074901581, + "p90": 400.2560079097748, + "p95": 405.8240056037903, + "p99": 428.41601371765137 + }, + "isolatedSum": { + "p50": 397.2800150513649, + "p90": 431.8719878792763, + "p95": 442.49599426984787, + "p99": 462.17598766088486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 337.47199177742004, + "p90": 361.4720106124878, + "p95": 367.5200045108795, + "p99": 381.9519877433777 + }, + "combine": { + "p50": 53.408000618219376, + "p90": 65.05600363016129, + "p95": 68.70400160551071, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 378.7840008735657, + "p90": 400.0000059604645, + "p95": 407.1359932422638, + "p99": 419.99998688697815 + }, + "isolatedSum": { + "p50": 390.8799923956394, + "p90": 426.5280142426491, + "p95": 436.2240061163902, + "p99": 456.06398582458496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 337.21598982810974, + "p90": 359.391987323761, + "p95": 367.0080006122589, + "p99": 382.78400897979736 + }, + "combine": { + "p50": 53.53600159287453, + "p90": 65.05600363016129, + "p95": 69.24799829721451, + "p99": 75.29599964618683 + }, + "roundtrip": { + "p50": 377.0880103111267, + "p90": 397.2800076007843, + "p95": 403.55199575424194, + "p99": 415.1040017604828 + }, + "isolatedSum": { + "p50": 390.75199142098427, + "p90": 424.4479909539223, + "p95": 436.2559989094734, + "p99": 458.0800086259842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 340.5759930610657, + "p90": 362.7519905567169, + "p95": 368.22399497032166, + "p99": 381.9519877433777 + }, + "combine": { + "p50": 54.048001766204834, + "p90": 64.80000168085098, + "p95": 68.44799965620041, + "p99": 80.19199967384338 + }, + "roundtrip": { + "p50": 381.44001364707947, + "p90": 402.94399857521057, + "p95": 409.1840088367462, + "p99": 429.82399463653564 + }, + "isolatedSum": { + "p50": 394.6239948272705, + "p90": 427.5519922375679, + "p95": 436.67199462652206, + "p99": 462.14398741722107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 339.9679958820343, + "p90": 361.60001158714294, + "p95": 368.2880103588104, + "p99": 386.4319920539856 + }, + "combine": { + "p50": 53.75999957323074, + "p90": 64.15999680757523, + "p95": 67.26399809122086, + "p99": 77.27999985218048 + }, + "roundtrip": { + "p50": 381.1520040035248, + "p90": 401.66398882865906, + "p95": 408.4160029888153, + "p99": 422.04800248146057 + }, + "isolatedSum": { + "p50": 393.72799545526505, + "p90": 425.76000839471817, + "p95": 435.5520084500313, + "p99": 463.7119919061661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 339.04001116752625, + "p90": 359.2959940433502, + "p95": 365.2159869670868, + "p99": 379.10398840904236 + }, + "combine": { + "p50": 54.52800169587135, + "p90": 65.8240020275116, + "p95": 69.47200000286102, + "p99": 76.9599974155426 + }, + "roundtrip": { + "p50": 379.64800000190735, + "p90": 399.07199144363403, + "p95": 402.94399857521057, + "p99": 415.77601432800293 + }, + "isolatedSum": { + "p50": 393.5680128633976, + "p90": 425.1199960708618, + "p95": 434.6879869699478, + "p99": 456.06398582458496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 335.32801270484924, + "p90": 354.46399450302124, + "p95": 361.31200194358826, + "p99": 390.56000113487244 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 66.3359984755516, + "p95": 69.72800195217133, + "p99": 75.74400305747986 + }, + "roundtrip": { + "p50": 379.93600964546204, + "p90": 400.92799067497253, + "p95": 406.3679873943329, + "p99": 422.7840006351471 + }, + "isolatedSum": { + "p50": 390.9760117530823, + "p90": 420.79999297857285, + "p95": 431.0400038957596, + "p99": 466.3040041923523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 337.5680148601532, + "p90": 356.9920063018799, + "p95": 362.62398958206177, + "p99": 374.4319975376129 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 69.05599683523178, + "p95": 72.38399982452393, + "p99": 81.02399855852127 + }, + "roundtrip": { + "p50": 381.4080059528351, + "p90": 401.2799859046936, + "p95": 407.00799226760864, + "p99": 416.4159893989563 + }, + "isolatedSum": { + "p50": 397.18401432037354, + "p90": 426.04800313711166, + "p95": 435.0079894065857, + "p99": 455.4559960961342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8882f9b", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||15404c7c0ec01b5", + "colorKey": "gb300_59d99632", + "comparisonKey": "92713f9beb6130d8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:31.631310+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15404c7c0ec01b5", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 321.4080035686493, + "p90": 346.8480110168457, + "p95": 352.03200578689575, + "p99": 362.39999532699585 + }, + "combine": { + "p50": 53.37600037455559, + "p90": 60.896001756191254, + "p95": 63.93600255250931, + "p99": 72.73600250482559 + }, + "roundtrip": { + "p50": 352.06401348114014, + "p90": 378.30400466918945, + "p95": 387.7759873867035, + "p99": 399.9040126800537 + }, + "isolatedSum": { + "p50": 374.7840039432049, + "p90": 407.74401277303696, + "p95": 415.96800833940506, + "p99": 435.13599783182144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 309.28000807762146, + "p90": 335.35999059677124, + "p95": 343.26401352882385, + "p99": 357.08799958229065 + }, + "combine": { + "p50": 53.888000547885895, + "p90": 62.20800057053566, + "p95": 66.880002617836, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 344.2560136318207, + "p90": 377.85598635673523, + "p95": 385.53598523139954, + "p99": 397.21599221229553 + }, + "isolatedSum": { + "p50": 363.16800862550735, + "p90": 397.5679911673069, + "p95": 410.14401614665985, + "p99": 453.5679966211319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 311.64801120758057, + "p90": 349.8559892177582, + "p95": 357.15198516845703, + "p99": 369.59999799728394 + }, + "combine": { + "p50": 56.89600110054016, + "p90": 63.48799914121628, + "p95": 67.03999638557434, + "p99": 71.10399752855301 + }, + "roundtrip": { + "p50": 343.77598762512207, + "p90": 383.29601287841797, + "p95": 390.6880021095276, + "p99": 407.039999961853 + }, + "isolatedSum": { + "p50": 368.5440123081207, + "p90": 413.34398835897446, + "p95": 424.1919815540314, + "p99": 440.70399552583694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 315.42399525642395, + "p90": 341.5679931640625, + "p95": 350.271999835968, + "p99": 363.8719916343689 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 63.680000603199005, + "p95": 67.9360032081604, + "p99": 73.66400212049484 + }, + "roundtrip": { + "p50": 347.6479947566986, + "p90": 374.9760091304779, + "p95": 382.9439878463745, + "p99": 394.9759900569916 + }, + "isolatedSum": { + "p50": 372.22399562597275, + "p90": 405.2479937672615, + "p95": 418.2080030441284, + "p99": 437.53599375486374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 327.5519907474518, + "p90": 353.2480001449585, + "p95": 358.94399881362915, + "p99": 370.5599904060364 + }, + "combine": { + "p50": 59.39200147986412, + "p90": 67.16799736022949, + "p95": 70.56000083684921, + "p99": 76.06399804353714 + }, + "roundtrip": { + "p50": 354.71999645233154, + "p90": 391.5199935436249, + "p95": 399.52000975608826, + "p99": 416.57599806785583 + }, + "isolatedSum": { + "p50": 386.9439922273159, + "p90": 420.415997505188, + "p95": 429.50399965047836, + "p99": 446.6239884495735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 314.14398550987244, + "p90": 352.7680039405823, + "p95": 360.51198840141296, + "p99": 376.15999579429626 + }, + "combine": { + "p50": 59.51999872922897, + "p90": 67.55200028419495, + "p95": 71.03999704122543, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 352.54400968551636, + "p90": 382.56001472473145, + "p95": 391.03999733924866, + "p99": 405.5359959602356 + }, + "isolatedSum": { + "p50": 373.6639842391014, + "p90": 420.3200042247772, + "p95": 431.5519854426384, + "p99": 454.815998673439 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 353.0240058898926, + "p90": 385.3119909763336, + "p95": 393.72798800468445, + "p99": 406.14399313926697 + }, + "combine": { + "p50": 63.93600255250931, + "p90": 69.76000219583511, + "p95": 74.49600100517273, + "p99": 81.08799904584885 + }, + "roundtrip": { + "p50": 398.49600195884705, + "p90": 429.1520118713379, + "p95": 435.87198853492737, + "p99": 445.8880126476288 + }, + "isolatedSum": { + "p50": 416.9600084424019, + "p90": 455.07199317216873, + "p95": 468.2239890098572, + "p99": 487.2319921851158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 359.20000076293945, + "p90": 390.27199149131775, + "p95": 396.5759873390198, + "p99": 408.9919924736023 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 79.9039974808693, + "p95": 82.56000280380249, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 411.23199462890625, + "p90": 441.2800073623657, + "p95": 447.2639858722687, + "p99": 459.1040015220642 + }, + "isolatedSum": { + "p50": 433.79200249910355, + "p90": 470.17598897218704, + "p95": 479.13599014282227, + "p99": 500.8639916777611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9810c5ed", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_b2554bbc", + "comparisonKey": "d41363d38b08d895", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:45.501228+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 326.33599638938904, + "p90": 346.75198793411255, + "p95": 352.9280126094818, + "p99": 377.6960074901581 + }, + "combine": { + "p50": 53.63199859857559, + "p90": 62.752000987529755, + "p95": 66.30399823188782, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 365.08798599243164, + "p90": 383.2319974899292, + "p95": 388.51198554039, + "p99": 405.15199303627014 + }, + "isolatedSum": { + "p50": 379.96799498796463, + "p90": 409.5039889216423, + "p95": 419.23201084136963, + "p99": 455.6480050086975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 343.423992395401, + "p90": 365.31201004981995, + "p95": 370.4639971256256, + "p99": 391.6800022125244 + }, + "combine": { + "p50": 54.336000233888626, + "p90": 62.591999769210815, + "p95": 66.43199920654297, + "p99": 74.87999647855759 + }, + "roundtrip": { + "p50": 382.81598687171936, + "p90": 404.1599929332733, + "p95": 409.56801176071167, + "p99": 428.76800894737244 + }, + "isolatedSum": { + "p50": 397.7599926292896, + "p90": 427.90400981903076, + "p95": 436.8959963321686, + "p99": 466.559998691082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 333.18400382995605, + "p90": 375.42399764060974, + "p95": 393.50399374961853, + "p99": 427.45599150657654 + }, + "combine": { + "p50": 57.88800120353699, + "p90": 75.07199794054031, + "p95": 94.87999975681305, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 372.48000502586365, + "p90": 424.9599874019623, + "p95": 446.1440145969391, + "p99": 477.9199957847595 + }, + "isolatedSum": { + "p50": 391.07200503349304, + "p90": 450.49599558115005, + "p95": 488.3839935064316, + "p99": 561.3119900226593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 341.7600095272064, + "p90": 362.43200302124023, + "p95": 369.56799030303955, + "p99": 381.24799728393555 + }, + "combine": { + "p50": 56.96000158786774, + "p90": 63.87200206518173, + "p95": 68.38399916887283, + "p99": 76.12799853086472 + }, + "roundtrip": { + "p50": 381.02400302886963, + "p90": 403.1040072441101, + "p95": 408.6399972438812, + "p99": 426.9759953022003 + }, + "isolatedSum": { + "p50": 398.72001111507416, + "p90": 426.30400508642197, + "p95": 437.9519894719124, + "p99": 457.37599581480026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 331.6799998283386, + "p90": 351.7119884490967, + "p95": 357.91999101638794, + "p99": 368.5440123081207 + }, + "combine": { + "p50": 58.46399813890457, + "p90": 78.84799689054489, + "p95": 89.72799777984619, + "p99": 98.84800016880035 + }, + "roundtrip": { + "p50": 369.08799409866333, + "p90": 390.75198769569397, + "p95": 397.2800076007843, + "p99": 409.5360040664673 + }, + "isolatedSum": { + "p50": 390.1439979672432, + "p90": 430.55998533964157, + "p95": 447.64798879623413, + "p99": 467.3920124769211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 331.9999873638153, + "p90": 350.49599409103394, + "p95": 356.86400532722473, + "p99": 383.9679956436157 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 78.68800312280655, + "p95": 90.87999910116196, + "p99": 99.39199686050415 + }, + "roundtrip": { + "p50": 376.25598907470703, + "p90": 391.61598682403564, + "p95": 398.1119990348816, + "p99": 412.6720130443573 + }, + "isolatedSum": { + "p50": 391.4879858493805, + "p90": 429.1839972138405, + "p95": 447.7440044283867, + "p99": 483.3599925041199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 378.9440095424652, + "p90": 416.128009557724, + "p95": 430.11200428009033, + "p99": 460.1280093193054 + }, + "combine": { + "p50": 64.12799656391144, + "p90": 90.36800265312195, + "p95": 113.11999708414078, + "p99": 5739.64786529541 + }, + "roundtrip": { + "p50": 421.53599858283997, + "p90": 473.53601455688477, + "p95": 503.9359927177429, + "p99": 8282.208442687988 + }, + "isolatedSum": { + "p50": 443.07200610637665, + "p90": 506.49601221084595, + "p95": 543.2320013642311, + "p99": 6199.775874614716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 377.50399112701416, + "p90": 413.08799386024475, + "p95": 437.9200041294098, + "p99": 470.14400362968445 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 108.47999900579453, + "p95": 119.93599683046341, + "p99": 141.27999544143677 + }, + "roundtrip": { + "p50": 424.3200123310089, + "p90": 439.2000138759613, + "p95": 443.9679980278015, + "p99": 456.7359983921051 + }, + "isolatedSum": { + "p50": 450.1759931445122, + "p90": 521.5679928660393, + "p95": 557.8560009598732, + "p99": 611.4239990711212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-806ab830", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_166aaf5e", + "comparisonKey": "1512a7c40dc5304a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:14.360319+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 358.91199111938477, + "p90": 382.07998871803284, + "p95": 388.51198554039, + "p99": 400.31999349594116 + }, + "combine": { + "p50": 57.312000542879105, + "p90": 67.1359971165657, + "p95": 71.26399874687195, + "p99": 79.29600030183792 + }, + "roundtrip": { + "p50": 402.14401483535767, + "p90": 427.67998576164246, + "p95": 434.04799699783325, + "p99": 449.47201013565063 + }, + "isolatedSum": { + "p50": 416.22399166226387, + "p90": 449.21598583459854, + "p95": 459.77598428726196, + "p99": 479.6159937977791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 359.2959940433502, + "p90": 378.6559998989105, + "p95": 385.15201210975647, + "p99": 402.8159976005554 + }, + "combine": { + "p50": 58.17599967122078, + "p90": 67.77600198984146, + "p95": 72.06399738788605, + "p99": 78.91199737787247 + }, + "roundtrip": { + "p50": 399.29598569869995, + "p90": 421.9200015068054, + "p95": 429.1200041770935, + "p99": 439.07201290130615 + }, + "isolatedSum": { + "p50": 417.471993714571, + "p90": 446.432001888752, + "p95": 457.2160094976425, + "p99": 481.7279949784279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 354.3359935283661, + "p90": 380.8639943599701, + "p95": 386.52798533439636, + "p99": 400.31999349594116 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 72.57600128650665, + "p95": 84.79999750852585, + "p99": 115.77600240707397 + }, + "roundtrip": { + "p50": 394.1760063171387, + "p90": 417.91999340057373, + "p95": 424.9599874019623, + "p99": 447.7759897708893 + }, + "isolatedSum": { + "p50": 415.0079935789108, + "p90": 453.43999564647675, + "p95": 471.3279828429222, + "p99": 516.0959959030151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 355.9040129184723, + "p90": 377.6319921016693, + "p95": 381.8880021572113, + "p99": 393.5360014438629 + }, + "combine": { + "p50": 60.19200012087822, + "p90": 69.023996591568, + "p95": 73.7600028514862, + "p99": 83.80799740552902 + }, + "roundtrip": { + "p50": 399.4880020618439, + "p90": 423.8080084323883, + "p95": 430.81599473953247, + "p99": 442.1440064907074 + }, + "isolatedSum": { + "p50": 416.0960130393505, + "p90": 446.6559886932373, + "p95": 455.6480050086975, + "p99": 477.34399884939194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 354.3039858341217, + "p90": 376.6399919986725, + "p95": 381.53600692749023, + "p99": 394.6560025215149 + }, + "combine": { + "p50": 61.85600161552429, + "p90": 81.53600245714188, + "p95": 92.54399687051773, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 397.8559970855713, + "p90": 422.65599966049194, + "p95": 430.01601099967957, + "p99": 437.6640021800995 + }, + "isolatedSum": { + "p50": 416.159987449646, + "p90": 458.17599445581436, + "p95": 474.08000379800797, + "p99": 502.01600044965744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 357.15198516845703, + "p90": 381.98399543762207, + "p95": 388.35200667381287, + "p99": 402.3999869823456 + }, + "combine": { + "p50": 65.79200178384781, + "p90": 74.78400319814682, + "p95": 79.42400127649307, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 410.5919897556305, + "p90": 434.9440038204193, + "p95": 441.72799587249756, + "p99": 458.46399664878845 + }, + "isolatedSum": { + "p50": 422.94398695230484, + "p90": 456.7679986357689, + "p95": 467.77600795030594, + "p99": 488.3519858121872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 389.055997133255, + "p90": 406.0159921646118, + "p95": 410.8799993991852, + "p99": 421.4400053024292 + }, + "combine": { + "p50": 64.28799778223038, + "p90": 78.43200117349625, + "p95": 87.0399996638298, + "p99": 132.192000746727 + }, + "roundtrip": { + "p50": 432.99201130867004, + "p90": 451.9360065460205, + "p95": 456.4799964427948, + "p99": 465.60001373291016 + }, + "isolatedSum": { + "p50": 453.3439949154854, + "p90": 484.44799333810806, + "p95": 497.919999063015, + "p99": 553.6320060491562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 391.7759954929352, + "p90": 410.68801283836365, + "p95": 414.46399688720703, + "p99": 428.8640022277832 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 93.47199648618698, + "p95": 111.7440015077591, + "p99": 139.23199474811554 + }, + "roundtrip": { + "p50": 445.50400972366333, + "p90": 462.3680114746094, + "p95": 469.9839949607849, + "p99": 483.3599925041199 + }, + "isolatedSum": { + "p50": 467.6159918308258, + "p90": 504.16000932455063, + "p95": 526.2079983949661, + "p99": 568.0959969758987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7f55d25d", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_fb8b2593", + "comparisonKey": "1ab399384110af91", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:53.733183+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 343.1679904460907, + "p90": 359.0719997882843, + "p95": 365.79200625419617, + "p99": 382.81598687171936 + }, + "combine": { + "p50": 56.92800134420395, + "p90": 67.77600198984146, + "p95": 70.62400132417679, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 383.7119936943054, + "p90": 403.2000005245209, + "p95": 407.9680144786835, + "p99": 453.0239999294281 + }, + "isolatedSum": { + "p50": 400.09599179029465, + "p90": 426.84800177812576, + "p95": 436.41600757837296, + "p99": 460.7999846339226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 345.3119993209839, + "p90": 361.9840145111084, + "p95": 366.7199909687042, + "p99": 387.2320055961609 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 81.34400099515915, + "p95": 98.52799773216248, + "p99": 111.39199882745743 + }, + "roundtrip": { + "p50": 386.6559863090515, + "p90": 405.63198924064636, + "p95": 410.94401478767395, + "p99": 428.73600125312805 + }, + "isolatedSum": { + "p50": 404.9279987812042, + "p90": 443.32801550626755, + "p95": 465.2479887008667, + "p99": 498.6240044236183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 345.2480137348175, + "p90": 361.7919981479645, + "p95": 367.8719997406006, + "p99": 382.9759955406189 + }, + "combine": { + "p50": 59.039998799562454, + "p90": 78.27199995517731, + "p95": 95.29600292444229, + "p99": 108.99200290441513 + }, + "roundtrip": { + "p50": 384.95999574661255, + "p90": 401.856005191803, + "p95": 407.9360067844391, + "p99": 418.4960126876831 + }, + "isolatedSum": { + "p50": 404.28801253437996, + "p90": 440.0639981031418, + "p95": 463.1680026650429, + "p99": 491.967998445034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 346.75198793411255, + "p90": 361.1519932746887, + "p95": 367.68001317977905, + "p99": 380.7680010795593 + }, + "combine": { + "p50": 58.559998869895935, + "p90": 76.92799717187881, + "p95": 91.87199920415878, + "p99": 103.07200253009796 + }, + "roundtrip": { + "p50": 387.4559998512268, + "p90": 405.44000267982483, + "p95": 411.19998693466187, + "p99": 421.02399468421936 + }, + "isolatedSum": { + "p50": 405.3119868040085, + "p90": 438.07999044656754, + "p95": 459.55201238393784, + "p99": 483.8400036096573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 344.1280126571655, + "p90": 361.2799942493439, + "p95": 367.19998717308044, + "p99": 385.6320083141327 + }, + "combine": { + "p50": 59.74400043487549, + "p90": 73.79200309515, + "p95": 84.19200032949448, + "p99": 105.0880029797554 + }, + "roundtrip": { + "p50": 387.4880075454712, + "p90": 406.17600083351135, + "p95": 411.6800129413605, + "p99": 422.7519929409027 + }, + "isolatedSum": { + "p50": 403.872013092041, + "p90": 435.07199734449387, + "p95": 451.3919875025749, + "p99": 490.7200112938881 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 344.543993473053, + "p90": 362.2399866580963, + "p95": 368.8960075378418, + "p99": 393.75999569892883 + }, + "combine": { + "p50": 62.72000074386597, + "p90": 91.00800007581711, + "p95": 99.16800260543823, + "p99": 120.38400024175644 + }, + "roundtrip": { + "p50": 388.8320028781891, + "p90": 406.3360095024109, + "p95": 412.2239947319031, + "p99": 425.79200863838196 + }, + "isolatedSum": { + "p50": 407.26399421691895, + "p90": 453.2479867339134, + "p95": 468.06401014328003, + "p99": 514.1439959406853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 354.7520041465759, + "p90": 371.9039857387543, + "p95": 379.87199425697327, + "p99": 394.8479890823364 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 82.0159986615181, + "p95": 94.46399658918381, + "p99": 132.76800513267517 + }, + "roundtrip": { + "p50": 399.80798959732056, + "p90": 417.34400391578674, + "p95": 423.5199987888336, + "p99": 445.95199823379517 + }, + "isolatedSum": { + "p50": 420.9600016474724, + "p90": 453.91998440027237, + "p95": 474.3359908461571, + "p99": 527.6159942150116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 357.66398906707764, + "p90": 377.0560026168823, + "p95": 384.8319947719574, + "p99": 400.736004114151 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 109.15199667215347, + "p95": 120.2239990234375, + "p99": 143.13599467277527 + }, + "roundtrip": { + "p50": 411.0400080680847, + "p90": 431.4880073070526, + "p95": 441.8559968471527, + "p99": 465.9520089626312 + }, + "isolatedSum": { + "p50": 434.04798954725266, + "p90": 486.2079992890358, + "p95": 505.0559937953949, + "p99": 543.8719987869263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2f619642", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fc79fe5fdca4c", + "colorKey": "gb300_dca7bfa9", + "comparisonKey": "e7cb3708915bc34c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:50.442063+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fc79fe5fdca4c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 338.3359909057617, + "p90": 354.46399450302124, + "p95": 358.65598917007446, + "p99": 368.76800656318665 + }, + "combine": { + "p50": 52.99200117588043, + "p90": 62.3680017888546, + "p95": 64.99200314283371, + "p99": 70.8480030298233 + }, + "roundtrip": { + "p50": 375.7759928703308, + "p90": 392.63999462127686, + "p95": 397.8559970855713, + "p99": 409.63199734687805 + }, + "isolatedSum": { + "p50": 391.32799208164215, + "p90": 416.83199629187584, + "p95": 423.6479923129082, + "p99": 439.61600959300995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 358.7520122528076, + "p90": 375.93600153923035, + "p95": 379.96798753738403, + "p99": 388.41599225997925 + }, + "combine": { + "p50": 58.78400057554245, + "p90": 67.00800359249115, + "p95": 70.94399631023407, + "p99": 78.07999849319458 + }, + "roundtrip": { + "p50": 404.12798523902893, + "p90": 419.5840060710907, + "p95": 423.5199987888336, + "p99": 435.93600392341614 + }, + "isolatedSum": { + "p50": 417.53601282835007, + "p90": 442.9440051317215, + "p95": 450.9119838476181, + "p99": 466.4959907531738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 358.5279881954193, + "p90": 372.79999256134033, + "p95": 376.800000667572, + "p99": 388.0319893360138 + }, + "combine": { + "p50": 59.10399928689003, + "p90": 66.94400310516357, + "p95": 70.72000205516815, + "p99": 78.20799946784973 + }, + "roundtrip": { + "p50": 405.2160084247589, + "p90": 421.4079976081848, + "p95": 425.02400279045105, + "p99": 436.47998571395874 + }, + "isolatedSum": { + "p50": 417.63198748230934, + "p90": 439.7439956665039, + "p95": 447.5200027227402, + "p99": 466.2399888038635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 359.6799969673157, + "p90": 380.8319866657257, + "p95": 391.61598682403564, + "p99": 516.1280035972595 + }, + "combine": { + "p50": 59.23200026154518, + "p90": 69.31199878454208, + "p95": 75.6160020828247, + "p99": 126.0479986667633 + }, + "roundtrip": { + "p50": 401.91999077796936, + "p90": 418.65599155426025, + "p95": 421.9200015068054, + "p99": 432.0000112056732 + }, + "isolatedSum": { + "p50": 418.91199722886086, + "p90": 450.1439854502678, + "p95": 467.23198890686035, + "p99": 642.1760022640228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 357.63201117515564, + "p90": 376.76799297332764, + "p95": 386.84800267219543, + "p99": 501.69599056243896 + }, + "combine": { + "p50": 59.90400165319443, + "p90": 69.7920024394989, + "p95": 75.45600086450577, + "p99": 115.07199704647064 + }, + "roundtrip": { + "p50": 403.9359986782074, + "p90": 423.23198914527893, + "p95": 444.2560076713562, + "p99": 548.4480261802673 + }, + "isolatedSum": { + "p50": 417.53601282835007, + "p90": 446.55999541282654, + "p95": 462.3040035367012, + "p99": 616.7679876089096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 358.46400260925293, + "p90": 379.13599610328674, + "p95": 392.5440013408661, + "p99": 509.8239779472351 + }, + "combine": { + "p50": 61.43999844789505, + "p90": 71.19999825954437, + "p95": 75.55200159549713, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 404.7999978065491, + "p90": 427.64800786972046, + "p95": 452.32000946998596, + "p99": 567.3279762268066 + }, + "isolatedSum": { + "p50": 419.904001057148, + "p90": 450.3359943628311, + "p95": 468.0960029363632, + "p99": 622.1759766340256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 354.0480136871338, + "p90": 371.616005897522, + "p95": 376.800000667572, + "p99": 385.53598523139954 + }, + "combine": { + "p50": 64.35199826955795, + "p90": 71.74400240182877, + "p95": 75.32799988985062, + "p99": 82.56000280380249 + }, + "roundtrip": { + "p50": 402.3360013961792, + "p90": 417.248010635376, + "p95": 422.39999771118164, + "p99": 432.3199987411499 + }, + "isolatedSum": { + "p50": 418.40001195669174, + "p90": 443.36000829935074, + "p95": 452.12800055742264, + "p99": 468.095988035202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 357.63201117515564, + "p90": 374.9760091304779, + "p95": 379.64800000190735, + "p99": 385.6000006198883 + }, + "combine": { + "p50": 74.11199808120728, + "p90": 79.64800298213959, + "p95": 83.29600095748901, + "p99": 89.6959975361824 + }, + "roundtrip": { + "p50": 402.97600626945496, + "p90": 419.74401473999023, + "p95": 424.83198642730713, + "p99": 436.47998571395874 + }, + "isolatedSum": { + "p50": 431.7440092563629, + "p90": 454.6240121126175, + "p95": 462.94400095939636, + "p99": 475.2959981560707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2110f6fd", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_ae0bd665", + "comparisonKey": "c21fe96424f02105", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:02.589763+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 366.2720024585724, + "p90": 384.99200344085693, + "p95": 392.4799859523773, + "p99": 444.8319971561432 + }, + "combine": { + "p50": 56.543998420238495, + "p90": 68.44799965620041, + "p95": 73.56800138950348, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 408.28800201416016, + "p90": 428.44799160957336, + "p95": 436.0319972038269, + "p99": 493.24798583984375 + }, + "isolatedSum": { + "p50": 422.8160008788109, + "p90": 453.44000309705734, + "p95": 466.0479873418808, + "p99": 544.4799959659576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 368.25600266456604, + "p90": 386.7200016975403, + "p95": 392.12799072265625, + "p99": 399.7119963169098 + }, + "combine": { + "p50": 56.15999922156334, + "p90": 65.8240020275116, + "p95": 72.06399738788605, + "p99": 79.77599650621414 + }, + "roundtrip": { + "p50": 408.4480106830597, + "p90": 430.2400052547455, + "p95": 438.944011926651, + "p99": 500.9920001029968 + }, + "isolatedSum": { + "p50": 424.4160018861294, + "p90": 452.5440037250519, + "p95": 464.1919881105423, + "p99": 479.48799282312393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 355.679988861084, + "p90": 382.1119964122772, + "p95": 390.04799723625183, + "p99": 440.38400053977966 + }, + "combine": { + "p50": 59.039998799562454, + "p90": 68.60800087451935, + "p95": 74.33599978685379, + "p99": 96.89600020647049 + }, + "roundtrip": { + "p50": 396.3199853897095, + "p90": 424.51199889183044, + "p95": 431.87201023101807, + "p99": 490.3680086135864 + }, + "isolatedSum": { + "p50": 414.71998766064644, + "p90": 450.71999728679657, + "p95": 464.3839970231056, + "p99": 537.2800007462502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 363.6159896850586, + "p90": 385.888010263443, + "p95": 400.2879858016968, + "p99": 452.60798931121826 + }, + "combine": { + "p50": 60.32000109553337, + "p90": 69.7920024394989, + "p95": 73.95199686288834, + "p99": 81.95199817419052 + }, + "roundtrip": { + "p50": 404.89599108695984, + "p90": 427.45599150657654, + "p95": 434.688001871109, + "p99": 490.2079999446869 + }, + "isolatedSum": { + "p50": 423.93599078059196, + "p90": 455.6800127029419, + "p95": 474.2399826645851, + "p99": 534.5599874854088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 366.4959967136383, + "p90": 390.46400785446167, + "p95": 402.52798795700073, + "p99": 447.4560022354126 + }, + "combine": { + "p50": 64.2239972949028, + "p90": 75.6480023264885, + "p95": 81.95199817419052, + "p99": 106.39999806880951 + }, + "roundtrip": { + "p50": 416.83200001716614, + "p90": 437.9520118236542, + "p95": 447.64798879623413, + "p99": 499.80801343917847 + }, + "isolatedSum": { + "p50": 430.7199940085411, + "p90": 466.11201018095016, + "p95": 484.47998613119125, + "p99": 553.8560003042221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 361.82400584220886, + "p90": 385.18399000167847, + "p95": 396.95999026298523, + "p99": 456.83199167251587 + }, + "combine": { + "p50": 64.57599997520447, + "p90": 73.82400333881378, + "p95": 79.58400249481201, + "p99": 94.78399902582169 + }, + "roundtrip": { + "p50": 408.735990524292, + "p90": 432.44799971580505, + "p95": 439.7760033607483, + "p99": 498.04800748825073 + }, + "isolatedSum": { + "p50": 426.40000581741333, + "p90": 459.00799334049225, + "p95": 476.54399275779724, + "p99": 551.6159906983376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 388.480007648468, + "p90": 410.8479917049408, + "p95": 420.76799273490906, + "p99": 487.5519871711731 + }, + "combine": { + "p50": 65.11999666690826, + "p90": 73.47200065851212, + "p95": 78.015998005867, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 429.7280013561249, + "p90": 455.9360146522522, + "p95": 463.00798654556274, + "p99": 535.647988319397 + }, + "isolatedSum": { + "p50": 453.6000043153763, + "p90": 484.3199923634529, + "p95": 498.78399074077606, + "p99": 578.1119838356972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 393.15199851989746, + "p90": 421.34401202201843, + "p95": 453.5039961338043, + "p99": 488.5439872741699 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 80.4160013794899, + "p95": 84.28800106048584, + "p99": 100.44799745082855 + }, + "roundtrip": { + "p50": 446.78398966789246, + "p90": 464.7679924964905, + "p95": 473.1839895248413, + "p99": 533.9840054512024 + }, + "isolatedSum": { + "p50": 465.2159959077835, + "p90": 501.76001340150833, + "p95": 537.7919971942902, + "p99": 588.9919847249985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0ac13687", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_fc0eaec4", + "comparisonKey": "720598885319a9b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:15.601246+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 354.7840118408203, + "p90": 374.08000230789185, + "p95": 379.13599610328674, + "p99": 391.1040127277374 + }, + "combine": { + "p50": 59.61599946022034, + "p90": 68.38399916887283, + "p95": 74.46400076150894, + "p99": 81.69600367546082 + }, + "roundtrip": { + "p50": 397.21599221229553, + "p90": 416.28798842430115, + "p95": 423.20001125335693, + "p99": 437.27999925613403 + }, + "isolatedSum": { + "p50": 414.40001130104065, + "p90": 442.4640014767647, + "p95": 453.5999968647957, + "p99": 472.80001640319824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 357.08799958229065, + "p90": 378.2080113887787, + "p95": 384.799987077713, + "p99": 397.66401052474976 + }, + "combine": { + "p50": 58.9120015501976, + "p90": 68.9919963479042, + "p95": 74.43200051784515, + "p99": 81.44000172615051 + }, + "roundtrip": { + "p50": 404.00001406669617, + "p90": 422.7840006351471, + "p95": 427.5200068950653, + "p99": 441.15200638771057 + }, + "isolatedSum": { + "p50": 416.00000113248825, + "p90": 447.2000077366829, + "p95": 459.23198759555817, + "p99": 479.10401225090027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 355.16801476478577, + "p90": 377.56800651550293, + "p95": 382.207989692688, + "p99": 391.7439877986908 + }, + "combine": { + "p50": 61.184000223875046, + "p90": 69.72800195217133, + "p95": 75.26399940252304, + "p99": 89.05600011348724 + }, + "roundtrip": { + "p50": 398.49600195884705, + "p90": 422.7840006351471, + "p95": 430.30399084091187, + "p99": 446.1440145969391 + }, + "isolatedSum": { + "p50": 416.3520149886608, + "p90": 447.29600846767426, + "p95": 457.47198909521103, + "p99": 480.79998791217804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 354.65601086616516, + "p90": 376.800000667572, + "p95": 385.15201210975647, + "p99": 400.191992521286 + }, + "combine": { + "p50": 61.76000088453293, + "p90": 69.56800073385239, + "p95": 75.45600086450577, + "p99": 88.41600269079208 + }, + "roundtrip": { + "p50": 400.89601278305054, + "p90": 424.0320026874542, + "p95": 431.10400438308716, + "p99": 441.1199986934662 + }, + "isolatedSum": { + "p50": 416.4160117506981, + "p90": 446.3680014014244, + "p95": 460.60801297426224, + "p99": 488.6079952120781 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 351.3279855251312, + "p90": 373.76001477241516, + "p95": 380.19201159477234, + "p99": 390.175998210907 + }, + "combine": { + "p50": 62.30400130152702, + "p90": 70.94399631023407, + "p95": 75.42400062084198, + "p99": 82.49600231647491 + }, + "roundtrip": { + "p50": 398.20799231529236, + "p90": 422.04800248146057, + "p95": 427.5839924812317, + "p99": 439.7119879722595 + }, + "isolatedSum": { + "p50": 413.63198682665825, + "p90": 444.70401108264923, + "p95": 455.6160122156143, + "p99": 472.6720005273819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 353.7920117378235, + "p90": 376.3520121574402, + "p95": 384.5759928226471, + "p99": 392.2559916973114 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 72.4480003118515, + "p95": 76.51200145483017, + "p99": 85.11999994516373 + }, + "roundtrip": { + "p50": 398.97599816322327, + "p90": 419.5840060710907, + "p95": 425.59999227523804, + "p99": 442.9120123386383 + }, + "isolatedSum": { + "p50": 418.33601146936417, + "p90": 448.8000124692917, + "p95": 461.08799427747726, + "p99": 477.3759916424751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 381.5680146217346, + "p90": 399.07199144363403, + "p95": 403.9359986782074, + "p99": 420.6080138683319 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 76.28799974918365, + "p95": 79.74400371313095, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 423.42400550842285, + "p90": 442.30398535728455, + "p95": 449.24798607826233, + "p99": 485.4080080986023 + }, + "isolatedSum": { + "p50": 450.17601549625397, + "p90": 475.3599911928177, + "p95": 483.68000239133835, + "p99": 511.23201102018356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 380.16000390052795, + "p90": 397.5679874420166, + "p95": 404.12798523902893, + "p99": 420.76799273490906 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 84.6719965338707, + "p95": 89.02399986982346, + "p99": 101.24800354242325 + }, + "roundtrip": { + "p50": 436.2240135669708, + "p90": 456.89600706100464, + "p95": 461.760014295578, + "p99": 473.5040068626404 + }, + "isolatedSum": { + "p50": 456.86400681734085, + "p90": 482.2399839758873, + "p95": 493.1519851088524, + "p99": 522.0159962773323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-adb22dd9", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_8c3da06a", + "comparisonKey": "2df0075a38a1c872", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:35.457488+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 349.4719862937927, + "p90": 383.13600420951843, + "p95": 391.7759954929352, + "p99": 409.4400107860565 + }, + "combine": { + "p50": 52.86400020122528, + "p90": 65.24799764156342, + "p95": 70.39999961853027, + "p99": 82.59200304746628 + }, + "roundtrip": { + "p50": 390.6559944152832, + "p90": 421.2160110473633, + "p95": 431.87201023101807, + "p99": 462.2400104999542 + }, + "isolatedSum": { + "p50": 402.335986495018, + "p90": 448.38400185108185, + "p95": 462.17599511146545, + "p99": 492.0320138335228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 342.3680067062378, + "p90": 366.04800820350647, + "p95": 375.328004360199, + "p99": 393.6319947242737 + }, + "combine": { + "p50": 53.53600159287453, + "p90": 63.391998410224915, + "p95": 70.49600034952164, + "p99": 83.23200047016144 + }, + "roundtrip": { + "p50": 389.21600580215454, + "p90": 414.88000750541687, + "p95": 424.8960018157959, + "p99": 441.6959881782532 + }, + "isolatedSum": { + "p50": 395.9040082991123, + "p90": 429.4400066137314, + "p95": 445.8240047097206, + "p99": 476.8639951944351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 336.60799264907837, + "p90": 375.61601400375366, + "p95": 388.8320028781891, + "p99": 415.0719940662384 + }, + "combine": { + "p50": 55.296000093221664, + "p90": 63.551999628543854, + "p95": 69.85600292682648, + "p99": 80.76799660921097 + }, + "roundtrip": { + "p50": 375.8080005645752, + "p90": 402.1120071411133, + "p95": 410.97599267959595, + "p99": 422.7840006351471 + }, + "isolatedSum": { + "p50": 391.90399274230003, + "p90": 439.1680136322975, + "p95": 458.68800580501556, + "p99": 495.83999067544937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 358.240008354187, + "p90": 385.43999195098877, + "p95": 394.9759900569916, + "p99": 407.3599874973297 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 71.26399874687195, + "p95": 85.21600067615509, + "p99": 99.74399954080582 + }, + "roundtrip": { + "p50": 389.4079923629761, + "p90": 414.7840142250061, + "p95": 422.0159947872162, + "p99": 437.3440146446228 + }, + "isolatedSum": { + "p50": 415.0400087237358, + "p90": 456.7039906978607, + "p95": 480.19199073314667, + "p99": 507.10398703813553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 352.3840010166168, + "p90": 382.7199935913086, + "p95": 391.5199935436249, + "p99": 418.08000206947327 + }, + "combine": { + "p50": 57.82400071620941, + "p90": 68.83200258016586, + "p95": 78.20799946784973, + "p99": 98.65599870681763 + }, + "roundtrip": { + "p50": 399.00800585746765, + "p90": 432.671993970871, + "p95": 444.35200095176697, + "p99": 466.1119878292084 + }, + "isolatedSum": { + "p50": 410.20800173282623, + "p90": 451.55199617147446, + "p95": 469.7279930114746, + "p99": 516.7360007762909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 352.09599137306213, + "p90": 379.42400574684143, + "p95": 389.15199041366577, + "p99": 404.7040045261383 + }, + "combine": { + "p50": 57.34400078654289, + "p90": 68.67200136184692, + "p95": 76.51200145483017, + "p99": 102.39999741315842 + }, + "roundtrip": { + "p50": 397.5360095500946, + "p90": 421.79200053215027, + "p95": 435.68000197410583, + "p99": 466.1119878292084 + }, + "isolatedSum": { + "p50": 409.439992159605, + "p90": 448.09600710868835, + "p95": 465.66399186849594, + "p99": 507.1040019392967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 376.67199969291687, + "p90": 403.872013092041, + "p95": 426.07998847961426, + "p99": 442.81598925590515 + }, + "combine": { + "p50": 62.33600154519081, + "p90": 78.36800068616867, + "p95": 87.80799806118011, + "p99": 109.47199910879135 + }, + "roundtrip": { + "p50": 419.71200704574585, + "p90": 455.1680088043213, + "p95": 471.2640047073364, + "p99": 496.92800641059875 + }, + "isolatedSum": { + "p50": 439.0080012381077, + "p90": 482.2400137782097, + "p95": 513.8879865407944, + "p99": 552.2879883646965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 384.5120072364807, + "p90": 411.1360013484955, + "p95": 424.3839979171753, + "p99": 447.7120041847229 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 76.80000364780426, + "p95": 80.76799660921097, + "p99": 92.00000017881393 + }, + "roundtrip": { + "p50": 442.01600551605225, + "p90": 469.59999203681946, + "p95": 480.8320105075836, + "p99": 494.87999081611633 + }, + "isolatedSum": { + "p50": 453.66400480270386, + "p90": 487.93600499629974, + "p95": 505.15199452638626, + "p99": 539.7120043635368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2dbaa073", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_6e070cf3", + "comparisonKey": "e4492f55f9440a65", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:38.601776+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 340.5439853668213, + "p90": 364.22398686408997, + "p95": 371.0399866104126, + "p99": 427.7440011501312 + }, + "combine": { + "p50": 54.46400120854378, + "p90": 63.231997191905975, + "p95": 67.00800359249115, + "p99": 72.25599884986877 + }, + "roundtrip": { + "p50": 378.6559998989105, + "p90": 397.63200283050537, + "p95": 404.6719968318939, + "p99": 416.8959856033325 + }, + "isolatedSum": { + "p50": 395.00798657536507, + "p90": 427.45598405599594, + "p95": 438.04799020290375, + "p99": 500 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 339.7440016269684, + "p90": 364.3200099468231, + "p95": 374.4640052318573, + "p99": 446.7200040817261 + }, + "combine": { + "p50": 55.10399863123894, + "p90": 64.31999802589417, + "p95": 67.26399809122086, + "p99": 77.7600035071373 + }, + "roundtrip": { + "p50": 381.44001364707947, + "p90": 401.2160003185272, + "p95": 409.85599160194397, + "p99": 466.46401286125183 + }, + "isolatedSum": { + "p50": 394.8480002582073, + "p90": 428.6400079727173, + "p95": 441.72800332307816, + "p99": 524.4800075888634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 337.92001008987427, + "p90": 355.8720052242279, + "p95": 362.62398958206177, + "p99": 380.7680010795593 + }, + "combine": { + "p50": 56.063998490571976, + "p90": 64.86400216817856, + "p95": 69.95200365781784, + "p99": 76.89599692821503 + }, + "roundtrip": { + "p50": 376.96000933647156, + "p90": 395.52000164985657, + "p95": 402.0479917526245, + "p99": 413.9519929885864 + }, + "isolatedSum": { + "p50": 393.98400858044624, + "p90": 420.73600739240646, + "p95": 432.5759932398796, + "p99": 457.66399800777435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 340.86400270462036, + "p90": 360.73601245880127, + "p95": 368.00000071525574, + "p99": 382.30401277542114 + }, + "combine": { + "p50": 56.41600117087364, + "p90": 63.58399987220764, + "p95": 69.15199756622314, + "p99": 77.37600058317184 + }, + "roundtrip": { + "p50": 379.87199425697327, + "p90": 398.75200390815735, + "p95": 403.84000539779663, + "p99": 414.5919978618622 + }, + "isolatedSum": { + "p50": 397.280003875494, + "p90": 424.3200123310089, + "p95": 437.1519982814789, + "p99": 459.680013358593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 339.07198905944824, + "p90": 359.16799306869507, + "p95": 365.1520013809204, + "p99": 384.223997592926 + }, + "combine": { + "p50": 57.50399827957153, + "p90": 65.47199934720993, + "p95": 70.01599669456482, + "p99": 74.75200295448303 + }, + "roundtrip": { + "p50": 385.0240111351013, + "p90": 406.0479998588562, + "p95": 410.1119935512543, + "p99": 457.18398690223694 + }, + "isolatedSum": { + "p50": 396.5759873390198, + "p90": 424.639992415905, + "p95": 435.16799807548523, + "p99": 458.97600054740906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 338.20798993110657, + "p90": 355.679988861084, + "p95": 364.8959994316101, + "p99": 386.6240084171295 + }, + "combine": { + "p50": 60.5119988322258, + "p90": 67.90400296449661, + "p95": 71.10399752855301, + "p99": 76.35200023651123 + }, + "roundtrip": { + "p50": 382.9439878463745, + "p90": 403.48801016807556, + "p95": 411.52000427246094, + "p99": 435.64799427986145 + }, + "isolatedSum": { + "p50": 398.71998876333237, + "p90": 423.5839918255806, + "p95": 435.9999969601631, + "p99": 462.97600865364075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 346.94400429725647, + "p90": 362.87999153137207, + "p95": 368.0320084095001, + "p99": 385.43999195098877 + }, + "combine": { + "p50": 63.93600255250931, + "p90": 69.95200365781784, + "p95": 74.30399954319, + "p99": 91.48799628019333 + }, + "roundtrip": { + "p50": 394.3679928779602, + "p90": 410.2399945259094, + "p95": 414.62400555610657, + "p99": 435.9999895095825 + }, + "isolatedSum": { + "p50": 410.8800068497658, + "p90": 432.8319951891899, + "p95": 442.3360079526901, + "p99": 476.9279882311821 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 348.86398911476135, + "p90": 365.53600430488586, + "p95": 370.07999420166016, + "p99": 394.463986158371 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 80.06399869918823, + "p95": 83.20000022649765, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 403.7120044231415, + "p90": 417.63201355934143, + "p95": 422.94400930404663, + "p99": 444.5120096206665 + }, + "isolatedSum": { + "p50": 422.11198806762695, + "p90": 445.6000030040741, + "p95": 453.2799944281578, + "p99": 490.33598601818085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a8a3f134", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_74bc362b", + "comparisonKey": "e73bfadc8a4deb15", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:58.180490+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 350.5280017852783, + "p90": 376.73598527908325, + "p95": 380.8319866657257, + "p99": 394.52800154685974 + }, + "combine": { + "p50": 56.15999922156334, + "p90": 67.03999638557434, + "p95": 70.49600034952164, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 386.9760036468506, + "p90": 413.91998529434204, + "p95": 417.7280068397522, + "p99": 425.31201243400574 + }, + "isolatedSum": { + "p50": 406.68800100684166, + "p90": 443.7759816646576, + "p95": 451.32798701524734, + "p99": 478.14400494098663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 348.7040102481842, + "p90": 374.1439878940582, + "p95": 381.1199963092804, + "p99": 392.35201478004456 + }, + "combine": { + "p50": 57.72799998521805, + "p90": 67.90400296449661, + "p95": 71.32799923419952, + "p99": 80.35200089216232 + }, + "roundtrip": { + "p50": 389.95200395584106, + "p90": 410.8799993991852, + "p95": 414.40001130104065, + "p99": 423.7760007381439 + }, + "isolatedSum": { + "p50": 406.43201023340225, + "p90": 442.04799085855484, + "p95": 452.4479955434799, + "p99": 472.7040156722069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 343.80799531936646, + "p90": 366.87999963760376, + "p95": 371.8079924583435, + "p99": 378.27199697494507 + }, + "combine": { + "p50": 59.7120001912117, + "p90": 69.11999732255936, + "p95": 72.95999675989151, + "p99": 81.37600123882294 + }, + "roundtrip": { + "p50": 387.6799941062927, + "p90": 409.824013710022, + "p95": 415.8079922199249, + "p99": 432.48000741004944 + }, + "isolatedSum": { + "p50": 403.51999551057816, + "p90": 435.9999969601631, + "p95": 444.767989218235, + "p99": 459.647998213768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 347.104012966156, + "p90": 370.7520067691803, + "p95": 375.67999958992004, + "p99": 387.4880075454712 + }, + "combine": { + "p50": 59.776000678539276, + "p90": 68.83200258016586, + "p95": 72.4480003118515, + "p99": 77.34400033950806 + }, + "roundtrip": { + "p50": 390.175998210907, + "p90": 413.6959910392761, + "p95": 419.3280041217804, + "p99": 428.1280040740967 + }, + "isolatedSum": { + "p50": 406.8800136446953, + "p90": 439.58400934934616, + "p95": 448.12799990177155, + "p99": 464.83200788497925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 343.4560000896454, + "p90": 366.8160140514374, + "p95": 373.1200098991394, + "p99": 386.49600744247437 + }, + "combine": { + "p50": 62.17600032687187, + "p90": 72.83200323581696, + "p95": 75.52000135183334, + "p99": 81.85599744319916 + }, + "roundtrip": { + "p50": 395.1680064201355, + "p90": 419.8080003261566, + "p95": 425.4080057144165, + "p99": 436.8320107460022 + }, + "isolatedSum": { + "p50": 405.63200041651726, + "p90": 439.64801728725433, + "p95": 448.64001125097275, + "p99": 468.3520048856735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 349.5039939880371, + "p90": 370.1440095901489, + "p95": 375.5519986152649, + "p99": 385.53598523139954 + }, + "combine": { + "p50": 62.01599910855293, + "p90": 71.32799923419952, + "p95": 74.8480036854744, + "p99": 82.0159986615181 + }, + "roundtrip": { + "p50": 397.66401052474976, + "p90": 424.4160056114197, + "p95": 432.3840141296387, + "p99": 475.42399168014526 + }, + "isolatedSum": { + "p50": 411.51999309659004, + "p90": 441.47200882434845, + "p95": 450.4000023007393, + "p99": 467.55198389291763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 377.9520094394684, + "p90": 393.5360014438629, + "p95": 398.97599816322327, + "p99": 405.56800365448 + }, + "combine": { + "p50": 64.03200328350067, + "p90": 70.65600156784058, + "p95": 75.00799745321274, + "p99": 82.71999657154083 + }, + "roundtrip": { + "p50": 424.9599874019623, + "p90": 442.04801321029663, + "p95": 447.07199931144714, + "p99": 466.048002243042 + }, + "isolatedSum": { + "p50": 441.98401272296906, + "p90": 464.1920030117035, + "p95": 473.983995616436, + "p99": 488.2880002260208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 385.9519958496094, + "p90": 400.2560079097748, + "p95": 404.1920006275177, + "p99": 417.9520010948181 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 80.03199845552444, + "p95": 83.42400193214417, + "p99": 90.4960036277771 + }, + "roundtrip": { + "p50": 439.10399079322815, + "p90": 456.5120041370392, + "p95": 462.4960124492645, + "p99": 468.28800439834595 + }, + "isolatedSum": { + "p50": 458.079993724823, + "p90": 480.2880063652992, + "p95": 487.61600255966187, + "p99": 508.4480047225952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-30395e14", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_d80745cb", + "comparisonKey": "8702e95bc4cf360d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:33.314401+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 352.1279990673065, + "p90": 371.4880049228668, + "p95": 376.6399919986725, + "p99": 388.5760009288788 + }, + "combine": { + "p50": 54.17599901556969, + "p90": 65.05600363016129, + "p95": 71.45600020885468, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 392.767995595932, + "p90": 416.3840115070343, + "p95": 421.4720129966736, + "p99": 433.1519901752472 + }, + "isolatedSum": { + "p50": 406.3039980828762, + "p90": 436.5440085530281, + "p95": 448.09599220752716, + "p99": 475.71200132369995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 356.57599568367004, + "p90": 378.4320056438446, + "p95": 384.4799995422363, + "p99": 408.28800201416016 + }, + "combine": { + "p50": 55.87200075387955, + "p90": 64.7360011935234, + "p95": 71.42399996519089, + "p99": 87.74399757385254 + }, + "roundtrip": { + "p50": 399.52000975608826, + "p90": 421.4400053024292, + "p95": 426.07998847961426, + "p99": 445.21600008010864 + }, + "isolatedSum": { + "p50": 412.4479964375496, + "p90": 443.168006837368, + "p95": 455.9039995074272, + "p99": 496.0319995880127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 345.8560109138489, + "p90": 373.6000061035156, + "p95": 380.0640106201172, + "p99": 398.0799913406372 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 64.83200192451477, + "p95": 70.30399888753891, + "p99": 82.59200304746628 + }, + "roundtrip": { + "p50": 396.9919979572296, + "p90": 423.3599901199341, + "p95": 428.79998683929443, + "p99": 441.50400161743164 + }, + "isolatedSum": { + "p50": 402.6560112833977, + "p90": 438.4320080280304, + "p95": 450.3680095076561, + "p99": 480.6719943881035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 353.2800078392029, + "p90": 374.5279908180237, + "p95": 383.13600420951843, + "p99": 394.23999190330505 + }, + "combine": { + "p50": 57.8560009598732, + "p90": 67.6800012588501, + "p95": 72.80000299215317, + "p99": 100.0640019774437 + }, + "roundtrip": { + "p50": 399.04001355171204, + "p90": 422.8479862213135, + "p95": 430.976003408432, + "p99": 449.44000244140625 + }, + "isolatedSum": { + "p50": 411.1360087990761, + "p90": 442.2079920768738, + "p95": 455.9360072016716, + "p99": 494.30399388074875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 349.69601035118103, + "p90": 374.9440014362335, + "p95": 382.4000060558319, + "p99": 395.58398723602295 + }, + "combine": { + "p50": 58.94400179386139, + "p90": 67.52000004053116, + "p95": 72.92799651622772, + "p99": 78.43200117349625 + }, + "roundtrip": { + "p50": 394.9440121650696, + "p90": 420.415997505188, + "p95": 427.45599150657654, + "p99": 437.44000792503357 + }, + "isolatedSum": { + "p50": 408.6400121450424, + "p90": 442.4640014767647, + "p95": 455.32800257205963, + "p99": 474.0159884095192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 355.9040129184723, + "p90": 375.2320110797882, + "p95": 381.21598958969116, + "p99": 398.17601442337036 + }, + "combine": { + "p50": 59.776000678539276, + "p90": 70.43199986219406, + "p95": 78.20799946784973, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 399.58399534225464, + "p90": 422.14399576187134, + "p95": 428.76800894737244, + "p99": 448.63998889923096 + }, + "isolatedSum": { + "p50": 415.68001359701157, + "p90": 445.66401094198227, + "p95": 459.4239890575409, + "p99": 497.76001274585724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 383.07198882102966, + "p90": 405.60001134872437, + "p95": 412.54401206970215, + "p99": 444.7680115699768 + }, + "combine": { + "p50": 61.28000095486641, + "p90": 69.11999732255936, + "p95": 75.07199794054031, + "p99": 90.27200192213058 + }, + "roundtrip": { + "p50": 434.9760115146637, + "p90": 458.8800072669983, + "p95": 467.6479995250702, + "p99": 480.51199316978455 + }, + "isolatedSum": { + "p50": 444.3519897758961, + "p90": 474.7200086712837, + "p95": 487.61601001024246, + "p99": 535.0400134921074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 391.29599928855896, + "p90": 415.1360094547272, + "p95": 422.7199852466583, + "p99": 460.671991109848 + }, + "combine": { + "p50": 69.023996591568, + "p90": 76.92799717187881, + "p95": 82.2720006108284, + "p99": 110.88000237941742 + }, + "roundtrip": { + "p50": 445.3440010547638, + "p90": 471.23199701309204, + "p95": 478.5279929637909, + "p99": 498.6239969730377 + }, + "isolatedSum": { + "p50": 460.31999588012695, + "p90": 492.064006626606, + "p95": 504.9919858574867, + "p99": 571.5519934892654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fd67d19c", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_9e1e517c", + "comparisonKey": "45c5ea7415be8199", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:17:44.418457+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 288.89599442481995, + "p90": 300.5119860172272, + "p95": 303.8719892501831, + "p99": 313.24800848960876 + }, + "combine": { + "p50": 57.472001761198044, + "p90": 60.896001756191254, + "p95": 62.39999830722809, + "p99": 67.9360032081604 + }, + "roundtrip": { + "p50": 329.0559947490692, + "p90": 340.831995010376, + "p95": 344.89598870277405, + "p99": 351.7119884490967 + }, + "isolatedSum": { + "p50": 346.367996186018, + "p90": 361.4079877734184, + "p95": 366.2719875574112, + "p99": 381.18401169776917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 478, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 292.2559976577759, + "p90": 303.99999022483826, + "p95": 307.3279857635498, + "p99": 311.90401315689087 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 70.36799937486649, + "p95": 71.68000191450119, + "p99": 77.95199751853943 + }, + "roundtrip": { + "p50": 343.80799531936646, + "p90": 354.71999645233154, + "p95": 357.7919900417328, + "p99": 365.1520013809204 + }, + "isolatedSum": { + "p50": 359.5519959926605, + "p90": 374.36798959970474, + "p95": 379.007987678051, + "p99": 389.8560106754303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 943, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 297.0559895038605, + "p90": 310.04801392555237, + "p95": 313.6639893054962, + "p99": 319.2639946937561 + }, + "combine": { + "p50": 87.87199854850769, + "p90": 90.62399715185165, + "p95": 92.06400066614151, + "p99": 99.67999905347824 + }, + "roundtrip": { + "p50": 366.2720024585724, + "p90": 379.2000114917755, + "p95": 382.3679983615875, + "p99": 391.36001467704773 + }, + "isolatedSum": { + "p50": 384.92798805236816, + "p90": 400.672011077404, + "p95": 405.7279899716377, + "p99": 418.94399374723434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 1862, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 309.471994638443, + "p90": 318.1760013103485, + "p95": 322.36799597740173, + "p99": 329.15198802948 + }, + "combine": { + "p50": 131.55199587345123, + "p90": 134.8479986190796, + "p95": 136.1279934644699, + "p99": 142.62400567531586 + }, + "roundtrip": { + "p50": 434.33600664138794, + "p90": 444.89601254463196, + "p95": 448.92799854278564, + "p99": 455.07198572158813 + }, + "isolatedSum": { + "p50": 441.0239905118942, + "p90": 453.0239999294281, + "p95": 458.49598944187164, + "p99": 471.77599370479584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 3716, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 431.1999976634979, + "p90": 441.6640102863312, + "p95": 444.92799043655396, + "p99": 450.3040015697479 + }, + "combine": { + "p50": 224.31999444961548, + "p90": 227.84000635147095, + "p95": 229.37600314617157, + "p99": 236.41599714756012 + }, + "roundtrip": { + "p50": 646.2079882621765, + "p90": 656.1920046806335, + "p95": 660.1600050926208, + "p99": 666.7519807815552 + }, + "isolatedSum": { + "p50": 655.5199921131134, + "p90": 669.5040166378021, + "p95": 674.3039935827255, + "p99": 686.719998717308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 7418, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 581.824004650116, + "p90": 592.8639769554138, + "p95": 595.4239964485168, + "p99": 603.007972240448 + }, + "combine": { + "p50": 404.06399965286255, + "p90": 407.51999616622925, + "p95": 408.9600145816803, + "p99": 414.7520065307617 + }, + "roundtrip": { + "p50": 977.183997631073, + "p90": 982.9760193824768, + "p95": 985.1199984550476, + "p99": 991.3920164108276 + }, + "isolatedSum": { + "p50": 985.8880043029785, + "p90": 1000.3839731216431, + "p95": 1004.3840110301971, + "p99": 1017.7599787712097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 14848, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-41d03fd3", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|prefill|normal|none|none|0|tuned||1104ab83732593b", + "colorKey": "gb300_f078f264", + "comparisonKey": "f4b201aec6bb126c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:18.085774+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1104ab83732593b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 294.07998919487, + "p90": 307.3599934577942, + "p95": 310.94399094581604, + "p99": 318.9760148525238 + }, + "combine": { + "p50": 57.28000029921532, + "p90": 60.32000109553337, + "p95": 61.69600039720535, + "p99": 70.27199864387512 + }, + "roundtrip": { + "p50": 331.64799213409424, + "p90": 343.23200583457947, + "p95": 348.25599193573, + "p99": 356.83199763298035 + }, + "isolatedSum": { + "p50": 351.3599894940853, + "p90": 367.67999455332756, + "p95": 372.6399913430214, + "p99": 389.2480134963989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 295.8720028400421, + "p90": 308.9280128479004, + "p95": 311.2320005893707, + "p99": 318.1439936161041 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 70.23999840021133, + "p95": 72.54400104284286, + "p99": 79.64800298213959 + }, + "roundtrip": { + "p50": 344.4480001926422, + "p90": 357.2160005569458, + "p95": 360.03199219703674, + "p99": 366.33598804473877 + }, + "isolatedSum": { + "p50": 362.4960035085678, + "p90": 379.1680112481117, + "p95": 383.7760016322136, + "p99": 397.7919965982437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 4, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 301.15199089050293, + "p90": 313.50401043891907, + "p95": 317.31200218200684, + "p99": 331.9999873638153 + }, + "combine": { + "p50": 86.62399649620056, + "p90": 89.75999802350998, + "p95": 91.2960022687912, + "p99": 97.59999811649323 + }, + "roundtrip": { + "p50": 372.8320002555847, + "p90": 381.24799728393555, + "p95": 384.0639889240265, + "p99": 391.6800022125244 + }, + "isolatedSum": { + "p50": 387.7759873867035, + "p90": 403.26400846242905, + "p95": 408.60800445079803, + "p99": 429.59998548030853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 4, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 320.1279938220978, + "p90": 332.0640027523041, + "p95": 335.7119858264923, + "p99": 343.55199337005615 + }, + "combine": { + "p50": 141.88799262046814, + "p90": 145.4399973154068, + "p95": 147.10399508476257, + "p99": 154.55999970436096 + }, + "roundtrip": { + "p50": 446.6879963874817, + "p90": 458.8479995727539, + "p95": 461.88798546791077, + "p99": 467.74399280548096 + }, + "isolatedSum": { + "p50": 462.0159864425659, + "p90": 477.5040000677109, + "p95": 482.8159809112549, + "p99": 498.1119930744171 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 4, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 447.58400321006775, + "p90": 459.77601408958435, + "p95": 464.1920030117035, + "p99": 474.2079973220825 + }, + "combine": { + "p50": 243.3599978685379, + "p90": 247.26399779319763, + "p95": 248.9279955625534, + "p99": 254.84800338745117 + }, + "roundtrip": { + "p50": 677.0240068435669, + "p90": 687.2640252113342, + "p95": 690.8800005912781, + "p99": 694.6880221366882 + }, + "isolatedSum": { + "p50": 690.9440010786057, + "p90": 707.040011882782, + "p95": 713.1199985742569, + "p99": 729.0560007095337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 4, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 613.2479906082153, + "p90": 624.895989894867, + "p95": 628.1920075416565, + "p99": 636.5439891815186 + }, + "combine": { + "p50": 443.04001331329346, + "p90": 447.32800126075745, + "p95": 448.89599084854126, + "p99": 453.0879855155945 + }, + "roundtrip": { + "p50": 1045.1840162277222, + "p90": 1055.0400018692017, + "p95": 1058.9439868927002, + "p99": 1064.5760297775269 + }, + "isolatedSum": { + "p50": 1056.2880039215088, + "p90": 1072.2239911556244, + "p95": 1077.0879983901978, + "p99": 1089.631974697113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 4, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-683aa35b", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|prefill|normal|none|none|0|tuned||e15d35cfeaea91f", + "colorKey": "gb300_07cee71f", + "comparisonKey": "4df0859434e982bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:18:45.885472+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e15d35cfeaea91f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 255.42399287223816, + "p90": 268.0639922618866, + "p95": 271.58400416374207, + "p99": 281.6320061683655 + }, + "combine": { + "p50": 49.18399825692177, + "p90": 52.22399905323982, + "p95": 54.655998945236206, + "p99": 61.535999178886414 + }, + "roundtrip": { + "p50": 287.9999876022339, + "p90": 301.2160062789917, + "p95": 304.4160008430481, + "p99": 310.40000915527344 + }, + "isolatedSum": { + "p50": 304.6079911291599, + "p90": 320.2879913151264, + "p95": 326.24000310897827, + "p99": 343.1680053472519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 256.9600045681, + "p90": 269.50401067733765, + "p95": 274.4959890842438, + "p99": 283.4559977054596 + }, + "combine": { + "p50": 58.94400179386139, + "p90": 62.84800171852112, + "p95": 64.64000046253204, + "p99": 72.09599763154984 + }, + "roundtrip": { + "p50": 292.57598519325256, + "p90": 305.4400086402893, + "p95": 309.1840147972107, + "p99": 316.864013671875 + }, + "isolatedSum": { + "p50": 315.90400636196136, + "p90": 332.35201239585876, + "p95": 339.1359895467758, + "p99": 355.55199533700943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 299.4239926338196, + "p90": 312.1599853038788, + "p95": 314.9760067462921, + "p99": 320.3519880771637 + }, + "combine": { + "p50": 101.15200281143188, + "p90": 103.87200117111206, + "p95": 105.69600015878677, + "p99": 111.87200248241425 + }, + "roundtrip": { + "p50": 385.24800539016724, + "p90": 396.5120017528534, + "p95": 399.9040126800537, + "p99": 409.6960127353668 + }, + "isolatedSum": { + "p50": 400.57599544525146, + "p90": 416.03198647499084, + "p95": 420.6720069050789, + "p99": 432.22399055957794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3c9d1339", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|prefill|normal|none|none|0|tuned||33484f7e5b87248", + "colorKey": "gb300_e29d658a", + "comparisonKey": "92f8b07dc0f411c1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:32.410108+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "33484f7e5b87248", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 255.96800446510315, + "p90": 269.3760097026825, + "p95": 272.7679908275604, + "p99": 279.7439992427826 + }, + "combine": { + "p50": 51.93600058555603, + "p90": 55.55199831724167, + "p95": 57.98399820923805, + "p99": 64.96000289916992 + }, + "roundtrip": { + "p50": 289.792001247406, + "p90": 302.17599868774414, + "p95": 305.34398555755615, + "p99": 311.42398715019226 + }, + "isolatedSum": { + "p50": 307.9040050506592, + "p90": 324.92800801992416, + "p95": 330.7519890367985, + "p99": 344.7040021419525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 255.45600056648254, + "p90": 268.22400093078613, + "p95": 273.3440101146698, + "p99": 282.55999088287354 + }, + "combine": { + "p50": 56.32000043988228, + "p90": 59.487998485565186, + "p95": 60.864001512527466, + "p99": 68.4799998998642 + }, + "roundtrip": { + "p50": 292.7359938621521, + "p90": 304.9919903278351, + "p95": 307.48799443244934, + "p99": 313.08799982070923 + }, + "isolatedSum": { + "p50": 311.7760010063648, + "p90": 327.7119994163513, + "p95": 334.20801162719727, + "p99": 351.03999078273773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22020096, + "combineLogicalBytes": 22020096, + "fanoutMean": 1.5, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 259.68000292778015, + "p90": 290.0480031967163, + "p95": 306.2399923801422, + "p99": 323.96799325942993 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 90.7519981265068, + "p95": 107.00800269842148, + "p99": 122.04799801111221 + }, + "roundtrip": { + "p50": 306.14399909973145, + "p90": 318.9440071582794, + "p95": 323.199987411499, + "p99": 332.70400762557983 + }, + "isolatedSum": { + "p50": 326.9760012626648, + "p90": 380.8000013232231, + "p95": 413.2479950785637, + "p99": 446.01599127054214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44040192, + "combineLogicalBytes": 44040192, + "fanoutMean": 1.5, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 265.50400257110596, + "p90": 279.04000878334045, + "p95": 282.30398893356323, + "p99": 288.38399052619934 + }, + "combine": { + "p50": 87.36000210046768, + "p90": 90.81599861383438, + "p95": 92.32000261545181, + "p99": 99.93600100278854 + }, + "roundtrip": { + "p50": 340.2239978313446, + "p90": 351.1359989643097, + "p95": 354.8159897327423, + "p99": 366.2079870700836 + }, + "isolatedSum": { + "p50": 352.86400467157364, + "p90": 369.85600739717484, + "p95": 374.62399154901505, + "p99": 388.3199915289879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 88080384, + "combineLogicalBytes": 88080384, + "fanoutMean": 1.5, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 336.0320031642914, + "p90": 376.25598907470703, + "p95": 388.7679874897003, + "p99": 406.39999508857727 + }, + "combine": { + "p50": 142.87999272346497, + "p90": 177.5680035352707, + "p95": 184.7040057182312, + "p99": 195.13599574565887 + }, + "roundtrip": { + "p50": 470.14400362968445, + "p90": 501.08802318573, + "p95": 512.5759840011597, + "p99": 538.6880040168762 + }, + "isolatedSum": { + "p50": 478.91199588775635, + "p90": 553.8239926099777, + "p95": 573.4719932079315, + "p99": 601.5359908342361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 176160768, + "combineLogicalBytes": 176160768, + "fanoutMean": 1.5, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 423.7760007381439, + "p90": 436.47998571395874, + "p95": 440.19201397895813, + "p99": 445.6000030040741 + }, + "combine": { + "p50": 243.6479926109314, + "p90": 247.19999730587006, + "p95": 248.1279969215393, + "p99": 257.85601139068604 + }, + "roundtrip": { + "p50": 657.3439836502075, + "p90": 667.7119731903076, + "p95": 671.5199947357178, + "p99": 679.7760128974915 + }, + "isolatedSum": { + "p50": 667.4239933490753, + "p90": 683.6799830198288, + "p95": 688.3200109004974, + "p99": 703.4560143947601 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352321536, + "combineLogicalBytes": 352321536, + "fanoutMean": 1.5, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bcd88e01", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|prefill|normal|none|none|0|tuned||b8e52e92c6d3379", + "colorKey": "gb300_48a86946", + "comparisonKey": "a18e35c4ea27f2a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:26.036585+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b8e52e92c6d3379", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 297.12000489234924, + "p90": 340.7039940357208, + "p95": 349.5360016822815, + "p99": 365.664005279541 + }, + "combine": { + "p50": 59.328000992536545, + "p90": 81.66400343179703, + "p95": 95.93600034713745, + "p99": 103.29599678516388 + }, + "roundtrip": { + "p50": 336.8639945983887, + "p90": 392.4480080604553, + "p95": 400.31999349594116, + "p99": 427.45599150657654 + }, + "isolatedSum": { + "p50": 356.4480058848858, + "p90": 422.36799746751785, + "p95": 445.47200202941895, + "p99": 468.9600020647049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 295.74400186538696, + "p90": 324.16000962257385, + "p95": 336.5760147571564, + "p99": 366.5280044078827 + }, + "combine": { + "p50": 68.7360018491745, + "p90": 88.639996945858, + "p95": 107.13600367307663, + "p99": 135.80800592899323 + }, + "roundtrip": { + "p50": 348.4799861907959, + "p90": 372.96000123023987, + "p95": 391.2000060081482, + "p99": 418.36801171302795 + }, + "isolatedSum": { + "p50": 364.48000371456146, + "p90": 412.80000656843185, + "p95": 443.712018430233, + "p99": 502.3360103368759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53100544, + "combineLogicalBytes": 53100544, + "fanoutMean": 3.6171875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 316.1599934101105, + "p90": 349.7599959373474, + "p95": 364.7040128707886, + "p99": 376.0319948196411 + }, + "combine": { + "p50": 90.52799642086029, + "p90": 146.55999839305878, + "p95": 150.43200552463531, + "p99": 156.22399747371674 + }, + "roundtrip": { + "p50": 377.1840035915375, + "p90": 416.4159893989563, + "p95": 427.3279905319214, + "p99": 451.4879882335663 + }, + "isolatedSum": { + "p50": 406.68798983097076, + "p90": 496.3199943304062, + "p95": 515.1360183954239, + "p99": 532.2559922933578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 106373120, + "fanoutMean": 3.623046875, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 328.5439908504486, + "p90": 375.5840063095093, + "p95": 386.2079977989197, + "p99": 401.856005191803 + }, + "combine": { + "p50": 135.48800349235535, + "p90": 178.39999496936798, + "p95": 188.31999599933624, + "p99": 193.9840018749237 + }, + "roundtrip": { + "p50": 441.98399782180786, + "p90": 481.6960096359253, + "p95": 500.06401538848877, + "p99": 509.2480182647705 + }, + "isolatedSum": { + "p50": 464.03199434280396, + "p90": 553.9840012788773, + "p95": 574.5279937982559, + "p99": 595.8400070667267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 432.96000361442566, + "p90": 464.2559885978699, + "p95": 477.08800435066223, + "p99": 498.9440143108368 + }, + "combine": { + "p50": 225.0239998102188, + "p90": 235.35999655723572, + "p95": 244.63999271392822, + "p99": 267.8399980068207 + }, + "roundtrip": { + "p50": 653.2480120658875, + "p90": 678.4639954566956, + "p95": 704.2880058288574, + "p99": 729.4080257415771 + }, + "isolatedSum": { + "p50": 657.9840034246445, + "p90": 699.6159851551056, + "p95": 721.7279970645905, + "p99": 766.7840123176575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423958528, + "combineLogicalBytes": 423958528, + "fanoutMean": 3.6099853515625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 597.2800254821777, + "p90": 634.6240043640137, + "p95": 647.4239826202393, + "p99": 663.2639765739441 + }, + "combine": { + "p50": 406.1119854450226, + "p90": 428.41601371765137, + "p95": 441.15200638771057, + "p99": 467.45601296424866 + }, + "roundtrip": { + "p50": 991.0399913787842, + "p90": 1024.448037147522, + "p95": 1039.8080348968506, + "p99": 1062.9119873046875 + }, + "isolatedSum": { + "p50": 1003.3920109272003, + "p90": 1063.040018081665, + "p95": 1088.5759890079498, + "p99": 1130.7199895381927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847988736, + "combineLogicalBytes": 847988736, + "fanoutMean": 3.61029052734375, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dcb675a2", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|prefill|normal|none|none|0|tuned||5f9878f45872329", + "colorKey": "gb300_419c8808", + "comparisonKey": "473af2ea4807ea75", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:28.610673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "5f9878f45872329", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.358123779296875, + "eplbImbalanceAfter": 1.000026818477746, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 288.4800136089325, + "p90": 300.7360100746155, + "p95": 303.9360046386719, + "p99": 307.776004076004 + }, + "combine": { + "p50": 56.832000613212585, + "p90": 60.095999389886856, + "p95": 62.97600269317627, + "p99": 70.04799693822861 + }, + "roundtrip": { + "p50": 329.0880024433136, + "p90": 339.6480083465576, + "p95": 344.0000116825104, + "p99": 350.9120047092438 + }, + "isolatedSum": { + "p50": 345.3120142221451, + "p90": 360.83200946450233, + "p95": 366.91200733184814, + "p99": 377.82400101423264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 26664960, + "fanoutMean": 3.6328125, + "recvTokensMax": 472, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 292.03200340270996, + "p90": 304.28799986839294, + "p95": 307.6159954071045, + "p99": 313.9199912548065 + }, + "combine": { + "p50": 64.60800021886826, + "p90": 67.84000247716904, + "p95": 69.24799829721451, + "p99": 74.23999905586243 + }, + "roundtrip": { + "p50": 344.57600116729736, + "p90": 357.5040102005005, + "p95": 360.9279990196228, + "p99": 371.5200126171112 + }, + "isolatedSum": { + "p50": 356.6400036215782, + "p90": 372.128002345562, + "p95": 376.863993704319, + "p99": 388.15999031066895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53143552, + "combineLogicalBytes": 53143552, + "fanoutMean": 3.6201171875, + "recvTokensMax": 946, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 296.9599962234497, + "p90": 309.59999561309814, + "p95": 312.3840093612671, + "p99": 319.16800141334534 + }, + "combine": { + "p50": 87.3280018568039, + "p90": 90.30400216579437, + "p95": 91.42400324344635, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 369.4399893283844, + "p90": 380.5440068244934, + "p95": 383.90401005744934, + "p99": 389.21600580215454 + }, + "isolatedSum": { + "p50": 384.2879980802536, + "p90": 399.9039977788925, + "p95": 403.80801260471344, + "p99": 414.94400054216385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106258432, + "combineLogicalBytes": 106258432, + "fanoutMean": 3.619140625, + "recvTokensMax": 1861, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 314.65598940849304, + "p90": 323.90400767326355, + "p95": 328.96000146865845, + "p99": 335.90400218963623 + }, + "combine": { + "p50": 133.82400572299957, + "p90": 136.4160031080246, + "p95": 137.472003698349, + "p99": 142.68800616264343 + }, + "roundtrip": { + "p50": 434.59200859069824, + "p90": 446.20800018310547, + "p95": 450.080007314682, + "p99": 459.74400639533997 + }, + "isolatedSum": { + "p50": 448.4799951314926, + "p90": 460.32001078128815, + "p95": 466.43200516700745, + "p99": 478.59200835227966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212645888, + "combineLogicalBytes": 212645888, + "fanoutMean": 3.621337890625, + "recvTokensMax": 3730, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 436.8959963321686, + "p90": 448.15999269485474, + "p95": 452.2239863872528, + "p99": 456.54401183128357 + }, + "combine": { + "p50": 227.07200050354004, + "p90": 230.68800568580627, + "p95": 232.86400735378265, + "p99": 239.58399891853333 + }, + "roundtrip": { + "p50": 648.8639712333679, + "p90": 657.5040221214294, + "p95": 661.4400148391724, + "p99": 667.136013507843 + }, + "isolatedSum": { + "p50": 663.9679968357086, + "p90": 678.847998380661, + "p95": 685.0879937410355, + "p99": 696.1280107498169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424775680, + "combineLogicalBytes": 424775680, + "fanoutMean": 3.616943359375, + "recvTokensMax": 7429, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 590.5280113220215, + "p90": 599.9360084533691, + "p95": 604.4800281524658, + "p99": 609.8880171775818 + }, + "combine": { + "p50": 404.12798523902893, + "p90": 407.48798847198486, + "p95": 408.8959991931915, + "p99": 412.80001401901245 + }, + "roundtrip": { + "p50": 983.456015586853, + "p90": 992.0960068702698, + "p95": 994.2399859428406, + "p99": 1001.471996307373 + }, + "isolatedSum": { + "p50": 994.6559965610504, + "p90": 1007.423996925354, + "p95": 1013.3760273456573, + "p99": 1022.6880311965942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848547840, + "combineLogicalBytes": 848547840, + "fanoutMean": 3.6126708984375, + "recvTokensMax": 14823, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3a1f51fd", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|prefill|normal|none|none|0|tuned||ed21345b2de53e0", + "colorKey": "gb300_ea2ca9d2", + "comparisonKey": "da4b40f8a13c6f05", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:00.125170+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ed21345b2de53e0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.003448486328125, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 288.5439991950989, + "p90": 300.8959889411926, + "p95": 304.3839931488037, + "p99": 309.4080090522766 + }, + "combine": { + "p50": 57.37600103020668, + "p90": 60.47999858856201, + "p95": 62.33600154519081, + "p99": 71.3919997215271 + }, + "roundtrip": { + "p50": 327.87200808525085, + "p90": 339.2319977283478, + "p95": 343.6799943447113, + "p99": 348.1599986553192 + }, + "isolatedSum": { + "p50": 345.92000022530556, + "p90": 361.37598752975464, + "p95": 366.7199946939945, + "p99": 380.8000087738037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 464, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 293.98399591445923, + "p90": 306.36799335479736, + "p95": 309.471994638443, + "p99": 313.85600566864014 + }, + "combine": { + "p50": 66.68800115585327, + "p90": 70.11199742555618, + "p95": 72.03199714422226, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 343.392014503479, + "p90": 355.3600013256073, + "p95": 359.96800661087036, + "p99": 365.664005279541 + }, + "isolatedSum": { + "p50": 360.6719970703125, + "p90": 376.47999078035355, + "p95": 381.50399178266525, + "p99": 392.41600781679153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52570112, + "combineLogicalBytes": 52570112, + "fanoutMean": 3.5810546875, + "recvTokensMax": 924, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 297.7280020713806, + "p90": 311.8079900741577, + "p95": 316.44800305366516, + "p99": 320.8000063896179 + }, + "combine": { + "p50": 86.68799698352814, + "p90": 89.75999802350998, + "p95": 90.7519981265068, + "p99": 98.59199821949005 + }, + "roundtrip": { + "p50": 371.67999148368835, + "p90": 383.29601287841797, + "p95": 387.2640132904053, + "p99": 394.0480053424835 + }, + "isolatedSum": { + "p50": 384.41599905490875, + "p90": 401.5679880976677, + "p95": 407.20000118017197, + "p99": 419.39200460910797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105613312, + "combineLogicalBytes": 105613312, + "fanoutMean": 3.59716796875, + "recvTokensMax": 1860, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 316.0319924354553, + "p90": 327.0080089569092, + "p95": 331.87198638916016, + "p99": 336.2239897251129 + }, + "combine": { + "p50": 133.7279975414276, + "p90": 136.4160031080246, + "p95": 137.69599795341492, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 434.6559941768646, + "p90": 447.32800126075745, + "p95": 450.8480131626129, + "p99": 455.9679925441742 + }, + "isolatedSum": { + "p50": 449.75998997688293, + "p90": 463.4240120649338, + "p95": 469.5679843425751, + "p99": 479.0079891681671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211843072, + "combineLogicalBytes": 211843072, + "fanoutMean": 3.607666015625, + "recvTokensMax": 3714, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 435.2000057697296, + "p90": 447.6799964904785, + "p95": 450.4320025444031, + "p99": 458.8479995727539 + }, + "combine": { + "p50": 222.91199862957, + "p90": 226.30399465560913, + "p95": 228.12800109386444, + "p99": 234.81599986553192 + }, + "roundtrip": { + "p50": 648.5120058059692, + "p90": 658.4640145301819, + "p95": 661.7599725723267, + "p99": 668.8960194587708 + }, + "isolatedSum": { + "p50": 658.1120043992996, + "p90": 673.9839911460876, + "p95": 678.5600036382675, + "p99": 693.6639994382858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423198720, + "combineLogicalBytes": 423198720, + "fanoutMean": 3.603515625, + "recvTokensMax": 7400, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 594.8160290718079, + "p90": 605.8560013771057, + "p95": 609.4719767570496, + "p99": 616.8320178985596 + }, + "combine": { + "p50": 402.52798795700073, + "p90": 405.7280123233795, + "p95": 407.3919951915741, + "p99": 412.447988986969 + }, + "roundtrip": { + "p50": 984.8319888114929, + "p90": 991.5519952774048, + "p95": 995.2319860458374, + "p99": 1001.8240213394165 + }, + "isolatedSum": { + "p50": 997.3440170288086, + "p90": 1011.5840137004852, + "p95": 1016.8639719486237, + "p99": 1029.2800068855286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 846024704, + "combineLogicalBytes": 846024704, + "fanoutMean": 3.6019287109375, + "recvTokensMax": 14796, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b5e3bd61", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_5ad05e77", + "comparisonKey": "e99686765add1b4e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:19.135157+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 281.95199370384216, + "p90": 316.864013671875, + "p95": 333.98398756980896, + "p99": 345.2160060405731 + }, + "combine": { + "p50": 57.920001447200775, + "p90": 72.25599884986877, + "p95": 93.91999989748001, + "p99": 129.7599971294403 + }, + "roundtrip": { + "p50": 315.96800684928894, + "p90": 364.9280071258545, + "p95": 379.07201051712036, + "p99": 390.01598954200745 + }, + "isolatedSum": { + "p50": 339.87199515104294, + "p90": 389.1200125217438, + "p95": 427.90398746728897, + "p99": 474.9760031700134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 274.944007396698, + "p90": 310.68798899650574, + "p95": 319.16800141334534, + "p99": 346.68800234794617 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 73.40800017118454, + "p95": 77.72800326347351, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 322.27200269699097, + "p90": 337.5360071659088, + "p95": 344.57600116729736, + "p99": 413.4080111980438 + }, + "isolatedSum": { + "p50": 341.15200489759445, + "p90": 384.0959891676903, + "p95": 396.89600467681885, + "p99": 436.8000030517578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 279.4240117073059, + "p90": 312.22400069236755, + "p95": 330.2080035209656, + "p99": 358.97600650787354 + }, + "combine": { + "p50": 83.74399691820145, + "p90": 97.85600006580353, + "p95": 127.10399925708771, + "p99": 150.04800260066986 + }, + "roundtrip": { + "p50": 344.38401460647583, + "p90": 363.0400002002716, + "p95": 386.33599877357483, + "p99": 408.57601165771484 + }, + "isolatedSum": { + "p50": 363.16800862550735, + "p90": 410.0800007581711, + "p95": 457.3120027780533, + "p99": 509.0240091085434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 305.9200048446655, + "p90": 328.73600721359253, + "p95": 336.92800998687744, + "p99": 371.2640106678009 + }, + "combine": { + "p50": 121.56800180673599, + "p90": 148.3840048313141, + "p95": 155.58399260044098, + "p99": 176.32000148296356 + }, + "roundtrip": { + "p50": 418.33600401878357, + "p90": 465.08800983428955, + "p95": 480.1279902458191, + "p99": 502.0800232887268 + }, + "isolatedSum": { + "p50": 427.4880066514015, + "p90": 477.1200120449066, + "p95": 492.5120025873184, + "p99": 547.5840121507645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 397.2800076007843, + "p90": 427.64800786972046, + "p95": 444.7360038757324, + "p99": 459.6160054206848 + }, + "combine": { + "p50": 199.96799528598785, + "p90": 217.72800385951996, + "p95": 231.10400140285492, + "p99": 251.52000784873962 + }, + "roundtrip": { + "p50": 602.4960279464722, + "p90": 630.6560039520264, + "p95": 640.8320069313049, + "p99": 661.6960167884827 + }, + "isolatedSum": { + "p50": 597.2480028867722, + "p90": 645.3760117292404, + "p95": 675.8400052785873, + "p99": 711.1360132694244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 566.7200088500977, + "p90": 591.6159749031067, + "p95": 602.7839779853821, + "p99": 620.3200221061707 + }, + "combine": { + "p50": 356.79998993873596, + "p90": 371.5200126171112, + "p95": 388.7999951839447, + "p99": 402.3039937019348 + }, + "roundtrip": { + "p50": 934.5600008964539, + "p90": 966.048002243042, + "p95": 975.8719801902771, + "p99": 1006.5280199050903 + }, + "isolatedSum": { + "p50": 923.5199987888336, + "p90": 963.1359875202179, + "p95": 991.5839731693268, + "p99": 1022.6240158081055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-423a554f", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|prefill|normal|none|none|0|tuned||25840dd8241ba10", + "colorKey": "gb300_c8260f45", + "comparisonKey": "522fec255c43829d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:19:52.225747+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "25840dd8241ba10", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 266.6879892349243, + "p90": 280.09599447250366, + "p95": 283.4559977054596, + "p99": 292.09598898887634 + }, + "combine": { + "p50": 52.928000688552856, + "p90": 56.28800019621849, + "p95": 59.36000123620033, + "p99": 67.96800345182419 + }, + "roundtrip": { + "p50": 298.0799973011017, + "p90": 311.7760121822357, + "p95": 314.9760067462921, + "p99": 324.0959942340851 + }, + "isolatedSum": { + "p50": 319.6159899234772, + "p90": 336.38399466872215, + "p95": 342.8159989416599, + "p99": 360.06399244070053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 263.839989900589, + "p90": 277.50399708747864, + "p95": 280.7680070400238, + "p99": 290.52799940109253 + }, + "combine": { + "p50": 61.503998935222626, + "p90": 64.86400216817856, + "p95": 67.9360032081604, + "p99": 76.83199644088745 + }, + "roundtrip": { + "p50": 300.927996635437, + "p90": 314.1759932041168, + "p95": 318.30400228500366, + "p99": 324.44798946380615 + }, + "isolatedSum": { + "p50": 325.3439888358116, + "p90": 342.3679992556572, + "p95": 348.7040102481842, + "p99": 367.35999584198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18335744, + "combineLogicalBytes": 18335744, + "fanoutMean": 1.2490234375, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 269.8560059070587, + "p90": 283.55199098587036, + "p95": 287.07200288772583, + "p99": 294.46399211883545 + }, + "combine": { + "p50": 77.88799703121185, + "p90": 81.53600245714188, + "p95": 84.06399935483932, + "p99": 89.66399729251862 + }, + "roundtrip": { + "p50": 329.53599095344543, + "p90": 340.86400270462036, + "p95": 344.63998675346375, + "p99": 350.7840037345886 + }, + "isolatedSum": { + "p50": 347.74400293827057, + "p90": 365.08799344301224, + "p95": 371.13600224256516, + "p99": 384.12798941135406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36900864, + "combineLogicalBytes": 36900864, + "fanoutMean": 1.2568359375, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 305.5360019207001, + "p90": 321.696013212204, + "p95": 324.5759904384613, + "p99": 338.6240005493164 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 118.46400052309036, + "p95": 119.39200013875961, + "p99": 124.38400089740753 + }, + "roundtrip": { + "p50": 415.2640104293823, + "p90": 426.91200971603394, + "p95": 430.1759898662567, + "p99": 438.27199935913086 + }, + "isolatedSum": { + "p50": 420.9280014038086, + "p90": 440.16001373529434, + "p95": 443.9679905772209, + "p99": 463.00800144672394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73730048, + "combineLogicalBytes": 73730048, + "fanoutMean": 1.255615234375, + "recvTokensMax": 4096, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 382.78400897979736, + "p90": 394.75199580192566, + "p95": 399.77601170539856, + "p99": 408.9600145816803 + }, + "combine": { + "p50": 188.960000872612, + "p90": 192.60799884796143, + "p95": 194.33599710464478, + "p99": 197.7279931306839 + }, + "roundtrip": { + "p50": 580.735981464386, + "p90": 591.3919806480408, + "p95": 594.8799848556519, + "p99": 600.4480123519897 + }, + "isolatedSum": { + "p50": 571.7440098524094, + "p90": 587.3599946498871, + "p95": 594.1120088100433, + "p99": 606.6880077123642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147746816, + "combineLogicalBytes": 147746816, + "fanoutMean": 1.258056640625, + "recvTokensMax": 8192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 558.6559772491455, + "p90": 570.7200169563293, + "p95": 574.4320154190063, + "p99": 821.3120102882385 + }, + "combine": { + "p50": 340.35199880599976, + "p90": 343.6160087585449, + "p95": 344.83200311660767, + "p99": 349.5360016822815 + }, + "roundtrip": { + "p50": 906.9120287895203, + "p90": 917.8879857063293, + "p95": 920.6079840660095, + "p99": 925.1840114593506 + }, + "isolatedSum": { + "p50": 899.0079760551453, + "p90": 914.3360257148743, + "p95": 919.264018535614, + "p99": 1170.84801197052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295278592, + "combineLogicalBytes": 295278592, + "fanoutMean": 1.25714111328125, + "recvTokensMax": 16384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1e08d205", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|prefill|normal|none|none|0|tuned||cabb28c468fd7cf", + "colorKey": "gb300_6ce4cab9", + "comparisonKey": "18fa845a7fad33f3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:54.029725+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cabb28c468fd7cf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86505126953125, + "eplbImbalanceAfter": 1.0000149681454613, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 284.5439910888672, + "p90": 297.3119914531708, + "p95": 300.4480004310608, + "p99": 305.6640028953552 + }, + "combine": { + "p50": 58.62399935722351, + "p90": 61.72800064086914, + "p95": 63.519999384880066, + "p99": 68.44799965620041 + }, + "roundtrip": { + "p50": 326.24000310897827, + "p90": 337.0560109615326, + "p95": 341.21599793434143, + "p99": 346.560001373291 + }, + "isolatedSum": { + "p50": 343.1679904460907, + "p90": 359.0399920940399, + "p95": 363.96799981594086, + "p99": 374.11200255155563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25862144, + "combineLogicalBytes": 25862144, + "fanoutMean": 3.5234375, + "recvTokensMax": 457, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 287.9999876022339, + "p90": 300.7679879665375, + "p95": 304.25599217414856, + "p99": 309.59999561309814 + }, + "combine": { + "p50": 66.20799750089645, + "p90": 69.34399902820587, + "p95": 71.23199850320816, + "p99": 79.52000200748444 + }, + "roundtrip": { + "p50": 338.75200152397156, + "p90": 350.3040075302124, + "p95": 353.5040020942688, + "p99": 358.3039939403534 + }, + "isolatedSum": { + "p50": 354.20798510313034, + "p90": 370.11198699474335, + "p95": 375.4879906773567, + "p99": 389.1199976205826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 51509248, + "combineLogicalBytes": 51509248, + "fanoutMean": 3.5087890625, + "recvTokensMax": 914, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 293.08798909187317, + "p90": 305.31200766563416, + "p95": 308.76800417900085, + "p99": 315.42399525642395 + }, + "combine": { + "p50": 87.00799942016602, + "p90": 90.4960036277771, + "p95": 91.80799871683121, + "p99": 98.81599992513657 + }, + "roundtrip": { + "p50": 364.47998881340027, + "p90": 375.8719861507416, + "p95": 379.2960047721863, + "p99": 385.98400354385376 + }, + "isolatedSum": { + "p50": 380.0959885120392, + "p90": 395.80801129341125, + "p95": 400.57600289583206, + "p99": 414.2399951815605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 102688768, + "combineLogicalBytes": 102688768, + "fanoutMean": 3.49755859375, + "recvTokensMax": 1817, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 309.1199994087219, + "p90": 316.70400500297546, + "p95": 320.2880024909973, + "p99": 331.0079872608185 + }, + "combine": { + "p50": 131.96800649166107, + "p90": 135.13599336147308, + "p95": 136.4160031080246, + "p99": 143.48800480365753 + }, + "roundtrip": { + "p50": 427.8079867362976, + "p90": 439.9999976158142, + "p95": 443.90401244163513, + "p99": 450.23998618125916 + }, + "isolatedSum": { + "p50": 441.088005900383, + "p90": 451.83999836444855, + "p95": 456.7040055990219, + "p99": 474.495992064476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 205520896, + "combineLogicalBytes": 205520896, + "fanoutMean": 3.5, + "recvTokensMax": 3657, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 426.84799432754517, + "p90": 438.4320080280304, + "p95": 442.4000084400177, + "p99": 447.29599356651306 + }, + "combine": { + "p50": 223.13599288463593, + "p90": 226.52800381183624, + "p95": 228.0000001192093, + "p99": 233.66400599479675 + }, + "roundtrip": { + "p50": 638.5599970817566, + "p90": 648.5120058059692, + "p95": 651.5200138092041, + "p99": 659.3279838562012 + }, + "isolatedSum": { + "p50": 649.9839872121811, + "p90": 664.9600118398666, + "p95": 670.400008559227, + "p99": 680.9599995613098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 412016640, + "combineLogicalBytes": 412016640, + "fanoutMean": 3.50830078125, + "recvTokensMax": 7329, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 578.9120197296143, + "p90": 589.0560150146484, + "p95": 592.736005783081, + "p99": 602.4320125579834 + }, + "combine": { + "p50": 394.463986158371, + "p90": 397.95199036598206, + "p95": 399.4239866733551, + "p99": 402.5920033454895 + }, + "roundtrip": { + "p50": 965.4719829559326, + "p90": 972.5120067596436, + "p95": 976.5440225601196, + "p99": 1002.303957939148 + }, + "isolatedSum": { + "p50": 973.3760058879852, + "p90": 987.0080053806305, + "p95": 992.1599924564362, + "p99": 1005.0240159034729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824119296, + "combineLogicalBytes": 824119296, + "fanoutMean": 3.5086669921875, + "recvTokensMax": 14713, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-89916baa", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|prefill|normal|none|none|0|tuned||370c8dd16f08e2c", + "colorKey": "gb300_4b074890", + "comparisonKey": "55e2dc9a9be798c8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:55.436058+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "370c8dd16f08e2c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 278.84799242019653, + "p90": 291.20001196861267, + "p95": 293.92001032829285, + "p99": 301.2160062789917 + }, + "combine": { + "p50": 56.223999708890915, + "p90": 59.039998799562454, + "p95": 59.99999865889549, + "p99": 64.35199826955795 + }, + "roundtrip": { + "p50": 318.56000423431396, + "p90": 329.21600341796875, + "p95": 332.89599418640137, + "p99": 337.2479975223541 + }, + "isolatedSum": { + "p50": 335.07199212908745, + "p90": 350.2400107681751, + "p95": 353.92000898718834, + "p99": 365.56800454854965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 507, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 280.44798970222473, + "p90": 291.83998703956604, + "p95": 296.671986579895, + "p99": 301.503986120224 + }, + "combine": { + "p50": 65.63200056552887, + "p90": 68.57600063085556, + "p95": 70.0799971818924, + "p99": 77.18399912118912 + }, + "roundtrip": { + "p50": 329.98400926589966, + "p90": 341.5679931640625, + "p95": 345.984011888504, + "p99": 352.9280126094818 + }, + "isolatedSum": { + "p50": 346.0799902677536, + "p90": 360.4159876704216, + "p95": 366.7519837617874, + "p99": 378.6879852414131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49057792, + "combineLogicalBytes": 49057792, + "fanoutMean": 3.341796875, + "recvTokensMax": 1018, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 287.07200288772583, + "p90": 299.00801181793213, + "p95": 303.1359910964966, + "p99": 311.42398715019226 + }, + "combine": { + "p50": 84.16000008583069, + "p90": 87.16800063848495, + "p95": 88.83199840784073, + "p99": 94.78399902582169 + }, + "roundtrip": { + "p50": 355.3920090198517, + "p90": 367.39200353622437, + "p95": 370.62400579452515, + "p99": 375.328004360199 + }, + "isolatedSum": { + "p50": 371.2320029735565, + "p90": 386.1760124564171, + "p95": 391.9679895043373, + "p99": 406.20798617601395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 98344960, + "combineLogicalBytes": 98344960, + "fanoutMean": 3.349609375, + "recvTokensMax": 2039, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 305.82401156425476, + "p90": 319.9999928474426, + "p95": 322.6880133152008, + "p99": 328.19199562072754 + }, + "combine": { + "p50": 124.51200187206268, + "p90": 127.42400169372559, + "p95": 128.28800082206726, + "p99": 133.85599851608276 + }, + "roundtrip": { + "p50": 417.05599427223206, + "p90": 428.73600125312805, + "p95": 431.90398812294006, + "p99": 435.7439875602722 + }, + "isolatedSum": { + "p50": 430.33601343631744, + "p90": 447.4239945411682, + "p95": 450.97601413726807, + "p99": 462.0479941368103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 196704256, + "combineLogicalBytes": 196704256, + "fanoutMean": 3.349853515625, + "recvTokensMax": 4074, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 415.48800468444824, + "p90": 426.11199617385864, + "p95": 430.62400817871094, + "p99": 435.263991355896 + }, + "combine": { + "p50": 213.3760005235672, + "p90": 216.25599265098572, + "p95": 217.24799275398254, + "p99": 220.8320051431656 + }, + "roundtrip": { + "p50": 625.2800226211548, + "p90": 635.5839967727661, + "p95": 639.0720009803772, + "p99": 643.6799764633179 + }, + "isolatedSum": { + "p50": 628.8640052080154, + "p90": 642.3679888248444, + "p95": 647.8720009326935, + "p99": 656.0959964990616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 393351168, + "combineLogicalBytes": 393351168, + "fanoutMean": 3.349365234375, + "recvTokensMax": 8147, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 578.4000158309937, + "p90": 589.0880227088928, + "p95": 592.0000076293945, + "p99": 599.8079776763916 + }, + "combine": { + "p50": 379.96798753738403, + "p90": 383.67998600006104, + "p95": 384.6080005168915, + "p99": 387.07199692726135 + }, + "roundtrip": { + "p50": 961.4080190658569, + "p90": 973.7280011177063, + "p95": 975.6159782409668, + "p99": 980.1920056343079 + }, + "isolatedSum": { + "p50": 958.3680033683777, + "p90": 972.7680087089539, + "p95": 976.608008146286, + "p99": 986.879974603653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 785469440, + "combineLogicalBytes": 785469440, + "fanoutMean": 3.3441162109375, + "recvTokensMax": 16298, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38349e6d", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|prefill|normal|none|none|0|tuned||624fdceae193d94", + "colorKey": "gb300_ea52f89e", + "comparisonKey": "699ff291d5ae1e3a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:13.265170+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "624fdceae193d94", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.888397216796875, + "eplbImbalanceAfter": 1.00013427734375, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 283.6480140686035, + "p90": 295.9679961204529, + "p95": 300.5119860172272, + "p99": 305.85598945617676 + }, + "combine": { + "p50": 58.27200040221214, + "p90": 61.40799820423126, + "p95": 63.231997191905975, + "p99": 69.40799951553345 + }, + "roundtrip": { + "p50": 324.5759904384613, + "p90": 335.999995470047, + "p95": 340.4479920864105, + "p99": 345.95200419425964 + }, + "isolatedSum": { + "p50": 341.92001447081566, + "p90": 357.37599432468414, + "p95": 363.74398320913315, + "p99": 375.2639889717102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 286.52799129486084, + "p90": 298.5279858112335, + "p95": 302.2080063819885, + "p99": 307.23199248313904 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 67.391999065876, + "p95": 68.67200136184692, + "p99": 75.3600001335144 + }, + "roundtrip": { + "p50": 334.9440097808838, + "p90": 345.44000029563904, + "p95": 349.2479920387268, + "p99": 354.17601466178894 + }, + "isolatedSum": { + "p50": 350.2719923853874, + "p90": 365.9199848771095, + "p95": 370.88000774383545, + "p99": 382.59199261665344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52441088, + "combineLogicalBytes": 52441088, + "fanoutMean": 3.572265625, + "recvTokensMax": 924, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 293.63200068473816, + "p90": 305.63199520111084, + "p95": 309.4399869441986, + "p99": 314.5599961280823 + }, + "combine": { + "p50": 88.32000195980072, + "p90": 91.61599725484848, + "p95": 92.96000003814697, + "p99": 99.39199686050415 + }, + "roundtrip": { + "p50": 365.7599985599518, + "p90": 377.56800651550293, + "p95": 381.1199963092804, + "p99": 388.09600472450256 + }, + "isolatedSum": { + "p50": 381.9520026445389, + "p90": 397.2479924559593, + "p95": 402.3999869823456, + "p99": 413.9519929885864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105670656, + "combineLogicalBytes": 105670656, + "fanoutMean": 3.59912109375, + "recvTokensMax": 1859, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 310.36800146102905, + "p90": 318.33600997924805, + "p95": 323.96799325942993, + "p99": 329.3440043926239 + }, + "combine": { + "p50": 133.53599607944489, + "p90": 136.6720050573349, + "p95": 137.88799941539764, + "p99": 141.37600362300873 + }, + "roundtrip": { + "p50": 430.4639995098114, + "p90": 441.3119852542877, + "p95": 445.0879991054535, + "p99": 453.40800285339355 + }, + "isolatedSum": { + "p50": 443.90399754047394, + "p90": 455.00801503658295, + "p95": 461.8559926748276, + "p99": 470.72000801563263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211527680, + "combineLogicalBytes": 211527680, + "fanoutMean": 3.602294921875, + "recvTokensMax": 3708, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 428.76800894737244, + "p90": 439.8399889469147, + "p95": 444.60800290107727, + "p99": 449.3120014667511 + }, + "combine": { + "p50": 225.79200565814972, + "p90": 229.24800217151642, + "p95": 231.07199370861053, + "p99": 236.64000630378723 + }, + "roundtrip": { + "p50": 641.0560011863708, + "p90": 646.943986415863, + "p95": 649.5680212974548, + "p99": 657.9840183258057 + }, + "isolatedSum": { + "p50": 654.5600146055222, + "p90": 669.0879911184311, + "p95": 675.6799966096878, + "p99": 685.9520077705383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423284736, + "combineLogicalBytes": 423284736, + "fanoutMean": 3.604248046875, + "recvTokensMax": 7436, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 587.1040225028992, + "p90": 597.055971622467, + "p95": 600.5759835243225, + "p99": 604.095995426178 + }, + "combine": { + "p50": 401.7280042171478, + "p90": 404.83200550079346, + "p95": 405.63198924064636, + "p99": 410.2399945259094 + }, + "roundtrip": { + "p50": 974.2079973220825, + "p90": 979.5519709587097, + "p95": 981.1840057373047, + "p99": 985.1199984550476 + }, + "isolatedSum": { + "p50": 988.832026720047, + "p90": 1001.8879771232605, + "p95": 1006.2079727649689, + "p99": 1014.3359899520874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847745024, + "combineLogicalBytes": 847745024, + "fanoutMean": 3.6092529296875, + "recvTokensMax": 14866, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7f2f6cbf", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_0ce440d7", + "comparisonKey": "4268b76f9575f83a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:02.541422+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 267.93599128723145, + "p90": 279.87200021743774, + "p95": 283.9359939098358, + "p99": 290.6239926815033 + }, + "combine": { + "p50": 55.39200082421303, + "p90": 58.75200033187866, + "p95": 61.344001442193985, + "p99": 67.45599955320358 + }, + "roundtrip": { + "p50": 306.14399909973145, + "p90": 314.94399905204773, + "p95": 319.93600726127625, + "p99": 326.81599259376526 + }, + "isolatedSum": { + "p50": 323.3279921114445, + "p90": 338.6240005493164, + "p95": 345.2799953520298, + "p99": 358.0799922347069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 269.8560059070587, + "p90": 282.24000334739685, + "p95": 285.8879864215851, + "p99": 288.7040078639984 + }, + "combine": { + "p50": 64.15999680757523, + "p90": 67.29599833488464, + "p95": 69.24799829721451, + "p99": 74.97599720954895 + }, + "roundtrip": { + "p50": 317.02399253845215, + "p90": 327.39201188087463, + "p95": 332.2559893131256, + "p99": 339.9040102958679 + }, + "isolatedSum": { + "p50": 334.01600271463394, + "p90": 349.5360016822815, + "p95": 355.1359847187996, + "p99": 363.68000507354736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 273.75999093055725, + "p90": 286.1120104789734, + "p95": 290.43200612068176, + "p99": 296.09599709510803 + }, + "combine": { + "p50": 82.14399963617325, + "p90": 85.4720026254654, + "p95": 86.7839977145195, + "p99": 93.12000125646591 + }, + "roundtrip": { + "p50": 340.2239978313446, + "p90": 351.52000188827515, + "p95": 356.4479947090149, + "p99": 366.14400148391724 + }, + "isolatedSum": { + "p50": 355.9039905667305, + "p90": 371.5840131044388, + "p95": 377.21600383520126, + "p99": 389.21599835157394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 298.3680069446564, + "p90": 312.063992023468, + "p95": 315.10400772094727, + "p99": 318.9440071582794 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 119.32799965143204, + "p95": 120.28799951076508, + "p99": 125.08800625801086 + }, + "roundtrip": { + "p50": 413.567990064621, + "p90": 423.8719940185547, + "p95": 428.0000030994415, + "p99": 434.688001871109 + }, + "isolatedSum": { + "p50": 414.65600579977036, + "p90": 431.39199167490005, + "p95": 435.39200723171234, + "p99": 444.0320134162903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 389.631986618042, + "p90": 401.12000703811646, + "p95": 405.08800745010376, + "p99": 410.7840061187744 + }, + "combine": { + "p50": 196.8960016965866, + "p90": 199.68000054359436, + "p95": 200.8640021085739, + "p99": 205.82400262355804 + }, + "roundtrip": { + "p50": 591.3599729537964, + "p90": 601.9200086593628, + "p95": 607.0079803466797, + "p99": 613.6639714241028 + }, + "isolatedSum": { + "p50": 586.5279883146286, + "p90": 600.8000075817108, + "p95": 605.9520095586777, + "p99": 616.6080087423325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 554.751992225647, + "p90": 565.5679702758789, + "p95": 569.2800283432007, + "p99": 575.1680135726929 + }, + "combine": { + "p50": 353.2800078392029, + "p90": 356.57599568367004, + "p95": 357.8239977359772, + "p99": 360.22400856018066 + }, + "roundtrip": { + "p50": 923.520028591156, + "p90": 934.4959855079651, + "p95": 938.1120204925537, + "p99": 942.8160190582275 + }, + "isolatedSum": { + "p50": 908.0320000648499, + "p90": 922.143965959549, + "p95": 927.1040260791779, + "p99": 935.3920221328735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63d08df3", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_4d1c5d27", + "comparisonKey": "8e070c5743b7508f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:20.194651+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 289.92000222206116, + "p90": 303.3919930458069, + "p95": 306.65600299835205, + "p99": 312.25600838661194 + }, + "combine": { + "p50": 59.20000001788139, + "p90": 62.72000074386597, + "p95": 64.2239972949028, + "p99": 71.19999825954437 + }, + "roundtrip": { + "p50": 330.81600069999695, + "p90": 343.26401352882385, + "p95": 346.9119966030121, + "p99": 354.20799255371094 + }, + "isolatedSum": { + "p50": 349.12000223994255, + "p90": 366.11199378967285, + "p95": 370.88000029325485, + "p99": 383.4560066461563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 291.3280129432678, + "p90": 304.064005613327, + "p95": 307.23199248313904, + "p99": 315.5840039253235 + }, + "combine": { + "p50": 70.11199742555618, + "p90": 73.40800017118454, + "p95": 74.65600222349167, + "p99": 80.25600016117096 + }, + "roundtrip": { + "p50": 341.69599413871765, + "p90": 353.7279963493347, + "p95": 356.51201009750366, + "p99": 365.31201004981995 + }, + "isolatedSum": { + "p50": 361.440010368824, + "p90": 377.47200578451157, + "p95": 381.8879947066307, + "p99": 395.84000408649445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 294.9120104312897, + "p90": 307.8399896621704, + "p95": 311.42398715019226, + "p99": 316.2879943847656 + }, + "combine": { + "p50": 87.00799942016602, + "p90": 90.2400016784668, + "p95": 91.80799871683121, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 370.9760010242462, + "p90": 382.04801082611084, + "p95": 385.3760063648224, + "p99": 393.47198605537415 + }, + "isolatedSum": { + "p50": 381.9200098514557, + "p90": 398.0799913406372, + "p95": 403.23198586702347, + "p99": 413.56799751520157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 308.9280128479004, + "p90": 315.93599915504456, + "p95": 318.1439936161041, + "p99": 326.04798674583435 + }, + "combine": { + "p50": 133.53599607944489, + "p90": 136.83199882507324, + "p95": 137.92000710964203, + "p99": 140.8960074186325 + }, + "roundtrip": { + "p50": 432.2879910469055, + "p90": 445.3119933605194, + "p95": 449.72801208496094, + "p99": 453.5999894142151 + }, + "isolatedSum": { + "p50": 442.4640089273453, + "p90": 452.7679979801178, + "p95": 456.06400072574615, + "p99": 466.94399416446686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 433.6960017681122, + "p90": 445.6639885902405, + "p95": 450.0479996204376, + "p99": 457.12000131607056 + }, + "combine": { + "p50": 227.39200294017792, + "p90": 231.36000335216522, + "p95": 232.70399868488312, + "p99": 235.4239970445633 + }, + "roundtrip": { + "p50": 644.5760130882263, + "p90": 653.3759832382202, + "p95": 657.9200029373169, + "p99": 665.2160286903381 + }, + "isolatedSum": { + "p50": 661.0880047082901, + "p90": 677.0239919424057, + "p95": 682.7519983053207, + "p99": 692.5439983606339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 584.4159722328186, + "p90": 595.9039926528931, + "p95": 599.839985370636, + "p99": 606.6240072250366 + }, + "combine": { + "p50": 405.63198924064636, + "p90": 409.7279906272888, + "p95": 411.42401099205017, + "p99": 416.703999042511 + }, + "roundtrip": { + "p50": 978.272020816803, + "p90": 985.0239753723145, + "p95": 988.2879853248596, + "p99": 994.0800070762634 + }, + "isolatedSum": { + "p50": 990.047961473465, + "p90": 1005.6319832801819, + "p95": 1011.2639963626862, + "p99": 1023.3280062675476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-183ad9d3", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_08625a47", + "comparisonKey": "067134fb9ff6e7b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:05.991199+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 285.63201427459717, + "p90": 297.34399914741516, + "p95": 301.1839985847473, + "p99": 307.20001459121704 + }, + "combine": { + "p50": 57.34400078654289, + "p90": 60.80000102519989, + "p95": 62.72000074386597, + "p99": 71.3919997215271 + }, + "roundtrip": { + "p50": 327.87200808525085, + "p90": 339.4559919834137, + "p95": 343.423992395401, + "p99": 348.25599193573 + }, + "isolatedSum": { + "p50": 342.97601506114006, + "p90": 358.14400017261505, + "p95": 363.9039993286133, + "p99": 378.59201431274414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 463, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 289.5680069923401, + "p90": 301.82400345802307, + "p95": 304.6720027923584, + "p99": 313.4399950504303 + }, + "combine": { + "p50": 67.55200028419495, + "p90": 71.3919997215271, + "p95": 73.18399846553802, + "p99": 78.97599786520004 + }, + "roundtrip": { + "p50": 339.4879996776581, + "p90": 350.5280017852783, + "p95": 353.5360097885132, + "p99": 360.25598645210266 + }, + "isolatedSum": { + "p50": 357.12000727653503, + "p90": 373.21600317955017, + "p95": 377.8560012578964, + "p99": 392.41599291563034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 918, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 295.23199796676636, + "p90": 308.0640137195587, + "p95": 310.94399094581604, + "p99": 320.3839957714081 + }, + "combine": { + "p50": 88.03199976682663, + "p90": 91.0400003194809, + "p95": 92.19200164079666, + "p99": 98.84800016880035 + }, + "roundtrip": { + "p50": 368.2880103588104, + "p90": 380.5440068244934, + "p95": 383.87200236320496, + "p99": 388.5439932346344 + }, + "isolatedSum": { + "p50": 383.263997733593, + "p90": 399.1040140390396, + "p95": 403.1359925866127, + "p99": 419.23199594020844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 1841, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 309.5040023326874, + "p90": 317.6960051059723, + "p95": 320.3519880771637, + "p99": 329.6639919281006 + }, + "combine": { + "p50": 134.5279961824417, + "p90": 137.472003698349, + "p95": 138.2720023393631, + "p99": 144.03200149536133 + }, + "roundtrip": { + "p50": 430.4639995098114, + "p90": 442.49600172042847, + "p95": 446.5920031070709, + "p99": 454.17600870132446 + }, + "isolatedSum": { + "p50": 444.0319985151291, + "p90": 455.1680088043213, + "p95": 458.6239904165268, + "p99": 473.6959934234619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 3712, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 432.0639967918396, + "p90": 442.9120123386383, + "p95": 445.79198956489563, + "p99": 453.11999320983887 + }, + "combine": { + "p50": 225.69599747657776, + "p90": 229.2799949645996, + "p95": 230.84799945354462, + "p99": 237.40799725055695 + }, + "roundtrip": { + "p50": 643.455982208252, + "p90": 652.5120139122009, + "p95": 657.3119759559631, + "p99": 665.727972984314 + }, + "isolatedSum": { + "p50": 657.7599942684174, + "p90": 672.1920073032379, + "p95": 676.6399890184402, + "p99": 690.5279904603958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 7407, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 582.0800065994263, + "p90": 592.5120115280151, + "p95": 596.6079831123352, + "p99": 609.0239882469177 + }, + "combine": { + "p50": 403.5840034484863, + "p90": 407.77599811553955, + "p95": 409.66400504112244, + "p99": 418.4960126876831 + }, + "roundtrip": { + "p50": 975.4239916801453, + "p90": 981.4720153808594, + "p95": 984.2240214347839, + "p99": 1030.0159454345703 + }, + "isolatedSum": { + "p50": 985.6640100479126, + "p90": 1000.2880096435547, + "p95": 1006.2719881534576, + "p99": 1027.5200009346008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 14839, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1eaf8051", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_74aea6d0", + "comparisonKey": "3b6badf2c3387fc6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:51.432601+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 372.6400136947632, + "p90": 402.6240110397339, + "p95": 416.159987449646, + "p99": 436.2240135669708 + }, + "combine": { + "p50": 69.56800073385239, + "p90": 89.91999924182892, + "p95": 102.65599936246872, + "p99": 118.27199906110764 + }, + "roundtrip": { + "p50": 421.02399468421936, + "p90": 453.40800285339355, + "p95": 466.3040041923523, + "p99": 486.7520034313202 + }, + "isolatedSum": { + "p50": 442.20801442861557, + "p90": 492.5440102815628, + "p95": 518.8159868121147, + "p99": 554.4960126280785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 370.6879913806915, + "p90": 393.3440148830414, + "p95": 404.32000160217285, + "p99": 428.0959963798523 + }, + "combine": { + "p50": 86.496002972126, + "p90": 124.32000041007996, + "p95": 141.4719969034195, + "p99": 149.08799529075623 + }, + "roundtrip": { + "p50": 441.9200122356415, + "p90": 473.1839895248413, + "p95": 484.95998978614807, + "p99": 507.07197189331055 + }, + "isolatedSum": { + "p50": 457.18399435281754, + "p90": 517.6640152931213, + "p95": 545.7919985055923, + "p99": 577.1839916706085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 379.8080086708069, + "p90": 400.4800021648407, + "p95": 408.03200006484985, + "p99": 420.8320081233978 + }, + "combine": { + "p50": 122.079998254776, + "p90": 137.15200126171112, + "p95": 144.19199526309967, + "p99": 172.57599532604218 + }, + "roundtrip": { + "p50": 486.6879880428314, + "p90": 509.15199518203735, + "p95": 519.6160078048706, + "p99": 551.9999861717224 + }, + "isolatedSum": { + "p50": 501.8880069255829, + "p90": 537.6320034265518, + "p95": 552.2239953279495, + "p99": 593.40800344944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 435.9680116176605, + "p90": 467.26399660110474, + "p95": 482.9759895801544, + "p99": 708.2239985466003 + }, + "combine": { + "p50": 198.59200716018677, + "p90": 236.28799617290497, + "p95": 258.62398743629456, + "p99": 6563.87186050415 + }, + "roundtrip": { + "p50": 617.8240180015564, + "p90": 653.9520025253296, + "p95": 672.864019870758, + "p99": 707.0720195770264 + }, + "isolatedSum": { + "p50": 634.5600187778473, + "p90": 703.5519927740097, + "p95": 741.599977016449, + "p99": 7272.095859050751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 603.3599972724915, + "p90": 620.4479932785034, + "p95": 624.5759725570679, + "p99": 633.5999965667725 + }, + "combine": { + "p50": 330.1439881324768, + "p90": 351.8719971179962, + "p95": 364.9600148200989, + "p99": 387.2320055961609 + }, + "roundtrip": { + "p50": 927.7439713478088, + "p90": 945.3120231628418, + "p95": 950.5599737167358, + "p99": 968.8959717750549 + }, + "isolatedSum": { + "p50": 933.5039854049683, + "p90": 972.3199903964996, + "p95": 989.5359873771667, + "p99": 1020.8320021629333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 875.935971736908, + "p90": 906.65602684021, + "p95": 922.9440093040466, + "p99": 948.5120177268982 + }, + "combine": { + "p50": 608.0639958381653, + "p90": 626.3039708137512, + "p95": 643.2639956474304, + "p99": 652.0959734916687 + }, + "roundtrip": { + "p50": 1470.8479642868042, + "p90": 1514.2719745635986, + "p95": 1534.3040227890015, + "p99": 1554.1119575500488 + }, + "isolatedSum": { + "p50": 1483.9999675750732, + "p90": 1532.9599976539612, + "p95": 1566.208004951477, + "p99": 1600.607991218567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7318983", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||03799dfc4e73d7f", + "colorKey": "gb300_753a1ca8", + "comparisonKey": "494ae35c8a0335a3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:20:35.978165+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03799dfc4e73d7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 376.8959939479828, + "p90": 389.50398564338684, + "p95": 394.6239948272705, + "p99": 403.9359986782074 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 86.14400029182434, + "p95": 89.59999680519104, + "p99": 96.00000083446503 + }, + "roundtrip": { + "p50": 433.9199960231781, + "p90": 447.29599356651306, + "p95": 450.9119987487793, + "p99": 461.2480103969574 + }, + "isolatedSum": { + "p50": 457.5999975204468, + "p90": 475.6479859352112, + "p95": 484.22399163246155, + "p99": 499.9359995126724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 380.3519904613495, + "p90": 393.0239975452423, + "p95": 397.3119854927063, + "p99": 408.1279933452606 + }, + "combine": { + "p50": 102.52799838781357, + "p90": 108.86400192975998, + "p95": 111.7440015077591, + "p99": 118.81600320339203 + }, + "roundtrip": { + "p50": 461.8239998817444, + "p90": 474.68799352645874, + "p95": 478.4319996833801, + "p99": 492.8320050239563 + }, + "isolatedSum": { + "p50": 482.87998884916306, + "p90": 501.8879994750023, + "p95": 509.0559870004654, + "p99": 526.9439965486526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 389.8240029811859, + "p90": 403.3919870853424, + "p95": 407.6800048351288, + "p99": 416.0960018634796 + }, + "combine": { + "p50": 155.68000078201294, + "p90": 161.15200519561768, + "p95": 163.42400014400482, + "p99": 173.63199591636658 + }, + "roundtrip": { + "p50": 526.8800258636475, + "p90": 539.903998374939, + "p95": 545.0559854507446, + "p99": 591.3280248641968 + }, + "isolatedSum": { + "p50": 545.5040037631989, + "p90": 564.5439922809601, + "p95": 571.1040049791336, + "p99": 589.7279977798462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 486.4319860935211, + "p90": 499.1360008716583, + "p95": 502.6879906654358, + "p99": 511.74402236938477 + }, + "combine": { + "p50": 263.10399174690247, + "p90": 267.520010471344, + "p95": 269.76001262664795, + "p99": 274.1439938545227 + }, + "roundtrip": { + "p50": 731.3600182533264, + "p90": 742.9440021514893, + "p95": 746.7520236968994, + "p99": 758.624017238617 + }, + "isolatedSum": { + "p50": 749.5359778404236, + "p90": 766.6560113430023, + "p95": 772.4480032920837, + "p99": 785.8880162239075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 733.6320281028748, + "p90": 746.0160255432129, + "p95": 749.9840259552002, + "p99": 760.7679963111877 + }, + "combine": { + "p50": 469.2800045013428, + "p90": 474.43199157714844, + "p95": 477.1200120449066, + "p99": 483.5200011730194 + }, + "roundtrip": { + "p50": 1186.1120462417603, + "p90": 1197.8559494018555, + "p95": 1201.3119459152222, + "p99": 1211.4239931106567 + }, + "isolatedSum": { + "p50": 1202.9120326042175, + "p90": 1220.4480171203613, + "p95": 1227.1040380001068, + "p99": 1244.2879974842072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1169.4400310516357, + "p90": 1180.799961090088, + "p95": 1186.4639520645142, + "p99": 1191.872000694275 + }, + "combine": { + "p50": 882.2720050811768, + "p90": 886.9760036468506, + "p95": 888.2240056991577, + "p99": 894.3359851837158 + }, + "roundtrip": { + "p50": 2035.1040363311768, + "p90": 2048.6080646514893, + "p95": 2055.488109588623, + "p99": 2080.22403717041 + }, + "isolatedSum": { + "p50": 2051.7120361328125, + "p90": 2067.7759647369385, + "p95": 2074.687957763672, + "p99": 2086.2079858779907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-33bd25f2", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_793c150b", + "comparisonKey": "ecdfa6b06eb68abe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:08.725741+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 281.18398785591125, + "p90": 296.1280047893524, + "p95": 300.31999945640564, + "p99": 320.0640082359314 + }, + "combine": { + "p50": 54.75199967622757, + "p90": 61.503998935222626, + "p95": 65.79200178384781, + "p99": 70.39999961853027 + }, + "roundtrip": { + "p50": 317.53599643707275, + "p90": 331.61601424217224, + "p95": 336.38399839401245, + "p99": 348.35198521614075 + }, + "isolatedSum": { + "p50": 335.9359875321388, + "p90": 357.63200372457504, + "p95": 366.11200124025345, + "p99": 390.46400785446167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 282.5919985771179, + "p90": 301.0239899158478, + "p95": 313.9199912548065, + "p99": 7155.488014221191 + }, + "combine": { + "p50": 64.92800265550613, + "p90": 70.27199864387512, + "p95": 73.08799773454666, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 319.68000531196594, + "p90": 333.44000577926636, + "p95": 337.98399567604065, + "p99": 355.19999265670776 + }, + "isolatedSum": { + "p50": 347.52000123262405, + "p90": 371.2959885597229, + "p95": 387.0079889893532, + "p99": 7235.232017934322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 326.27201080322266, + "p90": 338.8479948043823, + "p95": 342.49600768089294, + "p99": 349.7599959373474 + }, + "combine": { + "p50": 107.4879989027977, + "p90": 112.19199746847153, + "p95": 114.14399743080139, + "p99": 123.07199835777283 + }, + "roundtrip": { + "p50": 417.91999340057373, + "p90": 430.04798889160156, + "p95": 434.4319999217987, + "p99": 450.27199387550354 + }, + "isolatedSum": { + "p50": 433.76000970602036, + "p90": 451.03999227285385, + "p95": 456.64000511169434, + "p99": 472.83199429512024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56f59270", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_d99d6f06", + "comparisonKey": "fbb9ef4238106fbf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:13.603943+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 276.2559950351715, + "p90": 292.64000058174133, + "p95": 310.33599376678467, + "p99": 349.63199496269226 + }, + "combine": { + "p50": 58.49599838256836, + "p90": 67.74400174617767, + "p95": 74.68800246715546, + "p99": 101.15200281143188 + }, + "roundtrip": { + "p50": 312.063992023468, + "p90": 333.9200019836426, + "p95": 345.12001276016235, + "p99": 391.32800698280334 + }, + "isolatedSum": { + "p50": 334.75199341773987, + "p90": 360.384002327919, + "p95": 385.0239962339401, + "p99": 450.78399777412415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 278.1760096549988, + "p90": 304.1920065879822, + "p95": 317.6319897174835, + "p99": 340.256005525589 + }, + "combine": { + "p50": 64.80000168085098, + "p90": 81.85599744319916, + "p95": 96.76799923181534, + "p99": 108.67200046777725 + }, + "roundtrip": { + "p50": 320.0959861278534, + "p90": 340.7999873161316, + "p95": 357.85600543022156, + "p99": 401.69599652290344 + }, + "isolatedSum": { + "p50": 342.97601133584976, + "p90": 386.04800403118134, + "p95": 414.39998894929886, + "p99": 448.92800599336624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 277.47198939323425, + "p90": 290.3040051460266, + "p95": 294.5280075073242, + "p99": 324.12800192832947 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 85.9839990735054, + "p95": 90.40000289678574, + "p99": 94.55999732017517 + }, + "roundtrip": { + "p50": 338.75200152397156, + "p90": 350.6560027599335, + "p95": 353.983998298645, + "p99": 360.73601245880127 + }, + "isolatedSum": { + "p50": 355.8719903230667, + "p90": 376.288004219532, + "p95": 384.92801040410995, + "p99": 418.68799924850464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 308.351993560791, + "p90": 319.5840120315552, + "p95": 325.79201459884644, + "p99": 336.67200803756714 + }, + "combine": { + "p50": 112.22399771213531, + "p90": 117.08799749612808, + "p95": 120.95999717712402, + "p99": 127.29600071907043 + }, + "roundtrip": { + "p50": 404.7999978065491, + "p90": 418.36801171302795, + "p95": 421.27999663352966, + "p99": 430.33599853515625 + }, + "isolatedSum": { + "p50": 420.57599127292633, + "p90": 436.67200952768326, + "p95": 446.75201177597046, + "p99": 463.9680087566376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 400.92799067497253, + "p90": 422.1760034561157, + "p95": 433.4399998188019, + "p99": 465.9520089626312 + }, + "combine": { + "p50": 186.24000251293182, + "p90": 205.37599921226501, + "p95": 226.59200429916382, + "p99": 245.53599953651428 + }, + "roundtrip": { + "p50": 573.1840133666992, + "p90": 586.624026298523, + "p95": 597.760021686554, + "p99": 633.8559985160828 + }, + "isolatedSum": { + "p50": 587.1679931879044, + "p90": 627.5520026683807, + "p95": 660.0320041179657, + "p99": 711.4880084991455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 560.1279735565186, + "p90": 569.6319937705994, + "p95": 571.9360113143921, + "p99": 578.0159831047058 + }, + "combine": { + "p50": 321.1199939250946, + "p90": 326.880007982254, + "p95": 337.7920091152191, + "p99": 349.8559892177582 + }, + "roundtrip": { + "p50": 872.3199963569641, + "p90": 886.5600228309631, + "p95": 892.5439715385437, + "p99": 908.8960289955139 + }, + "isolatedSum": { + "p50": 881.2479674816132, + "p90": 896.5120017528534, + "p95": 909.7280204296112, + "p99": 927.871972322464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-121d5e23", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||e3707ddc343088b", + "colorKey": "gb300_59d99632", + "comparisonKey": "358cec5373be3026", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:54.619652+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e3707ddc343088b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 349.69601035118103, + "p90": 371.5519905090332, + "p95": 379.4879913330078, + "p99": 396.5759873390198 + }, + "combine": { + "p50": 76.4480009675026, + "p90": 81.85599744319916, + "p95": 85.40800213813782, + "p99": 95.61599791049957 + }, + "roundtrip": { + "p50": 401.2160003185272, + "p90": 424.22398924827576, + "p95": 430.1759898662567, + "p99": 442.9759979248047 + }, + "isolatedSum": { + "p50": 426.1440113186836, + "p90": 453.40798795223236, + "p95": 464.89599347114563, + "p99": 492.19198524951935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 361.7919981479645, + "p90": 382.3679983615875, + "p95": 389.18399810791016, + "p99": 399.616003036499 + }, + "combine": { + "p50": 94.94400024414062, + "p90": 101.08800232410431, + "p95": 104.92800176143646, + "p99": 119.52000111341476 + }, + "roundtrip": { + "p50": 435.9680116176605, + "p90": 457.0879936218262, + "p95": 462.94400095939636, + "p99": 479.3280065059662 + }, + "isolatedSum": { + "p50": 456.7359983921051, + "p90": 483.45600068569183, + "p95": 494.1119998693466, + "p99": 519.1360041499138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 353.0240058898926, + "p90": 370.65601348876953, + "p95": 377.0560026168823, + "p99": 390.8799886703491 + }, + "combine": { + "p50": 133.7919980287552, + "p90": 139.23199474811554, + "p95": 141.95199310779572, + "p99": 159.7760021686554 + }, + "roundtrip": { + "p50": 481.5039932727814, + "p90": 500, + "p95": 505.66399097442627, + "p99": 536.0320210456848 + }, + "isolatedSum": { + "p50": 486.81600391864777, + "p90": 509.88800823688507, + "p95": 519.007995724678, + "p99": 550.6559908390045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 437.3440146446228, + "p90": 457.66401290893555, + "p95": 463.8719856739044, + "p99": 495.03999948501587 + }, + "combine": { + "p50": 228.92799973487854, + "p90": 234.30399596691132, + "p95": 237.0239943265915, + "p99": 242.49599874019623 + }, + "roundtrip": { + "p50": 670.8800196647644, + "p90": 692.7040219306946, + "p95": 697.8560090065002, + "p99": 710.752010345459 + }, + "isolatedSum": { + "p50": 666.2720143795013, + "p90": 691.9680088758469, + "p95": 700.8959800004959, + "p99": 737.5359982252121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 623.2320070266724, + "p90": 650.3040194511414, + "p95": 657.8879952430725, + "p99": 690.3039813041687 + }, + "combine": { + "p50": 408.06400775909424, + "p90": 411.8399918079376, + "p95": 413.567990064621, + "p99": 423.64799976348877 + }, + "roundtrip": { + "p50": 1045.1840162277222, + "p90": 1075.551986694336, + "p95": 1080.9919834136963, + "p99": 1093.0240154266357 + }, + "isolatedSum": { + "p50": 1031.2960147857666, + "p90": 1062.144011259079, + "p95": 1071.4559853076935, + "p99": 1113.9519810676575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1009.8880529403687, + "p90": 1019.9040174484253, + "p95": 1024.5440006256104, + "p99": 1033.4399938583374 + }, + "combine": { + "p50": 769.5040106773376, + "p90": 774.0799784660339, + "p95": 775.5839824676514, + "p99": 781.3439965248108 + }, + "roundtrip": { + "p50": 1811.0719919204712, + "p90": 1829.4399976730347, + "p95": 1833.3760499954224, + "p99": 1848.479986190796 + }, + "isolatedSum": { + "p50": 1779.3920636177063, + "p90": 1793.9839959144592, + "p95": 1800.1279830932617, + "p99": 1814.7839903831482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-25768e76", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_b2554bbc", + "comparisonKey": "538832b5b720f482", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:08.158294+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 371.0080087184906, + "p90": 399.4239866733551, + "p95": 405.91999888420105, + "p99": 417.1839952468872 + }, + "combine": { + "p50": 74.36800003051758, + "p90": 80.22399991750717, + "p95": 82.84799754619598, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 419.77599263191223, + "p90": 452.7359902858734, + "p95": 459.1679871082306, + "p99": 472.9920029640198 + }, + "isolatedSum": { + "p50": 445.3760087490082, + "p90": 479.6479865908623, + "p95": 488.76799643039703, + "p99": 507.29599595069885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 381.02400302886963, + "p90": 400.41598677635193, + "p95": 406.8480134010315, + "p99": 415.48800468444824 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 93.21600198745728, + "p95": 96.3200032711029, + "p99": 104.5759990811348 + }, + "roundtrip": { + "p50": 454.17600870132446, + "p90": 475.39201378822327, + "p95": 479.64799404144287, + "p99": 491.07199907302856 + }, + "isolatedSum": { + "p50": 468.128003180027, + "p90": 493.6319887638092, + "p95": 503.1680166721344, + "p99": 520.064003765583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 383.2640051841736, + "p90": 398.6560106277466, + "p95": 403.872013092041, + "p99": 416.3520038127899 + }, + "combine": { + "p50": 119.10399794578552, + "p90": 124.70400333404541, + "p95": 128.00000607967377, + "p99": 133.69600474834442 + }, + "roundtrip": { + "p50": 488.44799399375916, + "p90": 503.4559965133667, + "p95": 506.97600841522217, + "p99": 520.9919810295105 + }, + "isolatedSum": { + "p50": 502.3680031299591, + "p90": 523.360013961792, + "p95": 531.8720191717148, + "p99": 550.0480085611343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 442.9439902305603, + "p90": 459.7119987010956, + "p95": 464.57600593566895, + "p99": 477.75998711586 + }, + "combine": { + "p50": 196.1279958486557, + "p90": 201.1840045452118, + "p95": 202.81599462032318, + "p99": 208.064004778862 + }, + "roundtrip": { + "p50": 626.2400150299072, + "p90": 644.4479823112488, + "p95": 648.4479904174805, + "p99": 660.2879762649536 + }, + "isolatedSum": { + "p50": 639.071986079216, + "p90": 660.8960032463074, + "p95": 667.3920005559921, + "p99": 685.823991894722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 595.2960252761841, + "p90": 632.0319771766663, + "p95": 635.968029499054, + "p99": 646.6879844665527 + }, + "combine": { + "p50": 333.3120048046112, + "p90": 338.9120101928711, + "p95": 341.3439989089966, + "p99": 347.80800342559814 + }, + "roundtrip": { + "p50": 922.2400188446045, + "p90": 956.4800262451172, + "p95": 960.9599709510803, + "p99": 969.1200256347656 + }, + "isolatedSum": { + "p50": 928.6080300807953, + "p90": 970.9439873695374, + "p95": 977.3120284080505, + "p99": 994.4959878921509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 883.1679821014404, + "p90": 899.8399972915649, + "p95": 903.1040072441101, + "p99": 908.8320136070251 + }, + "combine": { + "p50": 604.3519973754883, + "p90": 608.735978603363, + "p95": 610.8480095863342, + "p99": 617.2800064086914 + }, + "roundtrip": { + "p50": 1474.33602809906, + "p90": 1490.3680086135864, + "p95": 1495.0079917907715, + "p99": 1516.800045967102 + }, + "isolatedSum": { + "p50": 1487.5199794769287, + "p90": 1508.575975894928, + "p95": 1513.9520168304443, + "p99": 1526.1120200157166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a181a208", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_166aaf5e", + "comparisonKey": "2fe41e5710cbe486", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:23:35.689202+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 360.83200573921204, + "p90": 384.799987077713, + "p95": 397.0879912376404, + "p99": 431.5840005874634 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 87.48800307512283, + "p95": 99.48799759149551, + "p99": 125.69600343704224 + }, + "roundtrip": { + "p50": 412.60799765586853, + "p90": 451.32800936698914, + "p95": 469.9839949607849, + "p99": 480.54400086402893 + }, + "isolatedSum": { + "p50": 429.53600734472275, + "p90": 472.28799015283585, + "p95": 496.5759888291359, + "p99": 557.2800040245056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 360.1920008659363, + "p90": 383.4879994392395, + "p95": 390.81600308418274, + "p99": 412.3840034008026 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 89.24800157546997, + "p95": 92.67199784517288, + "p99": 101.40799731016159 + }, + "roundtrip": { + "p50": 431.2959909439087, + "p90": 451.4879882335663, + "p95": 459.80799198150635, + "p99": 480.25599122047424 + }, + "isolatedSum": { + "p50": 442.9439976811409, + "p90": 472.7360010147095, + "p95": 483.4880009293556, + "p99": 513.7920007109642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 371.4880049228668, + "p90": 395.9679901599884, + "p95": 404.57600355148315, + "p99": 430.33599853515625 + }, + "combine": { + "p50": 118.1119978427887, + "p90": 131.80799782276154, + "p95": 145.02400159835815, + "p99": 158.07999670505524 + }, + "roundtrip": { + "p50": 480.80000281333923, + "p90": 505.5040121078491, + "p95": 513.1840109825134, + "p99": 538.6559963226318 + }, + "isolatedSum": { + "p50": 489.6000027656555, + "p90": 527.7759879827499, + "p95": 549.6000051498413, + "p99": 588.4159952402115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 425.6640076637268, + "p90": 448.4480023384094, + "p95": 458.2720100879669, + "p99": 502.3679733276367 + }, + "combine": { + "p50": 193.7279999256134, + "p90": 201.56799256801605, + "p95": 208.8640034198761, + "p99": 249.59999322891235 + }, + "roundtrip": { + "p50": 609.3760132789612, + "p90": 639.3600106239319, + "p95": 646.0480093955994, + "p99": 667.2959923744202 + }, + "isolatedSum": { + "p50": 619.3920075893402, + "p90": 650.0159949064255, + "p95": 667.136013507843, + "p99": 751.9679665565491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 596.0959792137146, + "p90": 617.792010307312, + "p95": 624.671995639801, + "p99": 642.0159935951233 + }, + "combine": { + "p50": 328.8320004940033, + "p90": 334.49599146842957, + "p95": 337.15200424194336, + "p99": 362.9760146141052 + }, + "roundtrip": { + "p50": 916.8639779090881, + "p90": 938.4639859199524, + "p95": 944.159984588623, + "p99": 970.2399969100952 + }, + "isolatedSum": { + "p50": 924.9279797077179, + "p90": 952.2880017757416, + "p95": 961.8239998817444, + "p99": 1004.9920082092285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 883.4559917449951, + "p90": 909.3760251998901, + "p95": 917.9199934005737, + "p99": 952.4160027503967 + }, + "combine": { + "p50": 606.2080264091492, + "p90": 611.4559769630432, + "p95": 613.5680079460144, + "p99": 617.5040006637573 + }, + "roundtrip": { + "p50": 1472.864031791687, + "p90": 1493.3760166168213, + "p95": 1497.6320266723633, + "p99": 1512.384057044983 + }, + "isolatedSum": { + "p50": 1489.6640181541443, + "p90": 1520.8320021629333, + "p95": 1531.4880013465881, + "p99": 1569.920003414154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7fb4fac", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_fb8b2593", + "comparisonKey": "dfd73f04125c4cf4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:36.117969+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 308.896005153656, + "p90": 328.70399951934814, + "p95": 350.0800132751465, + "p99": 419.0399944782257 + }, + "combine": { + "p50": 75.23199915885925, + "p90": 80.79999685287476, + "p95": 85.88799834251404, + "p99": 134.17600095272064 + }, + "roundtrip": { + "p50": 359.51998829841614, + "p90": 374.208003282547, + "p95": 381.8559944629669, + "p99": 455.80801367759705 + }, + "isolatedSum": { + "p50": 384.12800431251526, + "p90": 409.5039963722229, + "p95": 435.9680116176605, + "p99": 553.2159954309464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 307.42400884628296, + "p90": 324.6079981327057, + "p95": 334.56000685691833, + "p99": 410.68801283836365 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 96.76799923181534, + "p95": 101.34399682283401, + "p99": 139.80799913406372 + }, + "roundtrip": { + "p50": 384.19198989868164, + "p90": 397.8559970855713, + "p95": 403.48801016807556, + "p99": 469.88800168037415 + }, + "isolatedSum": { + "p50": 397.7600112557411, + "p90": 421.375997364521, + "p95": 435.90400367975235, + "p99": 550.4960119724274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 332.96000957489014, + "p90": 347.29599952697754, + "p95": 355.80798983573914, + "p99": 405.63198924064636 + }, + "combine": { + "p50": 131.6159963607788, + "p90": 137.43999600410461, + "p95": 140.99200069904327, + "p99": 166.1120057106018 + }, + "roundtrip": { + "p50": 460.00000834465027, + "p90": 475.39201378822327, + "p95": 482.62399435043335, + "p99": 559.0720176696777 + }, + "isolatedSum": { + "p50": 464.57600593566895, + "p90": 484.73599553108215, + "p95": 496.7999905347824, + "p99": 571.7439949512482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 421.2160110473633, + "p90": 434.4319999217987, + "p95": 442.2079920768738, + "p99": 526.8160104751587 + }, + "combine": { + "p50": 222.6559966802597, + "p90": 226.97600722312927, + "p95": 232.5119972229004, + "p99": 266.7520046234131 + }, + "roundtrip": { + "p50": 655.3599834442139, + "p90": 667.743980884552, + "p95": 672.9599833488464, + "p99": 749.6960163116455 + }, + "isolatedSum": { + "p50": 643.872007727623, + "p90": 661.408007144928, + "p95": 674.7199892997742, + "p99": 793.5680150985718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 609.4719767570496, + "p90": 625.823974609375, + "p95": 646.8480229377747, + "p99": 721.2160229682922 + }, + "combine": { + "p50": 393.7920033931732, + "p90": 397.63200283050537, + "p95": 399.1680145263672, + "p99": 413.9840006828308 + }, + "roundtrip": { + "p50": 1018.5920000076294, + "p90": 1031.8399667739868, + "p95": 1048.7040281295776, + "p99": 1110.368013381958 + }, + "isolatedSum": { + "p50": 1003.2639801502228, + "p90": 1023.4559774398804, + "p95": 1046.0160374641418, + "p99": 1135.200023651123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1000.2239942550659, + "p90": 1012.0639801025391, + "p95": 1018.3999538421631, + "p99": 1079.7439813613892 + }, + "combine": { + "p50": 744.2880272865295, + "p90": 749.0559816360474, + "p95": 750.2080202102661, + "p99": 756.3520073890686 + }, + "roundtrip": { + "p50": 1755.2640438079834, + "p90": 1769.0240144729614, + "p95": 1775.7439613342285, + "p99": 1859.0079545974731 + }, + "isolatedSum": { + "p50": 1744.5120215415955, + "p90": 1761.1199617385864, + "p95": 1768.6079740524292, + "p99": 1836.0959887504578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-020ea054", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||8183e404f63b100", + "colorKey": "gb300_dca7bfa9", + "comparisonKey": "2173a1958cd92198", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:12.789063+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8183e404f63b100", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 285.12001037597656, + "p90": 303.16799879074097, + "p95": 314.59200382232666, + "p99": 355.48800230026245 + }, + "combine": { + "p50": 69.023996591568, + "p90": 97.95200079679489, + "p95": 113.24799805879593, + "p99": 139.615997672081 + }, + "roundtrip": { + "p50": 330.3999900817871, + "p90": 355.103999376297, + "p95": 376.22401118278503, + "p99": 419.48801279067993 + }, + "isolatedSum": { + "p50": 354.14400696754456, + "p90": 401.11999958753586, + "p95": 427.8400018811226, + "p99": 495.10399997234344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 288.9600098133087, + "p90": 310.11199951171875, + "p95": 323.168009519577, + "p99": 351.77600383758545 + }, + "combine": { + "p50": 85.60000360012054, + "p90": 91.16800129413605, + "p95": 98.75199943780899, + "p99": 118.97599697113037 + }, + "roundtrip": { + "p50": 357.2160005569458, + "p90": 377.4400055408478, + "p95": 393.3440148830414, + "p99": 434.7519874572754 + }, + "isolatedSum": { + "p50": 374.56001341342926, + "p90": 401.2800008058548, + "p95": 421.920008957386, + "p99": 470.7520008087158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 330.55999875068665, + "p90": 352.03200578689575, + "p95": 368.0320084095001, + "p99": 395.04000544548035 + }, + "combine": { + "p50": 125.59999525547028, + "p90": 134.17600095272064, + "p95": 146.7519998550415, + "p99": 176.41599476337433 + }, + "roundtrip": { + "p50": 451.9039988517761, + "p90": 473.2159972190857, + "p95": 482.87999629974365, + "p99": 521.6320157051086 + }, + "isolatedSum": { + "p50": 456.1599940061569, + "p90": 486.2080067396164, + "p95": 514.7840082645416, + "p99": 571.4560002088547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 410.68801283836365, + "p90": 428.25600504875183, + "p95": 446.49600982666016, + "p99": 485.5999946594238 + }, + "combine": { + "p50": 211.90400421619415, + "p90": 225.2800017595291, + "p95": 231.64799809455872, + "p99": 242.01600253582 + }, + "roundtrip": { + "p50": 638.2399797439575, + "p90": 659.775972366333, + "p95": 679.8719763755798, + "p99": 706.4639925956726 + }, + "isolatedSum": { + "p50": 622.5920170545578, + "p90": 653.536006808281, + "p95": 678.1440079212189, + "p99": 727.6159971952438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 603.7120223045349, + "p90": 619.7119951248169, + "p95": 625.3119707107544, + "p99": 640.6400203704834 + }, + "combine": { + "p50": 377.56800651550293, + "p90": 384.8319947719574, + "p95": 389.21600580215454, + "p99": 396.5759873390198 + }, + "roundtrip": { + "p50": 1002.3360252380371, + "p90": 1028.0319452285767, + "p95": 1042.2719717025757, + "p99": 1065.8559799194336 + }, + "isolatedSum": { + "p50": 981.2800288200378, + "p90": 1004.5439898967743, + "p95": 1014.5279765129089, + "p99": 1037.2160077095032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 992.6400184631348, + "p90": 1004.7359466552734, + "p95": 1008.895993232727, + "p99": 1017.3759460449219 + }, + "combine": { + "p50": 711.1039757728577, + "p90": 715.2000069618225, + "p95": 716.7999744415283, + "p99": 722.432017326355 + }, + "roundtrip": { + "p50": 1718.5920476913452, + "p90": 1740.0319576263428, + "p95": 1760.5760097503662, + "p99": 1785.5679988861084 + }, + "isolatedSum": { + "p50": 1703.7439942359924, + "p90": 1719.935953617096, + "p95": 1725.6959676742554, + "p99": 1739.8079633712769 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dde4b067", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_ae0bd665", + "comparisonKey": "60bfffa411bb785f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:24.239545+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 372.44799733161926, + "p90": 398.9120125770569, + "p95": 406.1119854450226, + "p99": 417.1839952468872 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 78.91199737787247, + "p95": 82.46400207281113, + "p99": 95.77599912881851 + }, + "roundtrip": { + "p50": 424.3519902229309, + "p90": 452.8000056743622, + "p95": 457.5999975204468, + "p99": 475.8400022983551 + }, + "isolatedSum": { + "p50": 444.6079954504967, + "p90": 477.82400995492935, + "p95": 488.5759875178337, + "p99": 512.9599943757057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 380.47999143600464, + "p90": 404.1920006275177, + "p95": 411.9360148906708, + "p99": 436.6399943828583 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 94.65599805116653, + "p95": 97.9200005531311, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 453.7599980831146, + "p90": 482.62399435043335, + "p95": 488.2879853248596, + "p99": 500.35202503204346 + }, + "isolatedSum": { + "p50": 468.095988035202, + "p90": 498.84799867868423, + "p95": 509.8560154438019, + "p99": 539.8399978876114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 380.2880048751831, + "p90": 396.479994058609, + "p95": 399.9040126800537, + "p99": 412.992000579834 + }, + "combine": { + "p50": 120.95999717712402, + "p90": 126.8479973077774, + "p95": 131.29599392414093, + "p99": 138.3039951324463 + }, + "roundtrip": { + "p50": 487.67998814582825, + "p90": 506.4319968223572, + "p95": 512.0319724082947, + "p99": 524.511992931366 + }, + "isolatedSum": { + "p50": 501.24800205230713, + "p90": 523.3279913663864, + "p95": 531.2000066041946, + "p99": 551.2959957122803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 444.41598653793335, + "p90": 463.6160135269165, + "p95": 470.20798921585083, + "p99": 486.04801297187805 + }, + "combine": { + "p50": 196.3520050048828, + "p90": 203.87199521064758, + "p95": 206.33600652217865, + "p99": 210.11200547218323 + }, + "roundtrip": { + "p50": 628.1920075416565, + "p90": 655.2320122718811, + "p95": 662.4959707260132, + "p99": 672.544002532959 + }, + "isolatedSum": { + "p50": 640.7679915428162, + "p90": 667.4880087375641, + "p95": 676.5439957380295, + "p99": 696.1600184440613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 611.5840077400208, + "p90": 640.4799818992615, + "p95": 646.943986415863, + "p99": 654.7200083732605 + }, + "combine": { + "p50": 336.7359936237335, + "p90": 343.23200583457947, + "p95": 345.2480137348175, + "p99": 352.1920144557953 + }, + "roundtrip": { + "p50": 935.9359741210938, + "p90": 972.8959798812866, + "p95": 978.335976600647, + "p99": 993.6959743499756 + }, + "isolatedSum": { + "p50": 948.3200013637543, + "p90": 983.7119877338409, + "p95": 992.1920001506805, + "p99": 1006.9120228290558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 903.5199880599976, + "p90": 917.7600145339966, + "p95": 921.9840168952942, + "p99": 934.6240162849426 + }, + "combine": { + "p50": 618.8480257987976, + "p90": 623.4560012817383, + "p95": 624.9279975891113, + "p99": 629.3439865112305 + }, + "roundtrip": { + "p50": 1515.9679651260376, + "p90": 1534.559965133667, + "p95": 1539.6480560302734, + "p99": 1554.7840595245361 + }, + "isolatedSum": { + "p50": 1522.3680138587952, + "p90": 1541.2160158157349, + "p95": 1546.9120144844055, + "p99": 1563.968002796173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-faa78cef", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_fc0eaec4", + "comparisonKey": "80121ce7c03e2b09", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:25:57.650590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 369.6640133857727, + "p90": 388.0000114440918, + "p95": 393.312007188797, + "p99": 399.04001355171204 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 82.20800012350082, + "p95": 87.52000331878662, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 425.8880019187927, + "p90": 442.4319863319397, + "p95": 447.1679925918579, + "p99": 454.17600870132446 + }, + "isolatedSum": { + "p50": 444.8320120573044, + "p90": 470.2080115675926, + "p95": 480.8320105075836, + "p99": 495.7440122961998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 373.82400035858154, + "p90": 391.03999733924866, + "p95": 396.7039883136749, + "p99": 405.56800365448 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 102.04800218343735, + "p95": 104.99200224876404, + "p99": 114.27199840545654 + }, + "roundtrip": { + "p50": 448.7999975681305, + "p90": 465.6960070133209, + "p95": 469.5360064506531, + "p99": 478.5600006580353 + }, + "isolatedSum": { + "p50": 468.9600020647049, + "p90": 493.087999522686, + "p95": 501.69599056243896, + "p99": 519.8400020599365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 378.464013338089, + "p90": 395.7119882106781, + "p95": 401.66398882865906, + "p99": 411.6159975528717 + }, + "combine": { + "p50": 132.25600123405457, + "p90": 140.47999680042267, + "p95": 143.5839980840683, + "p99": 149.21599626541138 + }, + "roundtrip": { + "p50": 499.6800124645233, + "p90": 515.6480073928833, + "p95": 520.8320021629333, + "p99": 532.1919918060303 + }, + "isolatedSum": { + "p50": 510.72001457214355, + "p90": 536.1919850111008, + "p95": 545.2479869127274, + "p99": 560.8319938182831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 461.9840085506439, + "p90": 480.0960123538971, + "p95": 483.7760031223297, + "p99": 489.21599984169006 + }, + "combine": { + "p50": 224.19199347496033, + "p90": 228.67199778556824, + "p95": 231.90400004386902, + "p99": 237.34399676322937 + }, + "roundtrip": { + "p50": 693.4720277786255, + "p90": 708.5440158843994, + "p95": 713.2800221443176, + "p99": 719.8399901390076 + }, + "isolatedSum": { + "p50": 686.1760020256042, + "p90": 708.7680101394653, + "p95": 715.6800031661987, + "p99": 726.5599966049194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 625.8879899978638, + "p90": 647.9039788246155, + "p95": 656.8319797515869, + "p99": 9929.632186889648 + }, + "combine": { + "p50": 393.2799994945526, + "p90": 397.37600088119507, + "p95": 399.616003036499, + "p99": 404.4159948825836 + }, + "roundtrip": { + "p50": 1052.832007408142, + "p90": 1065.9840106964111, + "p95": 1070.847988128662, + "p99": 1079.5520544052124 + }, + "isolatedSum": { + "p50": 1019.1679894924164, + "p90": 1045.2799797058105, + "p95": 1056.447982788086, + "p99": 10334.048181772232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 978.9440035820007, + "p90": 988.1920218467712, + "p95": 991.5519952774048, + "p99": 996.4159727096558 + }, + "combine": { + "p50": 737.4719977378845, + "p90": 743.2000041007996, + "p95": 745.7600235939026, + "p99": 749.0879893302917 + }, + "roundtrip": { + "p50": 1770.1120376586914, + "p90": 1785.5679988861084, + "p95": 1788.4800434112549, + "p99": 1802.6880025863647 + }, + "isolatedSum": { + "p50": 1716.4160013198853, + "p90": 1731.3920259475708, + "p95": 1737.3120188713074, + "p99": 1745.5039620399475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7103c2a8", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_8c3da06a", + "comparisonKey": "bc42c7ebea44fe67", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:26:19.423727+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 382.78400897979736, + "p90": 399.9040126800537, + "p95": 404.5119881629944, + "p99": 412.60799765586853 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 75.6480023264885, + "p95": 81.02399855852127, + "p99": 91.64799749851227 + }, + "roundtrip": { + "p50": 436.67200207710266, + "p90": 453.0560076236725, + "p95": 459.23200249671936, + "p99": 469.85599398612976 + }, + "isolatedSum": { + "p50": 453.12000811100006, + "p90": 475.5520150065422, + "p95": 485.53598672151566, + "p99": 504.2559951543808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 391.87198877334595, + "p90": 409.40800309181213, + "p95": 414.5280122756958, + "p99": 446.30399346351624 + }, + "combine": { + "p50": 84.79999750852585, + "p90": 92.6399976015091, + "p95": 95.93600034713745, + "p99": 108.15999656915665 + }, + "roundtrip": { + "p50": 460.1919949054718, + "p90": 476.1280119419098, + "p95": 481.6960096359253, + "p99": 494.2080080509186 + }, + "isolatedSum": { + "p50": 476.6719862818718, + "p90": 502.0480006933212, + "p95": 510.46401262283325, + "p99": 554.4639900326729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 396.2880074977875, + "p90": 416.57599806785583, + "p95": 421.1519956588745, + "p99": 435.93600392341614 + }, + "combine": { + "p50": 119.26399916410446, + "p90": 125.15200674533844, + "p95": 127.48800218105316, + "p99": 134.43200290203094 + }, + "roundtrip": { + "p50": 504.2880177497864, + "p90": 523.4240293502808, + "p95": 529.4079780578613, + "p99": 540.6399965286255 + }, + "isolatedSum": { + "p50": 515.5520066618919, + "p90": 541.7280048131943, + "p95": 548.6399978399277, + "p99": 570.3680068254471 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 452.41600275039673, + "p90": 468.4160053730011, + "p95": 473.1200039386749, + "p99": 482.40000009536743 + }, + "combine": { + "p50": 193.7279999256134, + "p90": 199.52000677585602, + "p95": 202.68799364566803, + "p99": 212.19199895858765 + }, + "roundtrip": { + "p50": 633.8880062103271, + "p90": 651.1359810829163, + "p95": 655.5520296096802, + "p99": 666.815996170044 + }, + "isolatedSum": { + "p50": 646.1440026760101, + "p90": 667.9360121488571, + "p95": 675.807997584343, + "p99": 694.5919990539551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 619.9679970741272, + "p90": 635.2319717407227, + "p95": 640.9279704093933, + "p99": 669.0880060195923 + }, + "combine": { + "p50": 332.92800188064575, + "p90": 338.3040130138397, + "p95": 340.7039940357208, + "p99": 348.54400157928467 + }, + "roundtrip": { + "p50": 946.5600252151489, + "p90": 962.0159864425659, + "p95": 968.3840274810791, + "p99": 979.423999786377 + }, + "isolatedSum": { + "p50": 952.895998954773, + "p90": 973.5359847545624, + "p95": 981.6319644451141, + "p99": 1017.632007598877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 902.1440148353577, + "p90": 916.3839817047119, + "p95": 922.655999660492, + "p99": 950.5599737167358 + }, + "combine": { + "p50": 608.0319881439209, + "p90": 613.8240098953247, + "p95": 617.2159910202026, + "p99": 634.4320178031921 + }, + "roundtrip": { + "p50": 1493.0880069732666, + "p90": 1509.1520547866821, + "p95": 1514.7199630737305, + "p99": 1525.6320238113403 + }, + "isolatedSum": { + "p50": 1510.1760029792786, + "p90": 1530.2079916000366, + "p95": 1539.8719906806946, + "p99": 1584.991991519928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-babdb81a", + "identity": "gb300|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_6e070cf3", + "comparisonKey": "4b05db5c90e0f4ac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:20.544519+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 334.6880078315735, + "p90": 361.91999912261963, + "p95": 367.93598532676697, + "p99": 404.03199195861816 + }, + "combine": { + "p50": 75.3600001335144, + "p90": 81.56800270080566, + "p95": 86.40000224113464, + "p99": 100.76799988746643 + }, + "roundtrip": { + "p50": 393.92000436782837, + "p90": 416.73600673675537, + "p95": 423.39199781417847, + "p99": 454.49599623680115 + }, + "isolatedSum": { + "p50": 410.0480079650879, + "p90": 443.4880018234253, + "p95": 454.3359875679016, + "p99": 504.7999918460846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 341.6000008583069, + "p90": 364.3839955329895, + "p95": 369.28001046180725, + "p99": 378.84798645973206 + }, + "combine": { + "p50": 90.17600119113922, + "p90": 97.28000313043594, + "p95": 101.88800096511841, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 424.5760142803192, + "p90": 444.2239999771118, + "p95": 448.7999975681305, + "p99": 459.77601408958435 + }, + "isolatedSum": { + "p50": 431.7760020494461, + "p90": 461.66399866342545, + "p95": 471.16801142692566, + "p99": 485.6639876961708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 359.45600271224976, + "p90": 385.5679929256439, + "p95": 392.15999841690063, + "p99": 408.5119962692261 + }, + "combine": { + "p50": 130.72000443935394, + "p90": 137.43999600410461, + "p95": 142.17600226402283, + "p99": 174.81599748134613 + }, + "roundtrip": { + "p50": 491.7440116405487, + "p90": 516.7999863624573, + "p95": 524.4479775428772, + "p99": 559.2960119247437 + }, + "isolatedSum": { + "p50": 490.1760071516037, + "p90": 523.0079889297485, + "p95": 534.3360006809235, + "p99": 583.3279937505722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 454.49599623680115, + "p90": 478.1759977340698, + "p95": 484.0640127658844, + "p99": 495.10401487350464 + }, + "combine": { + "p50": 220.96000611782074, + "p90": 224.8000055551529, + "p95": 229.5680046081543, + "p99": 252.8960108757019 + }, + "roundtrip": { + "p50": 701.088011264801, + "p90": 718.8479900360107, + "p95": 723.3920097351074, + "p99": 734.4319820404053 + }, + "isolatedSum": { + "p50": 675.4560023546219, + "p90": 702.9760032892227, + "p95": 713.6320173740387, + "p99": 748.0000257492065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 611.1999750137329, + "p90": 633.4720253944397, + "p95": 640.3520107269287, + "p99": 650.1439809799194 + }, + "combine": { + "p50": 392.2879993915558, + "p90": 395.77600359916687, + "p95": 397.40800857543945, + "p99": 401.12000703811646 + }, + "roundtrip": { + "p50": 1048.4479665756226, + "p90": 1073.248028755188, + "p95": 1078.4640312194824, + "p99": 1101.6960144042969 + }, + "isolatedSum": { + "p50": 1003.4879744052887, + "p90": 1029.2480289936066, + "p95": 1037.7600193023682, + "p99": 1051.263988018036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1002.6559829711914, + "p90": 1013.3440494537354, + "p95": 1017.0880556106567, + "p99": 1028.4160375595093 + }, + "combine": { + "p50": 743.1679964065552, + "p90": 747.6800084114075, + "p95": 749.1199970245361, + "p99": 752.2240281105042 + }, + "roundtrip": { + "p50": 1803.0719757080078, + "p90": 1821.3759660720825, + "p95": 1826.5279531478882, + "p99": 1834.2080116271973 + }, + "isolatedSum": { + "p50": 1745.8239793777466, + "p90": 1761.0240578651428, + "p95": 1766.2080526351929, + "p99": 1780.6400656700134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b372f9c1", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_74bc362b", + "comparisonKey": "51fa1e12bd9c933a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:27:42.905829+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 376.44800543785095, + "p90": 393.69601011276245, + "p95": 398.9120125770569, + "p99": 410.3040099143982 + }, + "combine": { + "p50": 71.74400240182877, + "p90": 76.60800218582153, + "p95": 78.36800068616867, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 432.76798725128174, + "p90": 450.23998618125916, + "p95": 455.9679925441742, + "p99": 470.97599506378174 + }, + "isolatedSum": { + "p50": 448.1920078396797, + "p90": 470.304012298584, + "p95": 477.28001326322556, + "p99": 499.29600954055786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 376.3200044631958, + "p90": 397.72799611091614, + "p95": 404.7040045261383, + "p99": 424.2880046367645 + }, + "combine": { + "p50": 86.94399893283844, + "p90": 92.25600212812424, + "p95": 97.02400118112564, + "p99": 136.31999492645264 + }, + "roundtrip": { + "p50": 450.5600035190582, + "p90": 472.1919894218445, + "p95": 477.34400629997253, + "p99": 497.3120093345642 + }, + "isolatedSum": { + "p50": 463.26400339603424, + "p90": 489.9839982390404, + "p95": 501.72800570726395, + "p99": 560.6079995632172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 390.1120126247406, + "p90": 413.6959910392761, + "p95": 418.33600401878357, + "p99": 431.36000633239746 + }, + "combine": { + "p50": 115.99999666213989, + "p90": 122.30399996042252, + "p95": 125.34399330615997, + "p99": 135.13599336147308 + }, + "roundtrip": { + "p50": 487.2640073299408, + "p90": 507.4880123138428, + "p95": 511.58398389816284, + "p99": 531.711995601654 + }, + "isolatedSum": { + "p50": 506.1120092868805, + "p90": 535.9999909996986, + "p95": 543.6799973249435, + "p99": 566.4959996938705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 443.5200095176697, + "p90": 465.1840031147003, + "p95": 470.68798542022705, + "p99": 480.44800758361816 + }, + "combine": { + "p50": 190.33600389957428, + "p90": 196.51199877262115, + "p95": 199.23199713230133, + "p99": 208.92800390720367 + }, + "roundtrip": { + "p50": 622.6879954338074, + "p90": 643.9359784126282, + "p95": 648.4159827232361, + "p99": 662.3039841651917 + }, + "isolatedSum": { + "p50": 633.856013417244, + "p90": 661.6960018873215, + "p95": 669.9199825525284, + "p99": 689.3760114908218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 610.0800037384033, + "p90": 632.0000290870667, + "p95": 638.2719874382019, + "p99": 656.4159989356995 + }, + "combine": { + "p50": 331.13598823547363, + "p90": 336.2880051136017, + "p95": 338.49599957466125, + "p99": 344.63998675346375 + }, + "roundtrip": { + "p50": 934.9439740180969, + "p90": 953.216016292572, + "p95": 959.1360092163086, + "p99": 972.5120067596436 + }, + "isolatedSum": { + "p50": 941.215991973877, + "p90": 968.2880342006683, + "p95": 976.7679870128632, + "p99": 1001.0559856891632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 896.6400027275085, + "p90": 913.7279987335205, + "p95": 917.2800183296204, + "p99": 931.2000274658203 + }, + "combine": { + "p50": 605.3439974784851, + "p90": 610.8160018920898, + "p95": 612.6400232315063, + "p99": 617.247998714447 + }, + "roundtrip": { + "p50": 1490.3680086135864, + "p90": 1509.2480182647705, + "p95": 1515.328049659729, + "p99": 1530.8799743652344 + }, + "isolatedSum": { + "p50": 1501.9840002059937, + "p90": 1524.5440006256104, + "p95": 1529.9200415611267, + "p99": 1548.4480261802673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ccb60b21", + "identity": "gb300|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_d80745cb", + "comparisonKey": "00e5d1bbd6729252", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:55.670319+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-ep", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 399.55198764801025, + "p90": 417.34400391578674, + "p95": 421.9839870929718, + "p99": 433.4399998188019 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 77.56800204515457, + "p95": 80.92799782752991, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 448.2559859752655, + "p90": 466.0159945487976, + "p95": 470.5600142478943, + "p99": 486.01600527763367 + }, + "isolatedSum": { + "p50": 470.6239849328995, + "p90": 494.9120059609413, + "p95": 502.9119849205017, + "p99": 523.3599990606308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 403.872013092041, + "p90": 421.6960072517395, + "p95": 427.0719885826111, + "p99": 441.567987203598 + }, + "combine": { + "p50": 86.20800077915192, + "p90": 92.32000261545181, + "p95": 95.42399644851685, + "p99": 104.38399761915207 + }, + "roundtrip": { + "p50": 474.11200404167175, + "p90": 490.81599712371826, + "p95": 494.9760138988495, + "p99": 504.2240023612976 + }, + "isolatedSum": { + "p50": 490.08001387119293, + "p90": 514.0160098671913, + "p95": 522.4959850311279, + "p99": 545.9519848227501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 406.7839980125427, + "p90": 424.99199509620667, + "p95": 429.4080138206482, + "p99": 452.4480104446411 + }, + "combine": { + "p50": 118.78400295972824, + "p90": 123.9359974861145, + "p95": 125.56800246238708, + "p99": 131.84000551700592 + }, + "roundtrip": { + "p50": 509.0879797935486, + "p90": 526.3040065765381, + "p95": 530.8160185813904, + "p99": 537.9520058631897 + }, + "isolatedSum": { + "p50": 525.568000972271, + "p90": 548.9279925823212, + "p95": 554.9760162830353, + "p99": 584.288015961647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 462.3999893665314, + "p90": 480.51199316978455, + "p95": 486.27200722694397, + "p99": 506.75201416015625 + }, + "combine": { + "p50": 190.528005361557, + "p90": 196.60800695419312, + "p95": 198.08000326156616, + "p99": 203.67999374866486 + }, + "roundtrip": { + "p50": 644.1919803619385, + "p90": 659.9360108375549, + "p95": 663.8399958610535, + "p99": 669.4719791412354 + }, + "isolatedSum": { + "p50": 652.9279947280884, + "p90": 677.1200001239777, + "p95": 684.3520104885101, + "p99": 710.4320079088211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 636.9600296020508, + "p90": 653.9199948310852, + "p95": 658.8799953460693, + "p99": 668.4799790382385 + }, + "combine": { + "p50": 331.6799998283386, + "p90": 338.1440043449402, + "p95": 339.87200260162354, + "p99": 345.5680012702942 + }, + "roundtrip": { + "p50": 962.336003780365, + "p90": 976.2880206108093, + "p95": 980.7999730110168, + "p99": 1017.7919864654541 + }, + "isolatedSum": { + "p50": 968.6400294303894, + "p90": 992.0639991760254, + "p95": 998.7519979476929, + "p99": 1014.0479803085327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 913.0240082740784, + "p90": 927.295982837677, + "p95": 930.2399754524231, + "p99": 989.2160296440125 + }, + "combine": { + "p50": 605.9839725494385, + "p90": 610.368013381958, + "p95": 612.064003944397, + "p99": 617.8240180015564 + }, + "roundtrip": { + "p50": 1512.5759840011597, + "p90": 1527.135968208313, + "p95": 1531.3600301742554, + "p99": 1539.903998374939 + }, + "isolatedSum": { + "p50": 1519.0079808235168, + "p90": 1537.663996219635, + "p95": 1542.30397939682, + "p99": 1607.0400476455688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f30f4895", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_7255c6ac", + "comparisonKey": "e27bccc0f6bf4d4b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:10.843771+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 99.67999905347824, + "p90": 109.92000252008438, + "p95": 114.17599767446518, + "p99": 119.23199892044067 + }, + "combine": { + "p50": 99.67999905347824, + "p90": 109.92000252008438, + "p95": 114.17599767446518, + "p99": 119.23199892044067 + }, + "roundtrip": { + "p50": 99.67999905347824, + "p90": 109.92000252008438, + "p95": 114.17599767446518, + "p99": 119.23199892044067 + }, + "isolatedSum": { + "p50": 199.35999810695648, + "p90": 219.84000504016876, + "p95": 228.35199534893036, + "p99": 238.46399784088135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 100.19200295209885, + "p90": 109.6000000834465, + "p95": 114.1119971871376, + "p99": 120.92799693346024 + }, + "combine": { + "p50": 100.19200295209885, + "p90": 109.6000000834465, + "p95": 114.1119971871376, + "p99": 120.92799693346024 + }, + "roundtrip": { + "p50": 100.19200295209885, + "p90": 109.6000000834465, + "p95": 114.1119971871376, + "p99": 120.92799693346024 + }, + "isolatedSum": { + "p50": 200.3840059041977, + "p90": 219.200000166893, + "p95": 228.2239943742752, + "p99": 241.85599386692047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 100.63999891281128, + "p90": 110.52799969911575, + "p95": 114.23999816179276, + "p99": 120.06399780511856 + }, + "combine": { + "p50": 100.63999891281128, + "p90": 110.52799969911575, + "p95": 114.23999816179276, + "p99": 120.06399780511856 + }, + "roundtrip": { + "p50": 100.63999891281128, + "p90": 110.52799969911575, + "p95": 114.23999816179276, + "p99": 120.06399780511856 + }, + "isolatedSum": { + "p50": 201.27999782562256, + "p90": 221.0559993982315, + "p95": 228.4799963235855, + "p99": 240.12799561023712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 100.28800368309021, + "p90": 109.72800105810165, + "p95": 114.33599889278412, + "p99": 120.83200365304947 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 109.72800105810165, + "p95": 114.33599889278412, + "p99": 120.83200365304947 + }, + "roundtrip": { + "p50": 100.28800368309021, + "p90": 109.72800105810165, + "p95": 114.33599889278412, + "p99": 120.83200365304947 + }, + "isolatedSum": { + "p50": 200.57600736618042, + "p90": 219.4560021162033, + "p95": 228.67199778556824, + "p99": 241.66400730609894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 101.34399682283401, + "p90": 110.17599701881409, + "p95": 114.81600254774094, + "p99": 121.8239963054657 + }, + "combine": { + "p50": 101.34399682283401, + "p90": 110.17599701881409, + "p95": 114.81600254774094, + "p99": 121.8239963054657 + }, + "roundtrip": { + "p50": 101.34399682283401, + "p90": 110.17599701881409, + "p95": 114.81600254774094, + "p99": 121.8239963054657 + }, + "isolatedSum": { + "p50": 202.68799364566803, + "p90": 220.35199403762817, + "p95": 229.63200509548187, + "p99": 243.6479926109314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 101.34399682283401, + "p90": 109.0880036354065, + "p95": 114.30399864912033, + "p99": 118.43200027942657 + }, + "combine": { + "p50": 101.34399682283401, + "p90": 109.0880036354065, + "p95": 114.30399864912033, + "p99": 118.43200027942657 + }, + "roundtrip": { + "p50": 101.34399682283401, + "p90": 109.0880036354065, + "p95": 114.30399864912033, + "p99": 118.43200027942657 + }, + "isolatedSum": { + "p50": 202.68799364566803, + "p90": 218.176007270813, + "p95": 228.60799729824066, + "p99": 236.86400055885315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.80800068378448, + "p90": 115.61600118875504, + "p95": 118.43200027942657, + "p99": 125.98399817943573 + }, + "combine": { + "p50": 103.80800068378448, + "p90": 115.61600118875504, + "p95": 118.43200027942657, + "p99": 125.98399817943573 + }, + "roundtrip": { + "p50": 103.80800068378448, + "p90": 115.61600118875504, + "p95": 118.43200027942657, + "p99": 125.98399817943573 + }, + "isolatedSum": { + "p50": 207.61600136756897, + "p90": 231.23200237751007, + "p95": 236.86400055885315, + "p99": 251.96799635887146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.92800176143646, + "p90": 114.52800035476685, + "p95": 118.94399672746658, + "p99": 126.20800733566284 + }, + "combine": { + "p50": 104.92800176143646, + "p90": 114.52800035476685, + "p95": 118.94399672746658, + "p99": 126.20800733566284 + }, + "roundtrip": { + "p50": 104.92800176143646, + "p90": 114.52800035476685, + "p95": 118.94399672746658, + "p99": 126.20800733566284 + }, + "isolatedSum": { + "p50": 209.85600352287292, + "p90": 229.0560007095337, + "p95": 237.88799345493317, + "p99": 252.41601467132568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-921e164e", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|decode|normal|none|none|0|tuned||bb358a3c2e68578", + "colorKey": "gb300_44bf7fb4", + "comparisonKey": "5080dac8648fafd5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:53.644815+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "bb358a3c2e68578", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 101.75999999046326, + "p90": 110.01600325107574, + "p95": 114.20799791812897, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 101.75999999046326, + "p90": 110.01600325107574, + "p95": 114.20799791812897, + "p99": 118.17599833011627 + }, + "roundtrip": { + "p50": 101.75999999046326, + "p90": 110.01600325107574, + "p95": 114.20799791812897, + "p99": 118.17599833011627 + }, + "isolatedSum": { + "p50": 203.5199999809265, + "p90": 220.0320065021515, + "p95": 228.41599583625793, + "p99": 236.35199666023254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.95200145244598, + "p90": 111.35999858379364, + "p95": 115.58400094509125, + "p99": 119.03999745845795 + }, + "combine": { + "p50": 101.95200145244598, + "p90": 111.35999858379364, + "p95": 115.58400094509125, + "p99": 119.03999745845795 + }, + "roundtrip": { + "p50": 101.95200145244598, + "p90": 111.35999858379364, + "p95": 115.58400094509125, + "p99": 119.03999745845795 + }, + "isolatedSum": { + "p50": 203.90400290489197, + "p90": 222.71999716758728, + "p95": 231.1680018901825, + "p99": 238.0799949169159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 102.91200131177902, + "p90": 111.29599809646606, + "p95": 115.03999680280685, + "p99": 121.47200107574463 + }, + "combine": { + "p50": 102.91200131177902, + "p90": 111.29599809646606, + "p95": 115.03999680280685, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 102.91200131177902, + "p90": 111.29599809646606, + "p95": 115.03999680280685, + "p99": 121.47200107574463 + }, + "isolatedSum": { + "p50": 205.82400262355804, + "p90": 222.59199619293213, + "p95": 230.0799936056137, + "p99": 242.94400215148926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 101.6319990158081, + "p90": 108.83200168609619, + "p95": 114.20799791812897, + "p99": 119.26399916410446 + }, + "combine": { + "p50": 101.6319990158081, + "p90": 108.83200168609619, + "p95": 114.20799791812897, + "p99": 119.26399916410446 + }, + "roundtrip": { + "p50": 101.6319990158081, + "p90": 108.83200168609619, + "p95": 114.20799791812897, + "p99": 119.26399916410446 + }, + "isolatedSum": { + "p50": 203.2639980316162, + "p90": 217.66400337219238, + "p95": 228.41599583625793, + "p99": 238.52799832820892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 102.01600193977356, + "p90": 109.56799983978271, + "p95": 113.88800293207169, + "p99": 121.98399752378464 + }, + "combine": { + "p50": 102.01600193977356, + "p90": 109.56799983978271, + "p95": 113.88800293207169, + "p99": 121.98399752378464 + }, + "roundtrip": { + "p50": 102.01600193977356, + "p90": 109.56799983978271, + "p95": 113.88800293207169, + "p99": 121.98399752378464 + }, + "isolatedSum": { + "p50": 204.03200387954712, + "p90": 219.13599967956543, + "p95": 227.77600586414337, + "p99": 243.96799504756927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 101.59999877214432, + "p90": 110.17599701881409, + "p95": 114.3999993801117, + "p99": 119.71200257539749 + }, + "combine": { + "p50": 101.59999877214432, + "p90": 110.17599701881409, + "p95": 114.3999993801117, + "p99": 119.71200257539749 + }, + "roundtrip": { + "p50": 101.59999877214432, + "p90": 110.17599701881409, + "p95": 114.3999993801117, + "p99": 119.71200257539749 + }, + "isolatedSum": { + "p50": 203.19999754428864, + "p90": 220.35199403762817, + "p95": 228.7999987602234, + "p99": 239.42400515079498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.58399897813797, + "p90": 113.37599903345108, + "p95": 117.40799993276596, + "p99": 123.19999933242798 + }, + "combine": { + "p50": 103.58399897813797, + "p90": 113.37599903345108, + "p95": 117.40799993276596, + "p99": 123.19999933242798 + }, + "roundtrip": { + "p50": 103.58399897813797, + "p90": 113.37599903345108, + "p95": 117.40799993276596, + "p99": 123.19999933242798 + }, + "isolatedSum": { + "p50": 207.16799795627594, + "p90": 226.75199806690216, + "p95": 234.81599986553192, + "p99": 246.39999866485596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 103.93600165843964, + "p90": 112.28799819946289, + "p95": 115.68000167608261, + "p99": 124.44800138473511 + }, + "combine": { + "p50": 103.93600165843964, + "p90": 112.28799819946289, + "p95": 115.68000167608261, + "p99": 124.44800138473511 + }, + "roundtrip": { + "p50": 103.93600165843964, + "p90": 112.28799819946289, + "p95": 115.68000167608261, + "p99": 124.44800138473511 + }, + "isolatedSum": { + "p50": 207.87200331687927, + "p90": 224.57599639892578, + "p95": 231.36000335216522, + "p99": 248.89600276947021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3ffb06ae", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|decode|normal|none|none|0|tuned||c9bbf5a132d7fdf", + "colorKey": "gb300_d825256f", + "comparisonKey": "66e4e880b4e47cc3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:13.329127+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9bbf5a132d7fdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 101.05600208044052, + "p90": 111.80800199508667, + "p95": 115.13599753379822, + "p99": 120.35199999809265 + }, + "combine": { + "p50": 101.05600208044052, + "p90": 111.80800199508667, + "p95": 115.13599753379822, + "p99": 120.35199999809265 + }, + "roundtrip": { + "p50": 101.05600208044052, + "p90": 111.80800199508667, + "p95": 115.13599753379822, + "p99": 120.35199999809265 + }, + "isolatedSum": { + "p50": 202.11200416088104, + "p90": 223.61600399017334, + "p95": 230.27199506759644, + "p99": 240.7039999961853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 57344, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 102.52799838781357, + "p90": 113.53600025177002, + "p95": 118.68800222873688, + "p99": 127.00800597667694 + }, + "combine": { + "p50": 102.52799838781357, + "p90": 113.53600025177002, + "p95": 118.68800222873688, + "p99": 127.00800597667694 + }, + "roundtrip": { + "p50": 102.52799838781357, + "p90": 113.53600025177002, + "p95": 118.68800222873688, + "p99": 127.00800597667694 + }, + "isolatedSum": { + "p50": 205.05599677562714, + "p90": 227.07200050354004, + "p95": 237.37600445747375, + "p99": 254.01601195335388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 104.38399761915207, + "p90": 116.80000275373459, + "p95": 121.63200229406357, + "p99": 133.66399705410004 + }, + "combine": { + "p50": 104.38399761915207, + "p90": 116.80000275373459, + "p95": 121.63200229406357, + "p99": 133.66399705410004 + }, + "roundtrip": { + "p50": 104.38399761915207, + "p90": 116.80000275373459, + "p95": 121.63200229406357, + "p99": 133.66399705410004 + }, + "isolatedSum": { + "p50": 208.76799523830414, + "p90": 233.60000550746918, + "p95": 243.26400458812714, + "p99": 267.3279941082001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 102.01600193977356, + "p90": 113.69600147008896, + "p95": 118.52800101041794, + "p99": 136.09600067138672 + }, + "combine": { + "p50": 102.01600193977356, + "p90": 113.69600147008896, + "p95": 118.52800101041794, + "p99": 136.09600067138672 + }, + "roundtrip": { + "p50": 102.01600193977356, + "p90": 113.69600147008896, + "p95": 118.52800101041794, + "p99": 136.09600067138672 + }, + "isolatedSum": { + "p50": 204.03200387954712, + "p90": 227.39200294017792, + "p95": 237.05600202083588, + "p99": 272.19200134277344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dae4e385", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|decode|normal|none|none|0|tuned||4dc6cbd03327f4e", + "colorKey": "gb300_248d58da", + "comparisonKey": "0ee88add816b0495", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:53.877990+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "4dc6cbd03327f4e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 107.45599865913391, + "p90": 119.64800208806992, + "p95": 124.1919994354248, + "p99": 132.28799402713776 + }, + "combine": { + "p50": 107.45599865913391, + "p90": 119.64800208806992, + "p95": 124.1919994354248, + "p99": 132.28799402713776 + }, + "roundtrip": { + "p50": 107.45599865913391, + "p90": 119.64800208806992, + "p95": 124.1919994354248, + "p99": 132.28799402713776 + }, + "isolatedSum": { + "p50": 214.91199731826782, + "p90": 239.29600417613983, + "p95": 248.3839988708496, + "p99": 264.5759880542755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 2, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 107.04000294208527, + "p90": 119.6800023317337, + "p95": 123.10399860143661, + "p99": 128.48000228405 + }, + "combine": { + "p50": 107.04000294208527, + "p90": 119.6800023317337, + "p95": 123.10399860143661, + "p99": 128.48000228405 + }, + "roundtrip": { + "p50": 107.04000294208527, + "p90": 119.6800023317337, + "p95": 123.10399860143661, + "p99": 128.48000228405 + }, + "isolatedSum": { + "p50": 214.08000588417053, + "p90": 239.3600046634674, + "p95": 246.20799720287323, + "p99": 256.9600045681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 107.96800255775452, + "p90": 120.83200365304947, + "p95": 126.11199915409088, + "p99": 134.20799374580383 + }, + "combine": { + "p50": 107.96800255775452, + "p90": 120.83200365304947, + "p95": 126.11199915409088, + "p99": 134.20799374580383 + }, + "roundtrip": { + "p50": 107.96800255775452, + "p90": 120.83200365304947, + "p95": 126.11199915409088, + "p99": 134.20799374580383 + }, + "isolatedSum": { + "p50": 215.93600511550903, + "p90": 241.66400730609894, + "p95": 252.22399830818176, + "p99": 268.41598749160767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 107.744000852108, + "p90": 120.51200121641159, + "p95": 124.57600235939026, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 107.744000852108, + "p90": 120.51200121641159, + "p95": 124.57600235939026, + "p99": 131.8719983100891 + }, + "roundtrip": { + "p50": 107.744000852108, + "p90": 120.51200121641159, + "p95": 124.57600235939026, + "p99": 131.8719983100891 + }, + "isolatedSum": { + "p50": 215.488001704216, + "p90": 241.02400243282318, + "p95": 249.15200471878052, + "p99": 263.7439966201782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 104.80000078678131, + "p90": 116.83200299739838, + "p95": 121.18399888277054, + "p99": 126.78399682044983 + }, + "combine": { + "p50": 104.80000078678131, + "p90": 116.83200299739838, + "p95": 121.18399888277054, + "p99": 126.78399682044983 + }, + "roundtrip": { + "p50": 104.80000078678131, + "p90": 116.83200299739838, + "p95": 121.18399888277054, + "p99": 126.78399682044983 + }, + "isolatedSum": { + "p50": 209.60000157356262, + "p90": 233.66400599479675, + "p95": 242.36799776554108, + "p99": 253.56799364089966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 106.65600001811981, + "p90": 118.40000003576279, + "p95": 123.07199835777283, + "p99": 127.96799838542938 + }, + "combine": { + "p50": 106.65600001811981, + "p90": 118.40000003576279, + "p95": 123.07199835777283, + "p99": 127.96799838542938 + }, + "roundtrip": { + "p50": 106.65600001811981, + "p90": 118.40000003576279, + "p95": 123.07199835777283, + "p99": 127.96799838542938 + }, + "isolatedSum": { + "p50": 213.31200003623962, + "p90": 236.80000007152557, + "p95": 246.14399671554565, + "p99": 255.93599677085876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2752512, + "combineLogicalBytes": 2752512, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 108.5439994931221, + "p90": 123.80799651145935, + "p95": 127.42400169372559, + "p99": 134.33599472045898 + }, + "combine": { + "p50": 108.5439994931221, + "p90": 123.80799651145935, + "p95": 127.42400169372559, + "p99": 134.33599472045898 + }, + "roundtrip": { + "p50": 108.5439994931221, + "p90": 123.80799651145935, + "p95": 127.42400169372559, + "p99": 134.33599472045898 + }, + "isolatedSum": { + "p50": 217.0879989862442, + "p90": 247.6159930229187, + "p95": 254.84800338745117, + "p99": 268.67198944091797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5505024, + "combineLogicalBytes": 5505024, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 106.97600245475769, + "p90": 120.2239990234375, + "p95": 124.32000041007996, + "p99": 132.60799646377563 + }, + "combine": { + "p50": 106.97600245475769, + "p90": 120.2239990234375, + "p95": 124.32000041007996, + "p99": 132.60799646377563 + }, + "roundtrip": { + "p50": 106.97600245475769, + "p90": 120.2239990234375, + "p95": 124.32000041007996, + "p99": 132.60799646377563 + }, + "isolatedSum": { + "p50": 213.95200490951538, + "p90": 240.447998046875, + "p95": 248.6400008201599, + "p99": 265.21599292755127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f35f7890", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|decode|normal|none|none|0|tuned||0d921f8a9d2cb27", + "colorKey": "gb300_0c631e36", + "comparisonKey": "b45a9de02747dc6f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:12.207438+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "0d921f8a9d2cb27", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 101.27999633550644, + "p90": 112.0000034570694, + "p95": 115.7120019197464, + "p99": 120.7360029220581 + }, + "combine": { + "p50": 101.27999633550644, + "p90": 112.0000034570694, + "p95": 115.7120019197464, + "p99": 120.7360029220581 + }, + "roundtrip": { + "p50": 101.27999633550644, + "p90": 112.0000034570694, + "p95": 115.7120019197464, + "p99": 120.7360029220581 + }, + "isolatedSum": { + "p50": 202.55999267101288, + "p90": 224.0000069141388, + "p95": 231.4240038394928, + "p99": 241.4720058441162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 200704, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.43999755382538, + "p90": 112.38399893045425, + "p95": 115.167997777462, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 101.43999755382538, + "p90": 112.38399893045425, + "p95": 115.167997777462, + "p99": 119.48800086975098 + }, + "roundtrip": { + "p50": 101.43999755382538, + "p90": 112.38399893045425, + "p95": 115.167997777462, + "p99": 119.48800086975098 + }, + "isolatedSum": { + "p50": 202.87999510765076, + "p90": 224.7679978609085, + "p95": 230.335995554924, + "p99": 238.97600173950195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 101.59999877214432, + "p90": 112.22399771213531, + "p95": 116.64000153541565, + "p99": 120.89599668979645 + }, + "combine": { + "p50": 101.59999877214432, + "p90": 112.22399771213531, + "p95": 116.64000153541565, + "p99": 120.89599668979645 + }, + "roundtrip": { + "p50": 101.59999877214432, + "p90": 112.22399771213531, + "p95": 116.64000153541565, + "p99": 120.89599668979645 + }, + "isolatedSum": { + "p50": 203.19999754428864, + "p90": 224.44799542427063, + "p95": 233.2800030708313, + "p99": 241.7919933795929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 102.24000364542007, + "p90": 112.28799819946289, + "p95": 116.44800007343292, + "p99": 121.5360015630722 + }, + "combine": { + "p50": 102.24000364542007, + "p90": 112.28799819946289, + "p95": 116.44800007343292, + "p99": 121.5360015630722 + }, + "roundtrip": { + "p50": 102.24000364542007, + "p90": 112.28799819946289, + "p95": 116.44800007343292, + "p99": 121.5360015630722 + }, + "isolatedSum": { + "p50": 204.48000729084015, + "p90": 224.57599639892578, + "p95": 232.89600014686584, + "p99": 243.0720031261444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 102.91200131177902, + "p90": 113.47199976444244, + "p95": 117.0559972524643, + "p99": 123.19999933242798 + }, + "combine": { + "p50": 102.91200131177902, + "p90": 113.47199976444244, + "p95": 117.0559972524643, + "p99": 123.19999933242798 + }, + "roundtrip": { + "p50": 102.91200131177902, + "p90": 113.47199976444244, + "p95": 117.0559972524643, + "p99": 123.19999933242798 + }, + "isolatedSum": { + "p50": 205.82400262355804, + "p90": 226.9439995288849, + "p95": 234.1119945049286, + "p99": 246.39999866485596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3282944, + "combineLogicalBytes": 3282944, + "fanoutMean": 3.578125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 102.59199887514114, + "p90": 112.03200370073318, + "p95": 115.58400094509125, + "p99": 122.78400361537933 + }, + "combine": { + "p50": 102.59199887514114, + "p90": 112.03200370073318, + "p95": 115.58400094509125, + "p99": 122.78400361537933 + }, + "roundtrip": { + "p50": 102.59199887514114, + "p90": 112.03200370073318, + "p95": 115.58400094509125, + "p99": 122.78400361537933 + }, + "isolatedSum": { + "p50": 205.1839977502823, + "p90": 224.06400740146637, + "p95": 231.1680018901825, + "p99": 245.56800723075867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6694912, + "combineLogicalBytes": 6694912, + "fanoutMean": 3.6484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 104.60799932479858, + "p90": 116.31999909877777, + "p95": 120.25599926710129, + "p99": 128.7039965391159 + }, + "combine": { + "p50": 104.60799932479858, + "p90": 116.31999909877777, + "p95": 120.25599926710129, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 104.60799932479858, + "p90": 116.31999909877777, + "p95": 120.25599926710129, + "p99": 128.7039965391159 + }, + "isolatedSum": { + "p50": 209.21599864959717, + "p90": 232.63999819755554, + "p95": 240.51199853420258, + "p99": 257.4079930782318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13318144, + "combineLogicalBytes": 13318144, + "fanoutMean": 3.62890625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 105.50399869680405, + "p90": 114.46399986743927, + "p95": 118.65600198507309, + "p99": 124.41600114107132 + }, + "combine": { + "p50": 105.50399869680405, + "p90": 114.46399986743927, + "p95": 118.65600198507309, + "p99": 124.41600114107132 + }, + "roundtrip": { + "p50": 105.50399869680405, + "p90": 114.46399986743927, + "p95": 118.65600198507309, + "p99": 124.41600114107132 + }, + "isolatedSum": { + "p50": 211.0079973936081, + "p90": 228.92799973487854, + "p95": 237.31200397014618, + "p99": 248.83200228214264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-50c38ba1", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|decode|normal|none|none|0|tuned||cc5ad1cb2e95ef6", + "colorKey": "gb300_b2c46ef8", + "comparisonKey": "5fb6abfcf2797f2d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:15.626687+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cc5ad1cb2e95ef6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.3408203125, + "eplbImbalanceAfter": 1.000390625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 95.36000341176987, + "p90": 108.60799998044968, + "p95": 122.23999947309494, + "p99": 166.87999665737152 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 108.60799998044968, + "p95": 122.23999947309494, + "p99": 166.87999665737152 + }, + "roundtrip": { + "p50": 95.36000341176987, + "p90": 108.60799998044968, + "p95": 122.23999947309494, + "p99": 166.87999665737152 + }, + "isolatedSum": { + "p50": 190.72000682353973, + "p90": 217.21599996089935, + "p95": 244.47999894618988, + "p99": 333.75999331474304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 97.37599641084671, + "p90": 110.36799848079681, + "p95": 120.35199999809265, + "p99": 164.57599401474 + }, + "combine": { + "p50": 97.37599641084671, + "p90": 110.36799848079681, + "p95": 120.35199999809265, + "p99": 164.57599401474 + }, + "roundtrip": { + "p50": 97.37599641084671, + "p90": 110.36799848079681, + "p95": 120.35199999809265, + "p99": 164.57599401474 + }, + "isolatedSum": { + "p50": 194.75199282169342, + "p90": 220.73599696159363, + "p95": 240.7039999961853, + "p99": 329.15198802948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 97.05600142478943, + "p90": 115.77600240707397, + "p95": 133.05599987506866, + "p99": 167.04000532627106 + }, + "combine": { + "p50": 97.05600142478943, + "p90": 115.77600240707397, + "p95": 133.05599987506866, + "p99": 167.04000532627106 + }, + "roundtrip": { + "p50": 97.05600142478943, + "p90": 115.77600240707397, + "p95": 133.05599987506866, + "p99": 167.04000532627106 + }, + "isolatedSum": { + "p50": 194.11200284957886, + "p90": 231.55200481414795, + "p95": 266.11199975013733, + "p99": 334.0800106525421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 96.70399874448776, + "p90": 113.27999830245972, + "p95": 128.48000228405, + "p99": 182.72000551223755 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 113.27999830245972, + "p95": 128.48000228405, + "p99": 182.72000551223755 + }, + "roundtrip": { + "p50": 96.70399874448776, + "p90": 113.27999830245972, + "p95": 128.48000228405, + "p99": 182.72000551223755 + }, + "isolatedSum": { + "p50": 193.40799748897552, + "p90": 226.55999660491943, + "p95": 256.9600045681, + "p99": 365.4400110244751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1705984, + "combineLogicalBytes": 1705984, + "fanoutMean": 3.71875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 97.18400239944458, + "p90": 111.16799712181091, + "p95": 126.24000012874603, + "p99": 161.72799468040466 + }, + "combine": { + "p50": 97.18400239944458, + "p90": 111.16799712181091, + "p95": 126.24000012874603, + "p99": 161.72799468040466 + }, + "roundtrip": { + "p50": 97.18400239944458, + "p90": 111.16799712181091, + "p95": 126.24000012874603, + "p99": 161.72799468040466 + }, + "isolatedSum": { + "p50": 194.36800479888916, + "p90": 222.33599424362183, + "p95": 252.48000025749207, + "p99": 323.4559893608093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 3411968, + "fanoutMean": 3.71875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 98.01600128412247, + "p90": 112.60800063610077, + "p95": 131.3599944114685, + "p99": 164.000004529953 + }, + "combine": { + "p50": 98.01600128412247, + "p90": 112.60800063610077, + "p95": 131.3599944114685, + "p99": 164.000004529953 + }, + "roundtrip": { + "p50": 98.01600128412247, + "p90": 112.60800063610077, + "p95": 131.3599944114685, + "p99": 164.000004529953 + }, + "isolatedSum": { + "p50": 196.03200256824493, + "p90": 225.21600127220154, + "p95": 262.719988822937, + "p99": 328.000009059906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6680576, + "combineLogicalBytes": 6680576, + "fanoutMean": 3.640625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.32799637317657, + "p90": 116.95999652147293, + "p95": 142.81600713729858, + "p99": 173.72800409793854 + }, + "combine": { + "p50": 99.32799637317657, + "p90": 116.95999652147293, + "p95": 142.81600713729858, + "p99": 173.72800409793854 + }, + "roundtrip": { + "p50": 99.32799637317657, + "p90": 116.95999652147293, + "p95": 142.81600713729858, + "p99": 173.72800409793854 + }, + "isolatedSum": { + "p50": 198.65599274635315, + "p90": 233.91999304294586, + "p95": 285.63201427459717, + "p99": 347.4560081958771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13432832, + "combineLogicalBytes": 13432832, + "fanoutMean": 3.66015625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 100.63999891281128, + "p90": 114.3679991364479, + "p95": 130.17599284648895, + "p99": 161.69600188732147 + }, + "combine": { + "p50": 100.63999891281128, + "p90": 114.3679991364479, + "p95": 130.17599284648895, + "p99": 161.69600188732147 + }, + "roundtrip": { + "p50": 100.63999891281128, + "p90": 114.3679991364479, + "p95": 130.17599284648895, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 201.27999782562256, + "p90": 228.7359982728958, + "p95": 260.3519856929779, + "p99": 323.39200377464294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26464256, + "combineLogicalBytes": 26464256, + "fanoutMean": 3.60546875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0711f89f", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|decode|normal|none|none|0|tuned||c186e8c8d66ece3", + "colorKey": "gb300_81a322c2", + "comparisonKey": "32dfde5ef8348331", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:32.886244+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c186e8c8d66ece3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.091796875, + "eplbImbalanceAfter": 1.00146484375, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 102.36799716949463, + "p90": 113.21599781513214, + "p95": 117.91999638080597, + "p99": 124.25599992275238 + }, + "combine": { + "p50": 102.36799716949463, + "p90": 113.21599781513214, + "p95": 117.91999638080597, + "p99": 124.25599992275238 + }, + "roundtrip": { + "p50": 102.36799716949463, + "p90": 113.21599781513214, + "p95": 117.91999638080597, + "p99": 124.25599992275238 + }, + "isolatedSum": { + "p50": 204.73599433898926, + "p90": 226.43199563026428, + "p95": 235.83999276161194, + "p99": 248.51199984550476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.82400047779083, + "p90": 112.0000034570694, + "p95": 115.52000045776367, + "p99": 121.76000326871872 + }, + "combine": { + "p50": 101.82400047779083, + "p90": 112.0000034570694, + "p95": 115.52000045776367, + "p99": 121.76000326871872 + }, + "roundtrip": { + "p50": 101.82400047779083, + "p90": 112.0000034570694, + "p95": 115.52000045776367, + "p99": 121.76000326871872 + }, + "isolatedSum": { + "p50": 203.64800095558167, + "p90": 224.0000069141388, + "p95": 231.04000091552734, + "p99": 243.52000653743744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 102.49599814414978, + "p90": 112.83200234174728, + "p95": 117.08799749612808, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 102.49599814414978, + "p90": 112.83200234174728, + "p95": 117.08799749612808, + "p99": 123.80799651145935 + }, + "roundtrip": { + "p50": 102.49599814414978, + "p90": 112.83200234174728, + "p95": 117.08799749612808, + "p99": 123.80799651145935 + }, + "isolatedSum": { + "p50": 204.99199628829956, + "p90": 225.66400468349457, + "p95": 234.17599499225616, + "p99": 247.6159930229187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 745472, + "combineLogicalBytes": 745472, + "fanoutMean": 3.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 102.9760017991066, + "p90": 113.69600147008896, + "p95": 117.79200285673141, + "p99": 121.50400131940842 + }, + "combine": { + "p50": 102.9760017991066, + "p90": 113.69600147008896, + "p95": 117.79200285673141, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 102.9760017991066, + "p90": 113.69600147008896, + "p95": 117.79200285673141, + "p99": 121.50400131940842 + }, + "isolatedSum": { + "p50": 205.9520035982132, + "p90": 227.39200294017792, + "p95": 235.58400571346283, + "p99": 243.00800263881683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 102.9760017991066, + "p90": 112.38399893045425, + "p95": 117.0239970088005, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 102.9760017991066, + "p90": 112.38399893045425, + "p95": 117.0239970088005, + "p99": 122.17599898576736 + }, + "roundtrip": { + "p50": 102.9760017991066, + "p90": 112.38399893045425, + "p95": 117.0239970088005, + "p99": 122.17599898576736 + }, + "isolatedSum": { + "p50": 205.9520035982132, + "p90": 224.7679978609085, + "p95": 234.047994017601, + "p99": 244.35199797153473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3225600, + "combineLogicalBytes": 3225600, + "fanoutMean": 3.515625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 103.58399897813797, + "p90": 114.23999816179276, + "p95": 117.40799993276596, + "p99": 124.86399710178375 + }, + "combine": { + "p50": 103.58399897813797, + "p90": 114.23999816179276, + "p95": 117.40799993276596, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 103.58399897813797, + "p90": 114.23999816179276, + "p95": 117.40799993276596, + "p99": 124.86399710178375 + }, + "isolatedSum": { + "p50": 207.16799795627594, + "p90": 228.4799963235855, + "p95": 234.81599986553192, + "p99": 249.7279942035675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6522880, + "combineLogicalBytes": 6522880, + "fanoutMean": 3.5546875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 105.02400249242783, + "p90": 116.44800007343292, + "p95": 119.99999731779099, + "p99": 124.9919980764389 + }, + "combine": { + "p50": 105.02400249242783, + "p90": 116.44800007343292, + "p95": 119.99999731779099, + "p99": 124.9919980764389 + }, + "roundtrip": { + "p50": 105.02400249242783, + "p90": 116.44800007343292, + "p95": 119.99999731779099, + "p99": 124.9919980764389 + }, + "isolatedSum": { + "p50": 210.04800498485565, + "p90": 232.89600014686584, + "p95": 239.99999463558197, + "p99": 249.9839961528778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13160448, + "combineLogicalBytes": 13160448, + "fanoutMean": 3.5859375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 106.36799782514572, + "p90": 115.9679964184761, + "p95": 120.28799951076508, + "p99": 124.86399710178375 + }, + "combine": { + "p50": 106.36799782514572, + "p90": 115.9679964184761, + "p95": 120.28799951076508, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 106.36799782514572, + "p90": 115.9679964184761, + "p95": 120.28799951076508, + "p99": 124.86399710178375 + }, + "isolatedSum": { + "p50": 212.73599565029144, + "p90": 231.9359928369522, + "p95": 240.57599902153015, + "p99": 249.7279942035675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26406912, + "combineLogicalBytes": 26406912, + "fanoutMean": 3.59765625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f12e4e42", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_8ea28bc7", + "comparisonKey": "4a85853c44cd5702", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:32.746949+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 100.35199671983719, + "p90": 110.30399799346924, + "p95": 114.94400352239609, + "p99": 119.26399916410446 + }, + "combine": { + "p50": 100.35199671983719, + "p90": 110.30399799346924, + "p95": 114.94400352239609, + "p99": 119.26399916410446 + }, + "roundtrip": { + "p50": 100.35199671983719, + "p90": 110.30399799346924, + "p95": 114.94400352239609, + "p99": 119.26399916410446 + }, + "isolatedSum": { + "p50": 200.70399343967438, + "p90": 220.60799598693848, + "p95": 229.88800704479218, + "p99": 238.52799832820892 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 100.38399696350098, + "p90": 109.43999886512756, + "p95": 114.01599645614624, + "p99": 120.09599804878235 + }, + "combine": { + "p50": 100.38399696350098, + "p90": 109.43999886512756, + "p95": 114.01599645614624, + "p99": 120.09599804878235 + }, + "roundtrip": { + "p50": 100.38399696350098, + "p90": 109.43999886512756, + "p95": 114.01599645614624, + "p99": 120.09599804878235 + }, + "isolatedSum": { + "p50": 200.76799392700195, + "p90": 218.87999773025513, + "p95": 228.03199291229248, + "p99": 240.1919960975647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 99.10400211811066, + "p90": 108.64000022411346, + "p95": 112.83200234174728, + "p99": 117.76000261306763 + }, + "combine": { + "p50": 99.10400211811066, + "p90": 108.64000022411346, + "p95": 112.83200234174728, + "p99": 117.76000261306763 + }, + "roundtrip": { + "p50": 99.10400211811066, + "p90": 108.64000022411346, + "p95": 112.83200234174728, + "p99": 117.76000261306763 + }, + "isolatedSum": { + "p50": 198.2080042362213, + "p90": 217.28000044822693, + "p95": 225.66400468349457, + "p99": 235.52000522613525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 100.54399818181992, + "p90": 109.43999886512756, + "p95": 113.0559965968132, + "p99": 119.45600062608719 + }, + "combine": { + "p50": 100.54399818181992, + "p90": 109.43999886512756, + "p95": 113.0559965968132, + "p99": 119.45600062608719 + }, + "roundtrip": { + "p50": 100.54399818181992, + "p90": 109.43999886512756, + "p95": 113.0559965968132, + "p99": 119.45600062608719 + }, + "isolatedSum": { + "p50": 201.08799636363983, + "p90": 218.87999773025513, + "p95": 226.1119931936264, + "p99": 238.91200125217438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 100.47999769449234, + "p90": 109.92000252008438, + "p95": 115.26399850845337, + "p99": 117.91999638080597 + }, + "combine": { + "p50": 100.47999769449234, + "p90": 109.92000252008438, + "p95": 115.26399850845337, + "p99": 117.91999638080597 + }, + "roundtrip": { + "p50": 100.47999769449234, + "p90": 109.92000252008438, + "p95": 115.26399850845337, + "p99": 117.91999638080597 + }, + "isolatedSum": { + "p50": 200.95999538898468, + "p90": 219.84000504016876, + "p95": 230.52799701690674, + "p99": 235.83999276161194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 100.38399696350098, + "p90": 108.22399705648422, + "p95": 112.70400136709213, + "p99": 117.34399944543839 + }, + "combine": { + "p50": 100.38399696350098, + "p90": 108.22399705648422, + "p95": 112.70400136709213, + "p99": 117.34399944543839 + }, + "roundtrip": { + "p50": 100.38399696350098, + "p90": 108.22399705648422, + "p95": 112.70400136709213, + "p99": 117.34399944543839 + }, + "isolatedSum": { + "p50": 200.76799392700195, + "p90": 216.44799411296844, + "p95": 225.40800273418427, + "p99": 234.68799889087677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 102.36799716949463, + "p90": 112.5119999051094, + "p95": 115.87200313806534, + "p99": 121.69600278139114 + }, + "combine": { + "p50": 102.36799716949463, + "p90": 112.5119999051094, + "p95": 115.87200313806534, + "p99": 121.69600278139114 + }, + "roundtrip": { + "p50": 102.36799716949463, + "p90": 112.5119999051094, + "p95": 115.87200313806534, + "p99": 121.69600278139114 + }, + "isolatedSum": { + "p50": 204.73599433898926, + "p90": 225.0239998102188, + "p95": 231.74400627613068, + "p99": 243.3920055627823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 102.36799716949463, + "p90": 111.16799712181091, + "p95": 115.167997777462, + "p99": 119.74400281906128 + }, + "combine": { + "p50": 102.36799716949463, + "p90": 111.16799712181091, + "p95": 115.167997777462, + "p99": 119.74400281906128 + }, + "roundtrip": { + "p50": 102.36799716949463, + "p90": 111.16799712181091, + "p95": 115.167997777462, + "p99": 119.74400281906128 + }, + "isolatedSum": { + "p50": 204.73599433898926, + "p90": 222.33599424362183, + "p95": 230.335995554924, + "p99": 239.48800563812256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75b2d15b", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|decode|normal|none|none|0|tuned||3f8ffeba9f65629", + "colorKey": "gb300_511d7c55", + "comparisonKey": "3b859939edf6358a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:53.538664+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3f8ffeba9f65629", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 97.6639986038208, + "p90": 107.71200060844421, + "p95": 111.455999314785, + "p99": 116.7680025100708 + }, + "combine": { + "p50": 97.6639986038208, + "p90": 107.71200060844421, + "p95": 111.455999314785, + "p99": 116.7680025100708 + }, + "roundtrip": { + "p50": 97.6639986038208, + "p90": 107.71200060844421, + "p95": 111.455999314785, + "p99": 116.7680025100708 + }, + "isolatedSum": { + "p50": 195.3279972076416, + "p90": 215.42400121688843, + "p95": 222.91199862957, + "p99": 233.5360050201416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 71680, + "combineLogicalBytes": 71680, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 97.21600264310837, + "p90": 106.33599758148193, + "p95": 109.72800105810165, + "p99": 116.28799885511398 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 106.33599758148193, + "p95": 109.72800105810165, + "p99": 116.28799885511398 + }, + "roundtrip": { + "p50": 97.21600264310837, + "p90": 106.33599758148193, + "p95": 109.72800105810165, + "p99": 116.28799885511398 + }, + "isolatedSum": { + "p50": 194.43200528621674, + "p90": 212.67199516296387, + "p95": 219.4560021162033, + "p99": 232.57599771022797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 1.375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 96.70399874448776, + "p90": 106.30399733781815, + "p95": 110.52799969911575, + "p99": 117.18399822711945 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 106.30399733781815, + "p95": 110.52799969911575, + "p99": 117.18399822711945 + }, + "roundtrip": { + "p50": 96.70399874448776, + "p90": 106.30399733781815, + "p95": 110.52799969911575, + "p99": 117.18399822711945 + }, + "isolatedSum": { + "p50": 193.40799748897552, + "p90": 212.6079946756363, + "p95": 221.0559993982315, + "p99": 234.3679964542389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 286720, + "combineLogicalBytes": 286720, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 97.72799909114838, + "p90": 107.93600231409073, + "p95": 112.19199746847153, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 97.72799909114838, + "p90": 107.93600231409073, + "p95": 112.19199746847153, + "p99": 118.97599697113037 + }, + "roundtrip": { + "p50": 97.72799909114838, + "p90": 107.93600231409073, + "p95": 112.19199746847153, + "p99": 118.97599697113037 + }, + "isolatedSum": { + "p50": 195.45599818229675, + "p90": 215.87200462818146, + "p95": 224.38399493694305, + "p99": 237.95199394226074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 1.21875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 97.28000313043594, + "p90": 107.13600367307663, + "p95": 110.3999987244606, + "p99": 117.24799871444702 + }, + "combine": { + "p50": 97.28000313043594, + "p90": 107.13600367307663, + "p95": 110.3999987244606, + "p99": 117.24799871444702 + }, + "roundtrip": { + "p50": 97.28000313043594, + "p90": 107.13600367307663, + "p95": 110.3999987244606, + "p99": 117.24799871444702 + }, + "isolatedSum": { + "p50": 194.5600062608719, + "p90": 214.27200734615326, + "p95": 220.7999974489212, + "p99": 234.49599742889404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 1.265625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 97.6639986038208, + "p90": 108.35199803113937, + "p95": 111.64800077676773, + "p99": 117.91999638080597 + }, + "combine": { + "p50": 97.6639986038208, + "p90": 108.35199803113937, + "p95": 111.64800077676773, + "p99": 117.91999638080597 + }, + "roundtrip": { + "p50": 97.6639986038208, + "p90": 108.35199803113937, + "p95": 111.64800077676773, + "p99": 117.91999638080597 + }, + "isolatedSum": { + "p50": 195.3279972076416, + "p90": 216.70399606227875, + "p95": 223.29600155353546, + "p99": 235.83999276161194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2279424, + "combineLogicalBytes": 2279424, + "fanoutMean": 1.2421875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 99.61599856615067, + "p90": 110.43199896812439, + "p95": 114.30399864912033, + "p99": 122.40000069141388 + }, + "combine": { + "p50": 99.61599856615067, + "p90": 110.43199896812439, + "p95": 114.30399864912033, + "p99": 122.40000069141388 + }, + "roundtrip": { + "p50": 99.61599856615067, + "p90": 110.43199896812439, + "p95": 114.30399864912033, + "p99": 122.40000069141388 + }, + "isolatedSum": { + "p50": 199.23199713230133, + "p90": 220.86399793624878, + "p95": 228.60799729824066, + "p99": 244.80000138282776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4587520, + "combineLogicalBytes": 4587520, + "fanoutMean": 1.25, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 99.74399954080582, + "p90": 108.15999656915665, + "p95": 112.8000020980835, + "p99": 117.60000139474869 + }, + "combine": { + "p50": 99.74399954080582, + "p90": 108.15999656915665, + "p95": 112.8000020980835, + "p99": 117.60000139474869 + }, + "roundtrip": { + "p50": 99.74399954080582, + "p90": 108.15999656915665, + "p95": 112.8000020980835, + "p99": 117.60000139474869 + }, + "isolatedSum": { + "p50": 199.48799908161163, + "p90": 216.3199931383133, + "p95": 225.600004196167, + "p99": 235.20000278949738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-297572f6", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|decode|normal|none|none|0|tuned||e9a6e5febe08793", + "colorKey": "gb300_30e19049", + "comparisonKey": "eee44a47a281f693", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:53.878385+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e9a6e5febe08793", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86328125, + "eplbImbalanceAfter": 1.0003348214285714, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 97.69599884748459, + "p90": 107.2319969534874, + "p95": 111.455999314785, + "p99": 118.07999759912491 + }, + "combine": { + "p50": 97.69599884748459, + "p90": 107.2319969534874, + "p95": 111.455999314785, + "p99": 118.07999759912491 + }, + "roundtrip": { + "p50": 97.69599884748459, + "p90": 107.2319969534874, + "p95": 111.455999314785, + "p99": 118.07999759912491 + }, + "isolatedSum": { + "p50": 195.39199769496918, + "p90": 214.4639939069748, + "p95": 222.91199862957, + "p99": 236.15999519824982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 97.69599884748459, + "p90": 106.78400099277496, + "p95": 110.68800091743469, + "p99": 116.31999909877777 + }, + "combine": { + "p50": 97.69599884748459, + "p90": 106.78400099277496, + "p95": 110.68800091743469, + "p99": 116.31999909877777 + }, + "roundtrip": { + "p50": 97.69599884748459, + "p90": 106.78400099277496, + "p95": 110.68800091743469, + "p99": 116.31999909877777 + }, + "isolatedSum": { + "p50": 195.39199769496918, + "p90": 213.56800198554993, + "p95": 221.37600183486938, + "p99": 232.63999819755554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 99.74399954080582, + "p90": 108.2879975438118, + "p95": 112.67200112342834, + "p99": 118.8800036907196 + }, + "combine": { + "p50": 99.74399954080582, + "p90": 108.2879975438118, + "p95": 112.67200112342834, + "p99": 118.8800036907196 + }, + "roundtrip": { + "p50": 99.74399954080582, + "p90": 108.2879975438118, + "p95": 112.67200112342834, + "p99": 118.8800036907196 + }, + "isolatedSum": { + "p50": 199.48799908161163, + "p90": 216.5759950876236, + "p95": 225.3440022468567, + "p99": 237.7600073814392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 788480, + "combineLogicalBytes": 788480, + "fanoutMean": 3.4375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 100.03200173377991, + "p90": 108.76800119876862, + "p95": 113.08799684047699, + "p99": 124.15999919176102 + }, + "combine": { + "p50": 100.03200173377991, + "p90": 108.76800119876862, + "p95": 113.08799684047699, + "p99": 124.15999919176102 + }, + "roundtrip": { + "p50": 100.03200173377991, + "p90": 108.76800119876862, + "p95": 113.08799684047699, + "p99": 124.15999919176102 + }, + "isolatedSum": { + "p50": 200.06400346755981, + "p90": 217.53600239753723, + "p95": 226.17599368095398, + "p99": 248.31999838352203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 99.55199807882309, + "p90": 109.79200154542923, + "p95": 113.69600147008896, + "p99": 119.87199634313583 + }, + "combine": { + "p50": 99.55199807882309, + "p90": 109.79200154542923, + "p95": 113.69600147008896, + "p99": 119.87199634313583 + }, + "roundtrip": { + "p50": 99.55199807882309, + "p90": 109.79200154542923, + "p95": 113.69600147008896, + "p99": 119.87199634313583 + }, + "isolatedSum": { + "p50": 199.10399615764618, + "p90": 219.58400309085846, + "p95": 227.39200294017792, + "p99": 239.74399268627167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3196928, + "combineLogicalBytes": 3196928, + "fanoutMean": 3.484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 100.47999769449234, + "p90": 109.3439981341362, + "p95": 112.5119999051094, + "p99": 117.69600212574005 + }, + "combine": { + "p50": 100.47999769449234, + "p90": 109.3439981341362, + "p95": 112.5119999051094, + "p99": 117.69600212574005 + }, + "roundtrip": { + "p50": 100.47999769449234, + "p90": 109.3439981341362, + "p95": 112.5119999051094, + "p99": 117.69600212574005 + }, + "isolatedSum": { + "p50": 200.95999538898468, + "p90": 218.6879962682724, + "p95": 225.0239998102188, + "p99": 235.3920042514801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6408192, + "combineLogicalBytes": 6408192, + "fanoutMean": 3.4921875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.47199779748917, + "p90": 110.33599823713303, + "p95": 115.80800265073776, + "p99": 124.03199821710587 + }, + "combine": { + "p50": 101.47199779748917, + "p90": 110.33599823713303, + "p95": 115.80800265073776, + "p99": 124.03199821710587 + }, + "roundtrip": { + "p50": 101.47199779748917, + "p90": 110.33599823713303, + "p95": 115.80800265073776, + "p99": 124.03199821710587 + }, + "isolatedSum": { + "p50": 202.94399559497833, + "p90": 220.67199647426605, + "p95": 231.61600530147552, + "p99": 248.06399643421173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12773376, + "combineLogicalBytes": 12773376, + "fanoutMean": 3.48046875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 102.1760031580925, + "p90": 110.17599701881409, + "p95": 115.10399729013443, + "p99": 121.18399888277054 + }, + "combine": { + "p50": 102.1760031580925, + "p90": 110.17599701881409, + "p95": 115.10399729013443, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 102.1760031580925, + "p90": 110.17599701881409, + "p95": 115.10399729013443, + "p99": 121.18399888277054 + }, + "isolatedSum": { + "p50": 204.352006316185, + "p90": 220.35199403762817, + "p95": 230.20799458026886, + "p99": 242.36799776554108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25661440, + "combineLogicalBytes": 25661440, + "fanoutMean": 3.49609375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c4eb484a", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|decode|normal|none|none|0|tuned||e596902aaaeb56c", + "colorKey": "gb300_95126280", + "comparisonKey": "164b2756abc7b0ff", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:34.598058+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e596902aaaeb56c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 98.36799651384354, + "p90": 107.80800133943558, + "p95": 111.04000359773636, + "p99": 117.18399822711945 + }, + "combine": { + "p50": 98.36799651384354, + "p90": 107.80800133943558, + "p95": 111.04000359773636, + "p99": 117.18399822711945 + }, + "roundtrip": { + "p50": 98.36799651384354, + "p90": 107.80800133943558, + "p95": 111.04000359773636, + "p99": 117.18399822711945 + }, + "isolatedSum": { + "p50": 196.73599302768707, + "p90": 215.61600267887115, + "p95": 222.08000719547272, + "p99": 234.3679964542389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 99.35999661684036, + "p90": 108.31999778747559, + "p95": 111.77600175142288, + "p99": 117.66400188207626 + }, + "combine": { + "p50": 99.35999661684036, + "p90": 108.31999778747559, + "p95": 111.77600175142288, + "p99": 117.66400188207626 + }, + "roundtrip": { + "p50": 99.35999661684036, + "p90": 108.31999778747559, + "p95": 111.77600175142288, + "p99": 117.66400188207626 + }, + "isolatedSum": { + "p50": 198.71999323368073, + "p90": 216.63999557495117, + "p95": 223.55200350284576, + "p99": 235.32800376415253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 387072, + "combineLogicalBytes": 387072, + "fanoutMean": 3.375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 99.23200309276581, + "p90": 109.72800105810165, + "p95": 112.99200356006622, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 99.23200309276581, + "p90": 109.72800105810165, + "p95": 112.99200356006622, + "p99": 118.17599833011627 + }, + "roundtrip": { + "p50": 99.23200309276581, + "p90": 109.72800105810165, + "p95": 112.99200356006622, + "p99": 118.17599833011627 + }, + "isolatedSum": { + "p50": 198.46400618553162, + "p90": 219.4560021162033, + "p95": 225.98400712013245, + "p99": 236.35199666023254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 731136, + "combineLogicalBytes": 731136, + "fanoutMean": 3.1875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 99.96800124645233, + "p90": 110.55999994277954, + "p95": 114.56000059843063, + "p99": 119.55200135707855 + }, + "combine": { + "p50": 99.96800124645233, + "p90": 110.55999994277954, + "p95": 114.56000059843063, + "p99": 119.55200135707855 + }, + "roundtrip": { + "p50": 99.96800124645233, + "p90": 110.55999994277954, + "p95": 114.56000059843063, + "p99": 119.55200135707855 + }, + "isolatedSum": { + "p50": 199.93600249290466, + "p90": 221.11999988555908, + "p95": 229.12000119686127, + "p99": 239.1040027141571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1519616, + "combineLogicalBytes": 1519616, + "fanoutMean": 3.3125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 99.04000163078308, + "p90": 108.03200304508209, + "p95": 112.03200370073318, + "p99": 120.19199877977371 + }, + "combine": { + "p50": 99.04000163078308, + "p90": 108.03200304508209, + "p95": 112.03200370073318, + "p99": 120.19199877977371 + }, + "roundtrip": { + "p50": 99.04000163078308, + "p90": 108.03200304508209, + "p95": 112.03200370073318, + "p99": 120.19199877977371 + }, + "isolatedSum": { + "p50": 198.08000326156616, + "p90": 216.06400609016418, + "p95": 224.06400740146637, + "p99": 240.38399755954742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3082240, + "combineLogicalBytes": 3082240, + "fanoutMean": 3.359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 98.52799773216248, + "p90": 108.19199681282043, + "p95": 111.61600053310394, + "p99": 115.23199826478958 + }, + "combine": { + "p50": 98.52799773216248, + "p90": 108.19199681282043, + "p95": 111.61600053310394, + "p99": 115.23199826478958 + }, + "roundtrip": { + "p50": 98.52799773216248, + "p90": 108.19199681282043, + "p95": 111.61600053310394, + "p99": 115.23199826478958 + }, + "isolatedSum": { + "p50": 197.05599546432495, + "p90": 216.38399362564087, + "p95": 223.23200106620789, + "p99": 230.46399652957916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6121472, + "combineLogicalBytes": 6121472, + "fanoutMean": 3.3359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 101.53599828481674, + "p90": 111.80800199508667, + "p95": 115.80800265073776, + "p99": 124.4800016283989 + }, + "combine": { + "p50": 101.53599828481674, + "p90": 111.80800199508667, + "p95": 115.80800265073776, + "p99": 124.4800016283989 + }, + "roundtrip": { + "p50": 101.53599828481674, + "p90": 111.80800199508667, + "p95": 115.80800265073776, + "p99": 124.4800016283989 + }, + "isolatedSum": { + "p50": 203.07199656963348, + "p90": 223.61600399017334, + "p95": 231.61600530147552, + "p99": 248.9600032567978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12271616, + "combineLogicalBytes": 12271616, + "fanoutMean": 3.34375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 102.20800340175629, + "p90": 111.29599809646606, + "p95": 114.52800035476685, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 102.20800340175629, + "p90": 111.29599809646606, + "p95": 114.52800035476685, + "p99": 121.24799937009811 + }, + "roundtrip": { + "p50": 102.20800340175629, + "p90": 111.29599809646606, + "p95": 114.52800035476685, + "p99": 121.24799937009811 + }, + "isolatedSum": { + "p50": 204.41600680351257, + "p90": 222.59199619293213, + "p95": 229.0560007095337, + "p99": 242.49599874019623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1c8dfbf1", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|decode|normal|none|none|0|tuned||194008255dcd869", + "colorKey": "gb300_e4bc70ce", + "comparisonKey": "0c208a7876e41e0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:44.651636+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "194008255dcd869", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.865234375, + "eplbImbalanceAfter": 1.0003580729166668, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 101.15200281143188, + "p90": 110.46399921178818, + "p95": 116.15999788045883, + "p99": 120.41600048542023 + }, + "combine": { + "p50": 101.15200281143188, + "p90": 110.46399921178818, + "p95": 116.15999788045883, + "p99": 120.41600048542023 + }, + "roundtrip": { + "p50": 101.15200281143188, + "p90": 110.46399921178818, + "p95": 116.15999788045883, + "p99": 120.41600048542023 + }, + "isolatedSum": { + "p50": 202.30400562286377, + "p90": 220.92799842357635, + "p95": 232.31999576091766, + "p99": 240.83200097084045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 100.54399818181992, + "p90": 110.30399799346924, + "p95": 115.84000289440155, + "p99": 121.05599790811539 + }, + "combine": { + "p50": 100.54399818181992, + "p90": 110.30399799346924, + "p95": 115.84000289440155, + "p99": 121.05599790811539 + }, + "roundtrip": { + "p50": 100.54399818181992, + "p90": 110.30399799346924, + "p95": 115.84000289440155, + "p99": 121.05599790811539 + }, + "isolatedSum": { + "p50": 201.08799636363983, + "p90": 220.60799598693848, + "p95": 231.6800057888031, + "p99": 242.11199581623077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 3, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 101.95200145244598, + "p90": 113.3119985461235, + "p95": 116.5120005607605, + "p99": 121.91999703645706 + }, + "combine": { + "p50": 101.95200145244598, + "p90": 113.3119985461235, + "p95": 116.5120005607605, + "p99": 121.91999703645706 + }, + "roundtrip": { + "p50": 101.95200145244598, + "p90": 113.3119985461235, + "p95": 116.5120005607605, + "p99": 121.91999703645706 + }, + "isolatedSum": { + "p50": 203.90400290489197, + "p90": 226.623997092247, + "p95": 233.024001121521, + "p99": 243.83999407291412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 774144, + "combineLogicalBytes": 774144, + "fanoutMean": 3.375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 102.14400291442871, + "p90": 113.02399635314941, + "p95": 116.15999788045883, + "p99": 121.05599790811539 + }, + "combine": { + "p50": 102.14400291442871, + "p90": 113.02399635314941, + "p95": 116.15999788045883, + "p99": 121.05599790811539 + }, + "roundtrip": { + "p50": 102.14400291442871, + "p90": 113.02399635314941, + "p95": 116.15999788045883, + "p99": 121.05599790811539 + }, + "isolatedSum": { + "p50": 204.28800582885742, + "p90": 226.04799270629883, + "p95": 232.31999576091766, + "p99": 242.11199581623077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 101.9200012087822, + "p90": 112.28799819946289, + "p95": 115.61600118875504, + "p99": 124.9919980764389 + }, + "combine": { + "p50": 101.9200012087822, + "p90": 112.28799819946289, + "p95": 115.61600118875504, + "p99": 124.9919980764389 + }, + "roundtrip": { + "p50": 101.9200012087822, + "p90": 112.28799819946289, + "p95": 115.61600118875504, + "p99": 124.9919980764389 + }, + "isolatedSum": { + "p50": 203.8400024175644, + "p90": 224.57599639892578, + "p95": 231.23200237751007, + "p99": 249.9839961528778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3268608, + "combineLogicalBytes": 3268608, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 101.75999999046326, + "p90": 110.84800213575363, + "p95": 115.7120019197464, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 101.75999999046326, + "p90": 110.84800213575363, + "p95": 115.7120019197464, + "p99": 120.25599926710129 + }, + "roundtrip": { + "p50": 101.75999999046326, + "p90": 110.84800213575363, + "p95": 115.7120019197464, + "p99": 120.25599926710129 + }, + "isolatedSum": { + "p50": 203.5199999809265, + "p90": 221.69600427150726, + "p95": 231.4240038394928, + "p99": 240.51199853420258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6393856, + "combineLogicalBytes": 6393856, + "fanoutMean": 3.484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 104.35199737548828, + "p90": 114.68800157308578, + "p95": 117.8240031003952, + "p99": 123.3920007944107 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 114.68800157308578, + "p95": 117.8240031003952, + "p99": 123.3920007944107 + }, + "roundtrip": { + "p50": 104.35199737548828, + "p90": 114.68800157308578, + "p95": 117.8240031003952, + "p99": 123.3920007944107 + }, + "isolatedSum": { + "p50": 208.70399475097656, + "p90": 229.37600314617157, + "p95": 235.6480062007904, + "p99": 246.7840015888214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13045760, + "combineLogicalBytes": 13045760, + "fanoutMean": 3.5546875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 105.15200346708298, + "p90": 115.13599753379822, + "p95": 119.00799721479416, + "p99": 123.58400225639343 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 115.13599753379822, + "p95": 119.00799721479416, + "p99": 123.58400225639343 + }, + "roundtrip": { + "p50": 105.15200346708298, + "p90": 115.13599753379822, + "p95": 119.00799721479416, + "p99": 123.58400225639343 + }, + "isolatedSum": { + "p50": 210.30400693416595, + "p90": 230.27199506759644, + "p95": 238.01599442958832, + "p99": 247.16800451278687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26263552, + "combineLogicalBytes": 26263552, + "fanoutMean": 3.578125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-659ee8b4", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_3fb75847", + "comparisonKey": "0c0924d2bc3f7c84", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:11.097261+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 100.8640006184578, + "p90": 110.59200018644333, + "p95": 116.22399836778641, + "p99": 119.6800023317337 + }, + "combine": { + "p50": 100.8640006184578, + "p90": 110.59200018644333, + "p95": 116.22399836778641, + "p99": 119.6800023317337 + }, + "roundtrip": { + "p50": 100.8640006184578, + "p90": 110.59200018644333, + "p95": 116.22399836778641, + "p99": 119.6800023317337 + }, + "isolatedSum": { + "p50": 201.7280012369156, + "p90": 221.18400037288666, + "p95": 232.44799673557281, + "p99": 239.3600046634674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.59999877214432, + "p90": 111.58400028944016, + "p95": 114.81600254774094, + "p99": 119.61600184440613 + }, + "combine": { + "p50": 101.59999877214432, + "p90": 111.58400028944016, + "p95": 114.81600254774094, + "p99": 119.61600184440613 + }, + "roundtrip": { + "p50": 101.59999877214432, + "p90": 111.58400028944016, + "p95": 114.81600254774094, + "p99": 119.61600184440613 + }, + "isolatedSum": { + "p50": 203.19999754428864, + "p90": 223.1680005788803, + "p95": 229.63200509548187, + "p99": 239.23200368881226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 102.01600193977356, + "p90": 113.53600025177002, + "p95": 118.367999792099, + "p99": 127.10399925708771 + }, + "combine": { + "p50": 102.01600193977356, + "p90": 113.53600025177002, + "p95": 118.367999792099, + "p99": 127.10399925708771 + }, + "roundtrip": { + "p50": 102.01600193977356, + "p90": 113.53600025177002, + "p95": 118.367999792099, + "p99": 127.10399925708771 + }, + "isolatedSum": { + "p50": 204.03200387954712, + "p90": 227.07200050354004, + "p95": 236.735999584198, + "p99": 254.20799851417542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 102.46399790048599, + "p90": 112.64000087976456, + "p95": 116.48000031709671, + "p99": 121.11999839544296 + }, + "combine": { + "p50": 102.46399790048599, + "p90": 112.64000087976456, + "p95": 116.48000031709671, + "p99": 121.11999839544296 + }, + "roundtrip": { + "p50": 102.46399790048599, + "p90": 112.64000087976456, + "p95": 116.48000031709671, + "p99": 121.11999839544296 + }, + "isolatedSum": { + "p50": 204.92799580097198, + "p90": 225.2800017595291, + "p95": 232.96000063419342, + "p99": 242.23999679088593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 103.07200253009796, + "p90": 112.19199746847153, + "p95": 116.12799763679504, + "p99": 121.69600278139114 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 112.19199746847153, + "p95": 116.12799763679504, + "p99": 121.69600278139114 + }, + "roundtrip": { + "p50": 103.07200253009796, + "p90": 112.19199746847153, + "p95": 116.12799763679504, + "p99": 121.69600278139114 + }, + "isolatedSum": { + "p50": 206.14400506019592, + "p90": 224.38399493694305, + "p95": 232.2559952735901, + "p99": 243.3920055627823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 103.07200253009796, + "p90": 112.83200234174728, + "p95": 116.57600104808807, + "p99": 122.14399874210358 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 112.83200234174728, + "p95": 116.57600104808807, + "p99": 122.14399874210358 + }, + "roundtrip": { + "p50": 103.07200253009796, + "p90": 112.83200234174728, + "p95": 116.57600104808807, + "p99": 122.14399874210358 + }, + "isolatedSum": { + "p50": 206.14400506019592, + "p90": 225.66400468349457, + "p95": 233.15200209617615, + "p99": 244.28799748420715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 105.56799918413162, + "p90": 118.81600320339203, + "p95": 121.76000326871872, + "p99": 129.98400628566742 + }, + "combine": { + "p50": 105.56799918413162, + "p90": 118.81600320339203, + "p95": 121.76000326871872, + "p99": 129.98400628566742 + }, + "roundtrip": { + "p50": 105.56799918413162, + "p90": 118.81600320339203, + "p95": 121.76000326871872, + "p99": 129.98400628566742 + }, + "isolatedSum": { + "p50": 211.13599836826324, + "p90": 237.63200640678406, + "p95": 243.52000653743744, + "p99": 259.96801257133484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 105.05600273609161, + "p90": 115.64800143241882, + "p95": 119.32799965143204, + "p99": 127.55200266838074 + }, + "combine": { + "p50": 105.05600273609161, + "p90": 115.64800143241882, + "p95": 119.32799965143204, + "p99": 127.55200266838074 + }, + "roundtrip": { + "p50": 105.05600273609161, + "p90": 115.64800143241882, + "p95": 119.32799965143204, + "p99": 127.55200266838074 + }, + "isolatedSum": { + "p50": 210.11200547218323, + "p90": 231.29600286483765, + "p95": 238.65599930286407, + "p99": 255.10400533676147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0b1562b6", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_2c292f17", + "comparisonKey": "e3c444970935cee2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:21.809564+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 96.3520035147667, + "p90": 105.72800040245056, + "p95": 109.21599715948105, + "p99": 117.88800358772278 + }, + "combine": { + "p50": 96.3520035147667, + "p90": 105.72800040245056, + "p95": 109.21599715948105, + "p99": 117.88800358772278 + }, + "roundtrip": { + "p50": 96.3520035147667, + "p90": 105.72800040245056, + "p95": 109.21599715948105, + "p99": 117.88800358772278 + }, + "isolatedSum": { + "p50": 192.7040070295334, + "p90": 211.45600080490112, + "p95": 218.4319943189621, + "p99": 235.77600717544556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 96.28800302743912, + "p90": 106.30399733781815, + "p95": 109.56799983978271, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 96.28800302743912, + "p90": 106.30399733781815, + "p95": 109.56799983978271, + "p99": 114.04799669981003 + }, + "roundtrip": { + "p50": 96.28800302743912, + "p90": 106.30399733781815, + "p95": 109.56799983978271, + "p99": 114.04799669981003 + }, + "isolatedSum": { + "p50": 192.57600605487823, + "p90": 212.6079946756363, + "p95": 219.13599967956543, + "p99": 228.09599339962006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 97.6639986038208, + "p90": 107.4879989027977, + "p95": 111.48799955844879, + "p99": 120.41600048542023 + }, + "combine": { + "p50": 97.6639986038208, + "p90": 107.4879989027977, + "p95": 111.48799955844879, + "p99": 120.41600048542023 + }, + "roundtrip": { + "p50": 97.6639986038208, + "p90": 107.4879989027977, + "p95": 111.48799955844879, + "p99": 120.41600048542023 + }, + "isolatedSum": { + "p50": 195.3279972076416, + "p90": 214.9759978055954, + "p95": 222.97599911689758, + "p99": 240.83200097084045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 96.47999703884125, + "p90": 106.52799904346466, + "p95": 110.1439967751503, + "p99": 116.83200299739838 + }, + "combine": { + "p50": 96.47999703884125, + "p90": 106.52799904346466, + "p95": 110.1439967751503, + "p99": 116.83200299739838 + }, + "roundtrip": { + "p50": 96.47999703884125, + "p90": 106.52799904346466, + "p95": 110.1439967751503, + "p99": 116.83200299739838 + }, + "isolatedSum": { + "p50": 192.9599940776825, + "p90": 213.05599808692932, + "p95": 220.2879935503006, + "p99": 233.66400599479675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 97.05600142478943, + "p90": 105.43999820947647, + "p95": 109.95200276374817, + "p99": 115.48800021409988 + }, + "combine": { + "p50": 97.05600142478943, + "p90": 105.43999820947647, + "p95": 109.95200276374817, + "p99": 115.48800021409988 + }, + "roundtrip": { + "p50": 97.05600142478943, + "p90": 105.43999820947647, + "p95": 109.95200276374817, + "p99": 115.48800021409988 + }, + "isolatedSum": { + "p50": 194.11200284957886, + "p90": 210.87999641895294, + "p95": 219.90400552749634, + "p99": 230.97600042819977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 98.84800016880035, + "p90": 106.91200196743011, + "p95": 110.72000116109848, + "p99": 118.33599954843521 + }, + "combine": { + "p50": 98.84800016880035, + "p90": 106.91200196743011, + "p95": 110.72000116109848, + "p99": 118.33599954843521 + }, + "roundtrip": { + "p50": 98.84800016880035, + "p90": 106.91200196743011, + "p95": 110.72000116109848, + "p99": 118.33599954843521 + }, + "isolatedSum": { + "p50": 197.6960003376007, + "p90": 213.82400393486023, + "p95": 221.44000232219696, + "p99": 236.67199909687042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 100.47999769449234, + "p90": 109.56799983978271, + "p95": 113.34399878978729, + "p99": 120.7680031657219 + }, + "combine": { + "p50": 100.47999769449234, + "p90": 109.56799983978271, + "p95": 113.34399878978729, + "p99": 120.7680031657219 + }, + "roundtrip": { + "p50": 100.47999769449234, + "p90": 109.56799983978271, + "p95": 113.34399878978729, + "p99": 120.7680031657219 + }, + "isolatedSum": { + "p50": 200.95999538898468, + "p90": 219.13599967956543, + "p95": 226.68799757957458, + "p99": 241.5360063314438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 101.18400305509567, + "p90": 108.2879975438118, + "p95": 112.76800185441971, + "p99": 119.29599940776825 + }, + "combine": { + "p50": 101.18400305509567, + "p90": 108.2879975438118, + "p95": 112.76800185441971, + "p99": 119.29599940776825 + }, + "roundtrip": { + "p50": 101.18400305509567, + "p90": 108.2879975438118, + "p95": 112.76800185441971, + "p99": 119.29599940776825 + }, + "isolatedSum": { + "p50": 202.36800611019135, + "p90": 216.5759950876236, + "p95": 225.53600370883942, + "p99": 238.5919988155365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0b97e0b6", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_1d19e997", + "comparisonKey": "041e3a9f4e41733e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:13.877136+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 100.00000149011612, + "p90": 111.7440015077591, + "p95": 116.2559986114502, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 100.00000149011612, + "p90": 111.7440015077591, + "p95": 116.2559986114502, + "p99": 123.80799651145935 + }, + "roundtrip": { + "p50": 100.00000149011612, + "p90": 111.7440015077591, + "p95": 116.2559986114502, + "p99": 123.80799651145935 + }, + "isolatedSum": { + "p50": 200.00000298023224, + "p90": 223.4880030155182, + "p95": 232.5119972229004, + "p99": 247.6159930229187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 101.85600072145462, + "p90": 115.00799655914307, + "p95": 119.52000111341476, + "p99": 131.8719983100891 + }, + "combine": { + "p50": 101.85600072145462, + "p90": 115.00799655914307, + "p95": 119.52000111341476, + "p99": 131.8719983100891 + }, + "roundtrip": { + "p50": 101.85600072145462, + "p90": 115.00799655914307, + "p95": 119.52000111341476, + "p99": 131.8719983100891 + }, + "isolatedSum": { + "p50": 203.71200144290924, + "p90": 230.01599311828613, + "p95": 239.04000222682953, + "p99": 263.7439966201782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 101.82400047779083, + "p90": 114.78400230407715, + "p95": 119.48800086975098, + "p99": 130.43199479579926 + }, + "combine": { + "p50": 101.82400047779083, + "p90": 114.78400230407715, + "p95": 119.48800086975098, + "p99": 130.43199479579926 + }, + "roundtrip": { + "p50": 101.82400047779083, + "p90": 114.78400230407715, + "p95": 119.48800086975098, + "p99": 130.43199479579926 + }, + "isolatedSum": { + "p50": 203.64800095558167, + "p90": 229.5680046081543, + "p95": 238.97600173950195, + "p99": 260.8639895915985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 101.21600329875946, + "p90": 112.70400136709213, + "p95": 117.18399822711945, + "p99": 127.71199643611908 + }, + "combine": { + "p50": 101.21600329875946, + "p90": 112.70400136709213, + "p95": 117.18399822711945, + "p99": 127.71199643611908 + }, + "roundtrip": { + "p50": 101.21600329875946, + "p90": 112.70400136709213, + "p95": 117.18399822711945, + "p99": 127.71199643611908 + }, + "isolatedSum": { + "p50": 202.43200659751892, + "p90": 225.40800273418427, + "p95": 234.3679964542389, + "p99": 255.42399287223816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 99.87200051546097, + "p90": 111.87200248241425, + "p95": 116.19199812412262, + "p99": 123.03999811410904 + }, + "combine": { + "p50": 99.87200051546097, + "p90": 111.87200248241425, + "p95": 116.19199812412262, + "p99": 123.03999811410904 + }, + "roundtrip": { + "p50": 99.87200051546097, + "p90": 111.87200248241425, + "p95": 116.19199812412262, + "p99": 123.03999811410904 + }, + "isolatedSum": { + "p50": 199.74400103092194, + "p90": 223.7440049648285, + "p95": 232.38399624824524, + "p99": 246.07999622821808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 101.9200012087822, + "p90": 111.87200248241425, + "p95": 117.47200042009354, + "p99": 121.8239963054657 + }, + "combine": { + "p50": 101.9200012087822, + "p90": 111.87200248241425, + "p95": 117.47200042009354, + "p99": 121.8239963054657 + }, + "roundtrip": { + "p50": 101.9200012087822, + "p90": 111.87200248241425, + "p95": 117.47200042009354, + "p99": 121.8239963054657 + }, + "isolatedSum": { + "p50": 203.8400024175644, + "p90": 223.7440049648285, + "p95": 234.94400084018707, + "p99": 243.6479926109314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 103.55199873447418, + "p90": 115.52000045776367, + "p95": 120.2239990234375, + "p99": 127.16799974441528 + }, + "combine": { + "p50": 103.55199873447418, + "p90": 115.52000045776367, + "p95": 120.2239990234375, + "p99": 127.16799974441528 + }, + "roundtrip": { + "p50": 103.55199873447418, + "p90": 115.52000045776367, + "p95": 120.2239990234375, + "p99": 127.16799974441528 + }, + "isolatedSum": { + "p50": 207.10399746894836, + "p90": 231.04000091552734, + "p95": 240.447998046875, + "p99": 254.33599948883057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 103.93600165843964, + "p90": 116.67200177907944, + "p95": 120.92799693346024, + "p99": 132.03200697898865 + }, + "combine": { + "p50": 103.93600165843964, + "p90": 116.67200177907944, + "p95": 120.92799693346024, + "p99": 132.03200697898865 + }, + "roundtrip": { + "p50": 103.93600165843964, + "p90": 116.67200177907944, + "p95": 120.92799693346024, + "p99": 132.03200697898865 + }, + "isolatedSum": { + "p50": 207.87200331687927, + "p90": 233.34400355815887, + "p95": 241.85599386692047, + "p99": 264.0640139579773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8365af8e", + "identity": "gb300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_c23c12d7", + "comparisonKey": "cf513264361603db", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:33.382861+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 112.5440001487732, + "p90": 122.94399738311768, + "p95": 125.88800489902496, + "p99": 134.0479999780655 + }, + "combine": { + "p50": 112.5440001487732, + "p90": 122.94399738311768, + "p95": 125.88800489902496, + "p99": 134.0479999780655 + }, + "roundtrip": { + "p50": 112.5440001487732, + "p90": 122.94399738311768, + "p95": 125.88800489902496, + "p99": 134.0479999780655 + }, + "isolatedSum": { + "p50": 225.0880002975464, + "p90": 245.88799476623535, + "p95": 251.77600979804993, + "p99": 268.095999956131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 107520, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 111.77600175142288, + "p90": 120.44800072908401, + "p95": 124.60800260305405, + "p99": 130.3360015153885 + }, + "combine": { + "p50": 111.77600175142288, + "p90": 120.44800072908401, + "p95": 124.60800260305405, + "p99": 130.3360015153885 + }, + "roundtrip": { + "p50": 111.77600175142288, + "p90": 120.44800072908401, + "p95": 124.60800260305405, + "p99": 130.3360015153885 + }, + "isolatedSum": { + "p50": 223.55200350284576, + "p90": 240.89600145816803, + "p95": 249.2160052061081, + "p99": 260.672003030777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 113.18399757146835, + "p90": 122.40000069141388, + "p95": 126.01600587368011, + "p99": 133.37600231170654 + }, + "combine": { + "p50": 113.18399757146835, + "p90": 122.40000069141388, + "p95": 126.01600587368011, + "p99": 133.37600231170654 + }, + "roundtrip": { + "p50": 113.18399757146835, + "p90": 122.40000069141388, + "p95": 126.01600587368011, + "p99": 133.37600231170654 + }, + "isolatedSum": { + "p50": 226.3679951429367, + "p90": 244.80000138282776, + "p95": 252.03201174736023, + "p99": 266.7520046234131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 415744, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 111.93600296974182, + "p90": 121.76000326871872, + "p95": 126.39999389648438, + "p99": 137.56799697875977 + }, + "combine": { + "p50": 111.93600296974182, + "p90": 121.76000326871872, + "p95": 126.39999389648438, + "p99": 137.56799697875977 + }, + "roundtrip": { + "p50": 111.93600296974182, + "p90": 121.76000326871872, + "p95": 126.39999389648438, + "p99": 137.56799697875977 + }, + "isolatedSum": { + "p50": 223.87200593948364, + "p90": 243.52000653743744, + "p95": 252.79998779296875, + "p99": 275.13599395751953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 113.24799805879593, + "p90": 122.6240023970604, + "p95": 126.62400305271149, + "p99": 134.68800485134125 + }, + "combine": { + "p50": 113.24799805879593, + "p90": 122.6240023970604, + "p95": 126.62400305271149, + "p99": 134.68800485134125 + }, + "roundtrip": { + "p50": 113.24799805879593, + "p90": 122.6240023970604, + "p95": 126.62400305271149, + "p99": 134.68800485134125 + }, + "isolatedSum": { + "p50": 226.49599611759186, + "p90": 245.2480047941208, + "p95": 253.24800610542297, + "p99": 269.3760097026825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 113.3119985461235, + "p90": 122.20799922943115, + "p95": 126.75200402736664, + "p99": 135.45599579811096 + }, + "combine": { + "p50": 113.3119985461235, + "p90": 122.20799922943115, + "p95": 126.75200402736664, + "p99": 135.45599579811096 + }, + "roundtrip": { + "p50": 113.3119985461235, + "p90": 122.20799922943115, + "p95": 126.75200402736664, + "p99": 135.45599579811096 + }, + "isolatedSum": { + "p50": 226.623997092247, + "p90": 244.4159984588623, + "p95": 253.50400805473328, + "p99": 270.9119915962219 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 116.41599982976913, + "p90": 127.10399925708771, + "p95": 130.048006772995, + "p99": 136.3839954137802 + }, + "combine": { + "p50": 116.41599982976913, + "p90": 127.10399925708771, + "p95": 130.048006772995, + "p99": 136.3839954137802 + }, + "roundtrip": { + "p50": 116.41599982976913, + "p90": 127.10399925708771, + "p95": 130.048006772995, + "p99": 136.3839954137802 + }, + "isolatedSum": { + "p50": 232.83199965953827, + "p90": 254.20799851417542, + "p95": 260.09601354599, + "p99": 272.7679908275604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6759424, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.2559986114502, + "p90": 125.11999905109406, + "p95": 128.4160017967224, + "p99": 135.00800728797913 + }, + "combine": { + "p50": 116.2559986114502, + "p90": 125.11999905109406, + "p95": 128.4160017967224, + "p99": 135.00800728797913 + }, + "roundtrip": { + "p50": 116.2559986114502, + "p90": 125.11999905109406, + "p95": 128.4160017967224, + "p99": 135.00800728797913 + }, + "isolatedSum": { + "p50": 232.5119972229004, + "p90": 250.2399981021881, + "p95": 256.8320035934448, + "p99": 270.01601457595825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e20a6b4b", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_53d33fc0", + "comparisonKey": "1c9470d442ee3891", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:12.838893+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 126.94400548934937, + "p90": 142.68800616264343, + "p95": 148.47999811172485, + "p99": 163.4880006313324 + }, + "combine": { + "p50": 126.94400548934937, + "p90": 142.68800616264343, + "p95": 148.47999811172485, + "p99": 163.4880006313324 + }, + "roundtrip": { + "p50": 126.94400548934937, + "p90": 142.68800616264343, + "p95": 148.47999811172485, + "p99": 163.4880006313324 + }, + "isolatedSum": { + "p50": 253.88801097869873, + "p90": 285.37601232528687, + "p95": 296.9599962234497, + "p99": 326.9760012626648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 123.64800274372101, + "p90": 140.9599930047989, + "p95": 147.5519984960556, + "p99": 155.39200603961945 + }, + "combine": { + "p50": 123.64800274372101, + "p90": 140.9599930047989, + "p95": 147.5519984960556, + "p99": 155.39200603961945 + }, + "roundtrip": { + "p50": 123.64800274372101, + "p90": 140.9599930047989, + "p95": 147.5519984960556, + "p99": 155.39200603961945 + }, + "isolatedSum": { + "p50": 247.29600548744202, + "p90": 281.9199860095978, + "p95": 295.1039969921112, + "p99": 310.7840120792389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 123.07199835777283, + "p90": 142.11200177669525, + "p95": 149.4400054216385, + "p99": 158.33599865436554 + }, + "combine": { + "p50": 123.07199835777283, + "p90": 142.11200177669525, + "p95": 149.4400054216385, + "p99": 158.33599865436554 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 142.11200177669525, + "p95": 149.4400054216385, + "p99": 158.33599865436554 + }, + "isolatedSum": { + "p50": 246.14399671554565, + "p90": 284.2240035533905, + "p95": 298.880010843277, + "p99": 316.6719973087311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.85599720478058, + "p90": 143.5520052909851, + "p95": 149.08799529075623, + "p99": 177.5359958410263 + }, + "combine": { + "p50": 125.85599720478058, + "p90": 143.5520052909851, + "p95": 149.08799529075623, + "p99": 177.5359958410263 + }, + "roundtrip": { + "p50": 125.85599720478058, + "p90": 143.5520052909851, + "p95": 149.08799529075623, + "p99": 177.5359958410263 + }, + "isolatedSum": { + "p50": 251.71199440956116, + "p90": 287.1040105819702, + "p95": 298.17599058151245, + "p99": 355.0719916820526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.30400061607361, + "p90": 143.13599467277527, + "p95": 148.54399859905243, + "p99": 165.98400473594666 + }, + "combine": { + "p50": 126.30400061607361, + "p90": 143.13599467277527, + "p95": 148.54399859905243, + "p99": 165.98400473594666 + }, + "roundtrip": { + "p50": 126.30400061607361, + "p90": 143.13599467277527, + "p95": 148.54399859905243, + "p99": 165.98400473594666 + }, + "isolatedSum": { + "p50": 252.60800123214722, + "p90": 286.27198934555054, + "p95": 297.08799719810486, + "p99": 331.9680094718933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.4800016283989, + "p90": 142.68800616264343, + "p95": 149.56800639629364, + "p99": 163.55200111865997 + }, + "combine": { + "p50": 124.4800016283989, + "p90": 142.68800616264343, + "p95": 149.56800639629364, + "p99": 163.55200111865997 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 142.68800616264343, + "p95": 149.56800639629364, + "p99": 163.55200111865997 + }, + "isolatedSum": { + "p50": 248.9600032567978, + "p90": 285.37601232528687, + "p95": 299.1360127925873, + "p99": 327.10400223731995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 124.95999783277512, + "p90": 144.9279934167862, + "p95": 153.888002038002, + "p99": 182.559996843338 + }, + "combine": { + "p50": 124.95999783277512, + "p90": 144.9279934167862, + "p95": 153.888002038002, + "p99": 182.559996843338 + }, + "roundtrip": { + "p50": 124.95999783277512, + "p90": 144.9279934167862, + "p95": 153.888002038002, + "p99": 182.559996843338 + }, + "isolatedSum": { + "p50": 249.91999566555023, + "p90": 289.8559868335724, + "p95": 307.776004076004, + "p99": 365.119993686676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.56800246238708, + "p90": 143.2960033416748, + "p95": 149.1840034723282, + "p99": 161.24799847602844 + }, + "combine": { + "p50": 125.56800246238708, + "p90": 143.2960033416748, + "p95": 149.1840034723282, + "p99": 161.24799847602844 + }, + "roundtrip": { + "p50": 125.56800246238708, + "p90": 143.2960033416748, + "p95": 149.1840034723282, + "p99": 161.24799847602844 + }, + "isolatedSum": { + "p50": 251.13600492477417, + "p90": 286.5920066833496, + "p95": 298.3680069446564, + "p99": 322.4959969520569 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-563a565a", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||f1c99f5cf8ca9ed", + "colorKey": "gb300_120a7978", + "comparisonKey": "111b97426349f513", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:50.765848+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f1c99f5cf8ca9ed", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.30399930477142, + "p90": 132.9919993877411, + "p95": 136.9280070066452, + "p99": 148.54399859905243 + }, + "combine": { + "p50": 118.30399930477142, + "p90": 132.9919993877411, + "p95": 136.9280070066452, + "p99": 148.54399859905243 + }, + "roundtrip": { + "p50": 118.30399930477142, + "p90": 132.9919993877411, + "p95": 136.9280070066452, + "p99": 148.54399859905243 + }, + "isolatedSum": { + "p50": 236.60799860954285, + "p90": 265.9839987754822, + "p95": 273.8560140132904, + "p99": 297.08799719810486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.21599847078323, + "p90": 131.8719983100891, + "p95": 136.57599687576294, + "p99": 147.35999703407288 + }, + "combine": { + "p50": 117.21599847078323, + "p90": 131.8719983100891, + "p95": 136.57599687576294, + "p99": 147.35999703407288 + }, + "roundtrip": { + "p50": 117.21599847078323, + "p90": 131.8719983100891, + "p95": 136.57599687576294, + "p99": 147.35999703407288 + }, + "isolatedSum": { + "p50": 234.43199694156647, + "p90": 263.7439966201782, + "p95": 273.1519937515259, + "p99": 294.71999406814575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.66400188207626, + "p90": 132.9279989004135, + "p95": 136.00000739097595, + "p99": 147.0080018043518 + }, + "combine": { + "p50": 117.66400188207626, + "p90": 132.9279989004135, + "p95": 136.00000739097595, + "p99": 147.0080018043518 + }, + "roundtrip": { + "p50": 117.66400188207626, + "p90": 132.9279989004135, + "p95": 136.00000739097595, + "p99": 147.0080018043518 + }, + "isolatedSum": { + "p50": 235.32800376415253, + "p90": 265.855997800827, + "p95": 272.0000147819519, + "p99": 294.0160036087036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.66400188207626, + "p90": 134.43200290203094, + "p95": 138.20800185203552, + "p99": 145.91999351978302 + }, + "combine": { + "p50": 117.66400188207626, + "p90": 134.43200290203094, + "p95": 138.20800185203552, + "p99": 145.91999351978302 + }, + "roundtrip": { + "p50": 117.66400188207626, + "p90": 134.43200290203094, + "p95": 138.20800185203552, + "p99": 145.91999351978302 + }, + "isolatedSum": { + "p50": 235.32800376415253, + "p90": 268.8640058040619, + "p95": 276.41600370407104, + "p99": 291.83998703956604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.52800101041794, + "p90": 134.2719942331314, + "p95": 139.16799426078796, + "p99": 151.19999647140503 + }, + "combine": { + "p50": 118.52800101041794, + "p90": 134.2719942331314, + "p95": 139.16799426078796, + "p99": 151.19999647140503 + }, + "roundtrip": { + "p50": 118.52800101041794, + "p90": 134.2719942331314, + "p95": 139.16799426078796, + "p99": 151.19999647140503 + }, + "isolatedSum": { + "p50": 237.05600202083588, + "p90": 268.5439884662628, + "p95": 278.3359885215759, + "p99": 302.39999294281006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 117.63200163841248, + "p90": 133.27999413013458, + "p95": 139.00800049304962, + "p99": 152.41600573062897 + }, + "combine": { + "p50": 117.63200163841248, + "p90": 133.27999413013458, + "p95": 139.00800049304962, + "p99": 152.41600573062897 + }, + "roundtrip": { + "p50": 117.63200163841248, + "p90": 133.27999413013458, + "p95": 139.00800049304962, + "p99": 152.41600573062897 + }, + "isolatedSum": { + "p50": 235.26400327682495, + "p90": 266.55998826026917, + "p95": 278.01600098609924, + "p99": 304.83201146125793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 120.4800009727478, + "p90": 137.02400028705597, + "p95": 142.46399700641632, + "p99": 151.0400027036667 + }, + "combine": { + "p50": 120.4800009727478, + "p90": 137.02400028705597, + "p95": 142.46399700641632, + "p99": 151.0400027036667 + }, + "roundtrip": { + "p50": 120.4800009727478, + "p90": 137.02400028705597, + "p95": 142.46399700641632, + "p99": 151.0400027036667 + }, + "isolatedSum": { + "p50": 240.9600019454956, + "p90": 274.04800057411194, + "p95": 284.92799401283264, + "p99": 302.0800054073334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.36800110340118, + "p90": 141.59999787807465, + "p95": 145.53600549697876, + "p99": 157.3760062456131 + }, + "combine": { + "p50": 126.36800110340118, + "p90": 141.59999787807465, + "p95": 145.53600549697876, + "p99": 157.3760062456131 + }, + "roundtrip": { + "p50": 126.36800110340118, + "p90": 141.59999787807465, + "p95": 145.53600549697876, + "p99": 157.3760062456131 + }, + "isolatedSum": { + "p50": 252.73600220680237, + "p90": 283.1999957561493, + "p95": 291.0720109939575, + "p99": 314.7520124912262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac0532e4", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_df41b65b", + "comparisonKey": "c12df1a302503cd6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:11.064983+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 139.16799426078796, + "p90": 156.6080003976822, + "p95": 160.73599457740784, + "p99": 171.74400389194489 + }, + "combine": { + "p50": 139.16799426078796, + "p90": 156.6080003976822, + "p95": 160.73599457740784, + "p99": 171.74400389194489 + }, + "roundtrip": { + "p50": 139.16799426078796, + "p90": 156.6080003976822, + "p95": 160.73599457740784, + "p99": 171.74400389194489 + }, + "isolatedSum": { + "p50": 278.3359885215759, + "p90": 313.2160007953644, + "p95": 321.4719891548157, + "p99": 343.48800778388977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 137.53600418567657, + "p90": 157.53600001335144, + "p95": 164.0319973230362, + "p99": 173.69599640369415 + }, + "combine": { + "p50": 137.53600418567657, + "p90": 157.53600001335144, + "p95": 164.0319973230362, + "p99": 173.69599640369415 + }, + "roundtrip": { + "p50": 137.53600418567657, + "p90": 157.53600001335144, + "p95": 164.0319973230362, + "p99": 173.69599640369415 + }, + "isolatedSum": { + "p50": 275.07200837135315, + "p90": 315.0720000267029, + "p95": 328.0639946460724, + "p99": 347.3919928073883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 138.40000331401825, + "p90": 159.36000645160675, + "p95": 167.13599860668182, + "p99": 176.03200674057007 + }, + "combine": { + "p50": 138.40000331401825, + "p90": 159.36000645160675, + "p95": 167.13599860668182, + "p99": 176.03200674057007 + }, + "roundtrip": { + "p50": 138.40000331401825, + "p90": 159.36000645160675, + "p95": 167.13599860668182, + "p99": 176.03200674057007 + }, + "isolatedSum": { + "p50": 276.8000066280365, + "p90": 318.7200129032135, + "p95": 334.27199721336365, + "p99": 352.06401348114014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.60000467300415, + "p90": 156.47999942302704, + "p95": 162.432000041008, + "p99": 177.85599827766418 + }, + "combine": { + "p50": 137.60000467300415, + "p90": 156.47999942302704, + "p95": 162.432000041008, + "p99": 177.85599827766418 + }, + "roundtrip": { + "p50": 137.60000467300415, + "p90": 156.47999942302704, + "p95": 162.432000041008, + "p99": 177.85599827766418 + }, + "isolatedSum": { + "p50": 275.2000093460083, + "p90": 312.9599988460541, + "p95": 324.864000082016, + "p99": 355.71199655532837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2af15004", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_d844a8d6", + "comparisonKey": "76d46229dd1baead", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:49.440404+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.68000167608261, + "p90": 128.38399410247803, + "p95": 132.25600123405457, + "p99": 142.7839994430542 + }, + "combine": { + "p50": 115.68000167608261, + "p90": 128.38399410247803, + "p95": 132.25600123405457, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 115.68000167608261, + "p90": 128.38399410247803, + "p95": 132.25600123405457, + "p99": 142.7839994430542 + }, + "isolatedSum": { + "p50": 231.36000335216522, + "p90": 256.76798820495605, + "p95": 264.51200246810913, + "p99": 285.5679988861084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.48800021409988, + "p90": 128.89599800109863, + "p95": 134.68800485134125, + "p99": 151.93599462509155 + }, + "combine": { + "p50": 115.48800021409988, + "p90": 128.89599800109863, + "p95": 134.68800485134125, + "p99": 151.93599462509155 + }, + "roundtrip": { + "p50": 115.48800021409988, + "p90": 128.89599800109863, + "p95": 134.68800485134125, + "p99": 151.93599462509155 + }, + "isolatedSum": { + "p50": 230.97600042819977, + "p90": 257.79199600219727, + "p95": 269.3760097026825, + "p99": 303.8719892501831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.23999816179276, + "p90": 127.71199643611908, + "p95": 131.3599944114685, + "p99": 140.47999680042267 + }, + "combine": { + "p50": 114.23999816179276, + "p90": 127.71199643611908, + "p95": 131.3599944114685, + "p99": 140.47999680042267 + }, + "roundtrip": { + "p50": 114.23999816179276, + "p90": 127.71199643611908, + "p95": 131.3599944114685, + "p99": 140.47999680042267 + }, + "isolatedSum": { + "p50": 228.4799963235855, + "p90": 255.42399287223816, + "p95": 262.719988822937, + "p99": 280.95999360084534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 115.64800143241882, + "p90": 129.5360028743744, + "p95": 133.95200669765472, + "p99": 155.4879993200302 + }, + "combine": { + "p50": 115.64800143241882, + "p90": 129.5360028743744, + "p95": 133.95200669765472, + "p99": 155.4879993200302 + }, + "roundtrip": { + "p50": 115.64800143241882, + "p90": 129.5360028743744, + "p95": 133.95200669765472, + "p99": 155.4879993200302 + }, + "isolatedSum": { + "p50": 231.29600286483765, + "p90": 259.0720057487488, + "p95": 267.90401339530945, + "p99": 310.9759986400604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.39199948310852, + "p90": 127.6479959487915, + "p95": 133.7279975414276, + "p99": 153.9520025253296 + }, + "combine": { + "p50": 115.39199948310852, + "p90": 127.6479959487915, + "p95": 133.7279975414276, + "p99": 153.9520025253296 + }, + "roundtrip": { + "p50": 115.39199948310852, + "p90": 127.6479959487915, + "p95": 133.7279975414276, + "p99": 153.9520025253296 + }, + "isolatedSum": { + "p50": 230.78399896621704, + "p90": 255.295991897583, + "p95": 267.4559950828552, + "p99": 307.9040050506592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 115.03999680280685, + "p90": 129.08799946308136, + "p95": 133.31200182437897, + "p99": 152.28800475597382 + }, + "combine": { + "p50": 115.03999680280685, + "p90": 129.08799946308136, + "p95": 133.31200182437897, + "p99": 152.28800475597382 + }, + "roundtrip": { + "p50": 115.03999680280685, + "p90": 129.08799946308136, + "p95": 133.31200182437897, + "p99": 152.28800475597382 + }, + "isolatedSum": { + "p50": 230.0799936056137, + "p90": 258.1759989261627, + "p95": 266.62400364875793, + "p99": 304.57600951194763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 115.13599753379822, + "p90": 130.048006772995, + "p95": 134.91199910640717, + "p99": 174.84800517559052 + }, + "combine": { + "p50": 115.13599753379822, + "p90": 130.048006772995, + "p95": 134.91199910640717, + "p99": 174.84800517559052 + }, + "roundtrip": { + "p50": 115.13599753379822, + "p90": 130.048006772995, + "p95": 134.91199910640717, + "p99": 174.84800517559052 + }, + "isolatedSum": { + "p50": 230.27199506759644, + "p90": 260.09601354599, + "p95": 269.82399821281433, + "p99": 349.69601035118103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.74400216341019, + "p90": 130.36799430847168, + "p95": 133.69600474834442, + "p99": 147.64800667762756 + }, + "combine": { + "p50": 115.74400216341019, + "p90": 130.36799430847168, + "p95": 133.69600474834442, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 115.74400216341019, + "p90": 130.36799430847168, + "p95": 133.69600474834442, + "p99": 147.64800667762756 + }, + "isolatedSum": { + "p50": 231.48800432682037, + "p90": 260.73598861694336, + "p95": 267.39200949668884, + "p99": 295.2960133552551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-82f05fcb", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||15404c7c0ec01b5", + "colorKey": "gb300_b98e69e2", + "comparisonKey": "f06bb34f8bf5aca5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:06.631233+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15404c7c0ec01b5", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 133.7919980287552, + "p90": 164.70399498939514, + "p95": 176.54399573802948, + "p99": 203.3279985189438 + }, + "combine": { + "p50": 133.7919980287552, + "p90": 164.70399498939514, + "p95": 176.54399573802948, + "p99": 203.3279985189438 + }, + "roundtrip": { + "p50": 133.7919980287552, + "p90": 164.70399498939514, + "p95": 176.54399573802948, + "p99": 203.3279985189438 + }, + "isolatedSum": { + "p50": 267.5839960575104, + "p90": 329.4079899787903, + "p95": 353.08799147605896, + "p99": 406.6559970378876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 133.27999413013458, + "p90": 166.72000288963318, + "p95": 181.05599284172058, + "p99": 213.3760005235672 + }, + "combine": { + "p50": 133.27999413013458, + "p90": 166.72000288963318, + "p95": 181.05599284172058, + "p99": 213.3760005235672 + }, + "roundtrip": { + "p50": 133.27999413013458, + "p90": 166.72000288963318, + "p95": 181.05599284172058, + "p99": 213.3760005235672 + }, + "isolatedSum": { + "p50": 266.55998826026917, + "p90": 333.44000577926636, + "p95": 362.11198568344116, + "p99": 426.7520010471344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 132.7359974384308, + "p90": 164.60800170898438, + "p95": 176.9919991493225, + "p99": 202.81599462032318 + }, + "combine": { + "p50": 132.7359974384308, + "p90": 164.60800170898438, + "p95": 176.9919991493225, + "p99": 202.81599462032318 + }, + "roundtrip": { + "p50": 132.7359974384308, + "p90": 164.60800170898438, + "p95": 176.9919991493225, + "p99": 202.81599462032318 + }, + "isolatedSum": { + "p50": 265.4719948768616, + "p90": 329.21600341796875, + "p95": 353.983998298645, + "p99": 405.63198924064636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 133.7279975414276, + "p90": 160.12799739837646, + "p95": 167.52000153064728, + "p99": 194.815993309021 + }, + "combine": { + "p50": 133.7279975414276, + "p90": 160.12799739837646, + "p95": 167.52000153064728, + "p99": 194.815993309021 + }, + "roundtrip": { + "p50": 133.7279975414276, + "p90": 160.12799739837646, + "p95": 167.52000153064728, + "p99": 194.815993309021 + }, + "isolatedSum": { + "p50": 267.4559950828552, + "p90": 320.25599479675293, + "p95": 335.04000306129456, + "p99": 389.631986618042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 133.98399949073792, + "p90": 164.8319959640503, + "p95": 178.17600071430206, + "p99": 196.28800451755524 + }, + "combine": { + "p50": 133.98399949073792, + "p90": 164.8319959640503, + "p95": 178.17600071430206, + "p99": 196.28800451755524 + }, + "roundtrip": { + "p50": 133.98399949073792, + "p90": 164.8319959640503, + "p95": 178.17600071430206, + "p99": 196.28800451755524 + }, + "isolatedSum": { + "p50": 267.96799898147583, + "p90": 329.6639919281006, + "p95": 356.3520014286041, + "p99": 392.5760090351105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 137.37599551677704, + "p90": 172.0000058412552, + "p95": 181.05599284172058, + "p99": 199.0080028772354 + }, + "combine": { + "p50": 137.37599551677704, + "p90": 172.0000058412552, + "p95": 181.05599284172058, + "p99": 199.0080028772354 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 172.0000058412552, + "p95": 181.05599284172058, + "p99": 199.0080028772354 + }, + "isolatedSum": { + "p50": 274.7519910335541, + "p90": 344.0000116825104, + "p95": 362.11198568344116, + "p99": 398.0160057544708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.37599551677704, + "p90": 172.57599532604218, + "p95": 182.01600015163422, + "p99": 197.1839964389801 + }, + "combine": { + "p50": 137.37599551677704, + "p90": 172.57599532604218, + "p95": 182.01600015163422, + "p99": 197.1839964389801 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 172.57599532604218, + "p95": 182.01600015163422, + "p99": 197.1839964389801 + }, + "isolatedSum": { + "p50": 274.7519910335541, + "p90": 345.15199065208435, + "p95": 364.03200030326843, + "p99": 394.3679928779602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 135.3279948234558, + "p90": 166.30400717258453, + "p95": 180.2240014076233, + "p99": 202.14399695396423 + }, + "combine": { + "p50": 135.3279948234558, + "p90": 166.30400717258453, + "p95": 180.2240014076233, + "p99": 202.14399695396423 + }, + "roundtrip": { + "p50": 135.3279948234558, + "p90": 166.30400717258453, + "p95": 180.2240014076233, + "p99": 202.14399695396423 + }, + "isolatedSum": { + "p50": 270.6559896469116, + "p90": 332.60801434516907, + "p95": 360.4480028152466, + "p99": 404.28799390792847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fabc27d", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_8a9bcfac", + "comparisonKey": "3850e4466123cf24", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:57.771776+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.00799721479416, + "p90": 133.18400084972382, + "p95": 137.95199990272522, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 119.00799721479416, + "p90": 133.18400084972382, + "p95": 137.95199990272522, + "p99": 148.92800152301788 + }, + "roundtrip": { + "p50": 119.00799721479416, + "p90": 133.18400084972382, + "p95": 137.95199990272522, + "p99": 148.92800152301788 + }, + "isolatedSum": { + "p50": 238.01599442958832, + "p90": 266.36800169944763, + "p95": 275.90399980545044, + "p99": 297.85600304603577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 118.27199906110764, + "p90": 130.3039938211441, + "p95": 135.0719928741455, + "p99": 147.10399508476257 + }, + "combine": { + "p50": 118.27199906110764, + "p90": 130.3039938211441, + "p95": 135.0719928741455, + "p99": 147.10399508476257 + }, + "roundtrip": { + "p50": 118.27199906110764, + "p90": 130.3039938211441, + "p95": 135.0719928741455, + "p99": 147.10399508476257 + }, + "isolatedSum": { + "p50": 236.54399812221527, + "p90": 260.6079876422882, + "p95": 270.143985748291, + "p99": 294.20799016952515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 120.06399780511856, + "p90": 135.26399433612823, + "p95": 142.07999408245087, + "p99": 152.0320028066635 + }, + "combine": { + "p50": 120.06399780511856, + "p90": 135.26399433612823, + "p95": 142.07999408245087, + "p99": 152.0320028066635 + }, + "roundtrip": { + "p50": 120.06399780511856, + "p90": 135.26399433612823, + "p95": 142.07999408245087, + "p99": 152.0320028066635 + }, + "isolatedSum": { + "p50": 240.12799561023712, + "p90": 270.52798867225647, + "p95": 284.15998816490173, + "p99": 304.064005613327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 118.68800222873688, + "p90": 130.8159977197647, + "p95": 133.53599607944489, + "p99": 143.8400000333786 + }, + "combine": { + "p50": 118.68800222873688, + "p90": 130.8159977197647, + "p95": 133.53599607944489, + "p99": 143.8400000333786 + }, + "roundtrip": { + "p50": 118.68800222873688, + "p90": 130.8159977197647, + "p95": 133.53599607944489, + "p99": 143.8400000333786 + }, + "isolatedSum": { + "p50": 237.37600445747375, + "p90": 261.6319954395294, + "p95": 267.07199215888977, + "p99": 287.6800000667572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.8800036907196, + "p90": 132.89600610733032, + "p95": 136.57599687576294, + "p99": 163.7759953737259 + }, + "combine": { + "p50": 118.8800036907196, + "p90": 132.89600610733032, + "p95": 136.57599687576294, + "p99": 163.7759953737259 + }, + "roundtrip": { + "p50": 118.8800036907196, + "p90": 132.89600610733032, + "p95": 136.57599687576294, + "p99": 163.7759953737259 + }, + "isolatedSum": { + "p50": 237.7600073814392, + "p90": 265.79201221466064, + "p95": 273.1519937515259, + "p99": 327.5519907474518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.48800086975098, + "p90": 131.55199587345123, + "p95": 136.28800213336945, + "p99": 188.80000710487366 + }, + "combine": { + "p50": 119.48800086975098, + "p90": 131.55199587345123, + "p95": 136.28800213336945, + "p99": 188.80000710487366 + }, + "roundtrip": { + "p50": 119.48800086975098, + "p90": 131.55199587345123, + "p95": 136.28800213336945, + "p99": 188.80000710487366 + }, + "isolatedSum": { + "p50": 238.97600173950195, + "p90": 263.10399174690247, + "p95": 272.5760042667389, + "p99": 377.6000142097473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 122.36800044775009, + "p90": 134.8479986190796, + "p95": 139.42399621009827, + "p99": 153.6320000886917 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 134.8479986190796, + "p95": 139.42399621009827, + "p99": 153.6320000886917 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 134.8479986190796, + "p95": 139.42399621009827, + "p99": 153.6320000886917 + }, + "isolatedSum": { + "p50": 244.73600089550018, + "p90": 269.6959972381592, + "p95": 278.84799242019653, + "p99": 307.2640001773834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.69600278139114, + "p90": 135.3919953107834, + "p95": 139.26400244235992, + "p99": 149.88799393177032 + }, + "combine": { + "p50": 121.69600278139114, + "p90": 135.3919953107834, + "p95": 139.26400244235992, + "p99": 149.88799393177032 + }, + "roundtrip": { + "p50": 121.69600278139114, + "p90": 135.3919953107834, + "p95": 139.26400244235992, + "p99": 149.88799393177032 + }, + "isolatedSum": { + "p50": 243.3920055627823, + "p90": 270.7839906215668, + "p95": 278.52800488471985, + "p99": 299.77598786354065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-00ef2300", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_790c1f8e", + "comparisonKey": "46b43de4b9e906f5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:29.574725+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 129.82399761676788, + "p90": 148.03199470043182, + "p95": 154.2080044746399, + "p99": 170.84799706935883 + }, + "combine": { + "p50": 129.82399761676788, + "p90": 148.03199470043182, + "p95": 154.2080044746399, + "p99": 170.84799706935883 + }, + "roundtrip": { + "p50": 129.82399761676788, + "p90": 148.03199470043182, + "p95": 154.2080044746399, + "p99": 170.84799706935883 + }, + "isolatedSum": { + "p50": 259.64799523353577, + "p90": 296.06398940086365, + "p95": 308.4160089492798, + "p99": 341.69599413871765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 130.0799995660782, + "p90": 147.23199605941772, + "p95": 151.2320041656494, + "p99": 159.8079949617386 + }, + "combine": { + "p50": 130.0799995660782, + "p90": 147.23199605941772, + "p95": 151.2320041656494, + "p99": 159.8079949617386 + }, + "roundtrip": { + "p50": 130.0799995660782, + "p90": 147.23199605941772, + "p95": 151.2320041656494, + "p99": 159.8079949617386 + }, + "isolatedSum": { + "p50": 260.1599991321564, + "p90": 294.46399211883545, + "p95": 302.4640083312988, + "p99": 319.6159899234772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 128.22400033473969, + "p90": 146.33600413799286, + "p95": 153.31199765205383, + "p99": 165.79200327396393 + }, + "combine": { + "p50": 128.22400033473969, + "p90": 146.33600413799286, + "p95": 153.31199765205383, + "p99": 165.79200327396393 + }, + "roundtrip": { + "p50": 128.22400033473969, + "p90": 146.33600413799286, + "p95": 153.31199765205383, + "p99": 165.79200327396393 + }, + "isolatedSum": { + "p50": 256.44800066947937, + "p90": 292.6720082759857, + "p95": 306.62399530410767, + "p99": 331.58400654792786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 130.5599957704544, + "p90": 148.70400726795197, + "p95": 154.4959992170334, + "p99": 167.90400445461273 + }, + "combine": { + "p50": 130.5599957704544, + "p90": 148.70400726795197, + "p95": 154.4959992170334, + "p99": 167.90400445461273 + }, + "roundtrip": { + "p50": 130.5599957704544, + "p90": 148.70400726795197, + "p95": 154.4959992170334, + "p99": 167.90400445461273 + }, + "isolatedSum": { + "p50": 261.1199915409088, + "p90": 297.40801453590393, + "p95": 308.9919984340668, + "p99": 335.80800890922546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 128.22400033473969, + "p90": 146.2080031633377, + "p95": 152.0960032939911, + "p99": 183.1039935350418 + }, + "combine": { + "p50": 128.22400033473969, + "p90": 146.2080031633377, + "p95": 152.0960032939911, + "p99": 183.1039935350418 + }, + "roundtrip": { + "p50": 128.22400033473969, + "p90": 146.2080031633377, + "p95": 152.0960032939911, + "p99": 183.1039935350418 + }, + "isolatedSum": { + "p50": 256.44800066947937, + "p90": 292.4160063266754, + "p95": 304.1920065879822, + "p99": 366.2079870700836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 128.67200374603271, + "p90": 144.9279934167862, + "p95": 150.94399452209473, + "p99": 174.0799993276596 + }, + "combine": { + "p50": 128.67200374603271, + "p90": 144.9279934167862, + "p95": 150.94399452209473, + "p99": 174.0799993276596 + }, + "roundtrip": { + "p50": 128.67200374603271, + "p90": 144.9279934167862, + "p95": 150.94399452209473, + "p99": 174.0799993276596 + }, + "isolatedSum": { + "p50": 257.34400749206543, + "p90": 289.8559868335724, + "p95": 301.88798904418945, + "p99": 348.1599986553192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 130.75199723243713, + "p90": 148.6400067806244, + "p95": 153.9199948310852, + "p99": 161.95200383663177 + }, + "combine": { + "p50": 130.75199723243713, + "p90": 148.6400067806244, + "p95": 153.9199948310852, + "p99": 161.95200383663177 + }, + "roundtrip": { + "p50": 130.75199723243713, + "p90": 148.6400067806244, + "p95": 153.9199948310852, + "p99": 161.95200383663177 + }, + "isolatedSum": { + "p50": 261.50399446487427, + "p90": 297.2800135612488, + "p95": 307.8399896621704, + "p99": 323.90400767326355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.32800161838531, + "p90": 149.34399724006653, + "p95": 154.2080044746399, + "p99": 170.6240028142929 + }, + "combine": { + "p50": 131.32800161838531, + "p90": 149.34399724006653, + "p95": 154.2080044746399, + "p99": 170.6240028142929 + }, + "roundtrip": { + "p50": 131.32800161838531, + "p90": 149.34399724006653, + "p95": 154.2080044746399, + "p99": 170.6240028142929 + }, + "isolatedSum": { + "p50": 262.65600323677063, + "p90": 298.68799448013306, + "p95": 308.4160089492798, + "p99": 341.2480056285858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7ac3232d", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_f4728223", + "comparisonKey": "fc49a9eebdfe9161", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:29:41.322423+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 122.27199971675873, + "p90": 138.65600526332855, + "p95": 144.16000247001648, + "p99": 185.95199286937714 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 138.65600526332855, + "p95": 144.16000247001648, + "p99": 185.95199286937714 + }, + "roundtrip": { + "p50": 122.27199971675873, + "p90": 138.65600526332855, + "p95": 144.16000247001648, + "p99": 185.95199286937714 + }, + "isolatedSum": { + "p50": 244.54399943351746, + "p90": 277.3120105266571, + "p95": 288.32000494003296, + "p99": 371.9039857387543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 120.7360029220581, + "p90": 136.06399297714233, + "p95": 141.02399349212646, + "p99": 148.54399859905243 + }, + "combine": { + "p50": 120.7360029220581, + "p90": 136.06399297714233, + "p95": 141.02399349212646, + "p99": 148.54399859905243 + }, + "roundtrip": { + "p50": 120.7360029220581, + "p90": 136.06399297714233, + "p95": 141.02399349212646, + "p99": 148.54399859905243 + }, + "isolatedSum": { + "p50": 241.4720058441162, + "p90": 272.12798595428467, + "p95": 282.04798698425293, + "p99": 297.08799719810486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 120.99199742078781, + "p90": 137.1839940547943, + "p95": 142.56000518798828, + "p99": 161.79199516773224 + }, + "combine": { + "p50": 120.99199742078781, + "p90": 137.1839940547943, + "p95": 142.56000518798828, + "p99": 161.79199516773224 + }, + "roundtrip": { + "p50": 120.99199742078781, + "p90": 137.1839940547943, + "p95": 142.56000518798828, + "p99": 161.79199516773224 + }, + "isolatedSum": { + "p50": 241.98399484157562, + "p90": 274.3679881095886, + "p95": 285.12001037597656, + "p99": 323.5839903354645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 121.66400253772736, + "p90": 137.2479945421219, + "p95": 143.36000382900238, + "p99": 177.88800597190857 + }, + "combine": { + "p50": 121.66400253772736, + "p90": 137.2479945421219, + "p95": 143.36000382900238, + "p99": 177.88800597190857 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 137.2479945421219, + "p95": 143.36000382900238, + "p99": 177.88800597190857 + }, + "isolatedSum": { + "p50": 243.3280050754547, + "p90": 274.4959890842438, + "p95": 286.72000765800476, + "p99": 355.77601194381714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 121.5360015630722, + "p90": 136.03200018405914, + "p95": 140.47999680042267, + "p99": 152.51199901103973 + }, + "combine": { + "p50": 121.5360015630722, + "p90": 136.03200018405914, + "p95": 140.47999680042267, + "p99": 152.51199901103973 + }, + "roundtrip": { + "p50": 121.5360015630722, + "p90": 136.03200018405914, + "p95": 140.47999680042267, + "p99": 152.51199901103973 + }, + "isolatedSum": { + "p50": 243.0720031261444, + "p90": 272.0640003681183, + "p95": 280.95999360084534, + "p99": 305.02399802207947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 120.57600170373917, + "p90": 134.783998131752, + "p95": 138.91200721263885, + "p99": 150.91200172901154 + }, + "combine": { + "p50": 120.57600170373917, + "p90": 134.783998131752, + "p95": 138.91200721263885, + "p99": 150.91200172901154 + }, + "roundtrip": { + "p50": 120.57600170373917, + "p90": 134.783998131752, + "p95": 138.91200721263885, + "p99": 150.91200172901154 + }, + "isolatedSum": { + "p50": 241.15200340747833, + "p90": 269.567996263504, + "p95": 277.8240144252777, + "p99": 301.82400345802307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 123.19999933242798, + "p90": 139.16799426078796, + "p95": 145.02400159835815, + "p99": 156.67200088500977 + }, + "combine": { + "p50": 123.19999933242798, + "p90": 139.16799426078796, + "p95": 145.02400159835815, + "p99": 156.67200088500977 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 139.16799426078796, + "p95": 145.02400159835815, + "p99": 156.67200088500977 + }, + "isolatedSum": { + "p50": 246.39999866485596, + "p90": 278.3359885215759, + "p95": 290.0480031967163, + "p99": 313.34400177001953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.62400305271149, + "p90": 141.76000654697418, + "p95": 145.6959992647171, + "p99": 152.54400670528412 + }, + "combine": { + "p50": 126.62400305271149, + "p90": 141.76000654697418, + "p95": 145.6959992647171, + "p99": 152.54400670528412 + }, + "roundtrip": { + "p50": 126.62400305271149, + "p90": 141.76000654697418, + "p95": 145.6959992647171, + "p99": 152.54400670528412 + }, + "isolatedSum": { + "p50": 253.24800610542297, + "p90": 283.52001309394836, + "p95": 291.3919985294342, + "p99": 305.08801341056824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63faeb52", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fc79fe5fdca4c", + "colorKey": "gb300_ecdb41f9", + "comparisonKey": "665cbf003da748b3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:45.110640+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fc79fe5fdca4c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.93600362539291, + "p90": 132.4159950017929, + "p95": 137.05599308013916, + "p99": 152.44799852371216 + }, + "combine": { + "p50": 115.93600362539291, + "p90": 132.4159950017929, + "p95": 137.05599308013916, + "p99": 152.44799852371216 + }, + "roundtrip": { + "p50": 115.93600362539291, + "p90": 132.4159950017929, + "p95": 137.05599308013916, + "p99": 152.44799852371216 + }, + "isolatedSum": { + "p50": 231.87200725078583, + "p90": 264.8319900035858, + "p95": 274.1119861602783, + "p99": 304.8959970474243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 116.57600104808807, + "p90": 132.1599930524826, + "p95": 137.7280056476593, + "p99": 162.6559942960739 + }, + "combine": { + "p50": 116.57600104808807, + "p90": 132.1599930524826, + "p95": 137.7280056476593, + "p99": 162.6559942960739 + }, + "roundtrip": { + "p50": 116.57600104808807, + "p90": 132.1599930524826, + "p95": 137.7280056476593, + "p99": 162.6559942960739 + }, + "isolatedSum": { + "p50": 233.15200209617615, + "p90": 264.3199861049652, + "p95": 275.4560112953186, + "p99": 325.3119885921478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 116.64000153541565, + "p90": 134.65599715709686, + "p95": 139.615997672081, + "p99": 148.3200043439865 + }, + "combine": { + "p50": 116.64000153541565, + "p90": 134.65599715709686, + "p95": 139.615997672081, + "p99": 148.3200043439865 + }, + "roundtrip": { + "p50": 116.64000153541565, + "p90": 134.65599715709686, + "p95": 139.615997672081, + "p99": 148.3200043439865 + }, + "isolatedSum": { + "p50": 233.2800030708313, + "p90": 269.3119943141937, + "p95": 279.231995344162, + "p99": 296.640008687973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 115.80800265073776, + "p90": 132.32000172138214, + "p95": 137.11999356746674, + "p99": 147.5519984960556 + }, + "combine": { + "p50": 115.80800265073776, + "p90": 132.32000172138214, + "p95": 137.11999356746674, + "p99": 147.5519984960556 + }, + "roundtrip": { + "p50": 115.80800265073776, + "p90": 132.32000172138214, + "p95": 137.11999356746674, + "p99": 147.5519984960556 + }, + "isolatedSum": { + "p50": 231.61600530147552, + "p90": 264.6400034427643, + "p95": 274.2399871349335, + "p99": 295.1039969921112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 117.08799749612808, + "p90": 132.9919993877411, + "p95": 137.7599984407425, + "p99": 143.99999380111694 + }, + "combine": { + "p50": 117.08799749612808, + "p90": 132.9919993877411, + "p95": 137.7599984407425, + "p99": 143.99999380111694 + }, + "roundtrip": { + "p50": 117.08799749612808, + "p90": 132.9919993877411, + "p95": 137.7599984407425, + "p99": 143.99999380111694 + }, + "isolatedSum": { + "p50": 234.17599499225616, + "p90": 265.9839987754822, + "p95": 275.519996881485, + "p99": 287.9999876022339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 117.50400066375732, + "p90": 133.98399949073792, + "p95": 139.1039937734604, + "p99": 162.56000101566315 + }, + "combine": { + "p50": 117.50400066375732, + "p90": 133.98399949073792, + "p95": 139.1039937734604, + "p99": 162.56000101566315 + }, + "roundtrip": { + "p50": 117.50400066375732, + "p90": 133.98399949073792, + "p95": 139.1039937734604, + "p99": 162.56000101566315 + }, + "isolatedSum": { + "p50": 235.00800132751465, + "p90": 267.96799898147583, + "p95": 278.2079875469208, + "p99": 325.1200020313263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 118.72000247240067, + "p90": 134.783998131752, + "p95": 139.0720009803772, + "p99": 147.93600142002106 + }, + "combine": { + "p50": 118.72000247240067, + "p90": 134.783998131752, + "p95": 139.0720009803772, + "p99": 147.93600142002106 + }, + "roundtrip": { + "p50": 118.72000247240067, + "p90": 134.783998131752, + "p95": 139.0720009803772, + "p99": 147.93600142002106 + }, + "isolatedSum": { + "p50": 237.44000494480133, + "p90": 269.567996263504, + "p95": 278.1440019607544, + "p99": 295.8720028400421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.35199999809265, + "p90": 136.73600554466248, + "p95": 142.0159935951233, + "p99": 160.3199988603592 + }, + "combine": { + "p50": 120.35199999809265, + "p90": 136.73600554466248, + "p95": 142.0159935951233, + "p99": 160.3199988603592 + }, + "roundtrip": { + "p50": 120.35199999809265, + "p90": 136.73600554466248, + "p95": 142.0159935951233, + "p99": 160.3199988603592 + }, + "isolatedSum": { + "p50": 240.7039999961853, + "p90": 273.47201108932495, + "p95": 284.0319871902466, + "p99": 320.6399977207184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7c28da4d", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_844c1f75", + "comparisonKey": "d09c2a4ca029c110", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:37.054194+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.47200042009354, + "p90": 129.88799810409546, + "p95": 133.44000279903412, + "p99": 137.7280056476593 + }, + "combine": { + "p50": 117.47200042009354, + "p90": 129.88799810409546, + "p95": 133.44000279903412, + "p99": 137.7280056476593 + }, + "roundtrip": { + "p50": 117.47200042009354, + "p90": 129.88799810409546, + "p95": 133.44000279903412, + "p99": 137.7280056476593 + }, + "isolatedSum": { + "p50": 234.94400084018707, + "p90": 259.7759962081909, + "p95": 266.88000559806824, + "p99": 275.4560112953186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.29599940776825, + "p90": 132.32000172138214, + "p95": 136.00000739097595, + "p99": 143.16800236701965 + }, + "combine": { + "p50": 119.29599940776825, + "p90": 132.32000172138214, + "p95": 136.00000739097595, + "p99": 143.16800236701965 + }, + "roundtrip": { + "p50": 119.29599940776825, + "p90": 132.32000172138214, + "p95": 136.00000739097595, + "p99": 143.16800236701965 + }, + "isolatedSum": { + "p50": 238.5919988155365, + "p90": 264.6400034427643, + "p95": 272.0000147819519, + "p99": 286.3360047340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 118.81600320339203, + "p90": 132.7040046453476, + "p95": 136.99199259281158, + "p99": 150.33599734306335 + }, + "combine": { + "p50": 118.81600320339203, + "p90": 132.7040046453476, + "p95": 136.99199259281158, + "p99": 150.33599734306335 + }, + "roundtrip": { + "p50": 118.81600320339203, + "p90": 132.7040046453476, + "p95": 136.99199259281158, + "p99": 150.33599734306335 + }, + "isolatedSum": { + "p50": 237.63200640678406, + "p90": 265.4080092906952, + "p95": 273.98398518562317, + "p99": 300.6719946861267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 119.13599818944931, + "p90": 132.06399977207184, + "p95": 134.94400680065155, + "p99": 140.06400108337402 + }, + "combine": { + "p50": 119.13599818944931, + "p90": 132.06399977207184, + "p95": 134.94400680065155, + "p99": 140.06400108337402 + }, + "roundtrip": { + "p50": 119.13599818944931, + "p90": 132.06399977207184, + "p95": 134.94400680065155, + "p99": 140.06400108337402 + }, + "isolatedSum": { + "p50": 238.27199637889862, + "p90": 264.1279995441437, + "p95": 269.8880136013031, + "p99": 280.12800216674805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 117.98399686813354, + "p90": 131.71200454235077, + "p95": 135.3600025177002, + "p99": 144.70399916172028 + }, + "combine": { + "p50": 117.98399686813354, + "p90": 131.71200454235077, + "p95": 135.3600025177002, + "p99": 144.70399916172028 + }, + "roundtrip": { + "p50": 117.98399686813354, + "p90": 131.71200454235077, + "p95": 135.3600025177002, + "p99": 144.70399916172028 + }, + "isolatedSum": { + "p50": 235.9679937362671, + "p90": 263.42400908470154, + "p95": 270.7200050354004, + "p99": 289.40799832344055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.93599683046341, + "p90": 133.91999900341034, + "p95": 137.82399892807007, + "p99": 175.99999904632568 + }, + "combine": { + "p50": 119.93599683046341, + "p90": 133.91999900341034, + "p95": 137.82399892807007, + "p99": 175.99999904632568 + }, + "roundtrip": { + "p50": 119.93599683046341, + "p90": 133.91999900341034, + "p95": 137.82399892807007, + "p99": 175.99999904632568 + }, + "isolatedSum": { + "p50": 239.87199366092682, + "p90": 267.8399980068207, + "p95": 275.64799785614014, + "p99": 351.99999809265137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 121.34400010108948, + "p90": 134.75200533866882, + "p95": 138.2399946451187, + "p99": 150.65599977970123 + }, + "combine": { + "p50": 121.34400010108948, + "p90": 134.75200533866882, + "p95": 138.2399946451187, + "p99": 150.65599977970123 + }, + "roundtrip": { + "p50": 121.34400010108948, + "p90": 134.75200533866882, + "p95": 138.2399946451187, + "p99": 150.65599977970123 + }, + "isolatedSum": { + "p50": 242.68800020217896, + "p90": 269.50401067733765, + "p95": 276.4799892902374, + "p99": 301.31199955940247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.6800029873848, + "p90": 138.40000331401825, + "p95": 141.53599739074707, + "p99": 148.67199957370758 + }, + "combine": { + "p50": 123.6800029873848, + "p90": 138.40000331401825, + "p95": 141.53599739074707, + "p99": 148.67199957370758 + }, + "roundtrip": { + "p50": 123.6800029873848, + "p90": 138.40000331401825, + "p95": 141.53599739074707, + "p99": 148.67199957370758 + }, + "isolatedSum": { + "p50": 247.3600059747696, + "p90": 276.8000066280365, + "p95": 283.07199478149414, + "p99": 297.34399914741516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6a924f57", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_91fe36f4", + "comparisonKey": "2c9d775511d9204d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:13.200350+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.01599645614624, + "p90": 131.45600259304047, + "p95": 139.16799426078796, + "p99": 151.87199413776398 + }, + "combine": { + "p50": 114.01599645614624, + "p90": 131.45600259304047, + "p95": 139.16799426078796, + "p99": 151.87199413776398 + }, + "roundtrip": { + "p50": 114.01599645614624, + "p90": 131.45600259304047, + "p95": 139.16799426078796, + "p99": 151.87199413776398 + }, + "isolatedSum": { + "p50": 228.03199291229248, + "p90": 262.91200518608093, + "p95": 278.3359885215759, + "p99": 303.74398827552795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.3119985461235, + "p90": 132.192000746727, + "p95": 139.0399932861328, + "p99": 152.0639955997467 + }, + "combine": { + "p50": 113.3119985461235, + "p90": 132.192000746727, + "p95": 139.0399932861328, + "p99": 152.0639955997467 + }, + "roundtrip": { + "p50": 113.3119985461235, + "p90": 132.192000746727, + "p95": 139.0399932861328, + "p99": 152.0639955997467 + }, + "isolatedSum": { + "p50": 226.623997092247, + "p90": 264.384001493454, + "p95": 278.0799865722656, + "p99": 304.1279911994934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.00799655914307, + "p90": 133.7919980287552, + "p95": 140.60799777507782, + "p99": 153.18399667739868 + }, + "combine": { + "p50": 115.00799655914307, + "p90": 133.7919980287552, + "p95": 140.60799777507782, + "p99": 153.18399667739868 + }, + "roundtrip": { + "p50": 115.00799655914307, + "p90": 133.7919980287552, + "p95": 140.60799777507782, + "p99": 153.18399667739868 + }, + "isolatedSum": { + "p50": 230.01599311828613, + "p90": 267.5839960575104, + "p95": 281.21599555015564, + "p99": 306.36799335479736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 114.9120032787323, + "p90": 130.14400005340576, + "p95": 135.1040005683899, + "p99": 155.29599785804749 + }, + "combine": { + "p50": 114.9120032787323, + "p90": 130.14400005340576, + "p95": 135.1040005683899, + "p99": 155.29599785804749 + }, + "roundtrip": { + "p50": 114.9120032787323, + "p90": 130.14400005340576, + "p95": 135.1040005683899, + "p99": 155.29599785804749 + }, + "isolatedSum": { + "p50": 229.8240065574646, + "p90": 260.2880001068115, + "p95": 270.2080011367798, + "p99": 310.59199571609497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 114.23999816179276, + "p90": 134.46399569511414, + "p95": 141.92000031471252, + "p99": 151.2639969587326 + }, + "combine": { + "p50": 114.23999816179276, + "p90": 134.46399569511414, + "p95": 141.92000031471252, + "p99": 151.2639969587326 + }, + "roundtrip": { + "p50": 114.23999816179276, + "p90": 134.46399569511414, + "p95": 141.92000031471252, + "p99": 151.2639969587326 + }, + "isolatedSum": { + "p50": 228.4799963235855, + "p90": 268.92799139022827, + "p95": 283.84000062942505, + "p99": 302.5279939174652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 113.95200341939926, + "p90": 132.9919993877411, + "p95": 139.29599523544312, + "p99": 161.02400422096252 + }, + "combine": { + "p50": 113.95200341939926, + "p90": 132.9919993877411, + "p95": 139.29599523544312, + "p99": 161.02400422096252 + }, + "roundtrip": { + "p50": 113.95200341939926, + "p90": 132.9919993877411, + "p95": 139.29599523544312, + "p99": 161.02400422096252 + }, + "isolatedSum": { + "p50": 227.90400683879852, + "p90": 265.9839987754822, + "p95": 278.59199047088623, + "p99": 322.04800844192505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.60800063610077, + "p90": 129.2800009250641, + "p95": 133.7919980287552, + "p99": 148.3200043439865 + }, + "combine": { + "p50": 112.60800063610077, + "p90": 129.2800009250641, + "p95": 133.7919980287552, + "p99": 148.3200043439865 + }, + "roundtrip": { + "p50": 112.60800063610077, + "p90": 129.2800009250641, + "p95": 133.7919980287552, + "p99": 148.3200043439865 + }, + "isolatedSum": { + "p50": 225.21600127220154, + "p90": 258.5600018501282, + "p95": 267.5839960575104, + "p99": 296.640008687973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.68800222873688, + "p90": 135.80800592899323, + "p95": 143.00799369812012, + "p99": 162.27200627326965 + }, + "combine": { + "p50": 118.68800222873688, + "p90": 135.80800592899323, + "p95": 143.00799369812012, + "p99": 162.27200627326965 + }, + "roundtrip": { + "p50": 118.68800222873688, + "p90": 135.80800592899323, + "p95": 143.00799369812012, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 237.37600445747375, + "p90": 271.61601185798645, + "p95": 286.01598739624023, + "p99": 324.5440125465393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01933049", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_7320de9a", + "comparisonKey": "2cbae436b4163578", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:22.503967+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.6240017414093, + "p90": 135.68000495433807, + "p95": 142.14399456977844, + "p99": 151.16800367832184 + }, + "combine": { + "p50": 118.6240017414093, + "p90": 135.68000495433807, + "p95": 142.14399456977844, + "p99": 151.16800367832184 + }, + "roundtrip": { + "p50": 118.6240017414093, + "p90": 135.68000495433807, + "p95": 142.14399456977844, + "p99": 151.16800367832184 + }, + "isolatedSum": { + "p50": 237.2480034828186, + "p90": 271.36000990867615, + "p95": 284.2879891395569, + "p99": 302.3360073566437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.34399944543839, + "p90": 134.24000144004822, + "p95": 139.67999815940857, + "p99": 161.50400042533875 + }, + "combine": { + "p50": 117.34399944543839, + "p90": 134.24000144004822, + "p95": 139.67999815940857, + "p99": 161.50400042533875 + }, + "roundtrip": { + "p50": 117.34399944543839, + "p90": 134.24000144004822, + "p95": 139.67999815940857, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 234.68799889087677, + "p90": 268.48000288009644, + "p95": 279.35999631881714, + "p99": 323.0080008506775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.66400188207626, + "p90": 134.8479986190796, + "p95": 138.87999951839447, + "p99": 147.64800667762756 + }, + "combine": { + "p50": 117.66400188207626, + "p90": 134.8479986190796, + "p95": 138.87999951839447, + "p99": 147.64800667762756 + }, + "roundtrip": { + "p50": 117.66400188207626, + "p90": 134.8479986190796, + "p95": 138.87999951839447, + "p99": 147.64800667762756 + }, + "isolatedSum": { + "p50": 235.32800376415253, + "p90": 269.6959972381592, + "p95": 277.75999903678894, + "p99": 295.2960133552551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 118.14399808645248, + "p90": 134.65599715709686, + "p95": 140.09599387645721, + "p99": 154.59200739860535 + }, + "combine": { + "p50": 118.14399808645248, + "p90": 134.65599715709686, + "p95": 140.09599387645721, + "p99": 154.59200739860535 + }, + "roundtrip": { + "p50": 118.14399808645248, + "p90": 134.65599715709686, + "p95": 140.09599387645721, + "p99": 154.59200739860535 + }, + "isolatedSum": { + "p50": 236.28799617290497, + "p90": 269.3119943141937, + "p95": 280.19198775291443, + "p99": 309.1840147972107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 117.3119992017746, + "p90": 132.9919993877411, + "p95": 138.84800672531128, + "p99": 162.1759980916977 + }, + "combine": { + "p50": 117.3119992017746, + "p90": 132.9919993877411, + "p95": 138.84800672531128, + "p99": 162.1759980916977 + }, + "roundtrip": { + "p50": 117.3119992017746, + "p90": 132.9919993877411, + "p95": 138.84800672531128, + "p99": 162.1759980916977 + }, + "isolatedSum": { + "p50": 234.6239984035492, + "p90": 265.9839987754822, + "p95": 277.69601345062256, + "p99": 324.3519961833954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 116.15999788045883, + "p90": 132.76800513267517, + "p95": 137.60000467300415, + "p99": 148.99200201034546 + }, + "combine": { + "p50": 116.15999788045883, + "p90": 132.76800513267517, + "p95": 137.60000467300415, + "p99": 148.99200201034546 + }, + "roundtrip": { + "p50": 116.15999788045883, + "p90": 132.76800513267517, + "p95": 137.60000467300415, + "p99": 148.99200201034546 + }, + "isolatedSum": { + "p50": 232.31999576091766, + "p90": 265.53601026535034, + "p95": 275.2000093460083, + "p99": 297.9840040206909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 118.56000125408173, + "p90": 134.39999520778656, + "p95": 138.5280042886734, + "p99": 157.24800527095795 + }, + "combine": { + "p50": 118.56000125408173, + "p90": 134.39999520778656, + "p95": 138.5280042886734, + "p99": 157.24800527095795 + }, + "roundtrip": { + "p50": 118.56000125408173, + "p90": 134.39999520778656, + "p95": 138.5280042886734, + "p99": 157.24800527095795 + }, + "isolatedSum": { + "p50": 237.12000250816345, + "p90": 268.7999904155731, + "p95": 277.0560085773468, + "p99": 314.4960105419159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.44800072908401, + "p90": 137.15200126171112, + "p95": 142.17600226402283, + "p99": 162.6559942960739 + }, + "combine": { + "p50": 120.44800072908401, + "p90": 137.15200126171112, + "p95": 142.17600226402283, + "p99": 162.6559942960739 + }, + "roundtrip": { + "p50": 120.44800072908401, + "p90": 137.15200126171112, + "p95": 142.17600226402283, + "p99": 162.6559942960739 + }, + "isolatedSum": { + "p50": 240.89600145816803, + "p90": 274.30400252342224, + "p95": 284.35200452804565, + "p99": 325.3119885921478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0122f0e3", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_e889c3e3", + "comparisonKey": "7370bef62d8a8d66", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:53.935895+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 136.1600011587143, + "p90": 151.93599462509155, + "p95": 158.1760048866272, + "p99": 166.97600483894348 + }, + "combine": { + "p50": 136.1600011587143, + "p90": 151.93599462509155, + "p95": 158.1760048866272, + "p99": 166.97600483894348 + }, + "roundtrip": { + "p50": 136.1600011587143, + "p90": 151.93599462509155, + "p95": 158.1760048866272, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 272.3200023174286, + "p90": 303.8719892501831, + "p95": 316.3520097732544, + "p99": 333.95200967788696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 133.98399949073792, + "p90": 149.6960073709488, + "p95": 154.4640064239502, + "p99": 164.8000031709671 + }, + "combine": { + "p50": 133.98399949073792, + "p90": 149.6960073709488, + "p95": 154.4640064239502, + "p99": 164.8000031709671 + }, + "roundtrip": { + "p50": 133.98399949073792, + "p90": 149.6960073709488, + "p95": 154.4640064239502, + "p99": 164.8000031709671 + }, + "isolatedSum": { + "p50": 267.96799898147583, + "p90": 299.3920147418976, + "p95": 308.9280128479004, + "p99": 329.6000063419342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.00799918174744, + "p90": 145.85599303245544, + "p95": 149.9200016260147, + "p99": 160.67199409008026 + }, + "combine": { + "p50": 131.00799918174744, + "p90": 145.85599303245544, + "p95": 149.9200016260147, + "p99": 160.67199409008026 + }, + "roundtrip": { + "p50": 131.00799918174744, + "p90": 145.85599303245544, + "p95": 149.9200016260147, + "p99": 160.67199409008026 + }, + "isolatedSum": { + "p50": 262.0159983634949, + "p90": 291.7119860649109, + "p95": 299.8400032520294, + "p99": 321.3439881801605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 135.3279948234558, + "p90": 150.84800124168396, + "p95": 156.22399747371674, + "p99": 166.9120043516159 + }, + "combine": { + "p50": 135.3279948234558, + "p90": 150.84800124168396, + "p95": 156.22399747371674, + "p99": 166.9120043516159 + }, + "roundtrip": { + "p50": 135.3279948234558, + "p90": 150.84800124168396, + "p95": 156.22399747371674, + "p99": 166.9120043516159 + }, + "isolatedSum": { + "p50": 270.6559896469116, + "p90": 301.6960024833679, + "p95": 312.4479949474335, + "p99": 333.8240087032318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 133.98399949073792, + "p90": 149.6639996767044, + "p95": 155.13600409030914, + "p99": 171.26399278640747 + }, + "combine": { + "p50": 133.98399949073792, + "p90": 149.6639996767044, + "p95": 155.13600409030914, + "p99": 171.26399278640747 + }, + "roundtrip": { + "p50": 133.98399949073792, + "p90": 149.6639996767044, + "p95": 155.13600409030914, + "p99": 171.26399278640747 + }, + "isolatedSum": { + "p50": 267.96799898147583, + "p90": 299.3279993534088, + "p95": 310.2720081806183, + "p99": 342.52798557281494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 131.67999684810638, + "p90": 147.039994597435, + "p95": 151.74399316310883, + "p99": 159.32799875736237 + }, + "combine": { + "p50": 131.67999684810638, + "p90": 147.039994597435, + "p95": 151.74399316310883, + "p99": 159.32799875736237 + }, + "roundtrip": { + "p50": 131.67999684810638, + "p90": 147.039994597435, + "p95": 151.74399316310883, + "p99": 159.32799875736237 + }, + "isolatedSum": { + "p50": 263.35999369621277, + "p90": 294.07998919487, + "p95": 303.48798632621765, + "p99": 318.65599751472473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 134.71999764442444, + "p90": 151.10400319099426, + "p95": 154.94400262832642, + "p99": 165.50399363040924 + }, + "combine": { + "p50": 134.71999764442444, + "p90": 151.10400319099426, + "p95": 154.94400262832642, + "p99": 165.50399363040924 + }, + "roundtrip": { + "p50": 134.71999764442444, + "p90": 151.10400319099426, + "p95": 154.94400262832642, + "p99": 165.50399363040924 + }, + "isolatedSum": { + "p50": 269.4399952888489, + "p90": 302.2080063819885, + "p95": 309.88800525665283, + "p99": 331.0079872608185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 135.8720064163208, + "p90": 150.14399588108063, + "p95": 155.29599785804749, + "p99": 174.14399981498718 + }, + "combine": { + "p50": 135.8720064163208, + "p90": 150.14399588108063, + "p95": 155.29599785804749, + "p99": 174.14399981498718 + }, + "roundtrip": { + "p50": 135.8720064163208, + "p90": 150.14399588108063, + "p95": 155.29599785804749, + "p99": 174.14399981498718 + }, + "isolatedSum": { + "p50": 271.7440128326416, + "p90": 300.28799176216125, + "p95": 310.59199571609497, + "p99": 348.28799962997437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-124e16e4", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_c3b32f1b", + "comparisonKey": "4e53a8c9bf87fe91", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:03.602136+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 122.14399874210358, + "p90": 134.0160071849823, + "p95": 136.63999736309052, + "p99": 143.5520052909851 + }, + "combine": { + "p50": 122.14399874210358, + "p90": 134.0160071849823, + "p95": 136.63999736309052, + "p99": 143.5520052909851 + }, + "roundtrip": { + "p50": 122.14399874210358, + "p90": 134.0160071849823, + "p95": 136.63999736309052, + "p99": 143.5520052909851 + }, + "isolatedSum": { + "p50": 244.28799748420715, + "p90": 268.0320143699646, + "p95": 273.27999472618103, + "p99": 287.1040105819702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 121.05599790811539, + "p90": 131.8719983100891, + "p95": 135.23200154304504, + "p99": 184.38400328159332 + }, + "combine": { + "p50": 121.05599790811539, + "p90": 131.8719983100891, + "p95": 135.23200154304504, + "p99": 184.38400328159332 + }, + "roundtrip": { + "p50": 121.05599790811539, + "p90": 131.8719983100891, + "p95": 135.23200154304504, + "p99": 184.38400328159332 + }, + "isolatedSum": { + "p50": 242.11199581623077, + "p90": 263.7439966201782, + "p95": 270.4640030860901, + "p99": 368.76800656318665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.40000069141388, + "p90": 135.45599579811096, + "p95": 139.16799426078796, + "p99": 156.44800662994385 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 135.45599579811096, + "p95": 139.16799426078796, + "p99": 156.44800662994385 + }, + "roundtrip": { + "p50": 122.40000069141388, + "p90": 135.45599579811096, + "p95": 139.16799426078796, + "p99": 156.44800662994385 + }, + "isolatedSum": { + "p50": 244.80000138282776, + "p90": 270.9119915962219, + "p95": 278.3359885215759, + "p99": 312.8960132598877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 122.17599898576736, + "p90": 134.97599959373474, + "p95": 138.5599970817566, + "p99": 150.01599490642548 + }, + "combine": { + "p50": 122.17599898576736, + "p90": 134.97599959373474, + "p95": 138.5599970817566, + "p99": 150.01599490642548 + }, + "roundtrip": { + "p50": 122.17599898576736, + "p90": 134.97599959373474, + "p95": 138.5599970817566, + "p99": 150.01599490642548 + }, + "isolatedSum": { + "p50": 244.35199797153473, + "p90": 269.9519991874695, + "p95": 277.1199941635132, + "p99": 300.03198981285095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 121.8239963054657, + "p90": 140.22399485111237, + "p95": 268.3520019054413, + "p99": 860.1599931716919 + }, + "combine": { + "p50": 121.8239963054657, + "p90": 140.22399485111237, + "p95": 268.3520019054413, + "p99": 860.1599931716919 + }, + "roundtrip": { + "p50": 121.8239963054657, + "p90": 140.22399485111237, + "p95": 268.3520019054413, + "p99": 860.1599931716919 + }, + "isolatedSum": { + "p50": 243.6479926109314, + "p90": 280.44798970222473, + "p95": 536.7040038108826, + "p99": 1720.3199863433838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.35999989509583, + "p90": 135.6160044670105, + "p95": 139.8719996213913, + "p99": 166.1120057106018 + }, + "combine": { + "p50": 119.35999989509583, + "p90": 135.6160044670105, + "p95": 139.8719996213913, + "p99": 166.1120057106018 + }, + "roundtrip": { + "p50": 119.35999989509583, + "p90": 135.6160044670105, + "p95": 139.8719996213913, + "p99": 166.1120057106018 + }, + "isolatedSum": { + "p50": 238.71999979019165, + "p90": 271.232008934021, + "p95": 279.7439992427826, + "p99": 332.2240114212036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 122.43200093507767, + "p90": 136.9599997997284, + "p95": 140.57600498199463, + "p99": 151.87199413776398 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 136.9599997997284, + "p95": 140.57600498199463, + "p99": 151.87199413776398 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 136.9599997997284, + "p95": 140.57600498199463, + "p99": 151.87199413776398 + }, + "isolatedSum": { + "p50": 244.86400187015533, + "p90": 273.9199995994568, + "p95": 281.15200996398926, + "p99": 303.74398827552795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.4800016283989, + "p90": 138.43199610710144, + "p95": 141.56800508499146, + "p99": 148.44800531864166 + }, + "combine": { + "p50": 124.4800016283989, + "p90": 138.43199610710144, + "p95": 141.56800508499146, + "p99": 148.44800531864166 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 138.43199610710144, + "p95": 141.56800508499146, + "p99": 148.44800531864166 + }, + "isolatedSum": { + "p50": 248.9600032567978, + "p90": 276.8639922142029, + "p95": 283.1360101699829, + "p99": 296.8960106372833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-12d094e4", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_228227db", + "comparisonKey": "fc4150e66800755c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:09.776227+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 121.47200107574463, + "p90": 139.93600010871887, + "p95": 146.94400131702423, + "p99": 217.28000044822693 + }, + "combine": { + "p50": 121.47200107574463, + "p90": 139.93600010871887, + "p95": 146.94400131702423, + "p99": 217.28000044822693 + }, + "roundtrip": { + "p50": 121.47200107574463, + "p90": 139.93600010871887, + "p95": 146.94400131702423, + "p99": 217.28000044822693 + }, + "isolatedSum": { + "p50": 242.94400215148926, + "p90": 279.87200021743774, + "p95": 293.88800263404846, + "p99": 434.56000089645386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 122.56000190973282, + "p90": 138.7840062379837, + "p95": 143.74400675296783, + "p99": 155.32800555229187 + }, + "combine": { + "p50": 122.56000190973282, + "p90": 138.7840062379837, + "p95": 143.74400675296783, + "p99": 155.32800555229187 + }, + "roundtrip": { + "p50": 122.56000190973282, + "p90": 138.7840062379837, + "p95": 143.74400675296783, + "p99": 155.32800555229187 + }, + "isolatedSum": { + "p50": 245.12000381946564, + "p90": 277.5680124759674, + "p95": 287.48801350593567, + "p99": 310.65601110458374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.84799665212631, + "p90": 139.615997672081, + "p95": 144.3520039319992, + "p99": 157.60000050067902 + }, + "combine": { + "p50": 122.84799665212631, + "p90": 139.615997672081, + "p95": 144.3520039319992, + "p99": 157.60000050067902 + }, + "roundtrip": { + "p50": 122.84799665212631, + "p90": 139.615997672081, + "p95": 144.3520039319992, + "p99": 157.60000050067902 + }, + "isolatedSum": { + "p50": 245.69599330425262, + "p90": 279.231995344162, + "p95": 288.7040078639984, + "p99": 315.20000100135803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 123.45600128173828, + "p90": 139.00800049304962, + "p95": 144.0960019826889, + "p99": 157.9200029373169 + }, + "combine": { + "p50": 123.45600128173828, + "p90": 139.00800049304962, + "p95": 144.0960019826889, + "p99": 157.9200029373169 + }, + "roundtrip": { + "p50": 123.45600128173828, + "p90": 139.00800049304962, + "p95": 144.0960019826889, + "p99": 157.9200029373169 + }, + "isolatedSum": { + "p50": 246.91200256347656, + "p90": 278.01600098609924, + "p95": 288.1920039653778, + "p99": 315.8400058746338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 120.67200243473053, + "p90": 136.63999736309052, + "p95": 143.23200285434723, + "p99": 151.5199989080429 + }, + "combine": { + "p50": 120.67200243473053, + "p90": 136.63999736309052, + "p95": 143.23200285434723, + "p99": 151.5199989080429 + }, + "roundtrip": { + "p50": 120.67200243473053, + "p90": 136.63999736309052, + "p95": 143.23200285434723, + "p99": 151.5199989080429 + }, + "isolatedSum": { + "p50": 241.34400486946106, + "p90": 273.27999472618103, + "p95": 286.46400570869446, + "p99": 303.0399978160858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.40000069141388, + "p90": 139.26400244235992, + "p95": 145.60000598430634, + "p99": 163.13600540161133 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 139.26400244235992, + "p95": 145.60000598430634, + "p99": 163.13600540161133 + }, + "roundtrip": { + "p50": 122.40000069141388, + "p90": 139.26400244235992, + "p95": 145.60000598430634, + "p99": 163.13600540161133 + }, + "isolatedSum": { + "p50": 244.80000138282776, + "p90": 278.52800488471985, + "p95": 291.20001196861267, + "p99": 326.27201080322266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 122.40000069141388, + "p90": 139.90400731563568, + "p95": 145.75999975204468, + "p99": 166.24000668525696 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 139.90400731563568, + "p95": 145.75999975204468, + "p99": 166.24000668525696 + }, + "roundtrip": { + "p50": 122.40000069141388, + "p90": 139.90400731563568, + "p95": 145.75999975204468, + "p99": 166.24000668525696 + }, + "isolatedSum": { + "p50": 244.80000138282776, + "p90": 279.80801463127136, + "p95": 291.51999950408936, + "p99": 332.4800133705139 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.95999783277512, + "p90": 140.8960074186325, + "p95": 147.20000326633453, + "p99": 203.2639980316162 + }, + "combine": { + "p50": 124.95999783277512, + "p90": 140.8960074186325, + "p95": 147.20000326633453, + "p99": 203.2639980316162 + }, + "roundtrip": { + "p50": 124.95999783277512, + "p90": 140.8960074186325, + "p95": 147.20000326633453, + "p99": 203.2639980316162 + }, + "isolatedSum": { + "p50": 249.91999566555023, + "p90": 281.792014837265, + "p95": 294.40000653266907, + "p99": 406.5279960632324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-82609e8c", + "identity": "gb300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_404dcc83", + "comparisonKey": "71eb805a9522ffac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:43.258065+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 146.5280055999756, + "p90": 158.6879938840866, + "p95": 164.35199975967407, + "p99": 176.15999281406403 + }, + "combine": { + "p50": 146.5280055999756, + "p90": 158.6879938840866, + "p95": 164.35199975967407, + "p99": 176.15999281406403 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 158.6879938840866, + "p95": 164.35199975967407, + "p99": 176.15999281406403 + }, + "isolatedSum": { + "p50": 293.0560111999512, + "p90": 317.3759877681732, + "p95": 328.70399951934814, + "p99": 352.31998562812805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 146.2080031633377, + "p90": 157.79200196266174, + "p95": 162.9759967327118, + "p99": 170.3999936580658 + }, + "combine": { + "p50": 146.2080031633377, + "p90": 157.79200196266174, + "p95": 162.9759967327118, + "p99": 170.3999936580658 + }, + "roundtrip": { + "p50": 146.2080031633377, + "p90": 157.79200196266174, + "p95": 162.9759967327118, + "p99": 170.3999936580658 + }, + "isolatedSum": { + "p50": 292.4160063266754, + "p90": 315.5840039253235, + "p95": 325.9519934654236, + "p99": 340.7999873161316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 147.87200093269348, + "p90": 161.95200383663177, + "p95": 166.1120057106018, + "p99": 183.4239959716797 + }, + "combine": { + "p50": 147.87200093269348, + "p90": 161.95200383663177, + "p95": 166.1120057106018, + "p99": 183.4239959716797 + }, + "roundtrip": { + "p50": 147.87200093269348, + "p90": 161.95200383663177, + "p95": 166.1120057106018, + "p99": 183.4239959716797 + }, + "isolatedSum": { + "p50": 295.74400186538696, + "p90": 323.90400767326355, + "p95": 332.2240114212036, + "p99": 366.8479919433594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 147.07200229167938, + "p90": 160.60799360275269, + "p95": 165.0560051202774, + "p99": 189.37599658966064 + }, + "combine": { + "p50": 147.07200229167938, + "p90": 160.60799360275269, + "p95": 165.0560051202774, + "p99": 189.37599658966064 + }, + "roundtrip": { + "p50": 147.07200229167938, + "p90": 160.60799360275269, + "p95": 165.0560051202774, + "p99": 189.37599658966064 + }, + "isolatedSum": { + "p50": 294.14400458335876, + "p90": 321.21598720550537, + "p95": 330.1120102405548, + "p99": 378.7519931793213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 145.53600549697876, + "p90": 160.3199988603592, + "p95": 163.96799683570862, + "p99": 192.32000410556793 + }, + "combine": { + "p50": 145.53600549697876, + "p90": 160.3199988603592, + "p95": 163.96799683570862, + "p99": 192.32000410556793 + }, + "roundtrip": { + "p50": 145.53600549697876, + "p90": 160.3199988603592, + "p95": 163.96799683570862, + "p99": 192.32000410556793 + }, + "isolatedSum": { + "p50": 291.0720109939575, + "p90": 320.6399977207184, + "p95": 327.93599367141724, + "p99": 384.64000821113586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 145.7280069589615, + "p90": 160.67199409008026, + "p95": 165.79200327396393, + "p99": 177.88800597190857 + }, + "combine": { + "p50": 145.7280069589615, + "p90": 160.67199409008026, + "p95": 165.79200327396393, + "p99": 177.88800597190857 + }, + "roundtrip": { + "p50": 145.7280069589615, + "p90": 160.67199409008026, + "p95": 165.79200327396393, + "p99": 177.88800597190857 + }, + "isolatedSum": { + "p50": 291.456013917923, + "p90": 321.3439881801605, + "p95": 331.58400654792786, + "p99": 355.77601194381714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.0959951877594, + "p90": 162.1759980916977, + "p95": 166.62399470806122, + "p99": 180.89599907398224 + }, + "combine": { + "p50": 148.0959951877594, + "p90": 162.1759980916977, + "p95": 166.62399470806122, + "p99": 180.89599907398224 + }, + "roundtrip": { + "p50": 148.0959951877594, + "p90": 162.1759980916977, + "p95": 166.62399470806122, + "p99": 180.89599907398224 + }, + "isolatedSum": { + "p50": 296.1919903755188, + "p90": 324.3519961833954, + "p95": 333.24798941612244, + "p99": 361.7919981479645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.2640037536621, + "p90": 161.98399662971497, + "p95": 165.8879965543747, + "p99": 178.847998380661 + }, + "combine": { + "p50": 147.2640037536621, + "p90": 161.98399662971497, + "p95": 165.8879965543747, + "p99": 178.847998380661 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 161.98399662971497, + "p95": 165.8879965543747, + "p99": 178.847998380661 + }, + "isolatedSum": { + "p50": 294.5280075073242, + "p90": 323.96799325942993, + "p95": 331.7759931087494, + "p99": 357.695996761322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6da1934c", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_7255c6ac", + "comparisonKey": "c324a10c61c75ad0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:21.837125+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 121.69600278139114, + "p90": 148.19200336933136, + "p95": 156.25600516796112, + "p99": 169.69600319862366 + }, + "combine": { + "p50": 121.69600278139114, + "p90": 148.19200336933136, + "p95": 156.25600516796112, + "p99": 169.69600319862366 + }, + "roundtrip": { + "p50": 121.69600278139114, + "p90": 148.19200336933136, + "p95": 156.25600516796112, + "p99": 169.69600319862366 + }, + "isolatedSum": { + "p50": 243.3920055627823, + "p90": 296.3840067386627, + "p95": 312.51201033592224, + "p99": 339.3920063972473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 123.9359974861145, + "p90": 148.3519971370697, + "p95": 161.05599701404572, + "p99": 176.5120029449463 + }, + "combine": { + "p50": 123.9359974861145, + "p90": 148.3519971370697, + "p95": 161.05599701404572, + "p99": 176.5120029449463 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 148.3519971370697, + "p95": 161.05599701404572, + "p99": 176.5120029449463 + }, + "isolatedSum": { + "p50": 247.871994972229, + "p90": 296.7039942741394, + "p95": 322.11199402809143, + "p99": 353.0240058898926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 137.79200613498688, + "p90": 176.28799378871918, + "p95": 186.20799481868744, + "p99": 197.9839950799942 + }, + "combine": { + "p50": 137.79200613498688, + "p90": 176.28799378871918, + "p95": 186.20799481868744, + "p99": 197.9839950799942 + }, + "roundtrip": { + "p50": 137.79200613498688, + "p90": 176.28799378871918, + "p95": 186.20799481868744, + "p99": 197.9839950799942 + }, + "isolatedSum": { + "p50": 275.58401226997375, + "p90": 352.57598757743835, + "p95": 372.4159896373749, + "p99": 395.9679901599884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 196.79999351501465, + "p90": 229.50400412082672, + "p95": 232.41600394248962, + "p99": 248.76800179481506 + }, + "combine": { + "p50": 196.79999351501465, + "p90": 229.50400412082672, + "p95": 232.41600394248962, + "p99": 248.76800179481506 + }, + "roundtrip": { + "p50": 196.79999351501465, + "p90": 229.50400412082672, + "p95": 232.41600394248962, + "p99": 248.76800179481506 + }, + "isolatedSum": { + "p50": 393.5999870300293, + "p90": 459.00800824165344, + "p95": 464.83200788497925, + "p99": 497.5360035896301 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 339.07198905944824, + "p90": 375.4560053348541, + "p95": 381.5999925136566, + "p99": 386.59200072288513 + }, + "combine": { + "p50": 339.07198905944824, + "p90": 375.4560053348541, + "p95": 381.5999925136566, + "p99": 386.59200072288513 + }, + "roundtrip": { + "p50": 339.07198905944824, + "p90": 375.4560053348541, + "p95": 381.5999925136566, + "p99": 386.59200072288513 + }, + "isolatedSum": { + "p50": 678.1439781188965, + "p90": 750.9120106697083, + "p95": 763.1999850273132, + "p99": 773.1840014457703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 586.2079858779907, + "p90": 634.7519755363464, + "p95": 642.1759724617004, + "p99": 648.7039923667908 + }, + "combine": { + "p50": 586.2079858779907, + "p90": 634.7519755363464, + "p95": 642.1759724617004, + "p99": 648.7039923667908 + }, + "roundtrip": { + "p50": 586.2079858779907, + "p90": 634.7519755363464, + "p95": 642.1759724617004, + "p99": 648.7039923667908 + }, + "isolatedSum": { + "p50": 1172.4159717559814, + "p90": 1269.5039510726929, + "p95": 1284.3519449234009, + "p99": 1297.4079847335815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d27e4783", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|prefill|normal|none|none|0|tuned||1104ab83732593b", + "colorKey": "gb300_44bf7fb4", + "comparisonKey": "4f0d9f449e5eda1b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:04.240767+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1104ab83732593b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 94.81599926948547, + "p90": 104.92800176143646, + "p95": 108.83200168609619, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 94.81599926948547, + "p90": 104.92800176143646, + "p95": 108.83200168609619, + "p99": 114.04799669981003 + }, + "roundtrip": { + "p50": 94.81599926948547, + "p90": 104.92800176143646, + "p95": 108.83200168609619, + "p99": 114.04799669981003 + }, + "isolatedSum": { + "p50": 189.63199853897095, + "p90": 209.85600352287292, + "p95": 217.66400337219238, + "p99": 228.09599339962006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 103.07200253009796, + "p90": 110.46399921178818, + "p95": 113.88800293207169, + "p99": 120.83200365304947 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 110.46399921178818, + "p95": 113.88800293207169, + "p99": 120.83200365304947 + }, + "roundtrip": { + "p50": 103.07200253009796, + "p90": 110.46399921178818, + "p95": 113.88800293207169, + "p99": 120.83200365304947 + }, + "isolatedSum": { + "p50": 206.14400506019592, + "p90": 220.92799842357635, + "p95": 227.77600586414337, + "p99": 241.66400730609894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 124.12799894809723, + "p90": 130.75199723243713, + "p95": 135.77599823474884, + "p99": 147.10399508476257 + }, + "combine": { + "p50": 124.12799894809723, + "p90": 130.75199723243713, + "p95": 135.77599823474884, + "p99": 147.10399508476257 + }, + "roundtrip": { + "p50": 124.12799894809723, + "p90": 130.75199723243713, + "p95": 135.77599823474884, + "p99": 147.10399508476257 + }, + "isolatedSum": { + "p50": 248.25599789619446, + "p90": 261.50399446487427, + "p95": 271.5519964694977, + "p99": 294.20799016952515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 190.68799912929535, + "p90": 193.24800372123718, + "p95": 194.07999515533447, + "p99": 196.22400403022766 + }, + "combine": { + "p50": 190.68799912929535, + "p90": 193.24800372123718, + "p95": 194.07999515533447, + "p99": 196.22400403022766 + }, + "roundtrip": { + "p50": 190.68799912929535, + "p90": 193.24800372123718, + "p95": 194.07999515533447, + "p99": 196.22400403022766 + }, + "isolatedSum": { + "p50": 381.3759982585907, + "p90": 386.49600744247437, + "p95": 388.15999031066895, + "p99": 392.4480080604553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 338.8479948043823, + "p90": 341.2480056285858, + "p95": 342.20799803733826, + "p99": 348.09601306915283 + }, + "combine": { + "p50": 338.8479948043823, + "p90": 341.2480056285858, + "p95": 342.20799803733826, + "p99": 348.09601306915283 + }, + "roundtrip": { + "p50": 338.8479948043823, + "p90": 341.2480056285858, + "p95": 342.20799803733826, + "p99": 348.09601306915283 + }, + "isolatedSum": { + "p50": 677.6959896087646, + "p90": 682.4960112571716, + "p95": 684.4159960746765, + "p99": 696.1920261383057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 632.8960061073303, + "p90": 636.1600160598755, + "p95": 637.3440027236938, + "p99": 642.7519917488098 + }, + "combine": { + "p50": 632.8960061073303, + "p90": 636.1600160598755, + "p95": 637.3440027236938, + "p99": 642.7519917488098 + }, + "roundtrip": { + "p50": 632.8960061073303, + "p90": 636.1600160598755, + "p95": 637.3440027236938, + "p99": 642.7519917488098 + }, + "isolatedSum": { + "p50": 1265.7920122146606, + "p90": 1272.320032119751, + "p95": 1274.6880054473877, + "p99": 1285.5039834976196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 4, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e851860b", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|prefill|normal|none|none|0|tuned||e15d35cfeaea91f", + "colorKey": "gb300_d825256f", + "comparisonKey": "d3543b978ec4eacf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:22.581110+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e15d35cfeaea91f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 93.75999867916107, + "p90": 103.55199873447418, + "p95": 107.77600109577179, + "p99": 112.12799698114395 + }, + "combine": { + "p50": 93.75999867916107, + "p90": 103.55199873447418, + "p95": 107.77600109577179, + "p99": 112.12799698114395 + }, + "roundtrip": { + "p50": 93.75999867916107, + "p90": 103.55199873447418, + "p95": 107.77600109577179, + "p99": 112.12799698114395 + }, + "isolatedSum": { + "p50": 187.51999735832214, + "p90": 207.10399746894836, + "p95": 215.55200219154358, + "p99": 224.2559939622879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 96.83199971914291, + "p90": 126.52799487113953, + "p95": 143.5520052909851, + "p99": 157.72800147533417 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 126.52799487113953, + "p95": 143.5520052909851, + "p99": 157.72800147533417 + }, + "roundtrip": { + "p50": 96.83199971914291, + "p90": 126.52799487113953, + "p95": 143.5520052909851, + "p99": 157.72800147533417 + }, + "isolatedSum": { + "p50": 193.66399943828583, + "p90": 253.05598974227905, + "p95": 287.1040105819702, + "p99": 315.45600295066833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 129.08799946308136, + "p90": 146.91199362277985, + "p95": 162.432000041008, + "p99": 181.95199966430664 + }, + "combine": { + "p50": 129.08799946308136, + "p90": 146.91199362277985, + "p95": 162.432000041008, + "p99": 181.95199966430664 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 146.91199362277985, + "p95": 162.432000041008, + "p99": 181.95199966430664 + }, + "isolatedSum": { + "p50": 258.1759989261627, + "p90": 293.8239872455597, + "p95": 324.864000082016, + "p99": 363.9039993286133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-741952a9", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|prefill|normal|none|none|0|tuned||33484f7e5b87248", + "colorKey": "gb300_248d58da", + "comparisonKey": "340f4632639cb549", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:04.240978+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "33484f7e5b87248", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 99.48799759149551, + "p90": 109.31199789047241, + "p95": 113.40799927711487, + "p99": 124.57600235939026 + }, + "combine": { + "p50": 99.48799759149551, + "p90": 109.31199789047241, + "p95": 113.40799927711487, + "p99": 124.57600235939026 + }, + "roundtrip": { + "p50": 99.48799759149551, + "p90": 109.31199789047241, + "p95": 113.40799927711487, + "p99": 124.57600235939026 + }, + "isolatedSum": { + "p50": 198.97599518299103, + "p90": 218.62399578094482, + "p95": 226.81599855422974, + "p99": 249.15200471878052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 100.67199915647507, + "p90": 110.97600311040878, + "p95": 113.69600147008896, + "p99": 122.5920021533966 + }, + "combine": { + "p50": 100.67199915647507, + "p90": 110.97600311040878, + "p95": 113.69600147008896, + "p99": 122.5920021533966 + }, + "roundtrip": { + "p50": 100.67199915647507, + "p90": 110.97600311040878, + "p95": 113.69600147008896, + "p99": 122.5920021533966 + }, + "isolatedSum": { + "p50": 201.34399831295013, + "p90": 221.95200622081757, + "p95": 227.39200294017792, + "p99": 245.1840043067932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22020096, + "combineLogicalBytes": 22020096, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 105.02400249242783, + "p90": 115.64800143241882, + "p95": 118.6240017414093, + "p99": 134.14399325847626 + }, + "combine": { + "p50": 105.02400249242783, + "p90": 115.64800143241882, + "p95": 118.6240017414093, + "p99": 134.14399325847626 + }, + "roundtrip": { + "p50": 105.02400249242783, + "p90": 115.64800143241882, + "p95": 118.6240017414093, + "p99": 134.14399325847626 + }, + "isolatedSum": { + "p50": 210.04800498485565, + "p90": 231.29600286483765, + "p95": 237.2480034828186, + "p99": 268.2879865169525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44040192, + "combineLogicalBytes": 44040192, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 126.3359934091568, + "p90": 134.5600038766861, + "p95": 138.5280042886734, + "p99": 143.5520052909851 + }, + "combine": { + "p50": 126.3359934091568, + "p90": 134.5600038766861, + "p95": 138.5280042886734, + "p99": 143.5520052909851 + }, + "roundtrip": { + "p50": 126.3359934091568, + "p90": 134.5600038766861, + "p95": 138.5280042886734, + "p99": 143.5520052909851 + }, + "isolatedSum": { + "p50": 252.6719868183136, + "p90": 269.1200077533722, + "p95": 277.0560085773468, + "p99": 287.1040105819702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 88080384, + "combineLogicalBytes": 88080384, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 183.48799645900726, + "p90": 185.66399812698364, + "p95": 186.62400543689728, + "p99": 193.92000138759613 + }, + "combine": { + "p50": 183.48799645900726, + "p90": 185.66399812698364, + "p95": 186.62400543689728, + "p99": 193.92000138759613 + }, + "roundtrip": { + "p50": 183.48799645900726, + "p90": 185.66399812698364, + "p95": 186.62400543689728, + "p99": 193.92000138759613 + }, + "isolatedSum": { + "p50": 366.9759929180145, + "p90": 371.3279962539673, + "p95": 373.24801087379456, + "p99": 387.84000277519226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 176160768, + "combineLogicalBytes": 176160768, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 327.7440071105957, + "p90": 330.6879997253418, + "p95": 332.12798833847046, + "p99": 339.1680121421814 + }, + "combine": { + "p50": 327.7440071105957, + "p90": 330.6879997253418, + "p95": 332.12798833847046, + "p99": 339.1680121421814 + }, + "roundtrip": { + "p50": 327.7440071105957, + "p90": 330.6879997253418, + "p95": 332.12798833847046, + "p99": 339.1680121421814 + }, + "isolatedSum": { + "p50": 655.4880142211914, + "p90": 661.3759994506836, + "p95": 664.2559766769409, + "p99": 678.3360242843628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352321536, + "combineLogicalBytes": 352321536, + "fanoutMean": 1.5, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa017b51", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|prefill|normal|none|none|0|tuned||b8e52e92c6d3379", + "colorKey": "gb300_0c631e36", + "comparisonKey": "35eb1cb55100a6e0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:23.165910+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b8e52e92c6d3379", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 114.3999993801117, + "p90": 141.88799262046814, + "p95": 148.99200201034546, + "p99": 160.60799360275269 + }, + "combine": { + "p50": 114.3999993801117, + "p90": 141.88799262046814, + "p95": 148.99200201034546, + "p99": 160.60799360275269 + }, + "roundtrip": { + "p50": 114.3999993801117, + "p90": 141.88799262046814, + "p95": 148.99200201034546, + "p99": 160.60799360275269 + }, + "isolatedSum": { + "p50": 228.7999987602234, + "p90": 283.7759852409363, + "p95": 297.9840040206909, + "p99": 321.21598720550537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 115.10399729013443, + "p90": 133.7279975414276, + "p95": 142.33599603176117, + "p99": 176.4480024576187 + }, + "combine": { + "p50": 115.10399729013443, + "p90": 133.7279975414276, + "p95": 142.33599603176117, + "p99": 176.4480024576187 + }, + "roundtrip": { + "p50": 115.10399729013443, + "p90": 133.7279975414276, + "p95": 142.33599603176117, + "p99": 176.4480024576187 + }, + "isolatedSum": { + "p50": 230.20799458026886, + "p90": 267.4559950828552, + "p95": 284.67199206352234, + "p99": 352.8960049152374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53100544, + "combineLogicalBytes": 53100544, + "fanoutMean": 3.6171875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 136.31999492645264, + "p90": 162.56000101566315, + "p95": 183.61599743366241, + "p99": 197.63199985027313 + }, + "combine": { + "p50": 136.31999492645264, + "p90": 162.56000101566315, + "p95": 183.61599743366241, + "p99": 197.63199985027313 + }, + "roundtrip": { + "p50": 136.31999492645264, + "p90": 162.56000101566315, + "p95": 183.61599743366241, + "p99": 197.63199985027313 + }, + "isolatedSum": { + "p50": 272.6399898529053, + "p90": 325.1200020313263, + "p95": 367.23199486732483, + "p99": 395.26399970054626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 106373120, + "fanoutMean": 3.623046875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 191.45600497722626, + "p90": 227.52000391483307, + "p95": 234.8479926586151, + "p99": 264.8319900035858 + }, + "combine": { + "p50": 191.45600497722626, + "p90": 227.52000391483307, + "p95": 234.8479926586151, + "p99": 264.8319900035858 + }, + "roundtrip": { + "p50": 191.45600497722626, + "p90": 227.52000391483307, + "p95": 234.8479926586151, + "p99": 264.8319900035858 + }, + "isolatedSum": { + "p50": 382.9120099544525, + "p90": 455.04000782966614, + "p95": 469.6959853172302, + "p99": 529.6639800071716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 341.0559892654419, + "p90": 360.76799035072327, + "p95": 381.5680146217346, + "p99": 394.5919871330261 + }, + "combine": { + "p50": 341.0559892654419, + "p90": 360.76799035072327, + "p95": 381.5680146217346, + "p99": 394.5919871330261 + }, + "roundtrip": { + "p50": 341.0559892654419, + "p90": 360.76799035072327, + "p95": 381.5680146217346, + "p99": 394.5919871330261 + }, + "isolatedSum": { + "p50": 682.1119785308838, + "p90": 721.5359807014465, + "p95": 763.1360292434692, + "p99": 789.1839742660522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423958528, + "combineLogicalBytes": 423958528, + "fanoutMean": 3.6099853515625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 625.4400014877319, + "p90": 644.6719765663147, + "p95": 651.3919830322266, + "p99": 672.2239851951599 + }, + "combine": { + "p50": 625.4400014877319, + "p90": 644.6719765663147, + "p95": 651.3919830322266, + "p99": 672.2239851951599 + }, + "roundtrip": { + "p50": 625.4400014877319, + "p90": 644.6719765663147, + "p95": 651.3919830322266, + "p99": 672.2239851951599 + }, + "isolatedSum": { + "p50": 1250.8800029754639, + "p90": 1289.3439531326294, + "p95": 1302.7839660644531, + "p99": 1344.4479703903198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847988736, + "combineLogicalBytes": 847988736, + "fanoutMean": 3.61029052734375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7286f152", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|prefill|normal|none|none|0|tuned||5f9878f45872329", + "colorKey": "gb300_b2c46ef8", + "comparisonKey": "e6866cd1b1c4aaea", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:28.106483+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "5f9878f45872329", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.358123779296875, + "eplbImbalanceAfter": 1.000026818477746, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 113.79200220108032, + "p90": 122.27199971675873, + "p95": 126.3359934091568, + "p99": 132.32000172138214 + }, + "combine": { + "p50": 113.79200220108032, + "p90": 122.27199971675873, + "p95": 126.3359934091568, + "p99": 132.32000172138214 + }, + "roundtrip": { + "p50": 113.79200220108032, + "p90": 122.27199971675873, + "p95": 126.3359934091568, + "p99": 132.32000172138214 + }, + "isolatedSum": { + "p50": 227.58400440216064, + "p90": 244.54399943351746, + "p95": 252.6719868183136, + "p99": 264.6400034427643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 26664960, + "fanoutMean": 3.6328125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 118.27199906110764, + "p90": 126.68800354003906, + "p95": 129.37599420547485, + "p99": 134.5279961824417 + }, + "combine": { + "p50": 118.27199906110764, + "p90": 126.68800354003906, + "p95": 129.37599420547485, + "p99": 134.5279961824417 + }, + "roundtrip": { + "p50": 118.27199906110764, + "p90": 126.68800354003906, + "p95": 129.37599420547485, + "p99": 134.5279961824417 + }, + "isolatedSum": { + "p50": 236.54399812221527, + "p90": 253.37600708007812, + "p95": 258.7519884109497, + "p99": 269.0559923648834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53143552, + "combineLogicalBytes": 53143552, + "fanoutMean": 3.6201171875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 136.7039978504181, + "p90": 144.0960019826889, + "p95": 147.74399995803833, + "p99": 152.8639942407608 + }, + "combine": { + "p50": 136.7039978504181, + "p90": 144.0960019826889, + "p95": 147.74399995803833, + "p99": 152.8639942407608 + }, + "roundtrip": { + "p50": 136.7039978504181, + "p90": 144.0960019826889, + "p95": 147.74399995803833, + "p99": 152.8639942407608 + }, + "isolatedSum": { + "p50": 273.4079957008362, + "p90": 288.1920039653778, + "p95": 295.48799991607666, + "p99": 305.7279884815216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106258432, + "combineLogicalBytes": 106258432, + "fanoutMean": 3.619140625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 188.92799317836761, + "p90": 191.9039934873581, + "p95": 193.4400051832199, + "p99": 199.0080028772354 + }, + "combine": { + "p50": 188.92799317836761, + "p90": 191.9039934873581, + "p95": 193.4400051832199, + "p99": 199.0080028772354 + }, + "roundtrip": { + "p50": 188.92799317836761, + "p90": 191.9039934873581, + "p95": 193.4400051832199, + "p99": 199.0080028772354 + }, + "isolatedSum": { + "p50": 377.85598635673523, + "p90": 383.8079869747162, + "p95": 386.8800103664398, + "p99": 398.0160057544708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212645888, + "combineLogicalBytes": 212645888, + "fanoutMean": 3.621337890625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 322.4959969520569, + "p90": 325.8880078792572, + "p95": 327.13600993156433, + "p99": 331.36001229286194 + }, + "combine": { + "p50": 322.4959969520569, + "p90": 325.8880078792572, + "p95": 327.13600993156433, + "p99": 331.36001229286194 + }, + "roundtrip": { + "p50": 322.4959969520569, + "p90": 325.8880078792572, + "p95": 327.13600993156433, + "p99": 331.36001229286194 + }, + "isolatedSum": { + "p50": 644.9919939041138, + "p90": 651.7760157585144, + "p95": 654.2720198631287, + "p99": 662.7200245857239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424775680, + "combineLogicalBytes": 424775680, + "fanoutMean": 3.616943359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 587.7760052680969, + "p90": 591.5520191192627, + "p95": 592.6719903945923, + "p99": 596.5440273284912 + }, + "combine": { + "p50": 587.7760052680969, + "p90": 591.5520191192627, + "p95": 592.6719903945923, + "p99": 596.5440273284912 + }, + "roundtrip": { + "p50": 587.7760052680969, + "p90": 591.5520191192627, + "p95": 592.6719903945923, + "p99": 596.5440273284912 + }, + "isolatedSum": { + "p50": 1175.5520105361938, + "p90": 1183.1040382385254, + "p95": 1185.3439807891846, + "p99": 1193.0880546569824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848547840, + "combineLogicalBytes": 848547840, + "fanoutMean": 3.6126708984375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1f9959ad", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|prefill|normal|none|none|0|tuned||ed21345b2de53e0", + "colorKey": "gb300_81a322c2", + "comparisonKey": "e10a9f77ab3e813b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:43.737192+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ed21345b2de53e0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.003448486328125, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.67199981212616, + "p90": 114.30399864912033, + "p95": 117.3119992017746, + "p99": 124.95999783277512 + }, + "combine": { + "p50": 104.67199981212616, + "p90": 114.30399864912033, + "p95": 117.3119992017746, + "p99": 124.95999783277512 + }, + "roundtrip": { + "p50": 104.67199981212616, + "p90": 114.30399864912033, + "p95": 117.3119992017746, + "p99": 124.95999783277512 + }, + "isolatedSum": { + "p50": 209.34399962425232, + "p90": 228.60799729824066, + "p95": 234.6239984035492, + "p99": 249.91999566555023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 112.28799819946289, + "p90": 119.77600306272507, + "p95": 124.83199685811996, + "p99": 131.84000551700592 + }, + "combine": { + "p50": 112.28799819946289, + "p90": 119.77600306272507, + "p95": 124.83199685811996, + "p99": 131.84000551700592 + }, + "roundtrip": { + "p50": 112.28799819946289, + "p90": 119.77600306272507, + "p95": 124.83199685811996, + "p99": 131.84000551700592 + }, + "isolatedSum": { + "p50": 224.57599639892578, + "p90": 239.55200612545013, + "p95": 249.66399371623993, + "p99": 263.68001103401184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52570112, + "combineLogicalBytes": 52570112, + "fanoutMean": 3.5810546875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 130.36799430847168, + "p90": 139.64800536632538, + "p95": 143.327996134758, + "p99": 147.5519984960556 + }, + "combine": { + "p50": 130.36799430847168, + "p90": 139.64800536632538, + "p95": 143.327996134758, + "p99": 147.5519984960556 + }, + "roundtrip": { + "p50": 130.36799430847168, + "p90": 139.64800536632538, + "p95": 143.327996134758, + "p99": 147.5519984960556 + }, + "isolatedSum": { + "p50": 260.73598861694336, + "p90": 279.29601073265076, + "p95": 286.655992269516, + "p99": 295.1039969921112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105613312, + "combineLogicalBytes": 105613312, + "fanoutMean": 3.59716796875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 183.4239959716797, + "p90": 186.17600202560425, + "p95": 187.29600310325623, + "p99": 194.46399807929993 + }, + "combine": { + "p50": 183.4239959716797, + "p90": 186.17600202560425, + "p95": 187.29600310325623, + "p99": 194.46399807929993 + }, + "roundtrip": { + "p50": 183.4239959716797, + "p90": 186.17600202560425, + "p95": 187.29600310325623, + "p99": 194.46399807929993 + }, + "isolatedSum": { + "p50": 366.8479919433594, + "p90": 372.3520040512085, + "p95": 374.59200620651245, + "p99": 388.92799615859985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211843072, + "combineLogicalBytes": 211843072, + "fanoutMean": 3.607666015625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 317.21600890159607, + "p90": 320.67200541496277, + "p95": 321.9519853591919, + "p99": 327.1679878234863 + }, + "combine": { + "p50": 317.21600890159607, + "p90": 320.67200541496277, + "p95": 321.9519853591919, + "p99": 327.1679878234863 + }, + "roundtrip": { + "p50": 317.21600890159607, + "p90": 320.67200541496277, + "p95": 321.9519853591919, + "p99": 327.1679878234863 + }, + "isolatedSum": { + "p50": 634.4320178031921, + "p90": 641.3440108299255, + "p95": 643.9039707183838, + "p99": 654.3359756469727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423198720, + "combineLogicalBytes": 423198720, + "fanoutMean": 3.603515625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 582.4959874153137, + "p90": 585.6959819793701, + "p95": 587.4879956245422, + "p99": 594.6879982948303 + }, + "combine": { + "p50": 582.4959874153137, + "p90": 585.6959819793701, + "p95": 587.4879956245422, + "p99": 594.6879982948303 + }, + "roundtrip": { + "p50": 582.4959874153137, + "p90": 585.6959819793701, + "p95": 587.4879956245422, + "p99": 594.6879982948303 + }, + "isolatedSum": { + "p50": 1164.9919748306274, + "p90": 1171.3919639587402, + "p95": 1174.9759912490845, + "p99": 1189.3759965896606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 846024704, + "combineLogicalBytes": 846024704, + "fanoutMean": 3.6019287109375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-287d8f11", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_8ea28bc7", + "comparisonKey": "d460b384313cb3ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:43.565794+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 106.84800148010254, + "p90": 118.04799735546112, + "p95": 121.88799679279327, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 106.84800148010254, + "p90": 118.04799735546112, + "p95": 121.88799679279327, + "p99": 127.13600695133209 + }, + "roundtrip": { + "p50": 106.84800148010254, + "p90": 118.04799735546112, + "p95": 121.88799679279327, + "p99": 127.13600695133209 + }, + "isolatedSum": { + "p50": 213.69600296020508, + "p90": 236.09599471092224, + "p95": 243.77599358558655, + "p99": 254.27201390266418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 111.16799712181091, + "p90": 121.95199728012085, + "p95": 126.81600451469421, + "p99": 133.5040032863617 + }, + "combine": { + "p50": 111.16799712181091, + "p90": 121.95199728012085, + "p95": 126.81600451469421, + "p99": 133.5040032863617 + }, + "roundtrip": { + "p50": 111.16799712181091, + "p90": 121.95199728012085, + "p95": 126.81600451469421, + "p99": 133.5040032863617 + }, + "isolatedSum": { + "p50": 222.33599424362183, + "p90": 243.9039945602417, + "p95": 253.63200902938843, + "p99": 267.0080065727234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 127.6479959487915, + "p90": 136.80000603199005, + "p95": 142.17600226402283, + "p99": 150.59199929237366 + }, + "combine": { + "p50": 127.6479959487915, + "p90": 136.80000603199005, + "p95": 142.17600226402283, + "p99": 150.59199929237366 + }, + "roundtrip": { + "p50": 127.6479959487915, + "p90": 136.80000603199005, + "p95": 142.17600226402283, + "p99": 150.59199929237366 + }, + "isolatedSum": { + "p50": 255.295991897583, + "p90": 273.6000120639801, + "p95": 284.35200452804565, + "p99": 301.1839985847473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 182.43199586868286, + "p90": 185.2159947156906, + "p95": 186.39999628067017, + "p99": 196.06399536132812 + }, + "combine": { + "p50": 182.43199586868286, + "p90": 185.2159947156906, + "p95": 186.39999628067017, + "p99": 196.06399536132812 + }, + "roundtrip": { + "p50": 182.43199586868286, + "p90": 185.2159947156906, + "p95": 186.39999628067017, + "p99": 196.06399536132812 + }, + "isolatedSum": { + "p50": 364.8639917373657, + "p90": 370.4319894313812, + "p95": 372.79999256134033, + "p99": 392.12799072265625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 328.0960023403168, + "p90": 331.0079872608185, + "p95": 334.1439962387085, + "p99": 340.831995010376 + }, + "combine": { + "p50": 328.0960023403168, + "p90": 331.0079872608185, + "p95": 334.1439962387085, + "p99": 340.831995010376 + }, + "roundtrip": { + "p50": 328.0960023403168, + "p90": 331.0079872608185, + "p95": 334.1439962387085, + "p99": 340.831995010376 + }, + "isolatedSum": { + "p50": 656.1920046806335, + "p90": 662.015974521637, + "p95": 668.287992477417, + "p99": 681.663990020752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 654.591977596283, + "p90": 659.2959761619568, + "p95": 660.7040166854858, + "p99": 666.5599942207336 + }, + "combine": { + "p50": 654.591977596283, + "p90": 659.2959761619568, + "p95": 660.7040166854858, + "p99": 666.5599942207336 + }, + "roundtrip": { + "p50": 654.591977596283, + "p90": 659.2959761619568, + "p95": 660.7040166854858, + "p99": 666.5599942207336 + }, + "isolatedSum": { + "p50": 1309.183955192566, + "p90": 1318.5919523239136, + "p95": 1321.4080333709717, + "p99": 1333.1199884414673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4352031f", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|prefill|normal|none|none|0|tuned||25840dd8241ba10", + "colorKey": "gb300_511d7c55", + "comparisonKey": "e22d5281197d14ca", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:04.489539+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "25840dd8241ba10", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 99.13600236177444, + "p90": 108.51199924945831, + "p95": 111.93600296974182, + "p99": 116.38399958610535 + }, + "combine": { + "p50": 99.13600236177444, + "p90": 108.51199924945831, + "p95": 111.93600296974182, + "p99": 116.38399958610535 + }, + "roundtrip": { + "p50": 99.13600236177444, + "p90": 108.51199924945831, + "p95": 111.93600296974182, + "p99": 116.38399958610535 + }, + "isolatedSum": { + "p50": 198.2720047235489, + "p90": 217.02399849891663, + "p95": 223.87200593948364, + "p99": 232.7679991722107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 103.96800190210342, + "p90": 112.83200234174728, + "p95": 117.37599968910217, + "p99": 126.46399438381195 + }, + "combine": { + "p50": 103.96800190210342, + "p90": 112.83200234174728, + "p95": 117.37599968910217, + "p99": 126.46399438381195 + }, + "roundtrip": { + "p50": 103.96800190210342, + "p90": 112.83200234174728, + "p95": 117.37599968910217, + "p99": 126.46399438381195 + }, + "isolatedSum": { + "p50": 207.93600380420685, + "p90": 225.66400468349457, + "p95": 234.75199937820435, + "p99": 252.9279887676239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18335744, + "combineLogicalBytes": 18335744, + "fanoutMean": 1.2490234375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 117.53600090742111, + "p90": 126.27199292182922, + "p95": 132.86399841308594, + "p99": 140.70400595664978 + }, + "combine": { + "p50": 117.53600090742111, + "p90": 126.27199292182922, + "p95": 132.86399841308594, + "p99": 140.70400595664978 + }, + "roundtrip": { + "p50": 117.53600090742111, + "p90": 126.27199292182922, + "p95": 132.86399841308594, + "p99": 140.70400595664978 + }, + "isolatedSum": { + "p50": 235.07200181484222, + "p90": 252.54398584365845, + "p95": 265.7279968261719, + "p99": 281.40801191329956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36900864, + "combineLogicalBytes": 36900864, + "fanoutMean": 1.2568359375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 175.35999417304993, + "p90": 180.00000715255737, + "p95": 183.23199450969696, + "p99": 187.99999356269836 + }, + "combine": { + "p50": 175.35999417304993, + "p90": 180.00000715255737, + "p95": 183.23199450969696, + "p99": 187.99999356269836 + }, + "roundtrip": { + "p50": 175.35999417304993, + "p90": 180.00000715255737, + "p95": 183.23199450969696, + "p99": 187.99999356269836 + }, + "isolatedSum": { + "p50": 350.71998834609985, + "p90": 360.00001430511475, + "p95": 366.4639890193939, + "p99": 375.99998712539673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73730048, + "combineLogicalBytes": 73730048, + "fanoutMean": 1.255615234375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 327.64801383018494, + "p90": 330.3680121898651, + "p95": 332.73598551750183, + "p99": 338.27200531959534 + }, + "combine": { + "p50": 327.64801383018494, + "p90": 330.3680121898651, + "p95": 332.73598551750183, + "p99": 338.27200531959534 + }, + "roundtrip": { + "p50": 327.64801383018494, + "p90": 330.3680121898651, + "p95": 332.73598551750183, + "p99": 338.27200531959534 + }, + "isolatedSum": { + "p50": 655.2960276603699, + "p90": 660.7360243797302, + "p95": 665.4719710350037, + "p99": 676.5440106391907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147746816, + "combineLogicalBytes": 147746816, + "fanoutMean": 1.258056640625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 610.8480095863342, + "p90": 614.7199869155884, + "p95": 615.9039735794067, + "p99": 619.1040277481079 + }, + "combine": { + "p50": 610.8480095863342, + "p90": 614.7199869155884, + "p95": 615.9039735794067, + "p99": 619.1040277481079 + }, + "roundtrip": { + "p50": 610.8480095863342, + "p90": 614.7199869155884, + "p95": 615.9039735794067, + "p99": 619.1040277481079 + }, + "isolatedSum": { + "p50": 1221.6960191726685, + "p90": 1229.4399738311768, + "p95": 1231.8079471588135, + "p99": 1238.2080554962158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295278592, + "combineLogicalBytes": 295278592, + "fanoutMean": 1.25714111328125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cdd9e1f5", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|prefill|normal|none|none|0|tuned||cabb28c468fd7cf", + "colorKey": "gb300_30e19049", + "comparisonKey": "b438f4f181bcc6b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:22:04.963469+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cabb28c468fd7cf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86505126953125, + "eplbImbalanceAfter": 1.0000149681454613, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 116.35199934244156, + "p90": 125.37600100040436, + "p95": 128.12800705432892, + "p99": 133.08799266815186 + }, + "combine": { + "p50": 116.35199934244156, + "p90": 125.37600100040436, + "p95": 128.12800705432892, + "p99": 133.08799266815186 + }, + "roundtrip": { + "p50": 116.35199934244156, + "p90": 125.37600100040436, + "p95": 128.12800705432892, + "p99": 133.08799266815186 + }, + "isolatedSum": { + "p50": 232.70399868488312, + "p90": 250.75200200080872, + "p95": 256.25601410865784, + "p99": 266.1759853363037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25862144, + "combineLogicalBytes": 25862144, + "fanoutMean": 3.5234375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 120.51200121641159, + "p90": 128.03199887275696, + "p95": 131.48799538612366, + "p99": 135.83999872207642 + }, + "combine": { + "p50": 120.51200121641159, + "p90": 128.03199887275696, + "p95": 131.48799538612366, + "p99": 135.83999872207642 + }, + "roundtrip": { + "p50": 120.51200121641159, + "p90": 128.03199887275696, + "p95": 131.48799538612366, + "p99": 135.83999872207642 + }, + "isolatedSum": { + "p50": 241.02400243282318, + "p90": 256.0639977455139, + "p95": 262.9759907722473, + "p99": 271.67999744415283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 51509248, + "combineLogicalBytes": 51509248, + "fanoutMean": 3.5087890625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 137.05599308013916, + "p90": 144.6399986743927, + "p95": 149.31200444698334, + "p99": 156.63999319076538 + }, + "combine": { + "p50": 137.05599308013916, + "p90": 144.6399986743927, + "p95": 149.31200444698334, + "p99": 156.63999319076538 + }, + "roundtrip": { + "p50": 137.05599308013916, + "p90": 144.6399986743927, + "p95": 149.31200444698334, + "p99": 156.63999319076538 + }, + "isolatedSum": { + "p50": 274.1119861602783, + "p90": 289.2799973487854, + "p95": 298.6240088939667, + "p99": 313.27998638153076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 102688768, + "combineLogicalBytes": 102688768, + "fanoutMean": 3.49755859375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 186.46399676799774, + "p90": 190.0479942560196, + "p95": 192.09599494934082, + "p99": 197.60000705718994 + }, + "combine": { + "p50": 186.46399676799774, + "p90": 190.0479942560196, + "p95": 192.09599494934082, + "p99": 197.60000705718994 + }, + "roundtrip": { + "p50": 186.46399676799774, + "p90": 190.0479942560196, + "p95": 192.09599494934082, + "p99": 197.60000705718994 + }, + "isolatedSum": { + "p50": 372.9279935359955, + "p90": 380.0959885120392, + "p95": 384.19198989868164, + "p99": 395.2000141143799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 205520896, + "combineLogicalBytes": 205520896, + "fanoutMean": 3.5, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 317.8560137748718, + "p90": 321.3439881801605, + "p95": 322.4959969520569, + "p99": 327.5519907474518 + }, + "combine": { + "p50": 317.8560137748718, + "p90": 321.3439881801605, + "p95": 322.4959969520569, + "p99": 327.5519907474518 + }, + "roundtrip": { + "p50": 317.8560137748718, + "p90": 321.3439881801605, + "p95": 322.4959969520569, + "p99": 327.5519907474518 + }, + "isolatedSum": { + "p50": 635.7120275497437, + "p90": 642.687976360321, + "p95": 644.9919939041138, + "p99": 655.1039814949036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 412016640, + "combineLogicalBytes": 412016640, + "fanoutMean": 3.50830078125, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 579.039990901947, + "p90": 583.1999778747559, + "p95": 584.4799876213074, + "p99": 587.7119898796082 + }, + "combine": { + "p50": 579.039990901947, + "p90": 583.1999778747559, + "p95": 584.4799876213074, + "p99": 587.7119898796082 + }, + "roundtrip": { + "p50": 579.039990901947, + "p90": 583.1999778747559, + "p95": 584.4799876213074, + "p99": 587.7119898796082 + }, + "isolatedSum": { + "p50": 1158.079981803894, + "p90": 1166.3999557495117, + "p95": 1168.9599752426147, + "p99": 1175.4239797592163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824119296, + "combineLogicalBytes": 824119296, + "fanoutMean": 3.5086669921875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-390a8a9a", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|prefill|normal|none|none|0|tuned||370c8dd16f08e2c", + "colorKey": "gb300_95126280", + "comparisonKey": "1f3d1ee2acb6565d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:55.050060+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "370c8dd16f08e2c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 106.20799660682678, + "p90": 116.73600226640701, + "p95": 119.61600184440613, + "p99": 123.9359974861145 + }, + "combine": { + "p50": 106.20799660682678, + "p90": 116.73600226640701, + "p95": 119.61600184440613, + "p99": 123.9359974861145 + }, + "roundtrip": { + "p50": 106.20799660682678, + "p90": 116.73600226640701, + "p95": 119.61600184440613, + "p99": 123.9359974861145 + }, + "isolatedSum": { + "p50": 212.41599321365356, + "p90": 233.47200453281403, + "p95": 239.23200368881226, + "p99": 247.871994972229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 114.49600011110306, + "p90": 122.27199971675873, + "p95": 126.65599584579468, + "p99": 132.4480026960373 + }, + "combine": { + "p50": 114.49600011110306, + "p90": 122.27199971675873, + "p95": 126.65599584579468, + "p99": 132.4480026960373 + }, + "roundtrip": { + "p50": 114.49600011110306, + "p90": 122.27199971675873, + "p95": 126.65599584579468, + "p99": 132.4480026960373 + }, + "isolatedSum": { + "p50": 228.99200022220612, + "p90": 244.54399943351746, + "p95": 253.31199169158936, + "p99": 264.8960053920746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49057792, + "combineLogicalBytes": 49057792, + "fanoutMean": 3.341796875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 131.8719983100891, + "p90": 139.3599957227707, + "p95": 144.1279947757721, + "p99": 149.82399344444275 + }, + "combine": { + "p50": 131.8719983100891, + "p90": 139.3599957227707, + "p95": 144.1279947757721, + "p99": 149.82399344444275 + }, + "roundtrip": { + "p50": 131.8719983100891, + "p90": 139.3599957227707, + "p95": 144.1279947757721, + "p99": 149.82399344444275 + }, + "isolatedSum": { + "p50": 263.7439966201782, + "p90": 278.7199914455414, + "p95": 288.2559895515442, + "p99": 299.6479868888855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 98344960, + "combineLogicalBytes": 98344960, + "fanoutMean": 3.349609375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 186.3359957933426, + "p90": 188.76799941062927, + "p95": 190.59200584888458, + "p99": 197.66399264335632 + }, + "combine": { + "p50": 186.3359957933426, + "p90": 188.76799941062927, + "p95": 190.59200584888458, + "p99": 197.66399264335632 + }, + "roundtrip": { + "p50": 186.3359957933426, + "p90": 188.76799941062927, + "p95": 190.59200584888458, + "p99": 197.66399264335632 + }, + "isolatedSum": { + "p50": 372.6719915866852, + "p90": 377.53599882125854, + "p95": 381.18401169776917, + "p99": 395.32798528671265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 196704256, + "combineLogicalBytes": 196704256, + "fanoutMean": 3.349853515625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 332.73598551750183, + "p90": 335.32801270484924, + "p95": 336.38399839401245, + "p99": 344.0000116825104 + }, + "combine": { + "p50": 332.73598551750183, + "p90": 335.32801270484924, + "p95": 336.38399839401245, + "p99": 344.0000116825104 + }, + "roundtrip": { + "p50": 332.73598551750183, + "p90": 335.32801270484924, + "p95": 336.38399839401245, + "p99": 344.0000116825104 + }, + "isolatedSum": { + "p50": 665.4719710350037, + "p90": 670.6560254096985, + "p95": 672.7679967880249, + "p99": 688.0000233650208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 393351168, + "combineLogicalBytes": 393351168, + "fanoutMean": 3.349365234375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 616.0320043563843, + "p90": 619.0080046653748, + "p95": 620.2560067176819, + "p99": 627.0080208778381 + }, + "combine": { + "p50": 616.0320043563843, + "p90": 619.0080046653748, + "p95": 620.2560067176819, + "p99": 627.0080208778381 + }, + "roundtrip": { + "p50": 616.0320043563843, + "p90": 619.0080046653748, + "p95": 620.2560067176819, + "p99": 627.0080208778381 + }, + "isolatedSum": { + "p50": 1232.0640087127686, + "p90": 1238.0160093307495, + "p95": 1240.5120134353638, + "p99": 1254.0160417556763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 785469440, + "combineLogicalBytes": 785469440, + "fanoutMean": 3.3441162109375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-959bb2fd", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|prefill|normal|none|none|0|tuned||624fdceae193d94", + "colorKey": "gb300_e4bc70ce", + "comparisonKey": "49df24201a9a39e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:05.790468+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "624fdceae193d94", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.888397216796875, + "eplbImbalanceAfter": 1.00013427734375, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.76800054311752, + "p90": 113.95200341939926, + "p95": 119.29599940776825, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 104.76800054311752, + "p90": 113.95200341939926, + "p95": 119.29599940776825, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 104.76800054311752, + "p90": 113.95200341939926, + "p95": 119.29599940776825, + "p99": 128.89599800109863 + }, + "isolatedSum": { + "p50": 209.53600108623505, + "p90": 227.90400683879852, + "p95": 238.5919988155365, + "p99": 257.79199600219727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 112.83200234174728, + "p90": 120.70400267839432, + "p95": 125.88800489902496, + "p99": 130.3039938211441 + }, + "combine": { + "p50": 112.83200234174728, + "p90": 120.70400267839432, + "p95": 125.88800489902496, + "p99": 130.3039938211441 + }, + "roundtrip": { + "p50": 112.83200234174728, + "p90": 120.70400267839432, + "p95": 125.88800489902496, + "p99": 130.3039938211441 + }, + "isolatedSum": { + "p50": 225.66400468349457, + "p90": 241.40800535678864, + "p95": 251.77600979804993, + "p99": 260.6079876422882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52441088, + "combineLogicalBytes": 52441088, + "fanoutMean": 3.572265625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 131.20000064373016, + "p90": 138.17599415779114, + "p95": 143.8080072402954, + "p99": 149.1840034723282 + }, + "combine": { + "p50": 131.20000064373016, + "p90": 138.17599415779114, + "p95": 143.8080072402954, + "p99": 149.1840034723282 + }, + "roundtrip": { + "p50": 131.20000064373016, + "p90": 138.17599415779114, + "p95": 143.8080072402954, + "p99": 149.1840034723282 + }, + "isolatedSum": { + "p50": 262.4000012874603, + "p90": 276.3519883155823, + "p95": 287.6160144805908, + "p99": 298.3680069446564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105670656, + "combineLogicalBytes": 105670656, + "fanoutMean": 3.59912109375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 183.3599954843521, + "p90": 186.14399433135986, + "p95": 187.391996383667, + "p99": 192.99200177192688 + }, + "combine": { + "p50": 183.3599954843521, + "p90": 186.14399433135986, + "p95": 187.391996383667, + "p99": 192.99200177192688 + }, + "roundtrip": { + "p50": 183.3599954843521, + "p90": 186.14399433135986, + "p95": 187.391996383667, + "p99": 192.99200177192688 + }, + "isolatedSum": { + "p50": 366.7199909687042, + "p90": 372.2879886627197, + "p95": 374.783992767334, + "p99": 385.98400354385376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211527680, + "combineLogicalBytes": 211527680, + "fanoutMean": 3.602294921875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 317.9199993610382, + "p90": 320.95998525619507, + "p95": 322.33598828315735, + "p99": 328.3520042896271 + }, + "combine": { + "p50": 317.9199993610382, + "p90": 320.95998525619507, + "p95": 322.33598828315735, + "p99": 328.3520042896271 + }, + "roundtrip": { + "p50": 317.9199993610382, + "p90": 320.95998525619507, + "p95": 322.33598828315735, + "p99": 328.3520042896271 + }, + "isolatedSum": { + "p50": 635.8399987220764, + "p90": 641.9199705123901, + "p95": 644.6719765663147, + "p99": 656.7040085792542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423284736, + "combineLogicalBytes": 423284736, + "fanoutMean": 3.604248046875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 584.3520164489746, + "p90": 587.6479744911194, + "p95": 589.024007320404, + "p99": 593.2160019874573 + }, + "combine": { + "p50": 584.3520164489746, + "p90": 587.6479744911194, + "p95": 589.024007320404, + "p99": 593.2160019874573 + }, + "roundtrip": { + "p50": 584.3520164489746, + "p90": 587.6479744911194, + "p95": 589.024007320404, + "p99": 593.2160019874573 + }, + "isolatedSum": { + "p50": 1168.7040328979492, + "p90": 1175.2959489822388, + "p95": 1178.048014640808, + "p99": 1186.4320039749146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847745024, + "combineLogicalBytes": 847745024, + "fanoutMean": 3.6092529296875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-140b150f", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_3fb75847", + "comparisonKey": "46ce8a16c8c8d777", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:33.278934+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 99.32799637317657, + "p90": 113.63200098276138, + "p95": 118.49600076675415, + "p99": 135.8720064163208 + }, + "combine": { + "p50": 99.32799637317657, + "p90": 113.63200098276138, + "p95": 118.49600076675415, + "p99": 135.8720064163208 + }, + "roundtrip": { + "p50": 99.32799637317657, + "p90": 113.63200098276138, + "p95": 118.49600076675415, + "p99": 135.8720064163208 + }, + "isolatedSum": { + "p50": 198.65599274635315, + "p90": 227.26400196552277, + "p95": 236.9920015335083, + "p99": 271.7440128326416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 104.44799810647964, + "p90": 114.81600254774094, + "p95": 119.13599818944931, + "p99": 159.13599729537964 + }, + "combine": { + "p50": 104.44799810647964, + "p90": 114.81600254774094, + "p95": 119.13599818944931, + "p99": 159.13599729537964 + }, + "roundtrip": { + "p50": 104.44799810647964, + "p90": 114.81600254774094, + "p95": 119.13599818944931, + "p99": 159.13599729537964 + }, + "isolatedSum": { + "p50": 208.8959962129593, + "p90": 229.63200509548187, + "p95": 238.27199637889862, + "p99": 318.2719945907593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 120.2239990234375, + "p90": 127.93600559234619, + "p95": 135.26399433612823, + "p99": 158.1760048866272 + }, + "combine": { + "p50": 120.2239990234375, + "p90": 127.93600559234619, + "p95": 135.26399433612823, + "p99": 158.1760048866272 + }, + "roundtrip": { + "p50": 120.2239990234375, + "p90": 127.93600559234619, + "p95": 135.26399433612823, + "p99": 158.1760048866272 + }, + "isolatedSum": { + "p50": 240.447998046875, + "p90": 255.87201118469238, + "p95": 270.52798867225647, + "p99": 316.3520097732544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 182.0479929447174, + "p90": 184.60799753665924, + "p95": 186.0799938440323, + "p99": 193.37600469589233 + }, + "combine": { + "p50": 182.0479929447174, + "p90": 184.60799753665924, + "p95": 186.0799938440323, + "p99": 193.37600469589233 + }, + "roundtrip": { + "p50": 182.0479929447174, + "p90": 184.60799753665924, + "p95": 186.0799938440323, + "p99": 193.37600469589233 + }, + "isolatedSum": { + "p50": 364.0959858894348, + "p90": 369.2159950733185, + "p95": 372.1599876880646, + "p99": 386.75200939178467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 327.42398977279663, + "p90": 330.01598715782166, + "p95": 331.4880132675171, + "p99": 339.58399295806885 + }, + "combine": { + "p50": 327.42398977279663, + "p90": 330.01598715782166, + "p95": 331.4880132675171, + "p99": 339.58399295806885 + }, + "roundtrip": { + "p50": 327.42398977279663, + "p90": 330.01598715782166, + "p95": 331.4880132675171, + "p99": 339.58399295806885 + }, + "isolatedSum": { + "p50": 654.8479795455933, + "p90": 660.0319743156433, + "p95": 662.9760265350342, + "p99": 679.1679859161377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 651.2960195541382, + "p90": 656.000018119812, + "p95": 657.3439836502075, + "p99": 662.4959707260132 + }, + "combine": { + "p50": 651.2960195541382, + "p90": 656.000018119812, + "p95": 657.3439836502075, + "p99": 662.4959707260132 + }, + "roundtrip": { + "p50": 651.2960195541382, + "p90": 656.000018119812, + "p95": 657.3439836502075, + "p99": 662.4959707260132 + }, + "isolatedSum": { + "p50": 1302.5920391082764, + "p90": 1312.000036239624, + "p95": 1314.687967300415, + "p99": 1324.9919414520264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f30e2623", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_2c292f17", + "comparisonKey": "cf43e9b32fa16211", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:21:44.215363+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 103.4879982471466, + "p90": 118.9119964838028, + "p95": 127.96799838542938, + "p99": 165.0560051202774 + }, + "combine": { + "p50": 103.4879982471466, + "p90": 118.9119964838028, + "p95": 127.96799838542938, + "p99": 165.0560051202774 + }, + "roundtrip": { + "p50": 103.4879982471466, + "p90": 118.9119964838028, + "p95": 127.96799838542938, + "p99": 165.0560051202774 + }, + "isolatedSum": { + "p50": 206.9759964942932, + "p90": 237.8239929676056, + "p95": 255.93599677085876, + "p99": 330.1120102405548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 111.29599809646606, + "p90": 120.03199756145477, + "p95": 125.08800625801086, + "p99": 140.28799533843994 + }, + "combine": { + "p50": 111.29599809646606, + "p90": 120.03199756145477, + "p95": 125.08800625801086, + "p99": 140.28799533843994 + }, + "roundtrip": { + "p50": 111.29599809646606, + "p90": 120.03199756145477, + "p95": 125.08800625801086, + "p99": 140.28799533843994 + }, + "isolatedSum": { + "p50": 222.59199619293213, + "p90": 240.06399512290955, + "p95": 250.17601251602173, + "p99": 280.5759906768799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 129.63199615478516, + "p90": 140.54399728775024, + "p95": 148.3519971370697, + "p99": 186.17600202560425 + }, + "combine": { + "p50": 129.63199615478516, + "p90": 140.54399728775024, + "p95": 148.3519971370697, + "p99": 186.17600202560425 + }, + "roundtrip": { + "p50": 129.63199615478516, + "p90": 140.54399728775024, + "p95": 148.3519971370697, + "p99": 186.17600202560425 + }, + "isolatedSum": { + "p50": 259.2639923095703, + "p90": 281.0879945755005, + "p95": 296.7039942741394, + "p99": 372.3520040512085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 182.23999440670013, + "p90": 185.88800728321075, + "p95": 192.51200556755066, + "p99": 220.15999257564545 + }, + "combine": { + "p50": 182.23999440670013, + "p90": 185.88800728321075, + "p95": 192.51200556755066, + "p99": 220.15999257564545 + }, + "roundtrip": { + "p50": 182.23999440670013, + "p90": 185.88800728321075, + "p95": 192.51200556755066, + "p99": 220.15999257564545 + }, + "isolatedSum": { + "p50": 364.47998881340027, + "p90": 371.7760145664215, + "p95": 385.0240111351013, + "p99": 440.3199851512909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 315.90399146080017, + "p90": 319.42400336265564, + "p95": 324.0639865398407, + "p99": 340.7680094242096 + }, + "combine": { + "p50": 315.90399146080017, + "p90": 319.42400336265564, + "p95": 324.0639865398407, + "p99": 340.7680094242096 + }, + "roundtrip": { + "p50": 315.90399146080017, + "p90": 319.42400336265564, + "p95": 324.0639865398407, + "p99": 340.7680094242096 + }, + "isolatedSum": { + "p50": 631.8079829216003, + "p90": 638.8480067253113, + "p95": 648.1279730796814, + "p99": 681.5360188484192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 582.3360085487366, + "p90": 586.0159993171692, + "p95": 587.552011013031, + "p99": 596.5440273284912 + }, + "combine": { + "p50": 582.3360085487366, + "p90": 586.0159993171692, + "p95": 587.552011013031, + "p99": 596.5440273284912 + }, + "roundtrip": { + "p50": 582.3360085487366, + "p90": 586.0159993171692, + "p95": 587.552011013031, + "p99": 596.5440273284912 + }, + "isolatedSum": { + "p50": 1164.6720170974731, + "p90": 1172.0319986343384, + "p95": 1175.104022026062, + "p99": 1193.0880546569824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c07a8023", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_1d19e997", + "comparisonKey": "0067ad4d2bd626c6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:24.577627+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 104.5759990811348, + "p90": 114.3679991364479, + "p95": 118.33599954843521, + "p99": 124.15999919176102 + }, + "combine": { + "p50": 104.5759990811348, + "p90": 114.3679991364479, + "p95": 118.33599954843521, + "p99": 124.15999919176102 + }, + "roundtrip": { + "p50": 104.5759990811348, + "p90": 114.3679991364479, + "p95": 118.33599954843521, + "p99": 124.15999919176102 + }, + "isolatedSum": { + "p50": 209.1519981622696, + "p90": 228.7359982728958, + "p95": 236.67199909687042, + "p99": 248.31999838352203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 112.5440001487732, + "p90": 120.4800009727478, + "p95": 125.791996717453, + "p99": 135.903999209404 + }, + "combine": { + "p50": 112.5440001487732, + "p90": 120.4800009727478, + "p95": 125.791996717453, + "p99": 135.903999209404 + }, + "roundtrip": { + "p50": 112.5440001487732, + "p90": 120.4800009727478, + "p95": 125.791996717453, + "p99": 135.903999209404 + }, + "isolatedSum": { + "p50": 225.0880002975464, + "p90": 240.9600019454956, + "p95": 251.583993434906, + "p99": 271.807998418808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 130.3360015153885, + "p90": 138.94400000572205, + "p95": 144.16000247001648, + "p99": 151.90400183200836 + }, + "combine": { + "p50": 130.3360015153885, + "p90": 138.94400000572205, + "p95": 144.16000247001648, + "p99": 151.90400183200836 + }, + "roundtrip": { + "p50": 130.3360015153885, + "p90": 138.94400000572205, + "p95": 144.16000247001648, + "p99": 151.90400183200836 + }, + "isolatedSum": { + "p50": 260.672003030777, + "p90": 277.8880000114441, + "p95": 288.32000494003296, + "p99": 303.8080036640167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 182.78400599956512, + "p90": 185.56800484657288, + "p95": 186.5600049495697, + "p99": 196.86399400234222 + }, + "combine": { + "p50": 182.78400599956512, + "p90": 185.56800484657288, + "p95": 186.5600049495697, + "p99": 196.86399400234222 + }, + "roundtrip": { + "p50": 182.78400599956512, + "p90": 185.56800484657288, + "p95": 186.5600049495697, + "p99": 196.86399400234222 + }, + "isolatedSum": { + "p50": 365.56801199913025, + "p90": 371.13600969314575, + "p95": 373.1200098991394, + "p99": 393.72798800468445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 316.3520097732544, + "p90": 319.0079927444458, + "p95": 320.99199295043945, + "p99": 328.7679851055145 + }, + "combine": { + "p50": 316.3520097732544, + "p90": 319.0079927444458, + "p95": 320.99199295043945, + "p99": 328.7679851055145 + }, + "roundtrip": { + "p50": 316.3520097732544, + "p90": 319.0079927444458, + "p95": 320.99199295043945, + "p99": 328.7679851055145 + }, + "isolatedSum": { + "p50": 632.7040195465088, + "p90": 638.0159854888916, + "p95": 641.9839859008789, + "p99": 657.535970211029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 582.8480124473572, + "p90": 585.7279896736145, + "p95": 586.8480205535889, + "p99": 590.8480286598206 + }, + "combine": { + "p50": 582.8480124473572, + "p90": 585.7279896736145, + "p95": 586.8480205535889, + "p99": 590.8480286598206 + }, + "roundtrip": { + "p50": 582.8480124473572, + "p90": 585.7279896736145, + "p95": 586.8480205535889, + "p99": 590.8480286598206 + }, + "isolatedSum": { + "p50": 1165.6960248947144, + "p90": 1171.455979347229, + "p95": 1173.6960411071777, + "p99": 1181.696057319641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-14521c4d", + "identity": "gb300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_c23c12d7", + "comparisonKey": "8b50bc0c8a92f2a9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:32:44.199527+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 120.15999853610992, + "p90": 130.94399869441986, + "p95": 135.77599823474884, + "p99": 140.60799777507782 + }, + "combine": { + "p50": 120.15999853610992, + "p90": 130.94399869441986, + "p95": 135.77599823474884, + "p99": 140.60799777507782 + }, + "roundtrip": { + "p50": 120.15999853610992, + "p90": 130.94399869441986, + "p95": 135.77599823474884, + "p99": 140.60799777507782 + }, + "isolatedSum": { + "p50": 240.31999707221985, + "p90": 261.8879973888397, + "p95": 271.5519964694977, + "p99": 281.21599555015564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13382656, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 127.20000743865967, + "p90": 136.00000739097595, + "p95": 143.51999759674072, + "p99": 148.51200580596924 + }, + "combine": { + "p50": 127.20000743865967, + "p90": 136.00000739097595, + "p95": 143.51999759674072, + "p99": 148.51200580596924 + }, + "roundtrip": { + "p50": 127.20000743865967, + "p90": 136.00000739097595, + "p95": 143.51999759674072, + "p99": 148.51200580596924 + }, + "isolatedSum": { + "p50": 254.40001487731934, + "p90": 272.0000147819519, + "p95": 287.03999519348145, + "p99": 297.0240116119385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26629120, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 145.4080045223236, + "p90": 154.27200496196747, + "p95": 160.16000509262085, + "p99": 164.99200463294983 + }, + "combine": { + "p50": 145.4080045223236, + "p90": 154.27200496196747, + "p95": 160.16000509262085, + "p99": 164.99200463294983 + }, + "roundtrip": { + "p50": 145.4080045223236, + "p90": 154.27200496196747, + "p95": 160.16000509262085, + "p99": 164.99200463294983 + }, + "isolatedSum": { + "p50": 290.8160090446472, + "p90": 308.54400992393494, + "p95": 320.3200101852417, + "p99": 329.98400926589966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53122048, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 188.1600022315979, + "p90": 197.53600656986237, + "p95": 200.95999538898468, + "p99": 207.23199844360352 + }, + "combine": { + "p50": 188.1600022315979, + "p90": 197.53600656986237, + "p95": 200.95999538898468, + "p99": 207.23199844360352 + }, + "roundtrip": { + "p50": 188.1600022315979, + "p90": 197.53600656986237, + "p95": 200.95999538898468, + "p99": 207.23199844360352 + }, + "isolatedSum": { + "p50": 376.3200044631958, + "p90": 395.07201313972473, + "p95": 401.91999077796936, + "p99": 414.46399688720703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106201088, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 269.3760097026825, + "p90": 277.2800028324127, + "p95": 282.27201104164124, + "p99": 290.2719974517822 + }, + "combine": { + "p50": 269.3760097026825, + "p90": 277.2800028324127, + "p95": 282.27201104164124, + "p99": 290.2719974517822 + }, + "roundtrip": { + "p50": 269.3760097026825, + "p90": 277.2800028324127, + "p95": 282.27201104164124, + "p99": 290.2719974517822 + }, + "isolatedSum": { + "p50": 538.752019405365, + "p90": 554.5600056648254, + "p95": 564.5440220832825, + "p99": 580.5439949035645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212595712, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 474.047988653183, + "p90": 477.05599665641785, + "p95": 477.9840111732483, + "p99": 479.42399978637695 + }, + "combine": { + "p50": 474.047988653183, + "p90": 477.05599665641785, + "p95": 477.9840111732483, + "p99": 479.42399978637695 + }, + "roundtrip": { + "p50": 474.047988653183, + "p90": 477.05599665641785, + "p95": 477.9840111732483, + "p99": 479.42399978637695 + }, + "isolatedSum": { + "p50": 948.095977306366, + "p90": 954.1119933128357, + "p95": 955.9680223464966, + "p99": 958.8479995727539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424639488, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8712fbe1", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_53d33fc0", + "comparisonKey": "409a86325c90b0ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:36.753895+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.04800349473953, + "p90": 122.91199713945389, + "p95": 126.71999633312225, + "p99": 140.09599387645721 + }, + "combine": { + "p50": 110.04800349473953, + "p90": 122.91199713945389, + "p95": 126.71999633312225, + "p99": 140.09599387645721 + }, + "roundtrip": { + "p50": 110.04800349473953, + "p90": 122.91199713945389, + "p95": 126.71999633312225, + "p99": 140.09599387645721 + }, + "isolatedSum": { + "p50": 220.09600698947906, + "p90": 245.82399427890778, + "p95": 253.4399926662445, + "p99": 280.19198775291443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.67200309038162, + "p90": 137.472003698349, + "p95": 140.86399972438812, + "p99": 148.0959951877594 + }, + "combine": { + "p50": 124.67200309038162, + "p90": 137.472003698349, + "p95": 140.86399972438812, + "p99": 148.0959951877594 + }, + "roundtrip": { + "p50": 124.67200309038162, + "p90": 137.472003698349, + "p95": 140.86399972438812, + "p99": 148.0959951877594 + }, + "isolatedSum": { + "p50": 249.34400618076324, + "p90": 274.944007396698, + "p95": 281.72799944877625, + "p99": 296.1919903755188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 165.21599888801575, + "p90": 177.34399437904358, + "p95": 187.00799345970154, + "p99": 204.3199986219406 + }, + "combine": { + "p50": 165.21599888801575, + "p90": 177.34399437904358, + "p95": 187.00799345970154, + "p99": 204.3199986219406 + }, + "roundtrip": { + "p50": 165.21599888801575, + "p90": 177.34399437904358, + "p95": 187.00799345970154, + "p99": 204.3199986219406 + }, + "isolatedSum": { + "p50": 330.4319977760315, + "p90": 354.68798875808716, + "p95": 374.0159869194031, + "p99": 408.6399972438812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 281.69599175453186, + "p90": 288.86398673057556, + "p95": 292.4799919128418, + "p99": 301.9520044326782 + }, + "combine": { + "p50": 281.69599175453186, + "p90": 288.86398673057556, + "p95": 292.4799919128418, + "p99": 301.9520044326782 + }, + "roundtrip": { + "p50": 281.69599175453186, + "p90": 288.86398673057556, + "p95": 292.4799919128418, + "p99": 301.9520044326782 + }, + "isolatedSum": { + "p50": 563.3919835090637, + "p90": 577.7279734611511, + "p95": 584.9599838256836, + "p99": 603.9040088653564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 509.18400287628174, + "p90": 529.2479991912842, + "p95": 548.6400127410889, + "p99": 564.9600028991699 + }, + "combine": { + "p50": 509.18400287628174, + "p90": 529.2479991912842, + "p95": 548.6400127410889, + "p99": 564.9600028991699 + }, + "roundtrip": { + "p50": 509.18400287628174, + "p90": 529.2479991912842, + "p95": 548.6400127410889, + "p99": 564.9600028991699 + }, + "isolatedSum": { + "p50": 1018.3680057525635, + "p90": 1058.4959983825684, + "p95": 1097.2800254821777, + "p99": 1129.9200057983398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 953.1199932098389, + "p90": 959.9679708480835, + "p95": 963.487982749939, + "p99": 976.639986038208 + }, + "combine": { + "p50": 953.1199932098389, + "p90": 959.9679708480835, + "p95": 963.487982749939, + "p99": 976.639986038208 + }, + "roundtrip": { + "p50": 953.1199932098389, + "p90": 959.9679708480835, + "p95": 963.487982749939, + "p99": 976.639986038208 + }, + "isolatedSum": { + "p50": 1906.2399864196777, + "p90": 1919.935941696167, + "p95": 1926.975965499878, + "p99": 1953.279972076416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-61329453", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||03799dfc4e73d7f", + "colorKey": "gb300_120a7978", + "comparisonKey": "a175a5639605e59f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:30:02.111560+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03799dfc4e73d7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.87200313806534, + "p90": 127.23200023174286, + "p95": 130.43199479579926, + "p99": 140.35199582576752 + }, + "combine": { + "p50": 115.87200313806534, + "p90": 127.23200023174286, + "p95": 130.43199479579926, + "p99": 140.35199582576752 + }, + "roundtrip": { + "p50": 115.87200313806534, + "p90": 127.23200023174286, + "p95": 130.43199479579926, + "p99": 140.35199582576752 + }, + "isolatedSum": { + "p50": 231.74400627613068, + "p90": 254.46400046348572, + "p95": 260.8639895915985, + "p99": 280.70399165153503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 138.59200477600098, + "p90": 148.8640010356903, + "p95": 151.64799988269806, + "p99": 158.9760035276413 + }, + "combine": { + "p50": 138.59200477600098, + "p90": 148.8640010356903, + "p95": 151.64799988269806, + "p99": 158.9760035276413 + }, + "roundtrip": { + "p50": 138.59200477600098, + "p90": 148.8640010356903, + "p95": 151.64799988269806, + "p99": 158.9760035276413 + }, + "isolatedSum": { + "p50": 277.18400955200195, + "p90": 297.7280020713806, + "p95": 303.2959997653961, + "p99": 317.9520070552826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 222.71999716758728, + "p90": 228.67199778556824, + "p95": 231.80800676345825, + "p99": 239.29600417613983 + }, + "combine": { + "p50": 222.71999716758728, + "p90": 228.67199778556824, + "p95": 231.80800676345825, + "p99": 239.29600417613983 + }, + "roundtrip": { + "p50": 222.71999716758728, + "p90": 228.67199778556824, + "p95": 231.80800676345825, + "p99": 239.29600417613983 + }, + "isolatedSum": { + "p50": 445.43999433517456, + "p90": 457.3439955711365, + "p95": 463.6160135269165, + "p99": 478.59200835227966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 395.4559862613678, + "p90": 399.4239866733551, + "p95": 401.98400616645813, + "p99": 408.8959991931915 + }, + "combine": { + "p50": 395.4559862613678, + "p90": 399.4239866733551, + "p95": 401.98400616645813, + "p99": 408.8959991931915 + }, + "roundtrip": { + "p50": 395.4559862613678, + "p90": 399.4239866733551, + "p95": 401.98400616645813, + "p99": 408.8959991931915 + }, + "isolatedSum": { + "p50": 790.9119725227356, + "p90": 798.8479733467102, + "p95": 803.9680123329163, + "p99": 817.791998386383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 730.5920124053955, + "p90": 735.0720167160034, + "p95": 736.9599938392639, + "p99": 743.2960271835327 + }, + "combine": { + "p50": 730.5920124053955, + "p90": 735.0720167160034, + "p95": 736.9599938392639, + "p99": 743.2960271835327 + }, + "roundtrip": { + "p50": 730.5920124053955, + "p90": 735.0720167160034, + "p95": 736.9599938392639, + "p99": 743.2960271835327 + }, + "isolatedSum": { + "p50": 1461.184024810791, + "p90": 1470.1440334320068, + "p95": 1473.9199876785278, + "p99": 1486.5920543670654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1401.695966720581, + "p90": 1405.5999517440796, + "p95": 1406.6879749298096, + "p99": 1412.0639562606812 + }, + "combine": { + "p50": 1401.695966720581, + "p90": 1405.5999517440796, + "p95": 1406.6879749298096, + "p99": 1412.0639562606812 + }, + "roundtrip": { + "p50": 1401.695966720581, + "p90": 1405.5999517440796, + "p95": 1406.6879749298096, + "p99": 1412.0639562606812 + }, + "isolatedSum": { + "p50": 2803.391933441162, + "p90": 2811.199903488159, + "p95": 2813.375949859619, + "p99": 2824.1279125213623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9c4742a2", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_df41b65b", + "comparisonKey": "58944802337a5c5f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:23.741002+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.82400113344193, + "p90": 118.14399808645248, + "p95": 122.81599640846252, + "p99": 148.51200580596924 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 118.14399808645248, + "p95": 122.81599640846252, + "p99": 148.51200580596924 + }, + "roundtrip": { + "p50": 105.82400113344193, + "p90": 118.14399808645248, + "p95": 122.81599640846252, + "p99": 148.51200580596924 + }, + "isolatedSum": { + "p50": 211.64800226688385, + "p90": 236.28799617290497, + "p95": 245.63199281692505, + "p99": 297.0240116119385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 106.27199709415436, + "p90": 119.93599683046341, + "p95": 125.95200538635254, + "p99": 148.25600385665894 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 119.93599683046341, + "p95": 125.95200538635254, + "p99": 148.25600385665894 + }, + "roundtrip": { + "p50": 106.27199709415436, + "p90": 119.93599683046341, + "p95": 125.95200538635254, + "p99": 148.25600385665894 + }, + "isolatedSum": { + "p50": 212.54399418830872, + "p90": 239.87199366092682, + "p95": 251.90401077270508, + "p99": 296.51200771331787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 144.51199769973755, + "p90": 156.44800662994385, + "p95": 159.743994474411, + "p99": 189.91999328136444 + }, + "combine": { + "p50": 144.51199769973755, + "p90": 156.44800662994385, + "p95": 159.743994474411, + "p99": 189.91999328136444 + }, + "roundtrip": { + "p50": 144.51199769973755, + "p90": 156.44800662994385, + "p95": 159.743994474411, + "p99": 189.91999328136444 + }, + "isolatedSum": { + "p50": 289.0239953994751, + "p90": 312.8960132598877, + "p95": 319.487988948822, + "p99": 379.8399865627289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d150b7a0", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_d844a8d6", + "comparisonKey": "aa2bb31a8511d593", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:00.001037+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.31199789047241, + "p90": 121.56800180673599, + "p95": 124.64000284671783, + "p99": 133.15199315547943 + }, + "combine": { + "p50": 109.31199789047241, + "p90": 121.56800180673599, + "p95": 124.64000284671783, + "p99": 133.15199315547943 + }, + "roundtrip": { + "p50": 109.31199789047241, + "p90": 121.56800180673599, + "p95": 124.64000284671783, + "p99": 133.15199315547943 + }, + "isolatedSum": { + "p50": 218.62399578094482, + "p90": 243.13600361347198, + "p95": 249.28000569343567, + "p99": 266.30398631095886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 114.52800035476685, + "p90": 127.93600559234619, + "p95": 132.25600123405457, + "p99": 138.84800672531128 + }, + "combine": { + "p50": 114.52800035476685, + "p90": 127.93600559234619, + "p95": 132.25600123405457, + "p99": 138.84800672531128 + }, + "roundtrip": { + "p50": 114.52800035476685, + "p90": 127.93600559234619, + "p95": 132.25600123405457, + "p99": 138.84800672531128 + }, + "isolatedSum": { + "p50": 229.0560007095337, + "p90": 255.87201118469238, + "p95": 264.51200246810913, + "p99": 277.69601345062256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 129.63199615478516, + "p90": 142.46399700641632, + "p95": 145.24799585342407, + "p99": 156.95999562740326 + }, + "combine": { + "p50": 129.63199615478516, + "p90": 142.46399700641632, + "p95": 145.24799585342407, + "p99": 156.95999562740326 + }, + "roundtrip": { + "p50": 129.63199615478516, + "p90": 142.46399700641632, + "p95": 145.24799585342407, + "p99": 156.95999562740326 + }, + "isolatedSum": { + "p50": 259.2639923095703, + "p90": 284.92799401283264, + "p95": 290.49599170684814, + "p99": 313.9199912548065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 167.71200299263, + "p90": 173.47200214862823, + "p95": 176.09600722789764, + "p99": 183.20000171661377 + }, + "combine": { + "p50": 167.71200299263, + "p90": 173.47200214862823, + "p95": 176.09600722789764, + "p99": 183.20000171661377 + }, + "roundtrip": { + "p50": 167.71200299263, + "p90": 173.47200214862823, + "p95": 176.09600722789764, + "p99": 183.20000171661377 + }, + "isolatedSum": { + "p50": 335.42400598526, + "p90": 346.94400429725647, + "p95": 352.1920144557953, + "p99": 366.40000343322754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 292.9919958114624, + "p90": 297.08799719810486, + "p95": 299.74400997161865, + "p99": 304.7359883785248 + }, + "combine": { + "p50": 292.9919958114624, + "p90": 297.08799719810486, + "p95": 299.74400997161865, + "p99": 304.7359883785248 + }, + "roundtrip": { + "p50": 292.9919958114624, + "p90": 297.08799719810486, + "p95": 299.74400997161865, + "p99": 304.7359883785248 + }, + "isolatedSum": { + "p50": 585.9839916229248, + "p90": 594.1759943962097, + "p95": 599.4880199432373, + "p99": 609.4719767570496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 544.5119738578796, + "p90": 549.0880012512207, + "p95": 552.0640015602112, + "p99": 559.1359734535217 + }, + "combine": { + "p50": 544.5119738578796, + "p90": 549.0880012512207, + "p95": 552.0640015602112, + "p99": 559.1359734535217 + }, + "roundtrip": { + "p50": 544.5119738578796, + "p90": 549.0880012512207, + "p95": 552.0640015602112, + "p99": 559.1359734535217 + }, + "isolatedSum": { + "p50": 1089.0239477157593, + "p90": 1098.1760025024414, + "p95": 1104.1280031204224, + "p99": 1118.2719469070435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9c0f9173", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||e3707ddc343088b", + "colorKey": "gb300_b98e69e2", + "comparisonKey": "571b084e40f2c3f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:19.018430+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e3707ddc343088b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.656001329422, + "p90": 126.75200402736664, + "p95": 129.40800189971924, + "p99": 134.49600338935852 + }, + "combine": { + "p50": 114.656001329422, + "p90": 126.75200402736664, + "p95": 129.40800189971924, + "p99": 134.49600338935852 + }, + "roundtrip": { + "p50": 114.656001329422, + "p90": 126.75200402736664, + "p95": 129.40800189971924, + "p99": 134.49600338935852 + }, + "isolatedSum": { + "p50": 229.312002658844, + "p90": 253.50400805473328, + "p95": 258.8160037994385, + "p99": 268.99200677871704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.65599715709686, + "p90": 145.6959992647171, + "p95": 147.8080004453659, + "p99": 158.4639996290207 + }, + "combine": { + "p50": 134.65599715709686, + "p90": 145.6959992647171, + "p95": 147.8080004453659, + "p99": 158.4639996290207 + }, + "roundtrip": { + "p50": 134.65599715709686, + "p90": 145.6959992647171, + "p95": 147.8080004453659, + "p99": 158.4639996290207 + }, + "isolatedSum": { + "p50": 269.3119943141937, + "p90": 291.3919985294342, + "p95": 295.6160008907318, + "p99": 316.9279992580414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.43200659751892, + "p90": 204.83200252056122, + "p95": 205.6639939546585, + "p99": 211.64800226688385 + }, + "combine": { + "p50": 202.43200659751892, + "p90": 204.83200252056122, + "p95": 205.6639939546585, + "p99": 211.64800226688385 + }, + "roundtrip": { + "p50": 202.43200659751892, + "p90": 204.83200252056122, + "p95": 205.6639939546585, + "p99": 211.64800226688385 + }, + "isolatedSum": { + "p50": 404.86401319503784, + "p90": 409.66400504112244, + "p95": 411.327987909317, + "p99": 423.2960045337677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 370.3039884567261, + "p90": 372.8640079498291, + "p95": 373.50401282310486, + "p99": 375.13598799705505 + }, + "combine": { + "p50": 370.3039884567261, + "p90": 372.8640079498291, + "p95": 373.50401282310486, + "p99": 375.13598799705505 + }, + "roundtrip": { + "p50": 370.3039884567261, + "p90": 372.8640079498291, + "p95": 373.50401282310486, + "p99": 375.13598799705505 + }, + "isolatedSum": { + "p50": 740.6079769134521, + "p90": 745.7280158996582, + "p95": 747.0080256462097, + "p99": 750.2719759941101 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 726.4000177383423, + "p90": 731.1999797821045, + "p95": 733.024001121521, + "p99": 736.2560033798218 + }, + "combine": { + "p50": 726.4000177383423, + "p90": 731.1999797821045, + "p95": 733.024001121521, + "p99": 736.2560033798218 + }, + "roundtrip": { + "p50": 726.4000177383423, + "p90": 731.1999797821045, + "p95": 733.024001121521, + "p99": 736.2560033798218 + }, + "isolatedSum": { + "p50": 1452.8000354766846, + "p90": 1462.399959564209, + "p95": 1466.048002243042, + "p99": 1472.5120067596436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1474.4960069656372, + "p90": 1478.7839651107788, + "p95": 1480.3839921951294, + "p99": 1484.4800233840942 + }, + "combine": { + "p50": 1474.4960069656372, + "p90": 1478.7839651107788, + "p95": 1480.3839921951294, + "p99": 1484.4800233840942 + }, + "roundtrip": { + "p50": 1474.4960069656372, + "p90": 1478.7839651107788, + "p95": 1480.3839921951294, + "p99": 1484.4800233840942 + }, + "isolatedSum": { + "p50": 2948.9920139312744, + "p90": 2957.5679302215576, + "p95": 2960.767984390259, + "p99": 2968.9600467681885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3a71226", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_8a9bcfac", + "comparisonKey": "8a50a3478fccb357", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:09.603960+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.35199803113937, + "p90": 120.51200121641159, + "p95": 123.36000055074692, + "p99": 132.9279989004135 + }, + "combine": { + "p50": 108.35199803113937, + "p90": 120.51200121641159, + "p95": 123.36000055074692, + "p99": 132.9279989004135 + }, + "roundtrip": { + "p50": 108.35199803113937, + "p90": 120.51200121641159, + "p95": 123.36000055074692, + "p99": 132.9279989004135 + }, + "isolatedSum": { + "p50": 216.70399606227875, + "p90": 241.02400243282318, + "p95": 246.72000110149384, + "p99": 265.855997800827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 122.43200093507767, + "p90": 134.68800485134125, + "p95": 138.0160003900528, + "p99": 154.65599298477173 + }, + "combine": { + "p50": 122.43200093507767, + "p90": 134.68800485134125, + "p95": 138.0160003900528, + "p99": 154.65599298477173 + }, + "roundtrip": { + "p50": 122.43200093507767, + "p90": 134.68800485134125, + "p95": 138.0160003900528, + "p99": 154.65599298477173 + }, + "isolatedSum": { + "p50": 244.86400187015533, + "p90": 269.3760097026825, + "p95": 276.0320007801056, + "p99": 309.31198596954346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 162.56000101566315, + "p90": 168.2240068912506, + "p95": 171.36000096797943, + "p99": 184.4480037689209 + }, + "combine": { + "p50": 162.56000101566315, + "p90": 168.2240068912506, + "p95": 171.36000096797943, + "p99": 184.4480037689209 + }, + "roundtrip": { + "p50": 162.56000101566315, + "p90": 168.2240068912506, + "p95": 171.36000096797943, + "p99": 184.4480037689209 + }, + "isolatedSum": { + "p50": 325.1200020313263, + "p90": 336.4480137825012, + "p95": 342.72000193595886, + "p99": 368.8960075378418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 279.231995344162, + "p90": 283.84000062942505, + "p95": 287.1679961681366, + "p99": 302.68800258636475 + }, + "combine": { + "p50": 279.231995344162, + "p90": 283.84000062942505, + "p95": 287.1679961681366, + "p99": 302.68800258636475 + }, + "roundtrip": { + "p50": 279.231995344162, + "p90": 283.84000062942505, + "p95": 287.1679961681366, + "p99": 302.68800258636475 + }, + "isolatedSum": { + "p50": 558.463990688324, + "p90": 567.6800012588501, + "p95": 574.3359923362732, + "p99": 605.3760051727295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 502.24000215530396, + "p90": 506.52801990509033, + "p95": 509.4720125198364, + "p99": 531.328022480011 + }, + "combine": { + "p50": 502.24000215530396, + "p90": 506.52801990509033, + "p95": 509.4720125198364, + "p99": 531.328022480011 + }, + "roundtrip": { + "p50": 502.24000215530396, + "p90": 506.52801990509033, + "p95": 509.4720125198364, + "p99": 531.328022480011 + }, + "isolatedSum": { + "p50": 1004.4800043106079, + "p90": 1013.0560398101807, + "p95": 1018.9440250396729, + "p99": 1062.656044960022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 947.4239945411682, + "p90": 952.672004699707, + "p95": 955.0399780273438, + "p99": 960.1600170135498 + }, + "combine": { + "p50": 947.4239945411682, + "p90": 952.672004699707, + "p95": 955.0399780273438, + "p99": 960.1600170135498 + }, + "roundtrip": { + "p50": 947.4239945411682, + "p90": 952.672004699707, + "p95": 955.0399780273438, + "p99": 960.1600170135498 + }, + "isolatedSum": { + "p50": 1894.8479890823364, + "p90": 1905.344009399414, + "p95": 1910.0799560546875, + "p99": 1920.3200340270996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6157c6d8", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_790c1f8e", + "comparisonKey": "aebcb923ab506859", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:34:40.496461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.31199985742569, + "p90": 138.11199367046356, + "p95": 144.19199526309967, + "p99": 182.40000307559967 + }, + "combine": { + "p50": 121.31199985742569, + "p90": 138.11199367046356, + "p95": 144.19199526309967, + "p99": 182.40000307559967 + }, + "roundtrip": { + "p50": 121.31199985742569, + "p90": 138.11199367046356, + "p95": 144.19199526309967, + "p99": 182.40000307559967 + }, + "isolatedSum": { + "p50": 242.62399971485138, + "p90": 276.2239873409271, + "p95": 288.38399052619934, + "p99": 364.80000615119934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.85599851608276, + "p90": 148.60799908638, + "p95": 153.6320000886917, + "p99": 182.3039948940277 + }, + "combine": { + "p50": 133.85599851608276, + "p90": 148.60799908638, + "p95": 153.6320000886917, + "p99": 182.3039948940277 + }, + "roundtrip": { + "p50": 133.85599851608276, + "p90": 148.60799908638, + "p95": 153.6320000886917, + "p99": 182.3039948940277 + }, + "isolatedSum": { + "p50": 267.7119970321655, + "p90": 297.21599817276, + "p95": 307.2640001773834, + "p99": 364.6079897880554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 166.84800386428833, + "p90": 180.92800676822662, + "p95": 190.23999571800232, + "p99": 220.06399929523468 + }, + "combine": { + "p50": 166.84800386428833, + "p90": 180.92800676822662, + "p95": 190.23999571800232, + "p99": 220.06399929523468 + }, + "roundtrip": { + "p50": 166.84800386428833, + "p90": 180.92800676822662, + "p95": 190.23999571800232, + "p99": 220.06399929523468 + }, + "isolatedSum": { + "p50": 333.69600772857666, + "p90": 361.85601353645325, + "p95": 380.47999143600464, + "p99": 440.12799859046936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 280.16000986099243, + "p90": 287.1040105819702, + "p95": 290.78400135040283, + "p99": 332.99198746681213 + }, + "combine": { + "p50": 280.16000986099243, + "p90": 287.1040105819702, + "p95": 290.78400135040283, + "p99": 332.99198746681213 + }, + "roundtrip": { + "p50": 280.16000986099243, + "p90": 287.1040105819702, + "p95": 290.78400135040283, + "p99": 332.99198746681213 + }, + "isolatedSum": { + "p50": 560.3200197219849, + "p90": 574.2080211639404, + "p95": 581.5680027008057, + "p99": 665.9839749336243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 509.11998748779297, + "p90": 515.999972820282, + "p95": 517.952024936676, + "p99": 543.8399910926819 + }, + "combine": { + "p50": 509.11998748779297, + "p90": 515.999972820282, + "p95": 517.952024936676, + "p99": 543.8399910926819 + }, + "roundtrip": { + "p50": 509.11998748779297, + "p90": 515.999972820282, + "p95": 517.952024936676, + "p99": 543.8399910926819 + }, + "isolatedSum": { + "p50": 1018.2399749755859, + "p90": 1031.999945640564, + "p95": 1035.904049873352, + "p99": 1087.6799821853638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 953.3759951591492, + "p90": 957.8880071640015, + "p95": 960.2559804916382, + "p99": 972.9599952697754 + }, + "combine": { + "p50": 953.3759951591492, + "p90": 957.8880071640015, + "p95": 960.2559804916382, + "p99": 972.9599952697754 + }, + "roundtrip": { + "p50": 953.3759951591492, + "p90": 957.8880071640015, + "p95": 960.2559804916382, + "p99": 972.9599952697754 + }, + "isolatedSum": { + "p50": 1906.7519903182983, + "p90": 1915.776014328003, + "p95": 1920.5119609832764, + "p99": 1945.9199905395508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37f5077c", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_f4728223", + "comparisonKey": "b60ab44e0888c7f7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:38:00.231453+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.0239970088005, + "p90": 132.9279989004135, + "p95": 137.7280056476593, + "p99": 145.75999975204468 + }, + "combine": { + "p50": 117.0239970088005, + "p90": 132.9279989004135, + "p95": 137.7280056476593, + "p99": 145.75999975204468 + }, + "roundtrip": { + "p50": 117.0239970088005, + "p90": 132.9279989004135, + "p95": 137.7280056476593, + "p99": 145.75999975204468 + }, + "isolatedSum": { + "p50": 234.047994017601, + "p90": 265.855997800827, + "p95": 275.4560112953186, + "p99": 291.51999950408936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.88000631332397, + "p90": 148.00000190734863, + "p95": 152.70400047302246, + "p99": 162.27200627326965 + }, + "combine": { + "p50": 134.88000631332397, + "p90": 148.00000190734863, + "p95": 152.70400047302246, + "p99": 162.27200627326965 + }, + "roundtrip": { + "p50": 134.88000631332397, + "p90": 148.00000190734863, + "p95": 152.70400047302246, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 269.76001262664795, + "p90": 296.00000381469727, + "p95": 305.4080009460449, + "p99": 324.5440125465393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.50399887561798, + "p90": 199.42399859428406, + "p95": 199.93600249290466, + "p99": 202.4639993906021 + }, + "combine": { + "p50": 197.50399887561798, + "p90": 199.42399859428406, + "p95": 199.93600249290466, + "p99": 202.4639993906021 + }, + "roundtrip": { + "p50": 197.50399887561798, + "p90": 199.42399859428406, + "p95": 199.93600249290466, + "p99": 202.4639993906021 + }, + "isolatedSum": { + "p50": 395.00799775123596, + "p90": 398.8479971885681, + "p95": 399.8720049858093, + "p99": 404.9279987812042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.3999979496002, + "p90": 382.07998871803284, + "p95": 383.04001092910767, + "p99": 384.4799995422363 + }, + "combine": { + "p50": 378.3999979496002, + "p90": 382.07998871803284, + "p95": 383.04001092910767, + "p99": 384.4799995422363 + }, + "roundtrip": { + "p50": 378.3999979496002, + "p90": 382.07998871803284, + "p95": 383.04001092910767, + "p99": 384.4799995422363 + }, + "isolatedSum": { + "p50": 756.7999958992004, + "p90": 764.1599774360657, + "p95": 766.0800218582153, + "p99": 768.9599990844727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 775.4560112953186, + "p90": 778.0159711837769, + "p95": 778.9440155029297, + "p99": 780.7040214538574 + }, + "combine": { + "p50": 775.4560112953186, + "p90": 778.0159711837769, + "p95": 778.9440155029297, + "p99": 780.7040214538574 + }, + "roundtrip": { + "p50": 775.4560112953186, + "p90": 778.0159711837769, + "p95": 778.9440155029297, + "p99": 780.7040214538574 + }, + "isolatedSum": { + "p50": 1550.9120225906372, + "p90": 1556.0319423675537, + "p95": 1557.8880310058594, + "p99": 1561.4080429077148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1499.2320537567139, + "p90": 1502.4000406265259, + "p95": 1503.5200119018555, + "p99": 1505.568027496338 + }, + "combine": { + "p50": 1499.2320537567139, + "p90": 1502.4000406265259, + "p95": 1503.5200119018555, + "p99": 1505.568027496338 + }, + "roundtrip": { + "p50": 1499.2320537567139, + "p90": 1502.4000406265259, + "p95": 1503.5200119018555, + "p99": 1505.568027496338 + }, + "isolatedSum": { + "p50": 2998.4641075134277, + "p90": 3004.8000812530518, + "p95": 3007.040023803711, + "p99": 3011.136054992676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d2696c4", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||8183e404f63b100", + "colorKey": "gb300_ecdb41f9", + "comparisonKey": "ea2443aeeec7750c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:33:57.089442+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8183e404f63b100", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.30399799346924, + "p90": 122.46400117874146, + "p95": 126.68800354003906, + "p99": 135.26399433612823 + }, + "combine": { + "p50": 110.30399799346924, + "p90": 122.46400117874146, + "p95": 126.68800354003906, + "p99": 135.26399433612823 + }, + "roundtrip": { + "p50": 110.30399799346924, + "p90": 122.46400117874146, + "p95": 126.68800354003906, + "p99": 135.26399433612823 + }, + "isolatedSum": { + "p50": 220.60799598693848, + "p90": 244.9280023574829, + "p95": 253.37600708007812, + "p99": 270.52798867225647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.43200159072876, + "p90": 137.88799941539764, + "p95": 141.02399349212646, + "p99": 167.1680063009262 + }, + "combine": { + "p50": 126.43200159072876, + "p90": 137.88799941539764, + "p95": 141.02399349212646, + "p99": 167.1680063009262 + }, + "roundtrip": { + "p50": 126.43200159072876, + "p90": 137.88799941539764, + "p95": 141.02399349212646, + "p99": 167.1680063009262 + }, + "isolatedSum": { + "p50": 252.86400318145752, + "p90": 275.7759988307953, + "p95": 282.04798698425293, + "p99": 334.3360126018524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 192.35199689865112, + "p90": 194.17600333690643, + "p95": 194.62400674819946, + "p99": 201.75999402999878 + }, + "combine": { + "p50": 192.35199689865112, + "p90": 194.17600333690643, + "p95": 194.62400674819946, + "p99": 201.75999402999878 + }, + "roundtrip": { + "p50": 192.35199689865112, + "p90": 194.17600333690643, + "p95": 194.62400674819946, + "p99": 201.75999402999878 + }, + "isolatedSum": { + "p50": 384.70399379730225, + "p90": 388.35200667381287, + "p95": 389.2480134963989, + "p99": 403.51998805999756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 364.8639917373657, + "p90": 367.19998717308044, + "p95": 367.7760064601898, + "p99": 369.4719970226288 + }, + "combine": { + "p50": 364.8639917373657, + "p90": 367.19998717308044, + "p95": 367.7760064601898, + "p99": 369.4719970226288 + }, + "roundtrip": { + "p50": 364.8639917373657, + "p90": 367.19998717308044, + "p95": 367.7760064601898, + "p99": 369.4719970226288 + }, + "isolatedSum": { + "p50": 729.7279834747314, + "p90": 734.3999743461609, + "p95": 735.5520129203796, + "p99": 738.9439940452576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 686.7520213127136, + "p90": 689.0559792518616, + "p95": 689.6640062332153, + "p99": 691.2959814071655 + }, + "combine": { + "p50": 686.7520213127136, + "p90": 689.0559792518616, + "p95": 689.6640062332153, + "p99": 691.2959814071655 + }, + "roundtrip": { + "p50": 686.7520213127136, + "p90": 689.0559792518616, + "p95": 689.6640062332153, + "p99": 691.2959814071655 + }, + "isolatedSum": { + "p50": 1373.5040426254272, + "p90": 1378.1119585037231, + "p95": 1379.3280124664307, + "p99": 1382.591962814331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1328.927993774414, + "p90": 1331.231951713562, + "p95": 1331.9679498672485, + "p99": 1333.184003829956 + }, + "combine": { + "p50": 1328.927993774414, + "p90": 1331.231951713562, + "p95": 1331.9679498672485, + "p99": 1333.184003829956 + }, + "roundtrip": { + "p50": 1328.927993774414, + "p90": 1331.231951713562, + "p95": 1331.9679498672485, + "p99": 1333.184003829956 + }, + "isolatedSum": { + "p50": 2657.855987548828, + "p90": 2662.463903427124, + "p95": 2663.935899734497, + "p99": 2666.368007659912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c7764837", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_844c1f75", + "comparisonKey": "9cc1365d62adc1cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:48.330074+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.81600320339203, + "p90": 143.48800480365753, + "p95": 149.27999675273895, + "p99": 167.71200299263 + }, + "combine": { + "p50": 118.81600320339203, + "p90": 143.48800480365753, + "p95": 149.27999675273895, + "p99": 167.71200299263 + }, + "roundtrip": { + "p50": 118.81600320339203, + "p90": 143.48800480365753, + "p95": 149.27999675273895, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 237.63200640678406, + "p90": 286.97600960731506, + "p95": 298.5599935054779, + "p99": 335.42400598526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 128.28800082206726, + "p90": 147.07200229167938, + "p95": 165.0879979133606, + "p99": 188.09600174427032 + }, + "combine": { + "p50": 128.28800082206726, + "p90": 147.07200229167938, + "p95": 165.0879979133606, + "p99": 188.09600174427032 + }, + "roundtrip": { + "p50": 128.28800082206726, + "p90": 147.07200229167938, + "p95": 165.0879979133606, + "p99": 188.09600174427032 + }, + "isolatedSum": { + "p50": 256.5760016441345, + "p90": 294.14400458335876, + "p95": 330.1759958267212, + "p99": 376.19200348854065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 167.64800250530243, + "p90": 192.35199689865112, + "p95": 206.4639925956726, + "p99": 226.01599991321564 + }, + "combine": { + "p50": 167.64800250530243, + "p90": 192.35199689865112, + "p95": 206.4639925956726, + "p99": 226.01599991321564 + }, + "roundtrip": { + "p50": 167.64800250530243, + "p90": 192.35199689865112, + "p95": 206.4639925956726, + "p99": 226.01599991321564 + }, + "isolatedSum": { + "p50": 335.29600501060486, + "p90": 384.70399379730225, + "p95": 412.9279851913452, + "p99": 452.0319998264313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 288.9919877052307, + "p90": 307.99999833106995, + "p95": 321.0560083389282, + "p99": 351.23199224472046 + }, + "combine": { + "p50": 288.9919877052307, + "p90": 307.99999833106995, + "p95": 321.0560083389282, + "p99": 351.23199224472046 + }, + "roundtrip": { + "p50": 288.9919877052307, + "p90": 307.99999833106995, + "p95": 321.0560083389282, + "p99": 351.23199224472046 + }, + "isolatedSum": { + "p50": 577.9839754104614, + "p90": 615.9999966621399, + "p95": 642.1120166778564, + "p99": 702.4639844894409 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 522.6240158081055, + "p90": 554.9439787864685, + "p95": 561.3440275192261, + "p99": 579.0719985961914 + }, + "combine": { + "p50": 522.6240158081055, + "p90": 554.9439787864685, + "p95": 561.3440275192261, + "p99": 579.0719985961914 + }, + "roundtrip": { + "p50": 522.6240158081055, + "p90": 554.9439787864685, + "p95": 561.3440275192261, + "p99": 579.0719985961914 + }, + "isolatedSum": { + "p50": 1045.248031616211, + "p90": 1109.887957572937, + "p95": 1122.6880550384521, + "p99": 1158.1439971923828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 991.8720126152039, + "p90": 1019.9999809265137, + "p95": 1035.8400344848633, + "p99": 1046.3039875030518 + }, + "combine": { + "p50": 991.8720126152039, + "p90": 1019.9999809265137, + "p95": 1035.8400344848633, + "p99": 1046.3039875030518 + }, + "roundtrip": { + "p50": 991.8720126152039, + "p90": 1019.9999809265137, + "p95": 1035.8400344848633, + "p99": 1046.3039875030518 + }, + "isolatedSum": { + "p50": 1983.7440252304077, + "p90": 2039.9999618530273, + "p95": 2071.6800689697266, + "p99": 2092.6079750061035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-137e41ff", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_91fe36f4", + "comparisonKey": "69f248ff5703252f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:34.024316+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.7120012640953, + "p90": 123.58400225639343, + "p95": 125.72799623012543, + "p99": 133.215993642807 + }, + "combine": { + "p50": 111.7120012640953, + "p90": 123.58400225639343, + "p95": 125.72799623012543, + "p99": 133.215993642807 + }, + "roundtrip": { + "p50": 111.7120012640953, + "p90": 123.58400225639343, + "p95": 125.72799623012543, + "p99": 133.215993642807 + }, + "isolatedSum": { + "p50": 223.4240025281906, + "p90": 247.16800451278687, + "p95": 251.45599246025085, + "p99": 266.431987285614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 130.5920034646988, + "p90": 140.9599930047989, + "p95": 143.51999759674072, + "p99": 149.82399344444275 + }, + "combine": { + "p50": 130.5920034646988, + "p90": 140.9599930047989, + "p95": 143.51999759674072, + "p99": 149.82399344444275 + }, + "roundtrip": { + "p50": 130.5920034646988, + "p90": 140.9599930047989, + "p95": 143.51999759674072, + "p99": 149.82399344444275 + }, + "isolatedSum": { + "p50": 261.1840069293976, + "p90": 281.9199860095978, + "p95": 287.03999519348145, + "p99": 299.6479868888855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.93599438667297, + "p90": 197.6960003376007, + "p95": 198.40000569820404, + "p99": 199.8720020055771 + }, + "combine": { + "p50": 195.93599438667297, + "p90": 197.6960003376007, + "p95": 198.40000569820404, + "p99": 199.8720020055771 + }, + "roundtrip": { + "p50": 195.93599438667297, + "p90": 197.6960003376007, + "p95": 198.40000569820404, + "p99": 199.8720020055771 + }, + "isolatedSum": { + "p50": 391.87198877334595, + "p90": 395.3920006752014, + "p95": 396.8000113964081, + "p99": 399.7440040111542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 354.14400696754456, + "p90": 356.79998993873596, + "p95": 357.66398906707764, + "p99": 358.72000455856323 + }, + "combine": { + "p50": 354.14400696754456, + "p90": 356.79998993873596, + "p95": 357.66398906707764, + "p99": 358.72000455856323 + }, + "roundtrip": { + "p50": 354.14400696754456, + "p90": 356.79998993873596, + "p95": 357.66398906707764, + "p99": 358.72000455856323 + }, + "isolatedSum": { + "p50": 708.2880139350891, + "p90": 713.5999798774719, + "p95": 715.3279781341553, + "p99": 717.4400091171265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 706.9439888000488, + "p90": 711.6479873657227, + "p95": 712.9920125007629, + "p99": 716.4160013198853 + }, + "combine": { + "p50": 706.9439888000488, + "p90": 711.6479873657227, + "p95": 712.9920125007629, + "p99": 716.4160013198853 + }, + "roundtrip": { + "p50": 706.9439888000488, + "p90": 711.6479873657227, + "p95": 712.9920125007629, + "p99": 716.4160013198853 + }, + "isolatedSum": { + "p50": 1413.8879776000977, + "p90": 1423.2959747314453, + "p95": 1425.9840250015259, + "p99": 1432.8320026397705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1427.839994430542, + "p90": 1432.927966117859, + "p95": 1434.6239566802979, + "p99": 1437.7280473709106 + }, + "combine": { + "p50": 1427.839994430542, + "p90": 1432.927966117859, + "p95": 1434.6239566802979, + "p99": 1437.7280473709106 + }, + "roundtrip": { + "p50": 1427.839994430542, + "p90": 1432.927966117859, + "p95": 1434.6239566802979, + "p99": 1437.7280473709106 + }, + "isolatedSum": { + "p50": 2855.679988861084, + "p90": 2865.855932235718, + "p95": 2869.2479133605957, + "p99": 2875.4560947418213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cfea1818", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_7320de9a", + "comparisonKey": "940983c86f5f8335", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:36:44.958783+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.76000261306763, + "p90": 145.21600306034088, + "p95": 151.5199989080429, + "p99": 194.815993309021 + }, + "combine": { + "p50": 117.76000261306763, + "p90": 145.21600306034088, + "p95": 151.5199989080429, + "p99": 194.815993309021 + }, + "roundtrip": { + "p50": 117.76000261306763, + "p90": 145.21600306034088, + "p95": 151.5199989080429, + "p99": 194.815993309021 + }, + "isolatedSum": { + "p50": 235.52000522613525, + "p90": 290.43200612068176, + "p95": 303.0399978160858, + "p99": 389.631986618042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.56800377368927, + "p90": 167.84000396728516, + "p95": 180.9920072555542, + "p99": 189.7599995136261 + }, + "combine": { + "p50": 133.56800377368927, + "p90": 167.84000396728516, + "p95": 180.9920072555542, + "p99": 189.7599995136261 + }, + "roundtrip": { + "p50": 133.56800377368927, + "p90": 167.84000396728516, + "p95": 180.9920072555542, + "p99": 189.7599995136261 + }, + "isolatedSum": { + "p50": 267.13600754737854, + "p90": 335.6800079345703, + "p95": 361.9840145111084, + "p99": 379.5199990272522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 181.18399381637573, + "p90": 217.40800142288208, + "p95": 223.51999580860138, + "p99": 231.4240038394928 + }, + "combine": { + "p50": 181.18399381637573, + "p90": 217.40800142288208, + "p95": 223.51999580860138, + "p99": 231.4240038394928 + }, + "roundtrip": { + "p50": 181.18399381637573, + "p90": 217.40800142288208, + "p95": 223.51999580860138, + "p99": 231.4240038394928 + }, + "isolatedSum": { + "p50": 362.36798763275146, + "p90": 434.81600284576416, + "p95": 447.03999161720276, + "p99": 462.8480076789856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 283.26401114463806, + "p90": 317.6319897174835, + "p95": 324.319988489151, + "p99": 345.95200419425964 + }, + "combine": { + "p50": 283.26401114463806, + "p90": 317.6319897174835, + "p95": 324.319988489151, + "p99": 345.95200419425964 + }, + "roundtrip": { + "p50": 283.26401114463806, + "p90": 317.6319897174835, + "p95": 324.319988489151, + "p99": 345.95200419425964 + }, + "isolatedSum": { + "p50": 566.5280222892761, + "p90": 635.263979434967, + "p95": 648.639976978302, + "p99": 691.9040083885193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 508.67199897766113, + "p90": 545.9200143814087, + "p95": 554.8480153083801, + "p99": 565.887987613678 + }, + "combine": { + "p50": 508.67199897766113, + "p90": 545.9200143814087, + "p95": 554.8480153083801, + "p99": 565.887987613678 + }, + "roundtrip": { + "p50": 508.67199897766113, + "p90": 545.9200143814087, + "p95": 554.8480153083801, + "p99": 565.887987613678 + }, + "isolatedSum": { + "p50": 1017.3439979553223, + "p90": 1091.8400287628174, + "p95": 1109.6960306167603, + "p99": 1131.775975227356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 956.991970539093, + "p90": 987.5839948654175, + "p95": 994.2079782485962, + "p99": 1009.7919702529907 + }, + "combine": { + "p50": 956.991970539093, + "p90": 987.5839948654175, + "p95": 994.2079782485962, + "p99": 1009.7919702529907 + }, + "roundtrip": { + "p50": 956.991970539093, + "p90": 987.5839948654175, + "p95": 994.2079782485962, + "p99": 1009.7919702529907 + }, + "isolatedSum": { + "p50": 1913.983941078186, + "p90": 1975.167989730835, + "p95": 1988.4159564971924, + "p99": 2019.5839405059814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-afd0820a", + "identity": "gb300|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_e889c3e3", + "comparisonKey": "3a82a61a6b6aa95a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:15.522352+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.0000034570694, + "p90": 125.11999905109406, + "p95": 127.74400413036346, + "p99": 133.27999413013458 + }, + "combine": { + "p50": 112.0000034570694, + "p90": 125.11999905109406, + "p95": 127.74400413036346, + "p99": 133.27999413013458 + }, + "roundtrip": { + "p50": 112.0000034570694, + "p90": 125.11999905109406, + "p95": 127.74400413036346, + "p99": 133.27999413013458 + }, + "isolatedSum": { + "p50": 224.0000069141388, + "p90": 250.2399981021881, + "p95": 255.48800826072693, + "p99": 266.55998826026917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 129.92000579833984, + "p90": 141.50400459766388, + "p95": 144.03200149536133, + "p99": 151.5520066022873 + }, + "combine": { + "p50": 129.92000579833984, + "p90": 141.50400459766388, + "p95": 144.03200149536133, + "p99": 151.5520066022873 + }, + "roundtrip": { + "p50": 129.92000579833984, + "p90": 141.50400459766388, + "p95": 144.03200149536133, + "p99": 151.5520066022873 + }, + "isolatedSum": { + "p50": 259.8400115966797, + "p90": 283.00800919532776, + "p95": 288.06400299072266, + "p99": 303.1040132045746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.44799828529358, + "p90": 198.40000569820404, + "p95": 199.072003364563, + "p99": 201.9519954919815 + }, + "combine": { + "p50": 196.44799828529358, + "p90": 198.40000569820404, + "p95": 199.072003364563, + "p99": 201.9519954919815 + }, + "roundtrip": { + "p50": 196.44799828529358, + "p90": 198.40000569820404, + "p95": 199.072003364563, + "p99": 201.9519954919815 + }, + "isolatedSum": { + "p50": 392.89599657058716, + "p90": 396.8000113964081, + "p95": 398.144006729126, + "p99": 403.903990983963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.8080086708069, + "p90": 383.8079869747162, + "p95": 384.8640024662018, + "p99": 388.8640105724335 + }, + "combine": { + "p50": 379.8080086708069, + "p90": 383.8079869747162, + "p95": 384.8640024662018, + "p99": 388.8640105724335 + }, + "roundtrip": { + "p50": 379.8080086708069, + "p90": 383.8079869747162, + "p95": 384.8640024662018, + "p99": 388.8640105724335 + }, + "isolatedSum": { + "p50": 759.6160173416138, + "p90": 767.6159739494324, + "p95": 769.7280049324036, + "p99": 777.728021144867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 775.7120132446289, + "p90": 778.656005859375, + "p95": 779.4560194015503, + "p99": 781.4720273017883 + }, + "combine": { + "p50": 775.7120132446289, + "p90": 778.656005859375, + "p95": 779.4560194015503, + "p99": 781.4720273017883 + }, + "roundtrip": { + "p50": 775.7120132446289, + "p90": 778.656005859375, + "p95": 779.4560194015503, + "p99": 781.4720273017883 + }, + "isolatedSum": { + "p50": 1551.4240264892578, + "p90": 1557.31201171875, + "p95": 1558.9120388031006, + "p99": 1562.9440546035767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1499.6479749679565, + "p90": 1502.7519464492798, + "p95": 1503.9360523223877, + "p99": 1505.5999755859375 + }, + "combine": { + "p50": 1499.6479749679565, + "p90": 1502.7519464492798, + "p95": 1503.9360523223877, + "p99": 1505.5999755859375 + }, + "roundtrip": { + "p50": 1499.6479749679565, + "p90": 1502.7519464492798, + "p95": 1503.9360523223877, + "p99": 1505.5999755859375 + }, + "isolatedSum": { + "p50": 2999.295949935913, + "p90": 3005.5038928985596, + "p95": 3007.8721046447754, + "p99": 3011.199951171875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60b17e31", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_c3b32f1b", + "comparisonKey": "6971107dbd20109a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:37:27.553468+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.52799969911575, + "p90": 121.76000326871872, + "p95": 125.02400577068329, + "p99": 128.1919926404953 + }, + "combine": { + "p50": 110.52799969911575, + "p90": 121.76000326871872, + "p95": 125.02400577068329, + "p99": 128.1919926404953 + }, + "roundtrip": { + "p50": 110.52799969911575, + "p90": 121.76000326871872, + "p95": 125.02400577068329, + "p99": 128.1919926404953 + }, + "isolatedSum": { + "p50": 221.0559993982315, + "p90": 243.52000653743744, + "p95": 250.04801154136658, + "p99": 256.3839852809906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 125.40799379348755, + "p90": 137.82399892807007, + "p95": 149.08799529075623, + "p99": 178.94400656223297 + }, + "combine": { + "p50": 125.40799379348755, + "p90": 137.82399892807007, + "p95": 149.08799529075623, + "p99": 178.94400656223297 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 137.82399892807007, + "p95": 149.08799529075623, + "p99": 178.94400656223297 + }, + "isolatedSum": { + "p50": 250.8159875869751, + "p90": 275.64799785614014, + "p95": 298.17599058151245, + "p99": 357.88801312446594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 163.7759953737259, + "p90": 169.79199647903442, + "p95": 173.88799786567688, + "p99": 196.79999351501465 + }, + "combine": { + "p50": 163.7759953737259, + "p90": 169.79199647903442, + "p95": 173.88799786567688, + "p99": 196.79999351501465 + }, + "roundtrip": { + "p50": 163.7759953737259, + "p90": 169.79199647903442, + "p95": 173.88799786567688, + "p99": 196.79999351501465 + }, + "isolatedSum": { + "p50": 327.5519907474518, + "p90": 339.58399295806885, + "p95": 347.77599573135376, + "p99": 393.5999870300293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 280.60799837112427, + "p90": 286.52799129486084, + "p95": 290.912002325058, + "p99": 324.73599910736084 + }, + "combine": { + "p50": 280.60799837112427, + "p90": 286.52799129486084, + "p95": 290.912002325058, + "p99": 324.73599910736084 + }, + "roundtrip": { + "p50": 280.60799837112427, + "p90": 286.52799129486084, + "p95": 290.912002325058, + "p99": 324.73599910736084 + }, + "isolatedSum": { + "p50": 561.2159967422485, + "p90": 573.0559825897217, + "p95": 581.824004650116, + "p99": 649.4719982147217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 505.72800636291504, + "p90": 510.8799934387207, + "p95": 513.696014881134, + "p99": 521.2799906730652 + }, + "combine": { + "p50": 505.72800636291504, + "p90": 510.8799934387207, + "p95": 513.696014881134, + "p99": 521.2799906730652 + }, + "roundtrip": { + "p50": 505.72800636291504, + "p90": 510.8799934387207, + "p95": 513.696014881134, + "p99": 521.2799906730652 + }, + "isolatedSum": { + "p50": 1011.4560127258301, + "p90": 1021.7599868774414, + "p95": 1027.392029762268, + "p99": 1042.5599813461304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 957.2160243988037, + "p90": 962.1440172195435, + "p95": 964.1600251197815, + "p99": 968.2239890098572 + }, + "combine": { + "p50": 957.2160243988037, + "p90": 962.1440172195435, + "p95": 964.1600251197815, + "p99": 968.2239890098572 + }, + "roundtrip": { + "p50": 957.2160243988037, + "p90": 962.1440172195435, + "p95": 964.1600251197815, + "p99": 968.2239890098572 + }, + "isolatedSum": { + "p50": 1914.4320487976074, + "p90": 1924.288034439087, + "p95": 1928.320050239563, + "p99": 1936.4479780197144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e1dd831", + "identity": "gb300|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_228227db", + "comparisonKey": "90258ad68e491fed", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:35:21.347127+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.77600109577179, + "p90": 120.28799951076508, + "p95": 123.4240010380745, + "p99": 131.77600502967834 + }, + "combine": { + "p50": 107.77600109577179, + "p90": 120.28799951076508, + "p95": 123.4240010380745, + "p99": 131.77600502967834 + }, + "roundtrip": { + "p50": 107.77600109577179, + "p90": 120.28799951076508, + "p95": 123.4240010380745, + "p99": 131.77600502967834 + }, + "isolatedSum": { + "p50": 215.55200219154358, + "p90": 240.57599902153015, + "p95": 246.848002076149, + "p99": 263.5520100593567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 122.3360002040863, + "p90": 133.12000036239624, + "p95": 137.60000467300415, + "p99": 143.8400000333786 + }, + "combine": { + "p50": 122.3360002040863, + "p90": 133.12000036239624, + "p95": 137.60000467300415, + "p99": 143.8400000333786 + }, + "roundtrip": { + "p50": 122.3360002040863, + "p90": 133.12000036239624, + "p95": 137.60000467300415, + "p99": 143.8400000333786 + }, + "isolatedSum": { + "p50": 244.6720004081726, + "p90": 266.2400007247925, + "p95": 275.2000093460083, + "p99": 287.6800000667572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 165.53600132465363, + "p90": 170.78399658203125, + "p95": 175.74399709701538, + "p99": 180.9920072555542 + }, + "combine": { + "p50": 165.53600132465363, + "p90": 170.78399658203125, + "p95": 175.74399709701538, + "p99": 180.9920072555542 + }, + "roundtrip": { + "p50": 165.53600132465363, + "p90": 170.78399658203125, + "p95": 175.74399709701538, + "p99": 180.9920072555542 + }, + "isolatedSum": { + "p50": 331.07200264930725, + "p90": 341.5679931640625, + "p95": 351.48799419403076, + "p99": 361.9840145111084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 277.2800028324127, + "p90": 281.6320061683655, + "p95": 285.0239872932434, + "p99": 296.9920039176941 + }, + "combine": { + "p50": 277.2800028324127, + "p90": 281.6320061683655, + "p95": 285.0239872932434, + "p99": 296.9920039176941 + }, + "roundtrip": { + "p50": 277.2800028324127, + "p90": 281.6320061683655, + "p95": 285.0239872932434, + "p99": 296.9920039176941 + }, + "isolatedSum": { + "p50": 554.5600056648254, + "p90": 563.264012336731, + "p95": 570.0479745864868, + "p99": 593.9840078353882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 506.1439871788025, + "p90": 510.97601652145386, + "p95": 513.9520168304443, + "p99": 529.4399857521057 + }, + "combine": { + "p50": 506.1439871788025, + "p90": 510.97601652145386, + "p95": 513.9520168304443, + "p99": 529.4399857521057 + }, + "roundtrip": { + "p50": 506.1439871788025, + "p90": 510.97601652145386, + "p95": 513.9520168304443, + "p99": 529.4399857521057 + }, + "isolatedSum": { + "p50": 1012.287974357605, + "p90": 1021.9520330429077, + "p95": 1027.9040336608887, + "p99": 1058.8799715042114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 957.7280282974243, + "p90": 963.8400077819824, + "p95": 965.6640291213989, + "p99": 972.8000164031982 + }, + "combine": { + "p50": 957.7280282974243, + "p90": 963.8400077819824, + "p95": 965.6640291213989, + "p99": 972.8000164031982 + }, + "roundtrip": { + "p50": 957.7280282974243, + "p90": 963.8400077819824, + "p95": 965.6640291213989, + "p99": 972.8000164031982 + }, + "isolatedSum": { + "p50": 1915.4560565948486, + "p90": 1927.6800155639648, + "p95": 1931.3280582427979, + "p99": 1945.6000328063965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-af0a0974", + "identity": "gb300|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_404dcc83", + "comparisonKey": "366111247175c88a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:28:53.687514+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 127.00800597667694, + "p90": 141.79199934005737, + "p95": 150.2400040626526, + "p99": 179.48800325393677 + }, + "combine": { + "p50": 127.00800597667694, + "p90": 141.79199934005737, + "p95": 150.2400040626526, + "p99": 179.48800325393677 + }, + "roundtrip": { + "p50": 127.00800597667694, + "p90": 141.79199934005737, + "p95": 150.2400040626526, + "p99": 179.48800325393677 + }, + "isolatedSum": { + "p50": 254.01601195335388, + "p90": 283.58399868011475, + "p95": 300.4800081253052, + "p99": 358.97600650787354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 140.83200693130493, + "p90": 152.8960019350052, + "p95": 156.2879979610443, + "p99": 167.4560010433197 + }, + "combine": { + "p50": 140.83200693130493, + "p90": 152.8960019350052, + "p95": 156.2879979610443, + "p99": 167.4560010433197 + }, + "roundtrip": { + "p50": 140.83200693130493, + "p90": 152.8960019350052, + "p95": 156.2879979610443, + "p99": 167.4560010433197 + }, + "isolatedSum": { + "p50": 281.66401386260986, + "p90": 305.7920038700104, + "p95": 312.5759959220886, + "p99": 334.9120020866394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 173.0239987373352, + "p90": 185.40799617767334, + "p95": 187.6160055398941, + "p99": 225.95199942588806 + }, + "combine": { + "p50": 173.0239987373352, + "p90": 185.40799617767334, + "p95": 187.6160055398941, + "p99": 225.95199942588806 + }, + "roundtrip": { + "p50": 173.0239987373352, + "p90": 185.40799617767334, + "p95": 187.6160055398941, + "p99": 225.95199942588806 + }, + "isolatedSum": { + "p50": 346.0479974746704, + "p90": 370.8159923553467, + "p95": 375.2320110797882, + "p99": 451.9039988517761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 244.47999894618988, + "p90": 253.56799364089966, + "p95": 256.6399872303009, + "p99": 267.96799898147583 + }, + "combine": { + "p50": 244.47999894618988, + "p90": 253.56799364089966, + "p95": 256.6399872303009, + "p99": 267.96799898147583 + }, + "roundtrip": { + "p50": 244.47999894618988, + "p90": 253.56799364089966, + "p95": 256.6399872303009, + "p99": 267.96799898147583 + }, + "isolatedSum": { + "p50": 488.95999789237976, + "p90": 507.1359872817993, + "p95": 513.2799744606018, + "p99": 535.9359979629517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 413.536012172699, + "p90": 418.11200976371765, + "p95": 421.05600237846375, + "p99": 430.27201294898987 + }, + "combine": { + "p50": 413.536012172699, + "p90": 418.11200976371765, + "p95": 421.05600237846375, + "p99": 430.27201294898987 + }, + "roundtrip": { + "p50": 413.536012172699, + "p90": 418.11200976371765, + "p95": 421.05600237846375, + "p99": 430.27201294898987 + }, + "isolatedSum": { + "p50": 827.072024345398, + "p90": 836.2240195274353, + "p95": 842.1120047569275, + "p99": 860.5440258979797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 762.1440291404724, + "p90": 766.048014163971, + "p95": 768.2560086250305, + "p99": 782.4000120162964 + }, + "combine": { + "p50": 762.1440291404724, + "p90": 766.048014163971, + "p95": 768.2560086250305, + "p99": 782.4000120162964 + }, + "roundtrip": { + "p50": 762.1440291404724, + "p90": 766.048014163971, + "p95": 768.2560086250305, + "p99": 782.4000120162964 + }, + "isolatedSum": { + "p50": 1524.2880582809448, + "p90": 1532.096028327942, + "p95": 1536.512017250061, + "p99": 1564.8000240325928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd3ec637", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|decode|normal|none|none|0|tuned||1f5e79371c89a8d", + "colorKey": "gb300_41ecd4d6", + "comparisonKey": "0d2a0fdab425e9be", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:40:17.642318+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1f5e79371c89a8d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 540.1600003242493, + "p90": 557.0240020751953, + "p95": 582.8800201416016, + "p99": 3045.248031616211 + }, + "combine": { + "p50": 244.47999894618988, + "p90": 258.11201333999634, + "p95": 264.2560005187988, + "p99": 2703.007936477661 + }, + "roundtrip": { + "p50": 753.8880109786987, + "p90": 790.6559705734253, + "p95": 856.5760254859924, + "p99": 3207.616090774536 + }, + "isolatedSum": { + "p50": 784.6399992704391, + "p90": 815.1360154151917, + "p95": 847.1360206604004, + "p99": 5748.255968093872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 539.3279790878296, + "p90": 555.903971195221, + "p95": 569.8559880256653, + "p99": 3062.3040199279785 + }, + "combine": { + "p50": 244.9599951505661, + "p90": 257.7599883079529, + "p95": 266.88000559806824, + "p99": 2740.9279346466064 + }, + "roundtrip": { + "p50": 752.7999877929688, + "p90": 789.9199724197388, + "p95": 856.1279773712158, + "p99": 3521.6639041900635 + }, + "isolatedSum": { + "p50": 784.2879742383957, + "p90": 813.6639595031738, + "p95": 836.7359936237335, + "p99": 5803.231954574585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 17, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 555.7439923286438, + "p90": 574.4320154190063, + "p95": 591.0720229148865, + "p99": 2842.3359394073486 + }, + "combine": { + "p50": 245.79200148582458, + "p90": 258.65599513053894, + "p95": 263.839989900589, + "p99": 2640.6400203704834 + }, + "roundtrip": { + "p50": 768.3519721031189, + "p90": 798.0160117149353, + "p95": 851.6799807548523, + "p99": 3416.4481163024902 + }, + "isolatedSum": { + "p50": 801.5359938144684, + "p90": 833.0880105495453, + "p95": 854.9120128154755, + "p99": 5482.975959777832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 555.4879903793335, + "p90": 572.5759863853455, + "p95": 584.5440030097961, + "p99": 2913.408041000366 + }, + "combine": { + "p50": 247.29600548744202, + "p90": 263.35999369621277, + "p95": 270.687997341156, + "p99": 2757.215976715088 + }, + "roundtrip": { + "p50": 769.2480087280273, + "p90": 820.4479813575745, + "p95": 887.8080248832703, + "p99": 3453.279972076416 + }, + "isolatedSum": { + "p50": 802.7839958667755, + "p90": 835.9359800815582, + "p95": 855.2320003509521, + "p99": 5670.624017715454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 555.4879903793335, + "p90": 568.6720013618469, + "p95": 580.3520083427429, + "p99": 2816.927909851074 + }, + "combine": { + "p50": 246.20799720287323, + "p90": 259.8400115966797, + "p95": 265.0560140609741, + "p99": 2729.343891143799 + }, + "roundtrip": { + "p50": 764.5760178565979, + "p90": 794.8480248451233, + "p95": 845.8880186080933, + "p99": 3240.6721115112305 + }, + "isolatedSum": { + "p50": 801.6959875822067, + "p90": 828.5120129585266, + "p95": 845.408022403717, + "p99": 5546.271800994873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 556.1919808387756, + "p90": 568.6079859733582, + "p95": 574.5919942855835, + "p99": 2966.207981109619 + }, + "combine": { + "p50": 247.5840002298355, + "p90": 261.34398579597473, + "p95": 268.5439884662628, + "p99": 2649.5039463043213 + }, + "roundtrip": { + "p50": 766.7840123176575, + "p90": 802.6880025863647, + "p95": 861.6639971733093, + "p99": 3287.008047103882 + }, + "isolatedSum": { + "p50": 803.7759810686111, + "p90": 829.9519717693329, + "p95": 843.1359827518463, + "p99": 5615.71192741394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6709248, + "combineLogicalBytes": 6709248, + "fanoutMean": 3.65625, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 555.4559826850891, + "p90": 569.1840052604675, + "p95": 574.6880173683167, + "p99": 2786.303997039795 + }, + "combine": { + "p50": 249.31199848651886, + "p90": 261.82401180267334, + "p95": 265.0879919528961, + "p99": 2562.4001026153564 + }, + "roundtrip": { + "p50": 768.3519721031189, + "p90": 799.1359829902649, + "p95": 816.0640001296997, + "p99": 3169.3758964538574 + }, + "isolatedSum": { + "p50": 804.767981171608, + "p90": 831.0080170631409, + "p95": 839.7760093212128, + "p99": 5348.704099655151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13518848, + "combineLogicalBytes": 13518848, + "fanoutMean": 3.68359375, + "recvTokensMax": 535, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 559.0720176696777, + "p90": 570.9440112113953, + "p95": 577.8560042381287, + "p99": 2877.2799968719482 + }, + "combine": { + "p50": 252.19199061393738, + "p90": 264.73599672317505, + "p95": 268.70399713516235, + "p99": 2626.3999938964844 + }, + "roundtrip": { + "p50": 767.7440047264099, + "p90": 779.9999713897705, + "p95": 837.3759984970093, + "p99": 3401.535987854004 + }, + "isolatedSum": { + "p50": 811.2640082836151, + "p90": 835.6800079345703, + "p95": 846.560001373291, + "p99": 5503.679990768433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8ac485ec", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|decode|normal|none|none|0|tuned||bb358a3c2e68578", + "colorKey": "gb300_68d1366e", + "comparisonKey": "c45395517a99b9ef", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:00.767898+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "bb358a3c2e68578", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 564.4479990005493, + "p90": 583.7119817733765, + "p95": 591.1999940872192, + "p99": 2642.7841186523438 + }, + "combine": { + "p50": 254.43199276924133, + "p90": 267.7440047264099, + "p95": 272.44800329208374, + "p99": 335.29600501060486 + }, + "roundtrip": { + "p50": 786.5920066833496, + "p90": 816.7039752006531, + "p95": 827.6479840278625, + "p99": 2947.5839138031006 + }, + "isolatedSum": { + "p50": 818.8799917697906, + "p90": 851.4559864997864, + "p95": 863.647997379303, + "p99": 2978.0801236629486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 4, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 566.0480260848999, + "p90": 583.9999914169312, + "p95": 589.8879766464233, + "p99": 2677.664041519165 + }, + "combine": { + "p50": 255.87201118469238, + "p90": 268.22400093078613, + "p95": 273.3440101146698, + "p99": 2165.343999862671 + }, + "roundtrip": { + "p50": 789.8560166358948, + "p90": 821.120023727417, + "p95": 838.6560082435608, + "p99": 3050.0481128692627 + }, + "isolatedSum": { + "p50": 821.9200372695923, + "p90": 852.2239923477173, + "p95": 863.2319867610931, + "p99": 4843.008041381836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 4, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 579.2639851570129, + "p90": 597.1519947052002, + "p95": 605.184018611908, + "p99": 3144.256114959717 + }, + "combine": { + "p50": 255.0399899482727, + "p90": 268.2560086250305, + "p95": 272.4800109863281, + "p99": 2274.751901626587 + }, + "roundtrip": { + "p50": 805.728018283844, + "p90": 836.7679715156555, + "p95": 850.0800132751465, + "p99": 2947.999954223633 + }, + "isolatedSum": { + "p50": 834.3039751052856, + "p90": 865.4080033302307, + "p95": 877.6640295982361, + "p99": 5419.008016586304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 4, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 582.7839970588684, + "p90": 600.3519892692566, + "p95": 607.1040034294128, + "p99": 2763.6160850524902 + }, + "combine": { + "p50": 257.6639950275421, + "p90": 270.9439992904663, + "p95": 276.2880027294159, + "p99": 2272.3519802093506 + }, + "roundtrip": { + "p50": 807.0719838142395, + "p90": 835.42400598526, + "p95": 849.4399785995483, + "p99": 2859.744071960449 + }, + "isolatedSum": { + "p50": 840.4479920864105, + "p90": 871.2959885597229, + "p95": 883.3920061588287, + "p99": 5035.968065261841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 4, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 581.0880064964294, + "p90": 596.7680215835571, + "p95": 603.5199761390686, + "p99": 2661.695957183838 + }, + "combine": { + "p50": 257.9520046710968, + "p90": 271.263986825943, + "p95": 276.06400847435, + "p99": 2185.0879192352295 + }, + "roundtrip": { + "p50": 802.4960160255432, + "p90": 829.9840092658997, + "p95": 846.0800051689148, + "p99": 3024.415969848633 + }, + "isolatedSum": { + "p50": 839.0400111675262, + "p90": 868.0320084095001, + "p95": 879.5839846134186, + "p99": 4846.783876419067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 4, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 579.1040062904358, + "p90": 594.8799848556519, + "p95": 603.6800146102905, + "p99": 2695.744037628174 + }, + "combine": { + "p50": 258.2080066204071, + "p90": 271.2959945201874, + "p95": 275.04000067710876, + "p99": 2068.5760974884033 + }, + "roundtrip": { + "p50": 806.0799837112427, + "p90": 834.496021270752, + "p95": 849.3760228157043, + "p99": 3063.3280277252197 + }, + "isolatedSum": { + "p50": 837.3120129108429, + "p90": 866.1759793758392, + "p95": 878.7200152873993, + "p99": 4764.320135116577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 4, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 581.6320180892944, + "p90": 596.9600081443787, + "p95": 602.4320125579834, + "p99": 2610.912084579468 + }, + "combine": { + "p50": 260.6079876422882, + "p90": 273.75999093055725, + "p95": 279.04000878334045, + "p99": 2268.320083618164 + }, + "roundtrip": { + "p50": 803.1359910964966, + "p90": 825.4079818725586, + "p95": 832.6399922370911, + "p99": 3010.751962661743 + }, + "isolatedSum": { + "p50": 842.2400057315826, + "p90": 870.7199990749359, + "p95": 881.4720213413239, + "p99": 4879.232168197632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 4, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 579.3280005455017, + "p90": 595.8399772644043, + "p95": 602.7839779853821, + "p99": 2663.872003555298 + }, + "combine": { + "p50": 260.3839933872223, + "p90": 274.59201216697693, + "p95": 277.8880000114441, + "p99": 2292.0639514923096 + }, + "roundtrip": { + "p50": 799.9680042266846, + "p90": 818.0480003356934, + "p95": 825.4719972610474, + "p99": 3358.783960342407 + }, + "isolatedSum": { + "p50": 839.711993932724, + "p90": 870.4319894313812, + "p95": 880.6719779968262, + "p99": 4955.935955047607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ee231848", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|decode|normal|none|none|0|tuned||c9bbf5a132d7fdf", + "colorKey": "gb300_85b0db41", + "comparisonKey": "17a4454686dea420", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:37.868867+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9bbf5a132d7fdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 553.1520247459412, + "p90": 573.855996131897, + "p95": 583.1040143966675, + "p99": 2984.031915664673 + }, + "combine": { + "p50": 248.06399643421173, + "p90": 261.21601462364197, + "p95": 279.231995344162, + "p99": 2396.8639373779297 + }, + "roundtrip": { + "p50": 770.3679800033569, + "p90": 804.1920065879822, + "p95": 1716.5440320968628, + "p99": 3457.5040340423584 + }, + "isolatedSum": { + "p50": 801.2160211801529, + "p90": 835.0720107555389, + "p95": 862.3360097408295, + "p99": 5380.8958530426025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 57344, + "combineLogicalBytes": 57344, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 564.2560124397278, + "p90": 583.8080048561096, + "p95": 599.9360084533691, + "p99": 3006.943941116333 + }, + "combine": { + "p50": 246.2719976902008, + "p90": 255.71200251579285, + "p95": 259.7759962081909, + "p99": 2306.3039779663086 + }, + "roundtrip": { + "p50": 783.0719947814941, + "p90": 812.3520016670227, + "p95": 2580.3520679473877, + "p99": 3521.951913833618 + }, + "isolatedSum": { + "p50": 810.5280101299286, + "p90": 839.5200073719025, + "p95": 859.7120046615601, + "p99": 5313.247919082642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 569.8879957199097, + "p90": 584.9279761314392, + "p95": 593.8559770584106, + "p99": 2862.2400760650635 + }, + "combine": { + "p50": 249.5039999485016, + "p90": 262.62399554252625, + "p95": 266.9439911842346, + "p99": 2545.664072036743 + }, + "roundtrip": { + "p50": 787.6160144805908, + "p90": 816.7999982833862, + "p95": 852.288007736206, + "p99": 3358.0799102783203 + }, + "isolatedSum": { + "p50": 819.3919956684113, + "p90": 847.5519716739655, + "p95": 860.7999682426453, + "p99": 5407.904148101807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 570.1760053634644, + "p90": 585.1200222969055, + "p95": 589.8879766464233, + "p99": 2776.9598960876465 + }, + "combine": { + "p50": 255.45600056648254, + "p90": 267.36000180244446, + "p95": 272.44800329208374, + "p99": 2490.367889404297 + }, + "roundtrip": { + "p50": 783.0399870872498, + "p90": 800.1919984817505, + "p95": 841.3119912147522, + "p99": 3381.7920684814453 + }, + "isolatedSum": { + "p50": 825.6320059299469, + "p90": 852.48002409935, + "p95": 862.3359799385071, + "p99": 5267.327785491943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c04ef03f", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|decode|normal|none|none|0|tuned||4dc6cbd03327f4e", + "colorKey": "gb300_2cbcb2a0", + "comparisonKey": "1d649c24139a766a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:45:11.561574+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "4dc6cbd03327f4e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 554.8160076141357, + "p90": 573.4080076217651, + "p95": 581.5359950065613, + "p99": 2757.5039863586426 + }, + "combine": { + "p50": 254.62400913238525, + "p90": 268.640011548996, + "p95": 287.9360020160675, + "p99": 3001.375913619995 + }, + "roundtrip": { + "p50": 779.0079712867737, + "p90": 807.4560165405273, + "p95": 915.8719778060913, + "p99": 3515.712022781372 + }, + "isolatedSum": { + "p50": 809.440016746521, + "p90": 842.0480191707611, + "p95": 869.4719970226288, + "p99": 5758.879899978638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 2, + "recvTokensMax": 10, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 555.679976940155, + "p90": 577.9520273208618, + "p95": 600.5120277404785, + "p99": 2712.928056716919 + }, + "combine": { + "p50": 252.83199548721313, + "p90": 264.1279995441437, + "p95": 269.567996263504, + "p99": 2399.104118347168 + }, + "roundtrip": { + "p50": 779.3599963188171, + "p90": 806.8159818649292, + "p95": 838.591992855072, + "p99": 3123.0719089508057 + }, + "isolatedSum": { + "p50": 808.5119724273682, + "p90": 842.0800268650055, + "p95": 870.0800240039825, + "p99": 5112.032175064087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 20, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 570.0479745864868, + "p90": 587.1679782867432, + "p95": 594.1439867019653, + "p99": 2612.7679347991943 + }, + "combine": { + "p50": 252.86400318145752, + "p90": 265.76000452041626, + "p95": 270.55999636650085, + "p99": 2533.9839458465576 + }, + "roundtrip": { + "p50": 794.1120266914368, + "p90": 821.9199776649475, + "p95": 863.4240031242371, + "p99": 3097.6319313049316 + }, + "isolatedSum": { + "p50": 822.9119777679443, + "p90": 852.9279828071594, + "p95": 864.7039830684662, + "p99": 5146.751880645752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 40, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 570.6560015678406, + "p90": 587.8080129623413, + "p95": 594.0160155296326, + "p99": 2580.8959007263184 + }, + "combine": { + "p50": 251.0719895362854, + "p90": 262.7519965171814, + "p95": 267.64801144599915, + "p99": 2346.719980239868 + }, + "roundtrip": { + "p50": 795.3280210494995, + "p90": 822.272002696991, + "p95": 854.1120290756226, + "p99": 3382.9760551452637 + }, + "isolatedSum": { + "p50": 821.727991104126, + "p90": 850.5600094795227, + "p95": 861.6640269756317, + "p99": 4927.6158809661865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 80, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 569.4079995155334, + "p90": 585.8240127563477, + "p95": 593.0560231208801, + "p99": 2675.424098968506 + }, + "combine": { + "p50": 252.19199061393738, + "p90": 264.70398902893066, + "p95": 270.143985748291, + "p99": 2463.871955871582 + }, + "roundtrip": { + "p50": 796.7039942741394, + "p90": 824.2239952087402, + "p95": 858.7520122528076, + "p99": 3168.8320636749268 + }, + "isolatedSum": { + "p50": 821.5999901294708, + "p90": 850.5280017852783, + "p95": 863.2000088691711, + "p99": 5139.296054840088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 570.5599784851074, + "p90": 585.9519839286804, + "p95": 593.887984752655, + "p99": 2577.791929244995 + }, + "combine": { + "p50": 253.12000513076782, + "p90": 265.9839987754822, + "p95": 269.6320116519928, + "p99": 2361.952066421509 + }, + "roundtrip": { + "p50": 796.6399788856506, + "p90": 823.1359720230103, + "p95": 833.5999846458435, + "p99": 3085.088014602661 + }, + "isolatedSum": { + "p50": 823.6799836158752, + "p90": 851.9359827041626, + "p95": 863.5199964046478, + "p99": 4939.743995666504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2752512, + "combineLogicalBytes": 2752512, + "fanoutMean": 1.5, + "recvTokensMax": 288, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 570.9440112113953, + "p90": 584.991991519928, + "p95": 590.399980545044, + "p99": 2565.6960010528564 + }, + "combine": { + "p50": 256.00001215934753, + "p90": 268.2560086250305, + "p95": 273.824006319046, + "p99": 2254.8160552978516 + }, + "roundtrip": { + "p50": 796.9599962234497, + "p90": 822.816014289856, + "p95": 834.6239924430847, + "p99": 3028.831958770752 + }, + "isolatedSum": { + "p50": 826.9440233707428, + "p90": 853.2480001449585, + "p95": 864.22398686409, + "p99": 4820.512056350708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5505024, + "combineLogicalBytes": 5505024, + "fanoutMean": 1.5, + "recvTokensMax": 576, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 569.7280168533325, + "p90": 583.0720067024231, + "p95": 591.2320017814636, + "p99": 2694.175958633423 + }, + "combine": { + "p50": 257.24801421165466, + "p90": 270.30399441719055, + "p95": 274.4640111923218, + "p99": 2344.831943511963 + }, + "roundtrip": { + "p50": 786.8160009384155, + "p90": 802.4320006370544, + "p95": 809.7599744796753, + "p99": 3605.4399013519287 + }, + "isolatedSum": { + "p50": 826.9760310649872, + "p90": 853.3760011196136, + "p95": 865.6960129737854, + "p99": 5039.007902145386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 1152, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6a27865e", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|decode|normal|none|none|0|tuned||0d921f8a9d2cb27", + "colorKey": "gb300_03c0b464", + "comparisonKey": "5b6d48705aaa6056", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:43:45.138572+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "0d921f8a9d2cb27", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 553.4719824790955, + "p90": 576.8640041351318, + "p95": 592.7680134773254, + "p99": 2989.6960258483887 + }, + "combine": { + "p50": 251.74400210380554, + "p90": 270.6559896469116, + "p95": 277.536004781723, + "p99": 2130.687952041626 + }, + "roundtrip": { + "p50": 773.1199860572815, + "p90": 808.5439801216125, + "p95": 848.0960130691528, + "p99": 3517.2159671783447 + }, + "isolatedSum": { + "p50": 805.215984582901, + "p90": 847.5199937820435, + "p95": 870.3040182590485, + "p99": 5120.383977890015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 200704, + "combineLogicalBytes": 200704, + "fanoutMean": 3.5, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 545.3760027885437, + "p90": 570.8159804344177, + "p95": 583.6480259895325, + "p99": 2992.703914642334 + }, + "combine": { + "p50": 246.59200012683868, + "p90": 262.84798979759216, + "p95": 267.8079903125763, + "p99": 2507.200002670288 + }, + "roundtrip": { + "p50": 754.0799975395203, + "p90": 793.4079766273499, + "p95": 823.7760066986084, + "p99": 3182.3039054870605 + }, + "isolatedSum": { + "p50": 791.9680029153824, + "p90": 833.6639702320099, + "p95": 851.4560163021088, + "p99": 5499.903917312622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 21, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 558.7520003318787, + "p90": 582.4000239372253, + "p95": 588.8640284538269, + "p99": 2852.3199558258057 + }, + "combine": { + "p50": 245.66400051116943, + "p90": 261.6640031337738, + "p95": 269.3119943141937, + "p99": 2546.3039875030518 + }, + "roundtrip": { + "p50": 767.9039835929871, + "p90": 816.4479732513428, + "p95": 863.6800050735474, + "p99": 3417.504072189331 + }, + "isolatedSum": { + "p50": 804.4160008430481, + "p90": 844.0640270709991, + "p95": 858.1760227680206, + "p99": 5398.623943328857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 831488, + "combineLogicalBytes": 831488, + "fanoutMean": 3.625, + "recvTokensMax": 35, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 555.1999807357788, + "p90": 585.8880281448364, + "p95": 596.6399908065796, + "p99": 2907.776117324829 + }, + "combine": { + "p50": 244.47999894618988, + "p90": 256.99201226234436, + "p95": 261.34398579597473, + "p99": 2380.863904953003 + }, + "roundtrip": { + "p50": 771.3279724121094, + "p90": 812.5439882278442, + "p95": 836.8319869041443, + "p99": 3131.808042526245 + }, + "isolatedSum": { + "p50": 799.6799796819687, + "p90": 842.8800404071808, + "p95": 857.9839766025543, + "p99": 5288.640022277832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1648640, + "combineLogicalBytes": 1648640, + "fanoutMean": 3.59375, + "recvTokensMax": 75, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 569.2800283432007, + "p90": 587.1359705924988, + "p95": 594.1759943962097, + "p99": 2820.159912109375 + }, + "combine": { + "p50": 250.30401349067688, + "p90": 262.9440128803253, + "p95": 268.8319981098175, + "p99": 2500.9279251098633 + }, + "roundtrip": { + "p50": 782.6880216598511, + "p90": 827.0400166511536, + "p95": 852.3520231246948, + "p99": 3301.151990890503 + }, + "isolatedSum": { + "p50": 819.5840418338776, + "p90": 850.0799834728241, + "p95": 863.0079925060272, + "p99": 5321.087837219238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3282944, + "combineLogicalBytes": 3282944, + "fanoutMean": 3.578125, + "recvTokensMax": 158, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 560.9599947929382, + "p90": 587.6799821853638, + "p95": 593.824028968811, + "p99": 2793.3759689331055 + }, + "combine": { + "p50": 249.63200092315674, + "p90": 263.13599944114685, + "p95": 268.15998554229736, + "p99": 2451.359987258911 + }, + "roundtrip": { + "p50": 771.3599801063538, + "p90": 819.2639946937561, + "p95": 843.2639837265015, + "p99": 3410.111904144287 + }, + "isolatedSum": { + "p50": 810.591995716095, + "p90": 850.8159816265106, + "p95": 861.9840145111084, + "p99": 5244.735956192017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6694912, + "combineLogicalBytes": 6694912, + "fanoutMean": 3.6484375, + "recvTokensMax": 331, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 562.4960064888, + "p90": 588.6399745941162, + "p95": 595.4880118370056, + "p99": 2753.024101257324 + }, + "combine": { + "p50": 250.7840096950531, + "p90": 264.67201113700867, + "p95": 271.90399169921875, + "p99": 2738.816022872925 + }, + "roundtrip": { + "p50": 771.776020526886, + "p90": 806.5279722213745, + "p95": 822.59202003479, + "p99": 3068.063974380493 + }, + "isolatedSum": { + "p50": 813.2800161838531, + "p90": 853.3119857311249, + "p95": 867.3920035362244, + "p99": 5491.840124130249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13318144, + "combineLogicalBytes": 13318144, + "fanoutMean": 3.62890625, + "recvTokensMax": 664, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 561.9519948959351, + "p90": 588.4159803390503, + "p95": 600.1920104026794, + "p99": 2934.8480701446533 + }, + "combine": { + "p50": 253.31199169158936, + "p90": 266.30398631095886, + "p95": 275.39199590682983, + "p99": 2629.7600269317627 + }, + "roundtrip": { + "p50": 767.4559950828552, + "p90": 806.0479760169983, + "p95": 821.5039968490601, + "p99": 3314.3680095672607 + }, + "isolatedSum": { + "p50": 815.2639865875244, + "p90": 854.7199666500092, + "p95": 875.5840063095093, + "p99": 5564.608097076416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 1373, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0798e7ff", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|decode|normal|none|none|0|tuned||cc5ad1cb2e95ef6", + "colorKey": "gb300_07142b8a", + "comparisonKey": "8601d66fe04c41b0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:50:17.695779+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cc5ad1cb2e95ef6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.3408203125, + "eplbImbalanceAfter": 1.000390625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 570.7520246505737, + "p90": 589.1199707984924, + "p95": 599.295973777771, + "p99": 2810.62388420105 + }, + "combine": { + "p50": 257.7280104160309, + "p90": 270.08000016212463, + "p95": 275.9360074996948, + "p99": 2424.0639209747314 + }, + "roundtrip": { + "p50": 795.6479787826538, + "p90": 823.8400220870972, + "p95": 930.1760196685791, + "p99": 5073.056221008301 + }, + "isolatedSum": { + "p50": 828.4800350666046, + "p90": 859.1999709606171, + "p95": 875.2319812774658, + "p99": 5234.687805175781 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 11, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 570.14399766922, + "p90": 586.3680243492126, + "p95": 600.0000238418579, + "p99": 3020.8001136779785 + }, + "combine": { + "p50": 257.88798928260803, + "p90": 270.9439992904663, + "p95": 279.07198667526245, + "p99": 2671.1039543151855 + }, + "roundtrip": { + "p50": 790.6559705734253, + "p90": 822.7519989013672, + "p95": 895.6480026245117, + "p99": 5138.879776000977 + }, + "isolatedSum": { + "p50": 828.031986951828, + "p90": 857.312023639679, + "p95": 879.0720105171204, + "p99": 5691.904067993164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 3.75, + "recvTokensMax": 19, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 581.1840295791626, + "p90": 599.9360084533691, + "p95": 632.6720118522644, + "p99": 4581.151962280273 + }, + "combine": { + "p50": 259.3599855899811, + "p90": 272.8640139102936, + "p95": 278.2079875469208, + "p99": 2505.5999755859375 + }, + "roundtrip": { + "p50": 808.5119724273682, + "p90": 836.8319869041443, + "p95": 906.4000248908997, + "p99": 3263.4239196777344 + }, + "isolatedSum": { + "p50": 840.5440151691437, + "p90": 872.8000223636627, + "p95": 910.8799993991852, + "p99": 7086.751937866211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 860160, + "combineLogicalBytes": 860160, + "fanoutMean": 3.75, + "recvTokensMax": 38, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 582.3040008544922, + "p90": 599.5200276374817, + "p95": 623.7760186195374, + "p99": 3046.2400913238525 + }, + "combine": { + "p50": 259.48798656463623, + "p90": 271.5199887752533, + "p95": 276.0320007801056, + "p99": 2411.8399620056152 + }, + "roundtrip": { + "p50": 809.503972530365, + "p90": 840.5439853668213, + "p95": 913.4399890899658, + "p99": 3369.663953781128 + }, + "isolatedSum": { + "p50": 841.7919874191284, + "p90": 871.040016412735, + "p95": 899.808019399643, + "p99": 5458.080053329468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1705984, + "combineLogicalBytes": 1705984, + "fanoutMean": 3.71875, + "recvTokensMax": 70, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 583.296000957489, + "p90": 601.311981678009, + "p95": 612.608015537262, + "p99": 2862.1439933776855 + }, + "combine": { + "p50": 260.25599241256714, + "p90": 274.2080092430115, + "p95": 281.6320061683655, + "p99": 2771.2318897247314 + }, + "roundtrip": { + "p50": 806.7839741706848, + "p90": 840.1280045509338, + "p95": 930.5279850959778, + "p99": 3401.3121128082275 + }, + "isolatedSum": { + "p50": 843.5519933700562, + "p90": 875.5199909210205, + "p95": 894.2400217056274, + "p99": 5633.375883102417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3411968, + "combineLogicalBytes": 3411968, + "fanoutMean": 3.71875, + "recvTokensMax": 143, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 588.5120034217834, + "p90": 607.0079803466797, + "p95": 624.7680187225342, + "p99": 2897.599935531616 + }, + "combine": { + "p50": 259.96801257133484, + "p90": 273.69600534439087, + "p95": 278.6239981651306, + "p99": 2445.919990539551 + }, + "roundtrip": { + "p50": 816.1280155181885, + "p90": 851.1999845504761, + "p95": 915.2320027351379, + "p99": 3331.712007522583 + }, + "isolatedSum": { + "p50": 848.4800159931183, + "p90": 880.7039856910706, + "p95": 903.3920168876648, + "p99": 5343.519926071167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6680576, + "combineLogicalBytes": 6680576, + "fanoutMean": 3.640625, + "recvTokensMax": 272, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 589.6000266075134, + "p90": 604.6079993247986, + "p95": 626.7520189285278, + "p99": 2813.920021057129 + }, + "combine": { + "p50": 263.90400528907776, + "p90": 277.72799134254456, + "p95": 286.01598739624023, + "p99": 2678.1439781188965 + }, + "roundtrip": { + "p50": 809.4080090522766, + "p90": 836.5439772605896, + "p95": 881.0240030288696, + "p99": 5044.672012329102 + }, + "isolatedSum": { + "p50": 853.5040318965912, + "p90": 882.3359906673431, + "p95": 912.7680063247681, + "p99": 5492.063999176025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13432832, + "combineLogicalBytes": 13432832, + "fanoutMean": 3.66015625, + "recvTokensMax": 517, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 586.624026298523, + "p90": 601.9520163536072, + "p95": 611.0720038414001, + "p99": 2863.9678955078125 + }, + "combine": { + "p50": 263.7439966201782, + "p90": 277.44001150131226, + "p95": 283.1040024757385, + "p99": 2569.9520111083984 + }, + "roundtrip": { + "p50": 808.1279993057251, + "p90": 828.1919956207275, + "p95": 875.1999735832214, + "p99": 4979.83980178833 + }, + "isolatedSum": { + "p50": 850.3680229187012, + "p90": 879.3920278549194, + "p95": 894.1760063171387, + "p99": 5433.919906616211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26464256, + "combineLogicalBytes": 26464256, + "fanoutMean": 3.60546875, + "recvTokensMax": 1029, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4249e579", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|decode|normal|none|none|0|tuned||c186e8c8d66ece3", + "colorKey": "gb300_99da9098", + "comparisonKey": "3f24d086f967af3d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:44:28.601180+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c186e8c8d66ece3", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.091796875, + "eplbImbalanceAfter": 1.00146484375, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 547.2959876060486, + "p90": 651.7760157585144, + "p95": 675.9359836578369, + "p99": 2762.176036834717 + }, + "combine": { + "p50": 264.384001493454, + "p90": 302.94400453567505, + "p95": 312.28798627853394, + "p99": 2534.2719554901123 + }, + "roundtrip": { + "p50": 773.855984210968, + "p90": 875.2639889717102, + "p95": 909.6639752388, + "p99": 3310.9118938446045 + }, + "isolatedSum": { + "p50": 811.6799890995026, + "p90": 954.7200202941895, + "p95": 988.2239699363708, + "p99": 5296.447992324829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 541.6319966316223, + "p90": 563.3599758148193, + "p95": 578.3680081367493, + "p99": 2904.99210357666 + }, + "combine": { + "p50": 244.89599466323853, + "p90": 255.90398907661438, + "p95": 260.6399953365326, + "p99": 2359.0400218963623 + }, + "roundtrip": { + "p50": 762.8480195999146, + "p90": 789.792001247406, + "p95": 825.2800107002258, + "p99": 3163.167953491211 + }, + "isolatedSum": { + "p50": 786.5279912948608, + "p90": 819.2639648914337, + "p95": 839.0080034732819, + "p99": 5264.0321254730225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 22, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 560.9599947929382, + "p90": 593.887984752655, + "p95": 606.4959764480591, + "p99": 2723.8399982452393 + }, + "combine": { + "p50": 247.42400646209717, + "p90": 265.9839987754822, + "p95": 274.1119861602783, + "p99": 2367.8081035614014 + }, + "roundtrip": { + "p50": 786.4000201225281, + "p90": 860.9600067138672, + "p95": 911.4559888839722, + "p99": 3466.3360118865967 + }, + "isolatedSum": { + "p50": 808.3840012550354, + "p90": 859.8719835281372, + "p95": 880.6079626083374, + "p99": 5091.648101806641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 745472, + "combineLogicalBytes": 745472, + "fanoutMean": 3.25, + "recvTokensMax": 37, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 559.9679946899414, + "p90": 656.8319797515869, + "p95": 694.4640278816223, + "p99": 2942.8799152374268 + }, + "combine": { + "p50": 248.1279969215393, + "p90": 273.824006319046, + "p95": 295.55198550224304, + "p99": 2451.9360065460205 + }, + "roundtrip": { + "p50": 785.6320142745972, + "p90": 888.7040019035339, + "p95": 924.5439767837524, + "p99": 3215.0399684906006 + }, + "isolatedSum": { + "p50": 808.0959916114807, + "p90": 930.6559860706329, + "p95": 990.0160133838654, + "p99": 5394.815921783447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 72, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 559.1999888420105, + "p90": 668.1920289993286, + "p95": 695.7759857177734, + "p99": 2841.344118118286 + }, + "combine": { + "p50": 247.96800315380096, + "p90": 278.78400683403015, + "p95": 298.0479896068573, + "p99": 2364.448070526123 + }, + "roundtrip": { + "p50": 787.9359722137451, + "p90": 892.3839926719666, + "p95": 915.0400161743164, + "p99": 3147.968053817749 + }, + "isolatedSum": { + "p50": 807.1679919958115, + "p90": 946.9760358333588, + "p95": 993.8239753246307, + "p99": 5205.792188644409 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3225600, + "combineLogicalBytes": 3225600, + "fanoutMean": 3.515625, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 563.6159777641296, + "p90": 668.2239770889282, + "p95": 695.9360241889954, + "p99": 2645.8239555358887 + }, + "combine": { + "p50": 252.83199548721313, + "p90": 281.66401386260986, + "p95": 302.72001028060913, + "p99": 2447.2639560699463 + }, + "roundtrip": { + "p50": 782.6560139656067, + "p90": 885.2159976959229, + "p95": 925.1840114593506, + "p99": 3480.0639152526855 + }, + "isolatedSum": { + "p50": 816.4479732513428, + "p90": 949.8879909515381, + "p95": 998.6560344696045, + "p99": 5093.087911605835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6522880, + "combineLogicalBytes": 6522880, + "fanoutMean": 3.5546875, + "recvTokensMax": 268, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 558.2720041275024, + "p90": 593.887984752655, + "p95": 602.4640202522278, + "p99": 2670.559883117676 + }, + "combine": { + "p50": 251.8720030784607, + "p90": 266.33599400520325, + "p95": 272.0640003681183, + "p99": 2153.343915939331 + }, + "roundtrip": { + "p50": 787.0079874992371, + "p90": 829.5680284500122, + "p95": 845.7279801368713, + "p99": 3149.5039463043213 + }, + "isolatedSum": { + "p50": 810.1440072059631, + "p90": 860.2239787578583, + "p95": 874.5280206203461, + "p99": 4823.903799057007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13160448, + "combineLogicalBytes": 13160448, + "fanoutMean": 3.5859375, + "recvTokensMax": 523, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 558.239996433258, + "p90": 661.9840264320374, + "p95": 691.4240121841431, + "p99": 2562.4959468841553 + }, + "combine": { + "p50": 252.9599964618683, + "p90": 287.3600125312805, + "p95": 307.5520098209381, + "p99": 2434.8480701446533 + }, + "roundtrip": { + "p50": 766.6879892349243, + "p90": 884.7360014915466, + "p95": 925.2480268478394, + "p99": 3194.8800086975098 + }, + "isolatedSum": { + "p50": 811.1999928951263, + "p90": 949.3440389633179, + "p95": 998.9760220050812, + "p99": 4997.344017028809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26406912, + "combineLogicalBytes": 26406912, + "fanoutMean": 3.59765625, + "recvTokensMax": 1032, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dfe7afe0", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_8e905a35", + "comparisonKey": "5cc572ee802187e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:08.659381+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 553.1520247459412, + "p90": 575.6160020828247, + "p95": 587.0400071144104, + "p99": 2938.080072402954 + }, + "combine": { + "p50": 261.56800985336304, + "p90": 273.72801303863525, + "p95": 281.5679907798767, + "p99": 2457.9200744628906 + }, + "roundtrip": { + "p50": 768.447995185852, + "p90": 796.4159846305847, + "p95": 829.0560245513916, + "p99": 3394.6239948272705 + }, + "isolatedSum": { + "p50": 814.7200345993042, + "p90": 849.34401512146, + "p95": 868.6079978942871, + "p99": 5396.000146865845 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 22, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 547.4879741668701, + "p90": 567.3919916152954, + "p95": 579.9040198326111, + "p99": 3022.4320888519287 + }, + "combine": { + "p50": 248.28800559043884, + "p90": 262.33598589897156, + "p95": 272.352010011673, + "p99": 2694.528102874756 + }, + "roundtrip": { + "p50": 765.3120160102844, + "p90": 791.5840148925781, + "p95": 838.6240005493164, + "p99": 3294.559955596924 + }, + "isolatedSum": { + "p50": 795.775979757309, + "p90": 829.727977514267, + "p95": 852.2560298442841, + "p99": 5716.960191726685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 42, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 569.6319937705994, + "p90": 596.5440273284912, + "p95": 618.5600161552429, + "p99": 2917.8240299224854 + }, + "combine": { + "p50": 249.28000569343567, + "p90": 263.7439966201782, + "p95": 267.87200570106506, + "p99": 2360.383987426758 + }, + "roundtrip": { + "p50": 787.7439856529236, + "p90": 826.6879916191101, + "p95": 870.6880211830139, + "p99": 3193.023920059204 + }, + "isolatedSum": { + "p50": 818.911999464035, + "p90": 860.2880239486694, + "p95": 886.432021856308, + "p99": 5278.208017349243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 90, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 569.4079995155334, + "p90": 588.9599919319153, + "p95": 600.6399989128113, + "p99": 2836.639881134033 + }, + "combine": { + "p50": 250.8159875869751, + "p90": 264.51200246810913, + "p95": 272.19200134277344, + "p99": 2616.9919967651367 + }, + "roundtrip": { + "p50": 789.247989654541, + "p90": 826.2720108032227, + "p95": 880.5440068244934, + "p99": 3260.3518962860107 + }, + "isolatedSum": { + "p50": 820.2239871025085, + "p90": 853.4719944000244, + "p95": 872.8320002555847, + "p99": 5453.63187789917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 568.448007106781, + "p90": 590.7840132713318, + "p95": 599.7120141983032, + "p99": 2910.367965698242 + }, + "combine": { + "p50": 250.97599625587463, + "p90": 263.5839879512787, + "p95": 267.4880027770996, + "p99": 2109.15207862854 + }, + "roundtrip": { + "p50": 788.7679934501648, + "p90": 820.8960294723511, + "p95": 858.0800294876099, + "p99": 3301.85604095459 + }, + "isolatedSum": { + "p50": 819.4240033626556, + "p90": 854.3680012226105, + "p95": 867.2000169754028, + "p99": 5019.520044326782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 370, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 567.6159858703613, + "p90": 587.8080129623413, + "p95": 595.1039791107178, + "p99": 2656.032085418701 + }, + "combine": { + "p50": 251.48800015449524, + "p90": 264.70398902893066, + "p95": 269.1839933395386, + "p99": 2311.0079765319824 + }, + "roundtrip": { + "p50": 787.3600125312805, + "p90": 816.2239789962769, + "p95": 845.1200127601624, + "p99": 3128.351926803589 + }, + "isolatedSum": { + "p50": 819.1039860248566, + "p90": 852.512001991272, + "p95": 864.2879724502563, + "p99": 4967.040061950684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 749, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 570.2400207519531, + "p90": 590.5600190162659, + "p95": 600.1279950141907, + "p99": 2635.967969894409 + }, + "combine": { + "p50": 252.57599353790283, + "p90": 266.7520046234131, + "p95": 273.6000120639801, + "p99": 2602.6558876037598 + }, + "roundtrip": { + "p50": 790.8160090446472, + "p90": 824.5120048522949, + "p95": 859.2000007629395, + "p99": 3339.2319679260254 + }, + "isolatedSum": { + "p50": 822.816014289856, + "p90": 857.312023639679, + "p95": 873.7280070781708, + "p99": 5238.623857498169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 1509, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 571.0399746894836, + "p90": 587.1359705924988, + "p95": 594.111979007721, + "p99": 2832.832098007202 + }, + "combine": { + "p50": 259.45600867271423, + "p90": 270.59200406074524, + "p95": 278.0799865722656, + "p99": 2567.2318935394287 + }, + "roundtrip": { + "p50": 783.3920121192932, + "p90": 809.7599744796753, + "p95": 824.5120048522949, + "p99": 3327.455997467041 + }, + "isolatedSum": { + "p50": 830.4959833621979, + "p90": 857.727974653244, + "p95": 872.1919655799866, + "p99": 5400.063991546631 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 3014, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a5ef425", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|decode|normal|none|none|0|tuned||3f8ffeba9f65629", + "colorKey": "gb300_7b7dff47", + "comparisonKey": "cc1e639961bc04b9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:55.785487+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "3f8ffeba9f65629", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 559.1040253639221, + "p90": 576.9280195236206, + "p95": 593.504011631012, + "p99": 3087.35990524292 + }, + "combine": { + "p50": 251.74400210380554, + "p90": 264.0959918498993, + "p95": 268.41598749160767, + "p99": 294.5919930934906 + }, + "roundtrip": { + "p50": 779.744029045105, + "p90": 808.1279993057251, + "p95": 828.5120129585266, + "p99": 2983.583927154541 + }, + "isolatedSum": { + "p50": 810.8480274677277, + "p90": 841.0240113735199, + "p95": 861.9199991226196, + "p99": 3381.9518983364105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 71680, + "combineLogicalBytes": 71680, + "fanoutMean": 1.25, + "recvTokensMax": 31, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 554.4959902763367, + "p90": 575.4560232162476, + "p95": 593.1519865989685, + "p99": 2634.9759101867676 + }, + "combine": { + "p50": 253.02401185035706, + "p90": 266.01600646972656, + "p95": 272.67199754714966, + "p99": 2426.2399673461914 + }, + "roundtrip": { + "p50": 778.11199426651, + "p90": 809.8880052566528, + "p95": 833.0559730529785, + "p99": 3065.9520626068115 + }, + "isolatedSum": { + "p50": 807.5200021266937, + "p90": 841.4720296859741, + "p95": 865.8239841461182, + "p99": 5061.215877532959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 1.375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 569.9840188026428, + "p90": 588.9599919319153, + "p95": 599.5839834213257, + "p99": 2819.391965866089 + }, + "combine": { + "p50": 267.07199215888977, + "p90": 277.3759961128235, + "p95": 281.5679907798767, + "p99": 2269.984006881714 + }, + "roundtrip": { + "p50": 796.671986579895, + "p90": 828.4800052642822, + "p95": 867.3920035362244, + "p99": 3065.5360221862793 + }, + "isolatedSum": { + "p50": 837.0560109615326, + "p90": 866.3359880447388, + "p95": 881.1519742012024, + "p99": 5089.375972747803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 286720, + "combineLogicalBytes": 286720, + "fanoutMean": 1.25, + "recvTokensMax": 124, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 572.2560286521912, + "p90": 591.5520191192627, + "p95": 604.7040224075317, + "p99": 2710.207939147949 + }, + "combine": { + "p50": 264.51200246810913, + "p90": 275.4240036010742, + "p95": 278.4000039100647, + "p99": 2157.8879356384277 + }, + "roundtrip": { + "p50": 792.9919958114624, + "p90": 824.3520259857178, + "p95": 844.4799780845642, + "p99": 3029.184103012085 + }, + "isolatedSum": { + "p50": 836.7680311203003, + "p90": 866.9760227203369, + "p95": 883.1040263175964, + "p99": 4868.095874786377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 1.21875, + "recvTokensMax": 249, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 573.4720230102539, + "p90": 591.1679863929749, + "p95": 599.9040007591248, + "p99": 3139.807939529419 + }, + "combine": { + "p50": 258.4959864616394, + "p90": 274.399995803833, + "p95": 282.368004322052, + "p99": 2431.936025619507 + }, + "roundtrip": { + "p50": 797.0560193061829, + "p90": 829.9520015716553, + "p95": 867.3279881477356, + "p99": 3530.1120281219482 + }, + "isolatedSum": { + "p50": 831.9680094718933, + "p90": 865.5679821968079, + "p95": 882.2720050811768, + "p99": 5571.743965148926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1161216, + "combineLogicalBytes": 1161216, + "fanoutMean": 1.265625, + "recvTokensMax": 494, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 571.7120170593262, + "p90": 593.0560231208801, + "p95": 600.1279950141907, + "p99": 2594.559907913208 + }, + "combine": { + "p50": 253.82399559020996, + "p90": 265.6320035457611, + "p95": 270.7520127296448, + "p99": 2281.888008117676 + }, + "roundtrip": { + "p50": 795.1359748840332, + "p90": 827.7119994163513, + "p95": 842.4000144004822, + "p99": 3020.512104034424 + }, + "isolatedSum": { + "p50": 825.5360126495361, + "p90": 858.6880266666412, + "p95": 870.8800077438354, + "p99": 4876.447916030884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2279424, + "combineLogicalBytes": 2279424, + "fanoutMean": 1.2421875, + "recvTokensMax": 992, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 571.23202085495, + "p90": 588.8640284538269, + "p95": 593.3120250701904, + "p99": 2636.1279487609863 + }, + "combine": { + "p50": 257.53599405288696, + "p90": 270.08000016212463, + "p95": 276.2880027294159, + "p99": 2349.407911300659 + }, + "roundtrip": { + "p50": 797.2480058670044, + "p90": 829.15198802948, + "p95": 845.632016658783, + "p99": 3040.3199195861816 + }, + "isolatedSum": { + "p50": 828.7680149078369, + "p90": 858.9440286159515, + "p95": 869.6000277996063, + "p99": 4985.5358600616455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4587520, + "combineLogicalBytes": 4587520, + "fanoutMean": 1.25, + "recvTokensMax": 1981, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 577.3119926452637, + "p90": 596.1920022964478, + "p95": 606.4959764480591, + "p99": 2593.280076980591 + }, + "combine": { + "p50": 256.6719949245453, + "p90": 267.96799898147583, + "p95": 275.32801032066345, + "p99": 2374.880075454712 + }, + "roundtrip": { + "p50": 806.2080144882202, + "p90": 819.6799755096436, + "p95": 840.0639891624451, + "p99": 3019.808053970337 + }, + "isolatedSum": { + "p50": 833.983987569809, + "p90": 864.1600012779236, + "p95": 881.8239867687225, + "p99": 4968.160152435303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 3956, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db29d420", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|decode|normal|none|none|0|tuned||e9a6e5febe08793", + "colorKey": "gb300_bdcb6417", + "comparisonKey": "3aae71c50510111d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:49:34.050417+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e9a6e5febe08793", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86328125, + "eplbImbalanceAfter": 1.0003348214285714, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 542.3359870910645, + "p90": 561.9519948959351, + "p95": 579.7759890556335, + "p99": 2978.4960746765137 + }, + "combine": { + "p50": 246.33599817752838, + "p90": 259.39199328422546, + "p95": 300.9600043296814, + "p99": 2633.4400177001953 + }, + "roundtrip": { + "p50": 764.415979385376, + "p90": 794.3040132522583, + "p95": 936.959981918335, + "p99": 3405.280113220215 + }, + "isolatedSum": { + "p50": 788.6719852685928, + "p90": 821.3439881801605, + "p95": 880.7359933853149, + "p99": 5611.936092376709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 547.3920106887817, + "p90": 566.6239857673645, + "p95": 611.0399961471558, + "p99": 3100.3201007843018 + }, + "combine": { + "p50": 247.77600169181824, + "p90": 259.42400097846985, + "p95": 263.90400528907776, + "p99": 2532.032012939453 + }, + "roundtrip": { + "p50": 760.703980922699, + "p90": 799.0720272064209, + "p95": 864.031970500946, + "p99": 3369.3439960479736 + }, + "isolatedSum": { + "p50": 795.1680123806, + "p90": 826.0479867458344, + "p95": 874.9440014362335, + "p99": 5632.352113723755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 372736, + "combineLogicalBytes": 372736, + "fanoutMean": 3.25, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 569.0879821777344, + "p90": 640.2559876441956, + "p95": 681.4720034599304, + "p99": 2573.568105697632 + }, + "combine": { + "p50": 251.3279914855957, + "p90": 268.2879865169525, + "p95": 278.6239981651306, + "p99": 2556.4799308776855 + }, + "roundtrip": { + "p50": 788.0319952964783, + "p90": 873.4400272369385, + "p95": 923.1359958648682, + "p99": 3329.087972640991 + }, + "isolatedSum": { + "p50": 820.4159736633301, + "p90": 908.5439741611481, + "p95": 960.096001625061, + "p99": 5130.048036575317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 788480, + "combineLogicalBytes": 788480, + "fanoutMean": 3.4375, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 558.9119791984558, + "p90": 577.567994594574, + "p95": 587.3600244522095, + "p99": 2819.7760581970215 + }, + "combine": { + "p50": 248.7040013074875, + "p90": 261.6319954395294, + "p95": 266.7520046234131, + "p99": 2478.1439304351807 + }, + "roundtrip": { + "p50": 774.6239900588989, + "p90": 807.6480031013489, + "p95": 851.5840172767639, + "p99": 3292.191982269287 + }, + "isolatedSum": { + "p50": 807.6159805059433, + "p90": 839.1999900341034, + "p95": 854.1120290756226, + "p99": 5297.919988632202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 562.2079968452454, + "p90": 580.9599757194519, + "p95": 592.0959711074829, + "p99": 2759.5839500427246 + }, + "combine": { + "p50": 248.44799935817719, + "p90": 262.0159983634949, + "p95": 265.8880054950714, + "p99": 2647.36008644104 + }, + "roundtrip": { + "p50": 780.2240252494812, + "p90": 809.7599744796753, + "p95": 891.8399810791016, + "p99": 3427.6158809661865 + }, + "isolatedSum": { + "p50": 810.6559962034225, + "p90": 842.9759740829468, + "p95": 857.9839766025543, + "p99": 5406.944036483765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3196928, + "combineLogicalBytes": 3196928, + "fanoutMean": 3.484375, + "recvTokensMax": 136, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 567.6479935646057, + "p90": 584.5440030097961, + "p95": 601.4400124549866, + "p99": 2846.5919494628906 + }, + "combine": { + "p50": 251.64800882339478, + "p90": 264.19198513031006, + "p95": 267.67998933792114, + "p99": 2356.5120697021484 + }, + "roundtrip": { + "p50": 777.4080038070679, + "p90": 815.1040077209473, + "p95": 866.2400245666504, + "p99": 3241.408109664917 + }, + "isolatedSum": { + "p50": 819.2960023880005, + "p90": 848.7359881401062, + "p95": 869.1200017929077, + "p99": 5203.104019165039 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6408192, + "combineLogicalBytes": 6408192, + "fanoutMean": 3.4921875, + "recvTokensMax": 270, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 563.1999969482422, + "p90": 582.0159912109375, + "p95": 594.3359732627869, + "p99": 2902.751922607422 + }, + "combine": { + "p50": 254.7839879989624, + "p90": 266.7520046234131, + "p95": 271.13598585128784, + "p99": 2581.0561180114746 + }, + "roundtrip": { + "p50": 788.4479761123657, + "p90": 817.0880079269409, + "p95": 865.0239706039429, + "p99": 3354.5279502868652 + }, + "isolatedSum": { + "p50": 817.9839849472046, + "p90": 848.7679958343506, + "p95": 865.4719591140747, + "p99": 5483.8080406188965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12773376, + "combineLogicalBytes": 12773376, + "fanoutMean": 3.48046875, + "recvTokensMax": 523, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 565.9840106964111, + "p90": 597.1840023994446, + "p95": 700.2879977226257, + "p99": 2863.8720512390137 + }, + "combine": { + "p50": 259.3599855899811, + "p90": 278.3359885215759, + "p95": 309.28000807762146, + "p99": 2662.5919342041016 + }, + "roundtrip": { + "p50": 784.928023815155, + "p90": 862.0160222053528, + "p95": 904.9919843673706, + "p99": 3286.400079727173 + }, + "isolatedSum": { + "p50": 825.3439962863922, + "p90": 875.5199909210205, + "p95": 1009.5680058002472, + "p99": 5526.463985443115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25661440, + "combineLogicalBytes": 25661440, + "fanoutMean": 3.49609375, + "recvTokensMax": 1036, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-720ef234", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|decode|normal|none|none|0|tuned||e596902aaaeb56c", + "colorKey": "gb300_927737aa", + "comparisonKey": "83e8a80ad791f36f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:46:36.913764+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e596902aaaeb56c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 546.5919971466064, + "p90": 591.808021068573, + "p95": 630.3359866142273, + "p99": 2869.983911514282 + }, + "combine": { + "p50": 243.45600605010986, + "p90": 265.7279968261719, + "p95": 285.7919931411743, + "p99": 2542.1760082244873 + }, + "roundtrip": { + "p50": 752.5759935379028, + "p90": 842.2080278396606, + "p95": 891.2960290908813, + "p99": 3401.18408203125 + }, + "isolatedSum": { + "p50": 790.0480031967163, + "p90": 857.5360178947449, + "p95": 916.1279797554016, + "p99": 5412.1599197387695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 3, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 534.9760055541992, + "p90": 568.0639743804932, + "p95": 587.5200033187866, + "p99": 2657.248020172119 + }, + "combine": { + "p50": 239.74399268627167, + "p90": 253.4399926662445, + "p95": 258.0159902572632, + "p99": 2246.367931365967 + }, + "roundtrip": { + "p50": 745.5360293388367, + "p90": 786.7199778556824, + "p95": 811.8720054626465, + "p99": 3637.824058532715 + }, + "isolatedSum": { + "p50": 774.7199982404709, + "p90": 821.5039670467377, + "p95": 845.5359935760498, + "p99": 4903.615951538086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 387072, + "combineLogicalBytes": 387072, + "fanoutMean": 3.375, + "recvTokensMax": 28, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 559.7119927406311, + "p90": 650.2400040626526, + "p95": 677.9839992523193, + "p99": 2780.4160118103027 + }, + "combine": { + "p50": 244.09599602222443, + "p90": 266.33599400520325, + "p95": 281.0240089893341, + "p99": 305.9200048446655 + }, + "roundtrip": { + "p50": 785.2159738540649, + "p90": 875.6800293922424, + "p95": 904.5119881629944, + "p99": 3123.2640743255615 + }, + "isolatedSum": { + "p50": 803.8079887628555, + "p90": 916.5759980678558, + "p95": 959.0080082416534, + "p99": 3086.3360166549683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 731136, + "combineLogicalBytes": 731136, + "fanoutMean": 3.1875, + "recvTokensMax": 59, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 565.7920241355896, + "p90": 657.9200029373169, + "p95": 673.5680103302002, + "p99": 2781.599998474121 + }, + "combine": { + "p50": 249.02400374412537, + "p90": 274.1760015487671, + "p95": 296.1280047893524, + "p99": 2110.879898071289 + }, + "roundtrip": { + "p50": 778.6239981651306, + "p90": 875.0720024108887, + "p95": 917.087972164154, + "p99": 3647.6800441741943 + }, + "isolatedSum": { + "p50": 814.816027879715, + "p90": 932.096004486084, + "p95": 969.6960151195526, + "p99": 4892.47989654541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1519616, + "combineLogicalBytes": 1519616, + "fanoutMean": 3.3125, + "recvTokensMax": 112, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 556.5119981765747, + "p90": 634.0799927711487, + "p95": 667.6480174064636, + "p99": 2845.4079627990723 + }, + "combine": { + "p50": 246.3040053844452, + "p90": 263.0400061607361, + "p95": 280.5440127849579, + "p99": 2430.0479888916016 + }, + "roundtrip": { + "p50": 773.9840149879456, + "p90": 884.4159841537476, + "p95": 911.9679927825928, + "p99": 3233.920097351074 + }, + "isolatedSum": { + "p50": 802.8160035610199, + "p90": 897.1199989318848, + "p95": 948.1920301914215, + "p99": 5275.455951690674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3082240, + "combineLogicalBytes": 3082240, + "fanoutMean": 3.359375, + "recvTokensMax": 229, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 552.0640015602112, + "p90": 575.9680271148682, + "p95": 582.2719931602478, + "p99": 2724.7679233551025 + }, + "combine": { + "p50": 245.79200148582458, + "p90": 257.4720084667206, + "p95": 262.11199164390564, + "p99": 381.02400302886963 + }, + "roundtrip": { + "p50": 762.2399926185608, + "p90": 803.6800026893616, + "p95": 827.9359936714172, + "p99": 3139.4240856170654 + }, + "isolatedSum": { + "p50": 797.8560030460358, + "p90": 833.4400355815887, + "p95": 844.3839848041534, + "p99": 3105.791926383972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6121472, + "combineLogicalBytes": 6121472, + "fanoutMean": 3.3359375, + "recvTokensMax": 481, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 559.7760081291199, + "p90": 632.7360272407532, + "p95": 666.7519807815552, + "p99": 2247.9679584503174 + }, + "combine": { + "p50": 249.15200471878052, + "p90": 274.0800082683563, + "p95": 295.9040105342865, + "p99": 2156.1279296875 + }, + "roundtrip": { + "p50": 769.7280049324036, + "p90": 850.5920171737671, + "p95": 899.8720049858093, + "p99": 3040.0960445404053 + }, + "isolatedSum": { + "p50": 808.9280128479004, + "p90": 906.8160355091095, + "p95": 962.6559913158417, + "p99": 4404.095888137817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12271616, + "combineLogicalBytes": 12271616, + "fanoutMean": 3.34375, + "recvTokensMax": 966, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 571.008026599884, + "p90": 651.0400176048279, + "p95": 672.3520159721375, + "p99": 3246.112108230591 + }, + "combine": { + "p50": 254.72000241279602, + "p90": 282.52801299095154, + "p95": 298.72000217437744, + "p99": 1550.4640340805054 + }, + "roundtrip": { + "p50": 764.0640139579773, + "p90": 790.9119725227356, + "p95": 801.6639947891235, + "p99": 3098.560094833374 + }, + "isolatedSum": { + "p50": 825.72802901268, + "p90": 933.5680305957794, + "p95": 971.0720181465149, + "p99": 4796.576142311096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 1910, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-25d06433", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|decode|normal|none|none|0|tuned||194008255dcd869", + "colorKey": "gb300_a565a324", + "comparisonKey": "af258f820b96cc08", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:46:53.032006+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "194008255dcd869", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.865234375, + "eplbImbalanceAfter": 1.0003580729166668, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 549.6960282325745, + "p90": 567.0400261878967, + "p95": 574.8479962348938, + "p99": 2907.072067260742 + }, + "combine": { + "p50": 249.08800423145294, + "p90": 262.30400800704956, + "p95": 266.36800169944763, + "p99": 2477.2799015045166 + }, + "roundtrip": { + "p50": 763.264000415802, + "p90": 783.4879755973816, + "p95": 843.936026096344, + "p99": 3383.455991744995 + }, + "isolatedSum": { + "p50": 798.7840324640274, + "p90": 829.3440341949463, + "p95": 841.2159979343414, + "p99": 5384.351968765259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 157696, + "combineLogicalBytes": 157696, + "fanoutMean": 2.75, + "recvTokensMax": 15, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 547.3920106887817, + "p90": 561.4719986915588, + "p95": 568.5759782791138, + "p99": 2896.8958854675293 + }, + "combine": { + "p50": 248.1279969215393, + "p90": 262.08001375198364, + "p95": 269.6000039577484, + "p99": 2700.000047683716 + }, + "roundtrip": { + "p50": 763.4559869766235, + "p90": 789.3120050430298, + "p95": 866.0799860954285, + "p99": 3294.1761016845703 + }, + "isolatedSum": { + "p50": 795.520007610321, + "p90": 823.5520124435425, + "p95": 838.1759822368622, + "p99": 5596.895933151245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 3, + "recvTokensMax": 27, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 567.2000050544739, + "p90": 588.5440111160278, + "p95": 605.184018611908, + "p99": 2903.359889984131 + }, + "combine": { + "p50": 250.8159875869751, + "p90": 262.04800605773926, + "p95": 265.0560140609741, + "p99": 2315.5200481414795 + }, + "roundtrip": { + "p50": 781.279981136322, + "p90": 820.1919794082642, + "p95": 851.8400192260742, + "p99": 3355.936050415039 + }, + "isolatedSum": { + "p50": 818.015992641449, + "p90": 850.5920171737671, + "p95": 870.2400326728821, + "p99": 5218.87993812561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 774144, + "combineLogicalBytes": 774144, + "fanoutMean": 3.375, + "recvTokensMax": 43, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 562.6559853553772, + "p90": 580.6080102920532, + "p95": 589.8560285568237, + "p99": 2689.023971557617 + }, + "combine": { + "p50": 250.14400482177734, + "p90": 262.1760070323944, + "p95": 267.1999931335449, + "p99": 2720.7679748535156 + }, + "roundtrip": { + "p50": 775.1039862632751, + "p90": 805.6960105895996, + "p95": 857.5360178947449, + "p99": 3466.048002243042 + }, + "isolatedSum": { + "p50": 812.7999901771545, + "p90": 842.7840173244476, + "p95": 857.0560216903687, + "p99": 5409.791946411133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1605632, + "combineLogicalBytes": 1605632, + "fanoutMean": 3.5, + "recvTokensMax": 76, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 568.9600110054016, + "p90": 588.4479880332947, + "p95": 594.8799848556519, + "p99": 2807.7120780944824 + }, + "combine": { + "p50": 252.22399830818176, + "p90": 264.92801308631897, + "p95": 270.9119915962219, + "p99": 2702.7199268341064 + }, + "roundtrip": { + "p50": 783.2319736480713, + "p90": 818.7519907951355, + "p95": 880.0960183143616, + "p99": 3467.871904373169 + }, + "isolatedSum": { + "p50": 821.1840093135834, + "p90": 853.3760011196136, + "p95": 865.7919764518738, + "p99": 5510.432004928589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3268608, + "combineLogicalBytes": 3268608, + "fanoutMean": 3.5625, + "recvTokensMax": 148, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 564.7680163383484, + "p90": 580.128014087677, + "p95": 587.8720283508301, + "p99": 2866.368055343628 + }, + "combine": { + "p50": 251.64800882339478, + "p90": 264.6079957485199, + "p95": 273.3440101146698, + "p99": 2624.959945678711 + }, + "roundtrip": { + "p50": 779.8720002174377, + "p90": 812.1280074119568, + "p95": 845.2799916267395, + "p99": 3463.3920192718506 + }, + "isolatedSum": { + "p50": 816.4160251617432, + "p90": 844.7360098361969, + "p95": 861.2160384654999, + "p99": 5491.328001022339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6393856, + "combineLogicalBytes": 6393856, + "fanoutMean": 3.484375, + "recvTokensMax": 284, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 569.6319937705994, + "p90": 592.9920077323914, + "p95": 601.8880009651184, + "p99": 2774.6880054473877 + }, + "combine": { + "p50": 255.90398907661438, + "p90": 267.520010471344, + "p95": 271.807998418808, + "p99": 2381.4079761505127 + }, + "roundtrip": { + "p50": 787.3280048370361, + "p90": 818.5920119285583, + "p95": 832.7999711036682, + "p99": 3267.712116241455 + }, + "isolatedSum": { + "p50": 825.5359828472137, + "p90": 860.5120182037354, + "p95": 873.6959993839264, + "p99": 5156.0959815979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13045760, + "combineLogicalBytes": 13045760, + "fanoutMean": 3.5546875, + "recvTokensMax": 538, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 568.0959820747375, + "p90": 587.7439975738525, + "p95": 593.0240154266357, + "p99": 2667.423963546753 + }, + "combine": { + "p50": 257.7599883079529, + "p90": 269.4399952888489, + "p95": 276.06400847435, + "p99": 2617.5999641418457 + }, + "roundtrip": { + "p50": 785.8560085296631, + "p90": 807.1680068969727, + "p95": 818.1120157241821, + "p99": 3403.424024581909 + }, + "isolatedSum": { + "p50": 825.8559703826904, + "p90": 857.1839928627014, + "p95": 869.0880239009857, + "p99": 5285.023927688599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26263552, + "combineLogicalBytes": 26263552, + "fanoutMean": 3.578125, + "recvTokensMax": 1030, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d2e406f6", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|decode|normal|none|none|0|tuned||6f1e4acdb9439aa", + "colorKey": "gb300_0c94bea1", + "comparisonKey": "eef1aa73733622bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:48:03.655612+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "6f1e4acdb9439aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 545.1520085334778, + "p90": 564.7040009498596, + "p95": 590.0480151176453, + "p99": 2994.2400455474854 + }, + "combine": { + "p50": 259.48798656463623, + "p90": 271.90399169921875, + "p95": 306.94401264190674, + "p99": 2805.3441047668457 + }, + "roundtrip": { + "p50": 763.5200023651123, + "p90": 793.1200265884399, + "p95": 882.7840089797974, + "p99": 3445.0879096984863 + }, + "isolatedSum": { + "p50": 804.639995098114, + "p90": 836.6079926490784, + "p95": 896.992027759552, + "p99": 5799.584150314331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 143360, + "combineLogicalBytes": 143360, + "fanoutMean": 2.5, + "recvTokensMax": 22, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 543.4240102767944, + "p90": 563.5520219802856, + "p95": 573.9200115203857, + "p99": 2858.367919921875 + }, + "combine": { + "p50": 256.51198625564575, + "p90": 268.67198944091797, + "p95": 280.89600801467896, + "p99": 2747.904062271118 + }, + "roundtrip": { + "p50": 766.7199969291687, + "p90": 800.6079792976379, + "p95": 851.360023021698, + "p99": 3470.367908477783 + }, + "isolatedSum": { + "p50": 799.9359965324402, + "p90": 832.2240114212036, + "p95": 854.8160195350647, + "p99": 5606.271982192993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 2.75, + "recvTokensMax": 42, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 556.22398853302, + "p90": 573.311984539032, + "p95": 589.9839997291565, + "p99": 3041.088104248047 + }, + "combine": { + "p50": 255.61600923538208, + "p90": 267.5839960575104, + "p95": 272.0319926738739, + "p99": 2288.6080741882324 + }, + "roundtrip": { + "p50": 777.184009552002, + "p90": 810.6560111045837, + "p95": 878.7840008735657, + "p99": 3290.2400493621826 + }, + "isolatedSum": { + "p50": 811.8399977684021, + "p90": 840.8959805965424, + "p95": 862.0159924030304, + "p99": 5329.696178436279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 2.625, + "recvTokensMax": 90, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 556.768000125885, + "p90": 576.1920213699341, + "p95": 584.9599838256836, + "p99": 2711.0400199890137 + }, + "combine": { + "p50": 246.848002076149, + "p90": 259.99999046325684, + "p95": 263.7439966201782, + "p99": 2671.7119216918945 + }, + "roundtrip": { + "p50": 779.807984828949, + "p90": 815.9999847412109, + "p95": 877.4080276489258, + "p99": 3363.840103149414 + }, + "isolatedSum": { + "p50": 803.616002202034, + "p90": 836.1920118331909, + "p95": 848.7039804458618, + "p99": 5382.751941680908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 2.59375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 557.6639771461487, + "p90": 576.4480233192444, + "p95": 589.024007320404, + "p99": 2890.0160789489746 + }, + "combine": { + "p50": 248.03200364112854, + "p90": 261.24799251556396, + "p95": 265.855997800827, + "p99": 2351.936101913452 + }, + "roundtrip": { + "p50": 781.1840176582336, + "p90": 810.4000091552734, + "p95": 859.6159815788269, + "p99": 3337.376117706299 + }, + "isolatedSum": { + "p50": 805.6959807872772, + "p90": 837.6960158348083, + "p95": 854.8800051212311, + "p99": 5241.952180862427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 2.625, + "recvTokensMax": 370, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 557.2479963302612, + "p90": 574.6560096740723, + "p95": 586.1759781837463, + "p99": 2939.552068710327 + }, + "combine": { + "p50": 246.62399291992188, + "p90": 259.90399718284607, + "p95": 263.90400528907776, + "p99": 2480.959892272949 + }, + "roundtrip": { + "p50": 776.7040133476257, + "p90": 809.6320033073425, + "p95": 831.7440152168274, + "p99": 3168.895959854126 + }, + "isolatedSum": { + "p50": 803.8719892501831, + "p90": 834.5600068569183, + "p95": 850.0799834728241, + "p99": 5420.511960983276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4716544, + "combineLogicalBytes": 4716544, + "fanoutMean": 2.5703125, + "recvTokensMax": 749, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 561.6959929466248, + "p90": 580.3200006484985, + "p95": 593.6959981918335, + "p99": 2756.4799785614014 + }, + "combine": { + "p50": 249.1839975118637, + "p90": 260.0640058517456, + "p95": 264.41600918769836, + "p99": 2340.831995010376 + }, + "roundtrip": { + "p50": 779.5199751853943, + "p90": 808.9920282363892, + "p95": 857.9840064048767, + "p99": 3402.6238918304443 + }, + "isolatedSum": { + "p50": 810.8799904584885, + "p90": 840.3840065002441, + "p95": 858.1120073795319, + "p99": 5097.311973571777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9275392, + "combineLogicalBytes": 9275392, + "fanoutMean": 2.52734375, + "recvTokensMax": 1509, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 562.6879930496216, + "p90": 577.3760080337524, + "p95": 590.719997882843, + "p99": 2847.007989883423 + }, + "combine": { + "p50": 249.53599274158478, + "p90": 261.50399446487427, + "p95": 266.62400364875793, + "p99": 2606.6880226135254 + }, + "roundtrip": { + "p50": 771.4560031890869, + "p90": 790.880024433136, + "p95": 798.6559867858887, + "p99": 3168.031930923462 + }, + "isolatedSum": { + "p50": 812.2239857912064, + "p90": 838.8800024986267, + "p95": 857.344001531601, + "p99": 5453.696012496948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 3014, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-33db8acc", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_6f30342d", + "comparisonKey": "debb82bd86c1902e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:48:19.730620+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 550.0800013542175, + "p90": 567.6159858703613, + "p95": 576.5119791030884, + "p99": 2920.095920562744 + }, + "combine": { + "p50": 249.59999322891235, + "p90": 262.11199164390564, + "p95": 266.11199975013733, + "p99": 301.60000920295715 + }, + "roundtrip": { + "p50": 763.7439966201782, + "p90": 789.5039916038513, + "p95": 841.376006603241, + "p99": 3265.439987182617 + }, + "isolatedSum": { + "p50": 799.6799945831299, + "p90": 829.727977514267, + "p95": 842.6239788532257, + "p99": 3221.6959297657013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 545.1520085334778, + "p90": 557.4719905853271, + "p95": 567.4880146980286, + "p99": 2754.240036010742 + }, + "combine": { + "p50": 250.2720057964325, + "p90": 262.65600323677063, + "p95": 267.4559950828552, + "p99": 313.3760094642639 + }, + "roundtrip": { + "p50": 762.8160119056702, + "p90": 793.2800054550171, + "p95": 835.5839848518372, + "p99": 3336.735963821411 + }, + "isolatedSum": { + "p50": 795.4240143299103, + "p90": 820.1279938220978, + "p95": 834.9440097808838, + "p99": 3067.616045475006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 560.4479908943176, + "p90": 575.5839943885803, + "p95": 582.5279951095581, + "p99": 2945.4081058502197 + }, + "combine": { + "p50": 250.2399981021881, + "p90": 262.2080147266388, + "p95": 266.04801416397095, + "p99": 323.0080008506775 + }, + "roundtrip": { + "p50": 779.1360020637512, + "p90": 811.9999766349792, + "p95": 851.9039750099182, + "p99": 3192.0320987701416 + }, + "isolatedSum": { + "p50": 810.6879889965057, + "p90": 837.7920091152191, + "p95": 848.576009273529, + "p99": 3268.416106700897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 40, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 560.9279870986938, + "p90": 575.1360058784485, + "p95": 583.1360220909119, + "p99": 2870.8479404449463 + }, + "combine": { + "p50": 248.9279955625534, + "p90": 260.3519856929779, + "p95": 263.5839879512787, + "p99": 273.9520072937012 + }, + "roundtrip": { + "p50": 774.8159766197205, + "p90": 799.9039888381958, + "p95": 834.7839713096619, + "p99": 3318.7520503997803 + }, + "isolatedSum": { + "p50": 809.8559826612473, + "p90": 835.4879915714264, + "p95": 846.7200100421906, + "p99": 3144.7999477386475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 71, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 562.8799796104431, + "p90": 585.5039954185486, + "p95": 598.6559987068176, + "p99": 2803.8079738616943 + }, + "combine": { + "p50": 251.8720030784607, + "p90": 265.28000831604004, + "p95": 269.76001262664795, + "p99": 294.048011302948 + }, + "roundtrip": { + "p50": 778.5919904708862, + "p90": 814.624011516571, + "p95": 839.8399949073792, + "p99": 3316.60795211792 + }, + "isolatedSum": { + "p50": 814.7519826889038, + "p90": 850.7840037345886, + "p95": 868.4160113334656, + "p99": 3097.8559851646423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 138, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 561.2480044364929, + "p90": 578.5279870033264, + "p95": 586.687982082367, + "p99": 2930.527925491333 + }, + "combine": { + "p50": 250.94398856163025, + "p90": 262.81601190567017, + "p95": 266.7199969291687, + "p99": 287.3600125312805 + }, + "roundtrip": { + "p50": 773.1199860572815, + "p90": 799.3599772453308, + "p95": 830.7520151138306, + "p99": 3550.3039360046387 + }, + "isolatedSum": { + "p50": 812.1919929981232, + "p90": 841.3439989089966, + "p95": 853.4079790115356, + "p99": 3217.8879380226135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 264, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 562.2720122337341, + "p90": 579.2639851570129, + "p95": 587.1679782867432, + "p99": 2706.8800926208496 + }, + "combine": { + "p50": 254.14401292800903, + "p90": 264.44798707962036, + "p95": 268.22400093078613, + "p99": 285.2799892425537 + }, + "roundtrip": { + "p50": 779.0079712867737, + "p90": 809.1520071029663, + "p95": 825.1199722290039, + "p99": 2984.2240810394287 + }, + "isolatedSum": { + "p50": 816.4160251617432, + "p90": 843.7119722366333, + "p95": 855.3919792175293, + "p99": 2992.1600818634033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 541, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 565.4399991035461, + "p90": 580.6080102920532, + "p95": 633.247971534729, + "p99": 2773.6001014709473 + }, + "combine": { + "p50": 257.05599784851074, + "p90": 268.5439884662628, + "p95": 272.44800329208374, + "p99": 864.8959994316101 + }, + "roundtrip": { + "p50": 778.9760231971741, + "p90": 800.4480004310608, + "p95": 808.9920282363892, + "p99": 3620.959997177124 + }, + "isolatedSum": { + "p50": 822.4959969520569, + "p90": 849.151998758316, + "p95": 905.6959748268127, + "p99": 3638.4961009025574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 1038, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a67a360", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|decode|normal|none|none|0|tuned||c9a80cc61d8211b", + "colorKey": "gb300_b3935729", + "comparisonKey": "43d2e787c937a609", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:45:53.648076+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "c9a80cc61d8211b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.943359375, + "eplbImbalanceAfter": 1.0002061631944446, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 4, + "dispatch": { + "p50": 537.663996219635, + "p90": 556.5440058708191, + "p95": 563.9359951019287, + "p99": 2874.6559619903564 + }, + "combine": { + "p50": 240.25599658489227, + "p90": 252.83199548721313, + "p95": 256.8640112876892, + "p99": 2554.719924926758 + }, + "roundtrip": { + "p50": 747.4240064620972, + "p90": 776.9280076026917, + "p95": 787.1040105819702, + "p99": 3236.959934234619 + }, + "isolatedSum": { + "p50": 777.9199928045273, + "p90": 809.3760013580322, + "p95": 820.8000063896179, + "p99": 5429.375886917114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 215040, + "fanoutMean": 3.75, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 8, + "dispatch": { + "p50": 539.5519733428955, + "p90": 558.5920214653015, + "p95": 564.2880201339722, + "p99": 3298.719882965088 + }, + "combine": { + "p50": 242.08000302314758, + "p90": 254.72000241279602, + "p95": 258.4959864616394, + "p99": 2464.3518924713135 + }, + "roundtrip": { + "p50": 746.8479871749878, + "p90": 778.656005859375, + "p95": 795.6799864768982, + "p99": 3430.079936981201 + }, + "isolatedSum": { + "p50": 781.6319763660431, + "p90": 813.3120238780975, + "p95": 822.7840065956116, + "p99": 5763.071775436401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 401408, + "combineLogicalBytes": 401408, + "fanoutMean": 3.5, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 16, + "dispatch": { + "p50": 553.9199709892273, + "p90": 573.0559825897217, + "p95": 579.2319774627686, + "p99": 3051.8081188201904 + }, + "combine": { + "p50": 241.69600009918213, + "p90": 254.17599081993103, + "p95": 258.91199707984924, + "p99": 2508.863925933838 + }, + "roundtrip": { + "p50": 762.0480060577393, + "p90": 791.8400168418884, + "p95": 806.4000010490417, + "p99": 3124.255895614624 + }, + "isolatedSum": { + "p50": 795.6159710884094, + "p90": 827.2319734096527, + "p95": 838.1439745426178, + "p99": 5560.672044754028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 817152, + "combineLogicalBytes": 817152, + "fanoutMean": 3.5625, + "recvTokensMax": 40, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 32, + "dispatch": { + "p50": 555.1679730415344, + "p90": 573.8880038261414, + "p95": 581.0239911079407, + "p99": 3002.8159618377686 + }, + "combine": { + "p50": 244.1280037164688, + "p90": 255.61600923538208, + "p95": 258.59200954437256, + "p99": 274.81600642204285 + }, + "roundtrip": { + "p50": 762.3040080070496, + "p90": 790.880024433136, + "p95": 831.1039805412292, + "p99": 3481.6319942474365 + }, + "isolatedSum": { + "p50": 799.2959767580032, + "p90": 829.5040130615234, + "p95": 839.6160006523132, + "p99": 3277.6319682598114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1634304, + "combineLogicalBytes": 1634304, + "fanoutMean": 3.5625, + "recvTokensMax": 71, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 64, + "dispatch": { + "p50": 554.9119710922241, + "p90": 574.4320154190063, + "p95": 579.2959928512573, + "p99": 2827.008008956909 + }, + "combine": { + "p50": 243.55199933052063, + "p90": 255.5840015411377, + "p95": 259.8080039024353, + "p99": 292.5119996070862 + }, + "roundtrip": { + "p50": 762.0480060577393, + "p90": 790.5600070953369, + "p95": 805.5999875068665, + "p99": 3195.5840587615967 + }, + "isolatedSum": { + "p50": 798.4639704227448, + "p90": 830.016016960144, + "p95": 839.1039967536926, + "p99": 3119.5200085639954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3239936, + "combineLogicalBytes": 3239936, + "fanoutMean": 3.53125, + "recvTokensMax": 138, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 128, + "dispatch": { + "p50": 557.0240020751953, + "p90": 572.9280114173889, + "p95": 579.2959928512573, + "p99": 3062.2079372406006 + }, + "combine": { + "p50": 244.73600089550018, + "p90": 257.24801421165466, + "p95": 261.53600215911865, + "p99": 345.8560109138489 + }, + "roundtrip": { + "p50": 761.5360021591187, + "p90": 788.1280183792114, + "p95": 800.0640273094177, + "p99": 3147.455930709839 + }, + "isolatedSum": { + "p50": 801.7600029706955, + "p90": 830.1760256290436, + "p95": 840.831995010376, + "p99": 3408.0639481544495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 264, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 256, + "dispatch": { + "p50": 556.0960173606873, + "p90": 574.0479826927185, + "p95": 580.8640122413635, + "p99": 2278.6879539489746 + }, + "combine": { + "p50": 247.45599925518036, + "p90": 260.6079876422882, + "p95": 267.2959864139557, + "p99": 1202.3040056228638 + }, + "roundtrip": { + "p50": 762.6240253448486, + "p90": 791.8720245361328, + "p95": 802.8799891471863, + "p99": 3153.0559062957764 + }, + "isolatedSum": { + "p50": 803.5520166158676, + "p90": 834.6559703350067, + "p95": 848.1599986553192, + "p99": 3480.9919595718384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12988416, + "combineLogicalBytes": 12988416, + "fanoutMean": 3.5390625, + "recvTokensMax": 541, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 555.3920269012451, + "p90": 569.3439841270447, + "p95": 574.2719769477844, + "p99": 1351.9680500030518 + }, + "combine": { + "p50": 248.89600276947021, + "p90": 260.5440020561218, + "p95": 265.28000831604004, + "p99": 2520.3840732574463 + }, + "roundtrip": { + "p50": 760.3840231895447, + "p90": 777.567982673645, + "p95": 785.6000065803528, + "p99": 3173.5360622406006 + }, + "isolatedSum": { + "p50": 804.2880296707153, + "p90": 829.8879861831665, + "p95": 839.5519852638245, + "p99": 3872.352123260498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26134528, + "combineLogicalBytes": 26134528, + "fanoutMean": 3.560546875, + "recvTokensMax": 1038, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8b8de4d1", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||74444524b5db510", + "colorKey": "gb300_c2190482", + "comparisonKey": "fe9f182e779daa85", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:52.726220+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "74444524b5db510", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 720.4800248146057, + "p90": 1744.928002357483, + "p95": 3654.9439430236816, + "p99": 3853.9199829101562 + }, + "combine": { + "p50": 344.89598870277405, + "p90": 398.0160057544708, + "p95": 2096.9278812408447, + "p99": 3444.0319538116455 + }, + "roundtrip": { + "p50": 1035.8400344848633, + "p90": 2152.5440216064453, + "p95": 3991.83988571167, + "p99": 4393.53609085083 + }, + "isolatedSum": { + "p50": 1065.3760135173798, + "p90": 2142.9440081119537, + "p95": 5751.871824264526, + "p99": 7297.951936721802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 640.8320069313049, + "p90": 1969.472050666809, + "p95": 3417.0238971710205, + "p99": 4044.9280738830566 + }, + "combine": { + "p50": 306.68801069259644, + "p90": 358.11200737953186, + "p95": 1811.2000226974487, + "p99": 3673.1839179992676 + }, + "roundtrip": { + "p50": 909.9199771881104, + "p90": 1181.3440322875977, + "p95": 3818.2079792022705, + "p99": 4611.487865447998 + }, + "isolatedSum": { + "p50": 947.5200176239014, + "p90": 2327.584058046341, + "p95": 5228.223919868469, + "p99": 7718.111991882324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 716.2240147590637, + "p90": 848.7679958343506, + "p95": 3465.3120040893555, + "p99": 4086.81583404541 + }, + "combine": { + "p50": 324.12800192832947, + "p90": 363.072007894516, + "p95": 1912.7999544143677, + "p99": 3313.7919902801514 + }, + "roundtrip": { + "p50": 1015.3919458389282, + "p90": 1205.4719924926758, + "p95": 3892.767906188965, + "p99": 6006.720066070557 + }, + "isolatedSum": { + "p50": 1040.3520166873932, + "p90": 1211.8400037288666, + "p95": 5378.111958503723, + "p99": 7400.6078243255615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 700.4160284996033, + "p90": 760.09601354599, + "p95": 2341.344118118286, + "p99": 3818.6240196228027 + }, + "combine": { + "p50": 331.13598823547363, + "p90": 358.0479919910431, + "p95": 467.99999475479126, + "p99": 3358.1759929656982 + }, + "roundtrip": { + "p50": 1024.7039794921875, + "p90": 1906.432032585144, + "p95": 3994.9119091033936, + "p99": 4387.743949890137 + }, + "isolatedSum": { + "p50": 1031.552016735077, + "p90": 1118.144005537033, + "p95": 2809.3441128730774, + "p99": 7176.800012588501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 737.1839880943298, + "p90": 840.5119776725769, + "p95": 3137.53604888916, + "p99": 3924.5760440826416 + }, + "combine": { + "p50": 331.29599690437317, + "p90": 357.695996761322, + "p95": 1732.0959568023682, + "p99": 3481.2800884246826 + }, + "roundtrip": { + "p50": 1019.6479558944702, + "p90": 1162.816047668457, + "p95": 4008.512020111084, + "p99": 5127.808094024658 + }, + "isolatedSum": { + "p50": 1068.479984998703, + "p90": 1198.207974433899, + "p95": 4869.632005691528, + "p99": 7405.856132507324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 727.9040217399597, + "p90": 838.208019733429, + "p95": 3374.7520446777344, + "p99": 3933.3438873291016 + }, + "combine": { + "p50": 333.0239951610565, + "p90": 360.48001050949097, + "p95": 1836.575984954834, + "p99": 3582.655906677246 + }, + "roundtrip": { + "p50": 1034.9119901657104, + "p90": 1143.455982208252, + "p95": 3722.4318981170654, + "p99": 4452.7997970581055 + }, + "isolatedSum": { + "p50": 1060.9280169010162, + "p90": 1198.68803024292, + "p95": 5211.328029632568, + "p99": 7515.999794006348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 715.2320146560669, + "p90": 780.9600234031677, + "p95": 2378.335952758789, + "p99": 3955.008029937744 + }, + "combine": { + "p50": 324.73599910736084, + "p90": 359.6799969673157, + "p95": 1320.032000541687, + "p99": 3224.575996398926 + }, + "roundtrip": { + "p50": 998.5600113868713, + "p90": 1711.2319469451904, + "p95": 2835.968017578125, + "p99": 4369.53592300415 + }, + "isolatedSum": { + "p50": 1039.9680137634277, + "p90": 1140.6400203704834, + "p95": 3698.367953300476, + "p99": 7179.58402633667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 722.7839827537537, + "p90": 769.2480087280273, + "p95": 2304.095983505249, + "p99": 3863.5199069976807 + }, + "combine": { + "p50": 328.7679851055145, + "p90": 355.45599460601807, + "p95": 1869.8879480361938, + "p99": 3460.416078567505 + }, + "roundtrip": { + "p50": 1021.8240022659302, + "p90": 1085.15202999115, + "p95": 3810.8479976654053, + "p99": 4518.943786621094 + }, + "isolatedSum": { + "p50": 1051.5519678592682, + "p90": 1124.7040033340454, + "p95": 4173.983931541443, + "p99": 7323.935985565186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38190bd8", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||f1c99f5cf8ca9ed", + "colorKey": "gb300_1cd48f0a", + "comparisonKey": "0bb94d01ffce354e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:43:20.726474+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f1c99f5cf8ca9ed", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 719.2959785461426, + "p90": 754.0799975395203, + "p95": 2214.5919799804688, + "p99": 2522.4640369415283 + }, + "combine": { + "p50": 335.83998680114746, + "p90": 360.6080114841461, + "p95": 1831.7439556121826, + "p99": 2334.944009780884 + }, + "roundtrip": { + "p50": 1031.4240455627441, + "p90": 1167.904019355774, + "p95": 2603.327989578247, + "p99": 4060.3199005126953 + }, + "isolatedSum": { + "p50": 1055.13596534729, + "p90": 1114.6880090236664, + "p95": 4046.3359355926514, + "p99": 4857.408046722412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 716.9280052185059, + "p90": 755.1040053367615, + "p95": 2227.61607170105, + "p99": 3451.296091079712 + }, + "combine": { + "p50": 336.60799264907837, + "p90": 360.79999804496765, + "p95": 1744.320034980774, + "p99": 2138.3678913116455 + }, + "roundtrip": { + "p50": 1028.0640125274658, + "p90": 1175.1680374145508, + "p95": 2686.7520809173584, + "p99": 3314.5599365234375 + }, + "isolatedSum": { + "p50": 1053.5359978675842, + "p90": 1115.9040033817291, + "p95": 3971.9361066818237, + "p99": 5589.663982391357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 736.9599938392639, + "p90": 795.2640056610107, + "p95": 2224.0641117095947, + "p99": 2497.8559017181396 + }, + "combine": { + "p50": 334.81600880622864, + "p90": 357.66398906707764, + "p95": 1701.3440132141113, + "p99": 2139.4879817962646 + }, + "roundtrip": { + "p50": 1046.8480587005615, + "p90": 1234.7520589828491, + "p95": 2656.8639278411865, + "p99": 3642.047882080078 + }, + "isolatedSum": { + "p50": 1071.7760026454926, + "p90": 1152.9279947280884, + "p95": 3925.408124923706, + "p99": 4637.343883514404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 748.7360239028931, + "p90": 787.5199913978577, + "p95": 2205.951929092407, + "p99": 2637.120008468628 + }, + "combine": { + "p50": 343.26401352882385, + "p90": 369.4399893283844, + "p95": 1642.5280570983887, + "p99": 2035.9039306640625 + }, + "roundtrip": { + "p50": 1059.9679946899414, + "p90": 1999.9680519104004, + "p95": 2699.4879245758057, + "p99": 3060.9281063079834 + }, + "isolatedSum": { + "p50": 1092.000037431717, + "p90": 1156.959980726242, + "p95": 3848.479986190796, + "p99": 4673.02393913269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 727.4240255355835, + "p90": 1538.8480424880981, + "p95": 2295.9039211273193, + "p99": 3473.2799530029297 + }, + "combine": { + "p50": 334.5920145511627, + "p90": 378.04800271987915, + "p95": 1660.7040166854858, + "p99": 2200.4799842834473 + }, + "roundtrip": { + "p50": 1042.080044746399, + "p90": 1236.2240552902222, + "p95": 2723.6480712890625, + "p99": 3767.6799297332764 + }, + "isolatedSum": { + "p50": 1062.0160400867462, + "p90": 1916.8960452079773, + "p95": 3956.607937812805, + "p99": 5673.759937286377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 736.0640168190002, + "p90": 792.9919958114624, + "p95": 2314.527988433838, + "p99": 3319.2319869995117 + }, + "combine": { + "p50": 335.9679877758026, + "p90": 359.45600271224976, + "p95": 1558.784008026123, + "p99": 2159.5840454101562 + }, + "roundtrip": { + "p50": 1047.3599433898926, + "p90": 1167.1359539031982, + "p95": 2630.784034729004, + "p99": 3833.8239192962646 + }, + "isolatedSum": { + "p50": 1072.0320045948029, + "p90": 1152.4479985237122, + "p95": 3873.311996459961, + "p99": 5478.816032409668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 740.6719923019409, + "p90": 804.6720027923584, + "p95": 2205.79195022583, + "p99": 2722.6879596710205 + }, + "combine": { + "p50": 337.72799372673035, + "p90": 365.63199758529663, + "p95": 1689.247965812683, + "p99": 2035.2959632873535 + }, + "roundtrip": { + "p50": 1051.4880418777466, + "p90": 1206.3039541244507, + "p95": 2612.4799251556396, + "p99": 3007.0080757141113 + }, + "isolatedSum": { + "p50": 1078.3999860286713, + "p90": 1170.304000377655, + "p95": 3895.039916038513, + "p99": 4757.983922958374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 733.1839799880981, + "p90": 797.3440289497375, + "p95": 2230.3359508514404, + "p99": 3112.351894378662 + }, + "combine": { + "p50": 333.5680067539215, + "p90": 356.6719889640808, + "p95": 1623.2000589370728, + "p99": 2103.327989578247 + }, + "roundtrip": { + "p50": 1034.6239805221558, + "p90": 1174.5280027389526, + "p95": 2653.856039047241, + "p99": 3951.4880180358887 + }, + "isolatedSum": { + "p50": 1066.7519867420197, + "p90": 1154.0160179138184, + "p95": 3853.536009788513, + "p99": 5215.679883956909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b9d3b67a", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||f0bc700e9998f70", + "colorKey": "gb300_20aa4dc5", + "comparisonKey": "2ea915a6d8e0cb9e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:44:06.398565+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0bc700e9998f70", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 648.8959789276123, + "p90": 704.3520212173462, + "p95": 2281.4719676971436, + "p99": 3964.8640155792236 + }, + "combine": { + "p50": 300.5119860172272, + "p90": 324.8000144958496, + "p95": 343.4560000896454, + "p99": 2241.1839962005615 + }, + "roundtrip": { + "p50": 961.2159729003906, + "p90": 2603.6159992218018, + "p95": 4701.504230499268, + "p99": 35518.4326171875 + }, + "isolatedSum": { + "p50": 949.4079649448395, + "p90": 1029.1520357131958, + "p95": 2624.927967786789, + "p99": 6206.048011779785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 674.6559739112854, + "p90": 721.343994140625, + "p95": 2244.54402923584, + "p99": 4154.367923736572 + }, + "combine": { + "p50": 316.4159953594208, + "p90": 351.00799798965454, + "p95": 1822.111964225769, + "p99": 2970.560073852539 + }, + "roundtrip": { + "p50": 960.1280093193054, + "p90": 2519.8400020599365, + "p95": 2881.8559646606445, + "p99": 4674.880027770996 + }, + "isolatedSum": { + "p50": 991.0719692707062, + "p90": 1072.3519921302795, + "p95": 4066.655993461609, + "p99": 7124.927997589111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 668.7999963760376, + "p90": 719.1680073738098, + "p95": 1670.9760427474976, + "p99": 2592.384099960327 + }, + "combine": { + "p50": 303.8080036640167, + "p90": 324.70399141311646, + "p95": 1388.8640403747559, + "p99": 2444.2241191864014 + }, + "roundtrip": { + "p50": 925.9200096130371, + "p90": 1013.375997543335, + "p95": 2482.8479290008545, + "p99": 3119.296073913574 + }, + "isolatedSum": { + "p50": 972.6080000400543, + "p90": 1043.8719987869263, + "p95": 3059.8400831222534, + "p99": 5036.6082191467285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 682.9119920730591, + "p90": 719.7120189666748, + "p95": 2242.5920963287354, + "p99": 2919.1040992736816 + }, + "combine": { + "p50": 305.9200048446655, + "p90": 331.29599690437317, + "p95": 464.80000019073486, + "p99": 2209.536075592041 + }, + "roundtrip": { + "p50": 946.4960098266602, + "p90": 1097.2479581832886, + "p95": 2669.9841022491455, + "p99": 4524.608135223389 + }, + "isolatedSum": { + "p50": 988.8319969177246, + "p90": 1051.008015871048, + "p95": 2707.39209651947, + "p99": 5128.640174865723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ca9929ea", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||0456df9778e5c0f", + "colorKey": "gb300_30494704", + "comparisonKey": "19d21f7151fe6735", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:48:29.666998+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0456df9778e5c0f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 739.6799921989441, + "p90": 1957.2160243988037, + "p95": 2284.1920852661133, + "p99": 3888.0960941314697 + }, + "combine": { + "p50": 349.2799997329712, + "p90": 390.46400785446167, + "p95": 1827.4879455566406, + "p99": 2221.440076828003 + }, + "roundtrip": { + "p50": 1050.7839918136597, + "p90": 2524.4479179382324, + "p95": 2751.744031906128, + "p99": 4391.488075256348 + }, + "isolatedSum": { + "p50": 1088.9599919319153, + "p90": 2347.6800322532654, + "p95": 4111.680030822754, + "p99": 6109.536170959473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 9, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 716.3199782371521, + "p90": 1992.7680492401123, + "p95": 2330.336093902588, + "p99": 3717.0560359954834 + }, + "combine": { + "p50": 337.2800052165985, + "p90": 379.7760009765625, + "p95": 1726.7839908599854, + "p99": 2138.495922088623 + }, + "roundtrip": { + "p50": 1007.1040391921997, + "p90": 2123.039960861206, + "p95": 2694.272041320801, + "p99": 4349.408149719238 + }, + "isolatedSum": { + "p50": 1053.5999834537506, + "p90": 2372.544050216675, + "p95": 4057.1200847625732, + "p99": 5855.551958084106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 18, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 716.8639898300171, + "p90": 1909.824013710022, + "p95": 2261.823892593384, + "p99": 4010.7197761535645 + }, + "combine": { + "p50": 314.8159980773926, + "p90": 353.3119857311249, + "p95": 1613.055944442749, + "p99": 2169.7280406951904 + }, + "roundtrip": { + "p50": 1010.3679895401001, + "p90": 1912.3519659042358, + "p95": 2648.9920616149902, + "p99": 4559.296131134033 + }, + "isolatedSum": { + "p50": 1031.6799879074097, + "p90": 2263.135999441147, + "p95": 3874.879837036133, + "p99": 6180.447816848755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 36, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 755.0079822540283, + "p90": 799.3599772453308, + "p95": 2081.376075744629, + "p99": 2588.5119438171387 + }, + "combine": { + "p50": 337.119996547699, + "p90": 361.56800389289856, + "p95": 1685.2480173110962, + "p99": 2117.1839237213135 + }, + "roundtrip": { + "p50": 1050.976037979126, + "p90": 1220.1600074768066, + "p95": 2575.968027114868, + "p99": 4147.776126861572 + }, + "isolatedSum": { + "p50": 1092.1279788017273, + "p90": 1160.9279811382294, + "p95": 3766.624093055725, + "p99": 4705.695867538452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 72, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 759.6480250358582, + "p90": 853.056013584137, + "p95": 2154.207944869995, + "p99": 3089.1520977020264 + }, + "combine": { + "p50": 332.67199993133545, + "p90": 357.34400153160095, + "p95": 1621.7600107192993, + "p99": 2133.824110031128 + }, + "roundtrip": { + "p50": 1066.2720203399658, + "p90": 1230.687975883484, + "p95": 2628.25608253479, + "p99": 4016.575813293457 + }, + "isolatedSum": { + "p50": 1092.3200249671936, + "p90": 1210.400015115738, + "p95": 3775.9679555892944, + "p99": 5222.976207733154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 144, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 740.6079769134521, + "p90": 804.2240142822266, + "p95": 2197.7601051330566, + "p99": 3174.4320392608643 + }, + "combine": { + "p50": 333.15199613571167, + "p90": 355.16801476478577, + "p95": 1349.8879671096802, + "p99": 2097.69606590271 + }, + "roundtrip": { + "p50": 1035.264015197754, + "p90": 1163.167953491211, + "p95": 2601.4719009399414, + "p99": 4094.560146331787 + }, + "isolatedSum": { + "p50": 1073.7599730491638, + "p90": 1159.3920290470123, + "p95": 3547.648072242737, + "p99": 5272.128105163574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 288, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 723.6160039901733, + "p90": 764.9279832839966, + "p95": 2121.2480068206787, + "p99": 3638.0159854888916 + }, + "combine": { + "p50": 324.5440125465393, + "p90": 352.1279990673065, + "p95": 1719.648003578186, + "p99": 2324.160099029541 + }, + "roundtrip": { + "p50": 1019.6800231933594, + "p90": 1173.6639738082886, + "p95": 2600.735902786255, + "p99": 4339.168071746826 + }, + "isolatedSum": { + "p50": 1048.1600165367126, + "p90": 1117.055982351303, + "p95": 3840.8960103988647, + "p99": 5962.176084518433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 576, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 731.6799759864807, + "p90": 803.2640218734741, + "p95": 2103.6159992218018, + "p99": 3700.8960247039795 + }, + "combine": { + "p50": 326.9760012626648, + "p90": 353.2800078392029, + "p95": 1713.919997215271, + "p99": 2293.503999710083 + }, + "roundtrip": { + "p50": 1031.391978263855, + "p90": 1129.6000480651855, + "p95": 2531.2321186065674, + "p99": 4381.56795501709 + }, + "isolatedSum": { + "p50": 1058.6559772491455, + "p90": 1156.544029712677, + "p95": 3817.5359964370728, + "p99": 5994.4000244140625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19892699", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||15404c7c0ec01b5", + "colorKey": "gb300_9700a008", + "comparisonKey": "95b752ffb404abc6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:46:42.510954+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "15404c7c0ec01b5", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 664.8960113525391, + "p90": 717.3759937286377, + "p95": 2400.4480838775635, + "p99": 3812.544107437134 + }, + "combine": { + "p50": 309.02400612831116, + "p90": 325.6959915161133, + "p95": 1793.3119535446167, + "p99": 2273.7600803375244 + }, + "roundtrip": { + "p50": 949.2800235748291, + "p90": 2292.799949645996, + "p95": 2771.9039916992188, + "p99": 3844.320058822632 + }, + "isolatedSum": { + "p50": 973.9200174808502, + "p90": 1043.071985244751, + "p95": 4193.76003742218, + "p99": 6086.304187774658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 660.6720089912415, + "p90": 774.1439938545227, + "p95": 2357.5680255889893, + "p99": 3837.3119831085205 + }, + "combine": { + "p50": 309.53601002693176, + "p90": 329.18399572372437, + "p95": 1937.600016593933, + "p99": 2476.639986038208 + }, + "roundtrip": { + "p50": 945.1519846916199, + "p90": 2228.3520698547363, + "p95": 2789.3118858337402, + "p99": 4236.959934234619 + }, + "isolatedSum": { + "p50": 970.2080190181732, + "p90": 1103.327989578247, + "p95": 4295.168042182922, + "p99": 6313.9519691467285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 678.7199974060059, + "p90": 710.9119892120361, + "p95": 2240.096092224121, + "p99": 2628.351926803589 + }, + "combine": { + "p50": 308.351993560791, + "p90": 326.1120021343231, + "p95": 492.5119876861572, + "p99": 2133.888006210327 + }, + "roundtrip": { + "p50": 970.0800180435181, + "p90": 1112.5119924545288, + "p95": 2626.8160343170166, + "p99": 4088.064193725586 + }, + "isolatedSum": { + "p50": 987.0719909667969, + "p90": 1037.0239913463593, + "p95": 2732.6080799102783, + "p99": 4762.239933013916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 679.967999458313, + "p90": 721.7599749565125, + "p95": 2267.6799297332764, + "p99": 3621.0880279541016 + }, + "combine": { + "p50": 308.9599907398224, + "p90": 330.30399680137634, + "p95": 1892.2879695892334, + "p99": 2316.3840770721436 + }, + "roundtrip": { + "p50": 973.3440279960632, + "p90": 1155.3599834442139, + "p95": 2706.144094467163, + "p99": 3201.472043991089 + }, + "isolatedSum": { + "p50": 988.9279901981354, + "p90": 1052.0639717578888, + "p95": 4159.96789932251, + "p99": 5937.472105026245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 682.1439862251282, + "p90": 757.2159767150879, + "p95": 2317.984104156494, + "p99": 2750.9119510650635 + }, + "combine": { + "p50": 307.5200021266937, + "p90": 329.8879861831665, + "p95": 1732.1280241012573, + "p99": 2272.991895675659 + }, + "roundtrip": { + "p50": 969.9519872665405, + "p90": 1050.6880283355713, + "p95": 2685.472011566162, + "p99": 3972.543954849243 + }, + "isolatedSum": { + "p50": 989.6639883518219, + "p90": 1087.1039628982544, + "p95": 4050.1121282577515, + "p99": 5023.903846740723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 224, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 681.1839938163757, + "p90": 714.5280241966248, + "p95": 2328.927993774414, + "p99": 2902.5280475616455 + }, + "combine": { + "p50": 307.5520098209381, + "p90": 329.27998900413513, + "p95": 1809.2479705810547, + "p99": 2529.184103012085 + }, + "roundtrip": { + "p50": 961.8560075759888, + "p90": 1053.8560152053833, + "p95": 2625.920057296753, + "p99": 3996.351957321167 + }, + "isolatedSum": { + "p50": 988.7360036373138, + "p90": 1043.80801320076, + "p95": 4138.175964355469, + "p99": 5431.7121505737305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 681.056022644043, + "p90": 713.5360240936279, + "p95": 2098.112106323242, + "p99": 2567.6798820495605 + }, + "combine": { + "p50": 310.016006231308, + "p90": 331.4880132675171, + "p95": 1490.1759624481201, + "p99": 2226.304054260254 + }, + "roundtrip": { + "p50": 972.8000164031982, + "p90": 1018.8159942626953, + "p95": 2611.072063446045, + "p99": 3098.0160236358643 + }, + "isolatedSum": { + "p50": 991.072028875351, + "p90": 1045.024037361145, + "p95": 3588.2880687713623, + "p99": 4793.983936309814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 925, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 676.2239933013916, + "p90": 713.3439779281616, + "p95": 2224.4160175323486, + "p99": 3738.52801322937 + }, + "combine": { + "p50": 303.99999022483826, + "p90": 325.47199726104736, + "p95": 1720.255970954895, + "p99": 2427.3600578308105 + }, + "roundtrip": { + "p50": 961.9519710540771, + "p90": 1006.6879987716675, + "p95": 2664.1600131988525, + "p99": 3836.384057998657 + }, + "isolatedSum": { + "p50": 980.2239835262299, + "p90": 1038.815975189209, + "p95": 3944.6719884872437, + "p99": 6165.888071060181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-002526ff", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||5793a02d08aaa9c", + "colorKey": "gb300_c510a7e6", + "comparisonKey": "ac618100ea2e3b6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:54:52.934745+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5793a02d08aaa9c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 698.9120244979858, + "p90": 2235.0080013275146, + "p95": 2485.3439331054688, + "p99": 4117.152214050293 + }, + "combine": { + "p50": 330.55999875068665, + "p90": 362.7519905567169, + "p95": 1838.528037071228, + "p99": 2418.5919761657715 + }, + "roundtrip": { + "p50": 988.6720180511475, + "p90": 2287.584066390991, + "p95": 2846.3358879089355, + "p99": 6051.648139953613 + }, + "isolatedSum": { + "p50": 1029.4720232486725, + "p90": 2597.7599918842316, + "p95": 4323.871970176697, + "p99": 6535.744190216064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 653.6319851875305, + "p90": 1405.5039882659912, + "p95": 2522.6240158081055, + "p99": 4179.359912872314 + }, + "combine": { + "p50": 307.3279857635498, + "p90": 353.0240058898926, + "p95": 1849.2799997329712, + "p99": 3457.119941711426 + }, + "roundtrip": { + "p50": 881.1839818954468, + "p90": 1208.0960273742676, + "p95": 2854.4960021972656, + "p99": 4475.327968597412 + }, + "isolatedSum": { + "p50": 960.9599709510803, + "p90": 1758.5279941558838, + "p95": 4371.904015541077, + "p99": 7636.47985458374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 22, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 691.6800141334534, + "p90": 781.2479734420776, + "p95": 2343.1999683380127, + "p99": 4050.528049468994 + }, + "combine": { + "p50": 312.8640055656433, + "p90": 356.7360043525696, + "p95": 1750.7519721984863, + "p99": 5197.919845581055 + }, + "roundtrip": { + "p50": 967.2639966011047, + "p90": 2077.9199600219727, + "p95": 2830.6241035461426, + "p99": 4401.919841766357 + }, + "isolatedSum": { + "p50": 1004.5440196990967, + "p90": 1137.9839777946472, + "p95": 4093.951940536499, + "p99": 9248.447895050049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 707.6799869537354, + "p90": 770.7520127296448, + "p95": 2345.855951309204, + "p99": 4274.303913116455 + }, + "combine": { + "p50": 319.2960023880005, + "p90": 353.08799147605896, + "p95": 1621.8880414962769, + "p99": 3683.39204788208 + }, + "roundtrip": { + "p50": 1004.7680139541626, + "p90": 1324.2239952087402, + "p95": 2794.015884399414, + "p99": 4575.808048248291 + }, + "isolatedSum": { + "p50": 1026.9759893417358, + "p90": 1123.8400042057037, + "p95": 3967.743992805481, + "p99": 7957.695960998535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 79, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 700.3840208053589, + "p90": 770.1119780540466, + "p95": 2387.455940246582, + "p99": 4305.344104766846 + }, + "combine": { + "p50": 316.6719973087311, + "p90": 347.7120101451874, + "p95": 1723.3279943466187, + "p99": 3504.8320293426514 + }, + "roundtrip": { + "p50": 1001.5039443969727, + "p90": 1370.9440231323242, + "p95": 2734.0800762176514, + "p99": 4960.959911346436 + }, + "isolatedSum": { + "p50": 1017.05601811409, + "p90": 1117.823988199234, + "p95": 4110.783934593201, + "p99": 7810.176134109497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 134, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 669.5359945297241, + "p90": 783.456027507782, + "p95": 2219.072103500366, + "p99": 3808.1600666046143 + }, + "combine": { + "p50": 302.7839958667755, + "p90": 352.6400029659271, + "p95": 1502.6559829711914, + "p99": 2327.7440071105957 + }, + "roundtrip": { + "p50": 945.9840059280396, + "p90": 1104.7999858856201, + "p95": 2748.800039291382, + "p99": 4320.41597366333 + }, + "isolatedSum": { + "p50": 972.3199903964996, + "p90": 1136.096030473709, + "p95": 3721.7280864715576, + "p99": 6135.90407371521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 268, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 696.1600184440613, + "p90": 771.0080146789551, + "p95": 2140.415906906128, + "p99": 4225.632190704346 + }, + "combine": { + "p50": 318.5279965400696, + "p90": 350.0800132751465, + "p95": 443.1680142879486, + "p99": 3163.5520458221436 + }, + "roundtrip": { + "p50": 979.6479940414429, + "p90": 1140.0959491729736, + "p95": 2778.592109680176, + "p99": 4458.591938018799 + }, + "isolatedSum": { + "p50": 1014.6880149841309, + "p90": 1121.0880279541016, + "p95": 2583.5839211940765, + "p99": 7389.184236526489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 533, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 671.7119812965393, + "p90": 771.8080282211304, + "p95": 2186.431884765625, + "p99": 3849.152088165283 + }, + "combine": { + "p50": 301.4400005340576, + "p90": 348.83201122283936, + "p95": 1535.5520248413086, + "p99": 2400.9599685668945 + }, + "roundtrip": { + "p50": 939.5840167999268, + "p90": 1077.5359869003296, + "p95": 2617.6319122314453, + "p99": 4417.888164520264 + }, + "isolatedSum": { + "p50": 973.1519818305969, + "p90": 1120.6400394439697, + "p95": 3721.9839096069336, + "p99": 6250.112056732178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 1027, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2a0fd65e", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||a572344820478f0", + "colorKey": "gb300_6c27634c", + "comparisonKey": "b411d98e4e3677dc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:47:37.028085+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a572344820478f0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 677.8560280799866, + "p90": 748.3199834823608, + "p95": 2489.9520874023438, + "p99": 4585.3118896484375 + }, + "combine": { + "p50": 315.0399923324585, + "p90": 353.11999917030334, + "p95": 1784.1600179672241, + "p99": 3225.503921508789 + }, + "roundtrip": { + "p50": 933.791995048523, + "p90": 1180.9920072555542, + "p95": 2653.887987136841, + "p99": 4576.992034912109 + }, + "isolatedSum": { + "p50": 992.8960204124451, + "p90": 1101.4399826526642, + "p95": 4274.112105369568, + "p99": 7810.815811157227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 15, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 615.8720254898071, + "p90": 776.639997959137, + "p95": 2679.03995513916, + "p99": 4346.62389755249 + }, + "combine": { + "p50": 290.0800108909607, + "p90": 357.5359880924225, + "p95": 1968.000054359436, + "p99": 3556.1599731445312 + }, + "roundtrip": { + "p50": 867.3279881477356, + "p90": 1181.3119649887085, + "p95": 2722.8479385375977, + "p99": 4799.647808074951 + }, + "isolatedSum": { + "p50": 905.9520363807678, + "p90": 1134.1759860515594, + "p95": 4647.040009498596, + "p99": 7902.7838706970215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 662.2400283813477, + "p90": 748.6720085144043, + "p95": 2327.8720378875732, + "p99": 4353.919982910156 + }, + "combine": { + "p50": 292.38399863243103, + "p90": 349.40800070762634, + "p95": 1803.007960319519, + "p99": 3654.46400642395 + }, + "roundtrip": { + "p50": 906.0800075531006, + "p90": 1231.8400144577026, + "p95": 2691.296100616455, + "p99": 4459.04016494751 + }, + "isolatedSum": { + "p50": 954.6240270137787, + "p90": 1098.0800092220306, + "p95": 4130.879998207092, + "p99": 8008.383989334106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 43, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 635.7759833335876, + "p90": 740.8639788627625, + "p95": 2172.5120544433594, + "p99": 4060.800075531006 + }, + "combine": { + "p50": 292.1279966831207, + "p90": 345.2160060405731, + "p95": 1787.8719568252563, + "p99": 3344.1920280456543 + }, + "roundtrip": { + "p50": 911.296010017395, + "p90": 1091.9359922409058, + "p95": 2711.8079662323, + "p99": 4646.399974822998 + }, + "isolatedSum": { + "p50": 927.9039800167084, + "p90": 1086.0799849033356, + "p95": 3960.3840112686157, + "p99": 7404.99210357666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 73, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 673.8240122795105, + "p90": 742.3359751701355, + "p95": 2339.4880294799805, + "p99": 3979.583978652954 + }, + "combine": { + "p50": 307.9040050506592, + "p90": 337.5680148601532, + "p95": 422.14399576187134, + "p99": 3390.6240463256836 + }, + "roundtrip": { + "p50": 950.6239891052246, + "p90": 1054.3359518051147, + "p95": 2461.6639614105225, + "p99": 4510.8160972595215 + }, + "isolatedSum": { + "p50": 981.7280173301697, + "p90": 1079.9039900302887, + "p95": 2761.632025241852, + "p99": 7370.208024978638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 142, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 624.351978302002, + "p90": 749.7919797897339, + "p95": 2265.631914138794, + "p99": 4086.048126220703 + }, + "combine": { + "p50": 283.7440073490143, + "p90": 342.46399998664856, + "p95": 1945.4079866409302, + "p99": 3706.33602142334 + }, + "roundtrip": { + "p50": 916.9279932975769, + "p90": 1176.576018333435, + "p95": 2655.168056488037, + "p99": 4550.784111022949 + }, + "isolatedSum": { + "p50": 908.0959856510162, + "p90": 1092.2559797763824, + "p95": 4211.039900779724, + "p99": 7792.384147644043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 274, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 676.4799952507019, + "p90": 755.2319765090942, + "p95": 2069.2479610443115, + "p99": 3537.4720096588135 + }, + "combine": { + "p50": 303.51999402046204, + "p90": 345.69600224494934, + "p95": 1605.9520244598389, + "p99": 2750.976085662842 + }, + "roundtrip": { + "p50": 921.7280149459839, + "p90": 1250.175952911377, + "p95": 2669.919967651367, + "p99": 4222.176074981689 + }, + "isolatedSum": { + "p50": 979.9999892711639, + "p90": 1100.9279787540436, + "p95": 3675.1999855041504, + "p99": 6288.448095321655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 526, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 683.67999792099, + "p90": 755.0079822540283, + "p95": 2057.408094406128, + "p99": 3766.3679122924805 + }, + "combine": { + "p50": 308.47999453544617, + "p90": 348.28799962997437, + "p95": 1787.8719568252563, + "p99": 3322.7200508117676 + }, + "roundtrip": { + "p50": 950.2720236778259, + "p90": 1089.8239612579346, + "p95": 2789.2799377441406, + "p99": 4373.88801574707 + }, + "isolatedSum": { + "p50": 992.1599924564362, + "p90": 1103.2959818840027, + "p95": 3845.2800512313843, + "p99": 7089.087963104248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 1042, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bfe6d07f", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_06081769", + "comparisonKey": "95d5a8845b23c1a0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:58.523132+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 720.0000286102295, + "p90": 2108.2239151000977, + "p95": 3332.2880268096924, + "p99": 3720.7679748535156 + }, + "combine": { + "p50": 340.4160141944885, + "p90": 418.2080030441284, + "p95": 1905.56800365448, + "p99": 3254.528045654297 + }, + "roundtrip": { + "p50": 1030.7519435882568, + "p90": 1365.0239706039429, + "p95": 2910.0160598754883, + "p99": 4252.384185791016 + }, + "isolatedSum": { + "p50": 1060.416042804718, + "p90": 2526.431918144226, + "p95": 5237.856030464172, + "p99": 6975.2960205078125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 712.8959894180298, + "p90": 1903.9039611816406, + "p95": 2697.9520320892334, + "p99": 3818.78399848938 + }, + "combine": { + "p50": 339.32799100875854, + "p90": 405.2479863166809, + "p95": 1996.7679977416992, + "p99": 3333.888053894043 + }, + "roundtrip": { + "p50": 1002.2399425506592, + "p90": 2219.1359996795654, + "p95": 2960.7040882110596, + "p99": 4112.703800201416 + }, + "isolatedSum": { + "p50": 1052.2239804267883, + "p90": 2309.1519474983215, + "p95": 4694.720029830933, + "p99": 7152.672052383423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 722.2399711608887, + "p90": 875.872015953064, + "p95": 3045.8240509033203, + "p99": 4228.288173675537 + }, + "combine": { + "p50": 329.6639919281006, + "p90": 359.391987323761, + "p95": 1748.5120296478271, + "p99": 3323.7760066986084 + }, + "roundtrip": { + "p50": 1023.5519409179688, + "p90": 1303.5199642181396, + "p95": 2668.8320636749268, + "p99": 4188.320159912109 + }, + "isolatedSum": { + "p50": 1051.9039630889893, + "p90": 1235.264003276825, + "p95": 4794.3360805511475, + "p99": 7552.0641803741455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 745.4720139503479, + "p90": 872.223973274231, + "p95": 2211.168050765991, + "p99": 3620.800018310547 + }, + "combine": { + "p50": 337.6320004463196, + "p90": 390.78399538993835, + "p95": 1825.5360126495361, + "p99": 3279.6480655670166 + }, + "roundtrip": { + "p50": 1033.8239669799805, + "p90": 1246.9120025634766, + "p95": 2775.2320766448975, + "p99": 4073.823928833008 + }, + "isolatedSum": { + "p50": 1083.1040143966675, + "p90": 1263.0079686641693, + "p95": 4036.7040634155273, + "p99": 6900.4480838775635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 730.1440238952637, + "p90": 865.2160167694092, + "p95": 2251.9359588623047, + "p99": 3804.095983505249 + }, + "combine": { + "p50": 333.6319923400879, + "p90": 390.6559944152832, + "p95": 1331.4239978790283, + "p99": 3230.0479412078857 + }, + "roundtrip": { + "p50": 1044.6399450302124, + "p90": 1189.695954322815, + "p95": 2925.920009613037, + "p99": 4541.152000427246 + }, + "isolatedSum": { + "p50": 1063.7760162353516, + "p90": 1255.8720111846924, + "p95": 3583.359956741333, + "p99": 7034.143924713135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 723.9680290222168, + "p90": 780.3840041160583, + "p95": 2213.792085647583, + "p99": 3638.688087463379 + }, + "combine": { + "p50": 329.5679986476898, + "p90": 350.6560027599335, + "p95": 1544.160008430481, + "p99": 3258.143901824951 + }, + "roundtrip": { + "p50": 1030.8159589767456, + "p90": 1571.7439651489258, + "p95": 2912.5759601593018, + "p99": 4163.104057312012 + }, + "isolatedSum": { + "p50": 1053.5360276699066, + "p90": 1131.0400068759918, + "p95": 3757.952094078064, + "p99": 6896.83198928833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 710.3360295295715, + "p90": 855.8080196380615, + "p95": 1655.6799411773682, + "p99": 3526.495933532715 + }, + "combine": { + "p50": 325.82399249076843, + "p90": 387.7120018005371, + "p95": 775.9039998054504, + "p99": 3199.359893798828 + }, + "roundtrip": { + "p50": 1024.7360467910767, + "p90": 1200.2559900283813, + "p95": 3003.040075302124, + "p99": 4378.687858581543 + }, + "isolatedSum": { + "p50": 1036.16002202034, + "p90": 1243.5200214385986, + "p95": 2431.5839409828186, + "p99": 6725.855827331543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 726.9759774208069, + "p90": 796.832025051117, + "p95": 2274.143934249878, + "p99": 3632.159948348999 + }, + "combine": { + "p50": 331.5519988536835, + "p90": 359.74401235580444, + "p95": 1729.3440103530884, + "p99": 3288.6719703674316 + }, + "roundtrip": { + "p50": 1026.3359546661377, + "p90": 1133.7920427322388, + "p95": 2767.9359912872314, + "p99": 4044.3520545959473 + }, + "isolatedSum": { + "p50": 1058.5279762744904, + "p90": 1156.5760374069214, + "p95": 4003.4879446029663, + "p99": 6920.831918716431 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0220b1d0", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||47fc79fe5fdca4c", + "colorKey": "gb300_00154133", + "comparisonKey": "03f03e5d76773553", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:45:29.352990+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "47fc79fe5fdca4c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 712.0959758758545, + "p90": 800.2240061759949, + "p95": 2256.8318843841553, + "p99": 3773.184061050415 + }, + "combine": { + "p50": 335.32801270484924, + "p90": 358.14398527145386, + "p95": 539.135992527008, + "p99": 2184.959888458252 + }, + "roundtrip": { + "p50": 1012.1920108795166, + "p90": 1310.0800514221191, + "p95": 2699.0718841552734, + "p99": 4276.576042175293 + }, + "isolatedSum": { + "p50": 1047.4239885807037, + "p90": 1158.3679914474487, + "p95": 2795.9678769111633, + "p99": 5958.143949508667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 59, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 675.5200028419495, + "p90": 762.0800137519836, + "p95": 2280.927896499634, + "p99": 4093.344211578369 + }, + "combine": { + "p50": 321.6640055179596, + "p90": 357.7280044555664, + "p95": 458.2720100879669, + "p99": 2088.383913040161 + }, + "roundtrip": { + "p50": 953.0879855155945, + "p90": 1138.0159854888916, + "p95": 2591.775894165039, + "p99": 3614.784002304077 + }, + "isolatedSum": { + "p50": 997.1840083599091, + "p90": 1119.80801820755, + "p95": 2739.1999065876007, + "p99": 6181.72812461853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 121, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 730.1440238952637, + "p90": 1788.3520126342773, + "p95": 2328.6399841308594, + "p99": 4066.431999206543 + }, + "combine": { + "p50": 332.89599418640137, + "p90": 639.7119760513306, + "p95": 1766.7200565338135, + "p99": 2413.439989089966 + }, + "roundtrip": { + "p50": 1033.6639881134033, + "p90": 1305.9519529342651, + "p95": 2597.7280139923096, + "p99": 4387.807846069336 + }, + "isolatedSum": { + "p50": 1063.040018081665, + "p90": 2428.063988685608, + "p95": 4095.360040664673, + "p99": 6479.871988296509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 244, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 728.3840179443359, + "p90": 773.7280130386353, + "p95": 2086.6239070892334, + "p99": 3792.479991912842 + }, + "combine": { + "p50": 335.5199992656708, + "p90": 432.3520064353943, + "p95": 1866.6239976882935, + "p99": 3224.5121002197266 + }, + "roundtrip": { + "p50": 1033.728003501892, + "p90": 1766.9440507888794, + "p95": 2574.3041038513184, + "p99": 4285.215854644775 + }, + "isolatedSum": { + "p50": 1063.9040172100067, + "p90": 1206.0800194740295, + "p95": 3953.247904777527, + "p99": 7016.992092132568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 478, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 736.6080284118652, + "p90": 1856.7039966583252, + "p95": 2290.2400493621826, + "p99": 3847.968101501465 + }, + "combine": { + "p50": 335.4560136795044, + "p90": 378.84798645973206, + "p95": 1800.3840446472168, + "p99": 2983.9680194854736 + }, + "roundtrip": { + "p50": 1032.9279899597168, + "p90": 1385.0879669189453, + "p95": 2630.9759616851807, + "p99": 4271.520137786865 + }, + "isolatedSum": { + "p50": 1072.0640420913696, + "p90": 2235.5519831180573, + "p95": 4090.6240940093994, + "p99": 6831.9361209869385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 953, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 703.0400037765503, + "p90": 817.7599906921387, + "p95": 2339.871883392334, + "p99": 3945.6961154937744 + }, + "combine": { + "p50": 326.4639973640442, + "p90": 354.8800051212311, + "p95": 1546.7519760131836, + "p99": 2115.648031234741 + }, + "roundtrip": { + "p50": 986.7200255393982, + "p90": 1166.208028793335, + "p95": 2553.4400939941406, + "p99": 3132.2879791259766 + }, + "isolatedSum": { + "p50": 1029.5040011405945, + "p90": 1172.6399958133698, + "p95": 3886.6238594055176, + "p99": 6061.344146728516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 1908, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 713.4079933166504, + "p90": 844.5760011672974, + "p95": 2171.168088912964, + "p99": 3691.3599967956543 + }, + "combine": { + "p50": 320.47998905181885, + "p90": 353.43998670578003, + "p95": 1626.6560554504395, + "p99": 3241.9519424438477 + }, + "roundtrip": { + "p50": 1015.8400535583496, + "p90": 1085.4400396347046, + "p95": 2313.215970993042, + "p99": 4045.407772064209 + }, + "isolatedSum": { + "p50": 1033.8879823684692, + "p90": 1198.0159878730774, + "p95": 3797.8241443634033, + "p99": 6933.311939239502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 3804, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 751.2959837913513, + "p90": 859.4560027122498, + "p95": 2180.3839206695557, + "p99": 3225.2159118652344 + }, + "combine": { + "p50": 349.4400084018707, + "p90": 364.47998881340027, + "p95": 623.3279705047607, + "p99": 1902.143955230713 + }, + "roundtrip": { + "p50": 1021.9520330429077, + "p90": 1142.3360109329224, + "p95": 2400.0959396362305, + "p99": 3965.280055999756 + }, + "isolatedSum": { + "p50": 1100.735992193222, + "p90": 1223.93599152565, + "p95": 2803.7118911743164, + "p99": 5127.359867095947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ae899527", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||39778bd75f046da", + "colorKey": "gb300_311629eb", + "comparisonKey": "008ab5cea08022d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:54:01.454498+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "39778bd75f046da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 665.5359864234924, + "p90": 1970.911979675293, + "p95": 2391.3280963897705, + "p99": 4255.519866943359 + }, + "combine": { + "p50": 315.90399146080017, + "p90": 380.7680010795593, + "p95": 1895.359992980957, + "p99": 2565.471887588501 + }, + "roundtrip": { + "p50": 956.4160108566284, + "p90": 2447.711944580078, + "p95": 2809.664011001587, + "p99": 4520.1921463012695 + }, + "isolatedSum": { + "p50": 981.4399778842926, + "p90": 2351.6799807548523, + "p95": 4286.6880893707275, + "p99": 6820.99175453186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 639.4559741020203, + "p90": 837.0559811592102, + "p95": 2422.9440689086914, + "p99": 4084.1917991638184 + }, + "combine": { + "p50": 302.8160035610199, + "p90": 335.10398864746094, + "p95": 1934.8479509353638, + "p99": 2368.4799671173096 + }, + "roundtrip": { + "p50": 918.7520146369934, + "p90": 2504.2879581451416, + "p95": 2821.5999603271484, + "p99": 4366.879940032959 + }, + "isolatedSum": { + "p50": 942.2719776630402, + "p90": 1172.1599698066711, + "p95": 4357.792019844055, + "p99": 6452.671766281128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 683.9039921760559, + "p90": 727.8079986572266, + "p95": 2147.7439403533936, + "p99": 3740.6399250030518 + }, + "combine": { + "p50": 306.7840039730072, + "p90": 331.0079872608185, + "p95": 1248.5120296478271, + "p99": 2071.5200901031494 + }, + "roundtrip": { + "p50": 979.744017124176, + "p90": 1177.3439645767212, + "p95": 2708.159923553467, + "p99": 4141.088008880615 + }, + "isolatedSum": { + "p50": 990.6879961490631, + "p90": 1058.815985918045, + "p95": 3396.2559700012207, + "p99": 5812.160015106201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 40, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 679.1679859161377, + "p90": 730.1759719848633, + "p95": 2288.127899169922, + "p99": 2613.663911819458 + }, + "combine": { + "p50": 308.351993560791, + "p90": 328.44799757003784, + "p95": 786.7199778556824, + "p99": 2099.3919372558594 + }, + "roundtrip": { + "p50": 971.5200066566467, + "p90": 1088.0639553070068, + "p95": 2663.935899734497, + "p99": 4545.472145080566 + }, + "isolatedSum": { + "p50": 987.5199794769287, + "p90": 1058.6239695549011, + "p95": 3074.8478770256042, + "p99": 4713.055849075317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 71, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 694.7839856147766, + "p90": 785.2159738540649, + "p95": 2263.6799812316895, + "p99": 2579.1358947753906 + }, + "combine": { + "p50": 314.2400085926056, + "p90": 333.5359990596771, + "p95": 1703.6800384521484, + "p99": 2269.536018371582 + }, + "roundtrip": { + "p50": 974.3040204048157, + "p90": 1073.7919807434082, + "p95": 2679.231882095337, + "p99": 4277.376174926758 + }, + "isolatedSum": { + "p50": 1009.0239942073822, + "p90": 1118.751972913742, + "p95": 3967.360019683838, + "p99": 4848.671913146973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 143, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 656.9600105285645, + "p90": 725.2159714698792, + "p95": 2253.983974456787, + "p99": 2616.863965988159 + }, + "combine": { + "p50": 302.3360073566437, + "p90": 327.90398597717285, + "p95": 1796.0959672927856, + "p99": 2276.639938354492 + }, + "roundtrip": { + "p50": 932.3520064353943, + "p90": 1079.1360139846802, + "p95": 2665.40789604187, + "p99": 4395.5841064453125 + }, + "isolatedSum": { + "p50": 959.2960178852081, + "p90": 1053.119957447052, + "p95": 4050.0799417495728, + "p99": 4893.503904342651 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 266, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 678.6239743232727, + "p90": 714.7520184516907, + "p95": 2164.28804397583, + "p99": 2703.9999961853027 + }, + "combine": { + "p50": 306.65600299835205, + "p90": 329.75998520851135, + "p95": 876.0640025138855, + "p99": 2239.6159172058105 + }, + "roundtrip": { + "p50": 965.0880098342896, + "p90": 1041.5359735488892, + "p95": 2559.743881225586, + "p99": 2966.14408493042 + }, + "isolatedSum": { + "p50": 985.2799773216248, + "p90": 1044.512003660202, + "p95": 3040.3520464897156, + "p99": 4943.615913391113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 534, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 682.4319958686829, + "p90": 735.8080148696899, + "p95": 2229.2799949645996, + "p99": 2564.6719932556152 + }, + "combine": { + "p50": 308.0959916114807, + "p90": 337.0240032672882, + "p95": 1277.9200077056885, + "p99": 2266.239881515503 + }, + "roundtrip": { + "p50": 964.7679924964905, + "p90": 1043.1040525436401, + "p95": 2528.2559394836426, + "p99": 2971.6479778289795 + }, + "isolatedSum": { + "p50": 990.5279874801636, + "p90": 1072.8320181369781, + "p95": 3507.200002670288, + "p99": 4830.911874771118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1044, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-277e7a85", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||a3b13bb200bb717", + "colorKey": "gb300_6400c8a6", + "comparisonKey": "7981a8dee891d81d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:50:15.426088+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a3b13bb200bb717", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 705.024003982544, + "p90": 799.5839715003967, + "p95": 2382.080078125, + "p99": 3475.1999378204346 + }, + "combine": { + "p50": 325.72799921035767, + "p90": 353.5360097885132, + "p95": 2015.1360034942627, + "p99": 2351.583957672119 + }, + "roundtrip": { + "p50": 997.439980506897, + "p90": 1300.0320196151733, + "p95": 2755.392074584961, + "p99": 3329.3120861053467 + }, + "isolatedSum": { + "p50": 1030.7520031929016, + "p90": 1153.11998128891, + "p95": 4397.216081619263, + "p99": 5826.783895492554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 717.9200053215027, + "p90": 1216.7999744415283, + "p95": 2284.4159603118896, + "p99": 3110.4960441589355 + }, + "combine": { + "p50": 326.2079954147339, + "p90": 346.72001004219055, + "p95": 1749.0559816360474, + "p99": 2162.0800495147705 + }, + "roundtrip": { + "p50": 1009.9200010299683, + "p90": 1246.399998664856, + "p95": 2720.031976699829, + "p99": 3696.320056915283 + }, + "isolatedSum": { + "p50": 1044.1280007362366, + "p90": 1563.5199844837189, + "p95": 4033.471941947937, + "p99": 5272.576093673706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 40, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 710.7200026512146, + "p90": 744.6720004081726, + "p95": 2239.327907562256, + "p99": 2573.823928833008 + }, + "combine": { + "p50": 319.0400004386902, + "p90": 343.1999981403351, + "p95": 1728.543996810913, + "p99": 2099.936008453369 + }, + "roundtrip": { + "p50": 1012.7040147781372, + "p90": 1139.19997215271, + "p95": 2538.1760597229004, + "p99": 3006.688117980957 + }, + "isolatedSum": { + "p50": 1029.7600030899048, + "p90": 1087.8719985485077, + "p95": 3967.871904373169, + "p99": 4673.759937286377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 81, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 728.9919853210449, + "p90": 771.4880108833313, + "p95": 2202.8160095214844, + "p99": 2551.743984222412 + }, + "combine": { + "p50": 330.30399680137634, + "p90": 358.815997838974, + "p95": 1020.4160213470459, + "p99": 2114.5920753479004 + }, + "roundtrip": { + "p50": 1032.480001449585, + "p90": 1180.4800033569336, + "p95": 2553.119897842407, + "p99": 2883.0718994140625 + }, + "isolatedSum": { + "p50": 1059.2959821224213, + "p90": 1130.3040087223053, + "p95": 3223.2320308685303, + "p99": 4666.3360595703125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 725.4080176353455, + "p90": 773.3439803123474, + "p95": 2163.935899734497, + "p99": 2610.27193069458 + }, + "combine": { + "p50": 325.5999982357025, + "p90": 354.65601086616516, + "p95": 1400.3520011901855, + "p99": 2161.4720821380615 + }, + "roundtrip": { + "p50": 1021.7280387878418, + "p90": 1124.6399879455566, + "p95": 2595.5519676208496, + "p99": 3009.6640586853027 + }, + "isolatedSum": { + "p50": 1051.008015871048, + "p90": 1127.9999911785126, + "p95": 3564.2879009246826, + "p99": 4771.744012832642 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 339, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 735.040009021759, + "p90": 776.2240171432495, + "p95": 2189.2800331115723, + "p99": 2543.839931488037 + }, + "combine": { + "p50": 329.53599095344543, + "p90": 351.00799798965454, + "p95": 1633.8560581207275, + "p99": 2094.2718982696533 + }, + "roundtrip": { + "p50": 1040.38405418396, + "p90": 1165.3759479522705, + "p95": 2684.8959922790527, + "p99": 2990.8480644226074 + }, + "isolatedSum": { + "p50": 1064.5759999752045, + "p90": 1127.232015132904, + "p95": 3823.1360912323, + "p99": 4638.11182975769 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 676, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 723.5199809074402, + "p90": 767.1679854393005, + "p95": 2251.9679069519043, + "p99": 2607.3598861694336 + }, + "combine": { + "p50": 327.10400223731995, + "p90": 356.00000619888306, + "p95": 1392.6080465316772, + "p99": 2198.496103286743 + }, + "roundtrip": { + "p50": 1019.6479558944702, + "p90": 1089.9840593338013, + "p95": 2555.5520057678223, + "p99": 3019.9038982391357 + }, + "isolatedSum": { + "p50": 1050.6239831447601, + "p90": 1123.1679916381836, + "p95": 3644.5759534835815, + "p99": 4805.855989456177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 1328, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 726.4639735221863, + "p90": 759.1999769210815, + "p95": 2278.3679962158203, + "p99": 2577.3439407348633 + }, + "combine": { + "p50": 328.0960023403168, + "p90": 350.7840037345886, + "p95": 1659.168004989624, + "p99": 2095.3280925750732 + }, + "roundtrip": { + "p50": 1032.0639610290527, + "p90": 1083.3920240402222, + "p95": 2544.991970062256, + "p99": 3079.008102416992 + }, + "isolatedSum": { + "p50": 1054.559975862503, + "p90": 1109.9839806556702, + "p95": 3937.5360012054443, + "p99": 4672.6720333099365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3ad6201b", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||ab982093c4eac2b", + "colorKey": "gb300_bf4b6268", + "comparisonKey": "c1f1afb6e341ecb7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:50:37.208962+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ab982093c4eac2b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 725.4080176353455, + "p90": 2060.1279735565186, + "p95": 2424.2560863494873, + "p99": 5051.775932312012 + }, + "combine": { + "p50": 343.80799531936646, + "p90": 400.06399154663086, + "p95": 1705.3439617156982, + "p99": 3479.167938232422 + }, + "roundtrip": { + "p50": 1034.0479612350464, + "p90": 1274.0800380706787, + "p95": 2700.7040977478027, + "p99": 6142.399787902832 + }, + "isolatedSum": { + "p50": 1069.216012954712, + "p90": 2460.1919651031494, + "p95": 4129.600048065186, + "p99": 8530.943870544434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 663.5199785232544, + "p90": 833.1199884414673, + "p95": 2335.2959156036377, + "p99": 3838.7200832366943 + }, + "combine": { + "p50": 313.50401043891907, + "p90": 390.5920088291168, + "p95": 1978.943943977356, + "p99": 3592.2560691833496 + }, + "roundtrip": { + "p50": 928.3199906349182, + "p90": 1221.0559844970703, + "p95": 2798.271894454956, + "p99": 4276.063919067383 + }, + "isolatedSum": { + "p50": 977.0239889621735, + "p90": 1223.711997270584, + "p95": 4314.239859580994, + "p99": 7430.976152420044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 22, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 674.3999719619751, + "p90": 742.3040270805359, + "p95": 2288.288116455078, + "p99": 3361.6321086883545 + }, + "combine": { + "p50": 309.56798791885376, + "p90": 338.6879861354828, + "p95": 385.3119909763336, + "p99": 2373.8560676574707 + }, + "roundtrip": { + "p50": 957.3760032653809, + "p90": 1051.0400533676147, + "p95": 2537.087917327881, + "p99": 4178.400039672852 + }, + "isolatedSum": { + "p50": 983.9679598808289, + "p90": 1080.9920132160187, + "p95": 2673.6001074314117, + "p99": 5735.488176345825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 675.104022026062, + "p90": 844.7679877281189, + "p95": 2215.5840396881104, + "p99": 3628.9920806884766 + }, + "combine": { + "p50": 310.4639947414398, + "p90": 371.2959885597229, + "p95": 1541.2479639053345, + "p99": 3249.8879432678223 + }, + "roundtrip": { + "p50": 956.3519954681396, + "p90": 1153.5999774932861, + "p95": 2711.168050765991, + "p99": 4426.559925079346 + }, + "isolatedSum": { + "p50": 985.5680167675018, + "p90": 1216.0639762878418, + "p95": 3756.832003593445, + "p99": 6878.880023956299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 73, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 706.6879868507385, + "p90": 788.864016532898, + "p95": 2161.0240936279297, + "p99": 3298.111915588379 + }, + "combine": { + "p50": 308.22399258613586, + "p90": 359.71200466156006, + "p95": 1678.4000396728516, + "p99": 2358.62398147583 + }, + "roundtrip": { + "p50": 957.2479724884033, + "p90": 1146.0479497909546, + "p95": 2725.055932998657, + "p99": 4143.9361572265625 + }, + "isolatedSum": { + "p50": 1014.9119794368744, + "p90": 1148.576021194458, + "p95": 3839.4241333007812, + "p99": 5656.735897064209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 138, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 673.1839776039124, + "p90": 772.159993648529, + "p95": 2018.5599327087402, + "p99": 2620.7680702209473 + }, + "combine": { + "p50": 305.4400086402893, + "p90": 350.3359854221344, + "p95": 1689.4079446792603, + "p99": 2915.5519008636475 + }, + "roundtrip": { + "p50": 955.6800127029419, + "p90": 1089.568018913269, + "p95": 2646.9759941101074, + "p99": 4093.5678482055664 + }, + "isolatedSum": { + "p50": 978.6239862442017, + "p90": 1122.4959790706635, + "p95": 3707.9678773880005, + "p99": 5536.319971084595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 273, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 676.5440106391907, + "p90": 738.5280132293701, + "p95": 2129.2800903320312, + "p99": 2821.216106414795 + }, + "combine": { + "p50": 310.91201305389404, + "p90": 339.9359881877899, + "p95": 1787.0080471038818, + "p99": 3045.696020126343 + }, + "roundtrip": { + "p50": 951.9680142402649, + "p90": 1072.5120306015015, + "p95": 2779.9038887023926, + "p99": 4172.99222946167 + }, + "isolatedSum": { + "p50": 987.4560236930847, + "p90": 1078.46400141716, + "p95": 3916.288137435913, + "p99": 5866.912126541138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 532, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 691.8399930000305, + "p90": 750.0799894332886, + "p95": 2375.4239082336426, + "p99": 3531.4559936523438 + }, + "combine": { + "p50": 314.62401151657104, + "p90": 346.52799367904663, + "p95": 468.35198998451233, + "p99": 2961.280107498169 + }, + "roundtrip": { + "p50": 947.8399753570557, + "p90": 1089.2800092697144, + "p95": 2811.5200996398926, + "p99": 4082.9758644104004 + }, + "isolatedSum": { + "p50": 1006.4640045166016, + "p90": 1096.6079831123352, + "p95": 2843.775898218155, + "p99": 6492.736101150513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 1041, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1010d11", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||1093cd76c9cd2db", + "colorKey": "gb300_0fa732b5", + "comparisonKey": "64fa72ebadda304a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:52:03.659378+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "1093cd76c9cd2db", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 676.6719818115234, + "p90": 2021.5680599212646, + "p95": 2516.9599056243896, + "p99": 3941.9519901275635 + }, + "combine": { + "p50": 304.064005613327, + "p90": 342.75200963020325, + "p95": 1820.9919929504395, + "p99": 3569.2479610443115 + }, + "roundtrip": { + "p50": 955.9999704360962, + "p90": 2272.8641033172607, + "p95": 2749.5999336242676, + "p99": 4519.999980926514 + }, + "isolatedSum": { + "p50": 980.7359874248505, + "p90": 2364.320069551468, + "p95": 4337.951898574829, + "p99": 7511.199951171875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 672.927975654602, + "p90": 2093.8880443573, + "p95": 2401.632070541382, + "p99": 3940.2239322662354 + }, + "combine": { + "p50": 300.83200335502625, + "p90": 316.9279992580414, + "p95": 1662.1119976043701, + "p99": 3731.071949005127 + }, + "roundtrip": { + "p50": 942.0480132102966, + "p90": 1113.2160425186157, + "p95": 2688.607931137085, + "p99": 4128.064155578613 + }, + "isolatedSum": { + "p50": 973.7599790096283, + "p90": 2410.816043615341, + "p95": 4063.744068145752, + "p99": 7671.295881271362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 696.8640089035034, + "p90": 1924.2240190505981, + "p95": 2325.0880241394043, + "p99": 3880.703926086426 + }, + "combine": { + "p50": 301.9520044326782, + "p90": 324.67201352119446, + "p95": 1631.1039924621582, + "p99": 2160.032033920288 + }, + "roundtrip": { + "p50": 973.0560183525085, + "p90": 1163.9360189437866, + "p95": 2704.67209815979, + "p99": 4416.416168212891 + }, + "isolatedSum": { + "p50": 998.8160133361816, + "p90": 2248.8960325717926, + "p95": 3956.1920166015625, + "p99": 6040.735960006714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 698.1760263442993, + "p90": 732.6400279998779, + "p95": 2252.8960704803467, + "p99": 2761.3439559936523 + }, + "combine": { + "p50": 309.53601002693176, + "p90": 339.4559919834137, + "p95": 1806.0799837112427, + "p99": 2417.59991645813 + }, + "roundtrip": { + "p50": 981.8559885025024, + "p90": 1227.552056312561, + "p95": 2579.5838832855225, + "p99": 4091.104030609131 + }, + "isolatedSum": { + "p50": 1007.7120363712311, + "p90": 1072.0960199832916, + "p95": 4058.9760541915894, + "p99": 5178.943872451782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 698.6560225486755, + "p90": 839.0719890594482, + "p95": 2258.847951889038, + "p99": 3834.239959716797 + }, + "combine": { + "p50": 304.1279911994934, + "p90": 328.70399951934814, + "p95": 1483.5840463638306, + "p99": 2367.039918899536 + }, + "roundtrip": { + "p50": 981.0240268707275, + "p90": 1101.8240451812744, + "p95": 2618.2401180267334, + "p99": 3105.9200763702393 + }, + "isolatedSum": { + "p50": 1002.784013748169, + "p90": 1167.7759885787964, + "p95": 3742.4319982528687, + "p99": 6201.279878616333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 694.9440240859985, + "p90": 734.5280051231384, + "p95": 2230.2401065826416, + "p99": 3608.63995552063 + }, + "combine": { + "p50": 305.4719865322113, + "p90": 325.3439962863922, + "p95": 364.51199650764465, + "p99": 2186.4640712738037 + }, + "roundtrip": { + "p50": 978.7840247154236, + "p90": 1194.6239471435547, + "p95": 2682.176113128662, + "p99": 4207.071781158447 + }, + "isolatedSum": { + "p50": 1000.4160106182098, + "p90": 1059.8720014095306, + "p95": 2594.7521030902863, + "p99": 5795.104026794434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 694.5919990539551, + "p90": 745.5679774284363, + "p95": 2052.4160861968994, + "p99": 2583.6799144744873 + }, + "combine": { + "p50": 306.2720000743866, + "p90": 329.0880024433136, + "p95": 352.4799942970276, + "p99": 2116.703987121582 + }, + "roundtrip": { + "p50": 979.3279767036438, + "p90": 1047.808051109314, + "p95": 2482.111930847168, + "p99": 3031.7440032958984 + }, + "isolatedSum": { + "p50": 1000.8639991283417, + "p90": 1074.6559798717499, + "p95": 2404.896080493927, + "p99": 4700.383901596069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 699.8400092124939, + "p90": 740.9279942512512, + "p95": 2240.959882736206, + "p99": 4025.4077911376953 + }, + "combine": { + "p50": 310.5599880218506, + "p90": 343.84000301361084, + "p95": 1773.568034172058, + "p99": 2235.5198860168457 + }, + "roundtrip": { + "p50": 966.8480157852173, + "p90": 1168.3199405670166, + "p95": 2565.5040740966797, + "p99": 4481.472015380859 + }, + "isolatedSum": { + "p50": 1010.3999972343445, + "p90": 1084.767997264862, + "p95": 4014.527916908264, + "p99": 6260.927677154541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-063afcd6", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_9985c0a9", + "comparisonKey": "a5d23c82289fa35e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:52:25.668761+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 695.5199837684631, + "p90": 1527.0719528198242, + "p95": 3261.4080905914307, + "p99": 4033.34379196167 + }, + "combine": { + "p50": 327.84000039100647, + "p90": 378.9120018482208, + "p95": 1995.7120418548584, + "p99": 3578.3040523529053 + }, + "roundtrip": { + "p50": 985.5359792709351, + "p90": 1360.3520393371582, + "p95": 3355.32808303833, + "p99": 4610.911846160889 + }, + "isolatedSum": { + "p50": 1023.3599841594696, + "p90": 1905.983954668045, + "p95": 5257.120132446289, + "p99": 7611.647844314575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 687.3599886894226, + "p90": 984.8639965057373, + "p95": 2480.6079864501953, + "p99": 3976.3519763946533 + }, + "combine": { + "p50": 312.73600459098816, + "p90": 334.9120020866394, + "p95": 1747.6160526275635, + "p99": 3540.4160022735596 + }, + "roundtrip": { + "p50": 963.3920192718506, + "p90": 1103.3920049667358, + "p95": 2812.351942062378, + "p99": 4497.407913208008 + }, + "isolatedSum": { + "p50": 1000.0959932804108, + "p90": 1319.7759985923767, + "p95": 4228.224039077759, + "p99": 7516.767978668213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 722.8800058364868, + "p90": 775.2000093460083, + "p95": 2196.160078048706, + "p99": 3974.2720127105713 + }, + "combine": { + "p50": 317.6960051059723, + "p90": 349.5360016822815, + "p95": 454.1440010070801, + "p99": 3333.7600231170654 + }, + "roundtrip": { + "p50": 936.0640048980713, + "p90": 1064.352035522461, + "p95": 2751.0080337524414, + "p99": 4579.360008239746 + }, + "isolatedSum": { + "p50": 1040.576010942459, + "p90": 1124.7360110282898, + "p95": 2650.304079055786, + "p99": 7308.032035827637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 715.1359915733337, + "p90": 776.2879729270935, + "p95": 2182.528018951416, + "p99": 3826.6561031341553 + }, + "combine": { + "p50": 319.68000531196594, + "p90": 354.4960021972656, + "p95": 1465.791940689087, + "p99": 3523.4880447387695 + }, + "roundtrip": { + "p50": 991.0399913787842, + "p90": 1087.6480340957642, + "p95": 2878.5600662231445, + "p99": 4408.383846282959 + }, + "isolatedSum": { + "p50": 1034.8159968852997, + "p90": 1130.7839751243591, + "p95": 3648.319959640503, + "p99": 7350.144147872925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 690.7839775085449, + "p90": 750.1440048217773, + "p95": 2160.991907119751, + "p99": 3825.472116470337 + }, + "combine": { + "p50": 312.6719892024994, + "p90": 344.60800886154175, + "p95": 382.9439878463745, + "p99": 2567.7120685577393 + }, + "roundtrip": { + "p50": 977.6960015296936, + "p90": 1068.2239532470703, + "p95": 2687.839984893799, + "p99": 4366.464138031006 + }, + "isolatedSum": { + "p50": 1003.4559667110443, + "p90": 1094.752013683319, + "p95": 2543.9358949661255, + "p99": 6393.184185028076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 702.2079825401306, + "p90": 754.4320225715637, + "p95": 2144.927978515625, + "p99": 3811.232089996338 + }, + "combine": { + "p50": 313.34400177001953, + "p90": 333.5359990596771, + "p95": 1681.823968887329, + "p99": 3564.7358894348145 + }, + "roundtrip": { + "p50": 967.1040177345276, + "p90": 1063.87197971344, + "p95": 2773.1199264526367, + "p99": 4400.447845458984 + }, + "isolatedSum": { + "p50": 1015.5519843101501, + "p90": 1087.9680216312408, + "p95": 3826.751947402954, + "p99": 7375.967979431152 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 708.9920043945312, + "p90": 769.2480087280273, + "p95": 2085.184097290039, + "p99": 3810.175895690918 + }, + "combine": { + "p50": 322.2079873085022, + "p90": 347.4240005016327, + "p95": 1690.176010131836, + "p99": 3416.032075881958 + }, + "roundtrip": { + "p50": 1001.7919540405273, + "p90": 1084.5760107040405, + "p95": 2646.944046020508, + "p99": 4383.808135986328 + }, + "isolatedSum": { + "p50": 1031.1999917030334, + "p90": 1116.67200922966, + "p95": 3775.360107421875, + "p99": 7226.207971572876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 684.9279999732971, + "p90": 760.640025138855, + "p95": 2326.6561031341553, + "p99": 4019.8721885681152 + }, + "combine": { + "p50": 304.6720027923584, + "p90": 342.3680067062378, + "p95": 490.27198553085327, + "p99": 3515.0399208068848 + }, + "roundtrip": { + "p50": 945.1839923858643, + "p90": 1034.1440439224243, + "p95": 2694.943904876709, + "p99": 4448.671817779541 + }, + "isolatedSum": { + "p50": 989.6000027656555, + "p90": 1103.0080318450928, + "p95": 2816.9280886650085, + "p99": 7534.912109375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b9ffa222", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||431e44245dd1524", + "colorKey": "gb300_85029fa5", + "comparisonKey": "fa6214a0e8c51f7a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:49:23.035891+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "431e44245dd1524", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 735.8719706535339, + "p90": 2038.6879444122314, + "p95": 2457.6001167297363, + "p99": 3858.4320545196533 + }, + "combine": { + "p50": 352.57598757743835, + "p90": 411.0719859600067, + "p95": 1832.703948020935, + "p99": 3036.2560749053955 + }, + "roundtrip": { + "p50": 1053.760051727295, + "p90": 1458.6559534072876, + "p95": 2916.5759086608887, + "p99": 4374.495983123779 + }, + "isolatedSum": { + "p50": 1088.4479582309723, + "p90": 2449.759930372238, + "p95": 4290.304064750671, + "p99": 6894.688129425049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 732.7359914779663, + "p90": 1502.9759407043457, + "p95": 2795.84002494812, + "p99": 5310.207843780518 + }, + "combine": { + "p50": 328.575998544693, + "p90": 359.74401235580444, + "p95": 1675.2959489822388, + "p99": 3554.3038845062256 + }, + "roundtrip": { + "p50": 1022.6880311965942, + "p90": 1206.6559791564941, + "p95": 2765.9521102905273, + "p99": 5584.0959548950195 + }, + "isolatedSum": { + "p50": 1061.3119900226593, + "p90": 1862.7199530601501, + "p95": 4471.135973930359, + "p99": 8864.511728286743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 721.5679883956909, + "p90": 783.9040160179138, + "p95": 2279.5519828796387, + "p99": 3991.935968399048 + }, + "combine": { + "p50": 327.93599367141724, + "p90": 363.072007894516, + "p95": 473.91998767852783, + "p99": 3159.5840454101562 + }, + "roundtrip": { + "p50": 1034.0800285339355, + "p90": 1175.5520105361938, + "p95": 2607.3920726776123, + "p99": 4242.015838623047 + }, + "isolatedSum": { + "p50": 1049.5039820671082, + "p90": 1146.9760239124298, + "p95": 2753.4719705581665, + "p99": 7151.520013809204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 736.1279726028442, + "p90": 814.4320249557495, + "p95": 2303.1039237976074, + "p99": 4988.255977630615 + }, + "combine": { + "p50": 321.4400112628937, + "p90": 357.37600922584534, + "p95": 1915.7439470291138, + "p99": 3373.055934906006 + }, + "roundtrip": { + "p50": 1007.9360008239746, + "p90": 1381.4719915390015, + "p95": 2795.3920364379883, + "p99": 4173.855781555176 + }, + "isolatedSum": { + "p50": 1057.567983865738, + "p90": 1171.8080341815948, + "p95": 4218.847870826721, + "p99": 8361.311912536621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 720.0319766998291, + "p90": 754.9120187759399, + "p95": 2072.3841190338135, + "p99": 3580.2879333496094 + }, + "combine": { + "p50": 320.3519880771637, + "p90": 344.7360098361969, + "p95": 1135.3280544281006, + "p99": 3342.4320220947266 + }, + "roundtrip": { + "p50": 1018.3680057525635, + "p90": 1151.039958000183, + "p95": 2663.968086242676, + "p99": 4401.855945587158 + }, + "isolatedSum": { + "p50": 1040.3839647769928, + "p90": 1099.6480286121368, + "p95": 3207.712173461914, + "p99": 6922.719955444336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 719.7120189666748, + "p90": 812.4160170555115, + "p95": 2071.6159343719482, + "p99": 3969.9840545654297 + }, + "combine": { + "p50": 321.75999879837036, + "p90": 357.9519987106323, + "p95": 1513.9199495315552, + "p99": 3255.232095718384 + }, + "roundtrip": { + "p50": 1037.2480154037476, + "p90": 1173.5999584197998, + "p95": 2647.200107574463, + "p99": 4641.503810882568 + }, + "isolatedSum": { + "p50": 1041.4720177650452, + "p90": 1170.3680157661438, + "p95": 3585.5358839035034, + "p99": 7225.2161502838135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 690.1440024375916, + "p90": 784.3199968338013, + "p95": 2161.3121032714844, + "p99": 3703.3278942108154 + }, + "combine": { + "p50": 313.02401423454285, + "p90": 336.06401085853577, + "p95": 1617.8879737854004, + "p99": 3320.2879428863525 + }, + "roundtrip": { + "p50": 996.5440034866333, + "p90": 1888.6079788208008, + "p95": 2963.200092315674, + "p99": 29710.176467895508 + }, + "isolatedSum": { + "p50": 1003.1680166721344, + "p90": 1120.384007692337, + "p95": 3779.2000770568848, + "p99": 7023.615837097168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 706.7840099334717, + "p90": 750.2719759941101, + "p95": 2236.7680072784424, + "p99": 3724.639892578125 + }, + "combine": { + "p50": 318.1439936161041, + "p90": 343.1360125541687, + "p95": 462.5599980354309, + "p99": 3344.480037689209 + }, + "roundtrip": { + "p50": 1005.728006362915, + "p90": 1129.0559768676758, + "p95": 2566.59197807312, + "p99": 4393.087863922119 + }, + "isolatedSum": { + "p50": 1024.9280035495758, + "p90": 1093.4079885482788, + "p95": 2699.3280053138733, + "p99": 7069.119930267334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b75c67a6", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|4|prefill|normal|none|none|0|tuned||ea1d5fe0776b7aa", + "colorKey": "gb300_41ecd4d6", + "comparisonKey": "54e631fbadaf8a48", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:40:44.493883+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ea1d5fe0776b7aa", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 571.071982383728, + "p90": 582.8160047531128, + "p95": 586.4959955215454, + "p99": 2311.8720054626465 + }, + "combine": { + "p50": 257.7599883079529, + "p90": 270.4319953918457, + "p95": 275.4879891872406, + "p99": 2371.9680309295654 + }, + "roundtrip": { + "p50": 787.6480221748352, + "p90": 799.839973449707, + "p95": 812.9600286483765, + "p99": 3382.848024368286 + }, + "isolatedSum": { + "p50": 828.8319706916809, + "p90": 853.2480001449585, + "p95": 861.983984708786, + "p99": 4683.840036392212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26765312, + "combineLogicalBytes": 26765312, + "fanoutMean": 3.646484375, + "recvTokensMax": 1118, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 577.9200196266174, + "p90": 596.1599946022034, + "p95": 630.8799982070923, + "p99": 2468.7039852142334 + }, + "combine": { + "p50": 353.8239896297455, + "p90": 364.44801092147827, + "p95": 387.1999979019165, + "p99": 2143.2321071624756 + }, + "roundtrip": { + "p50": 879.967987537384, + "p90": 899.8399972915649, + "p95": 904.8320055007935, + "p99": 2915.231943130493 + }, + "isolatedSum": { + "p50": 931.7440092563629, + "p90": 960.6080055236816, + "p95": 1018.0799961090088, + "p99": 4611.936092376709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53258240, + "combineLogicalBytes": 53258240, + "fanoutMean": 3.6279296875, + "recvTokensMax": 2159, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 615.1679754257202, + "p90": 631.55198097229, + "p95": 636.7679834365845, + "p99": 2575.648069381714 + }, + "combine": { + "p50": 635.807991027832, + "p90": 647.2640037536621, + "p95": 663.7120246887207, + "p99": 1660.2239608764648 + }, + "roundtrip": { + "p50": 1171.0079908370972, + "p90": 1190.9120082855225, + "p95": 1224.4479656219482, + "p99": 2918.303966522217 + }, + "isolatedSum": { + "p50": 1250.9759664535522, + "p90": 1278.8159847259521, + "p95": 1300.4800081253052, + "p99": 4235.872030258179 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106244096, + "combineLogicalBytes": 106244096, + "fanoutMean": 3.61865234375, + "recvTokensMax": 4221, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 757.2479844093323, + "p90": 774.3359804153442, + "p95": 782.4959754943848, + "p99": 2220.992088317871 + }, + "combine": { + "p50": 1171.2000370025635, + "p90": 1181.9519996643066, + "p95": 1189.344048500061, + "p99": 1752.1599531173706 + }, + "roundtrip": { + "p50": 1849.503993988037, + "p90": 1866.9439554214478, + "p95": 2023.0400562286377, + "p99": 2950.5279064178467 + }, + "isolatedSum": { + "p50": 1928.4480214118958, + "p90": 1956.2879800796509, + "p95": 1971.8400239944458, + "p99": 3973.1520414352417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 8273, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 932.9919815063477, + "p90": 953.279972076416, + "p95": 961.4400267601013, + "p99": 2136.960029602051 + }, + "combine": { + "p50": 2209.023952484131, + "p90": 2217.5040245056152, + "p95": 2221.8880653381348, + "p99": 2264.0960216522217 + }, + "roundtrip": { + "p50": 3052.639961242676, + "p90": 3071.039915084839, + "p95": 3080.512046813965, + "p99": 3601.6321182250977 + }, + "isolatedSum": { + "p50": 3142.0159339904785, + "p90": 3170.7839965820312, + "p95": 3183.328092098236, + "p99": 4401.0560512542725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 425191424, + "combineLogicalBytes": 425191424, + "fanoutMean": 3.6204833984375, + "recvTokensMax": 16469, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1313.7919902801514, + "p90": 1330.4959535598755, + "p95": 1340.8960103988647, + "p99": 1987.2000217437744 + }, + "combine": { + "p50": 4293.69592666626, + "p90": 4306.464195251465, + "p95": 4310.815811157227, + "p99": 4323.488235473633 + }, + "roundtrip": { + "p50": 5524.223804473877, + "p90": 5544.032096862793, + "p95": 5549.215793609619, + "p99": 5633.279800415039 + }, + "isolatedSum": { + "p50": 5607.487916946411, + "p90": 5636.96014881134, + "p95": 5651.711821556091, + "p99": 6310.688257217407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 849278976, + "combineLogicalBytes": 849278976, + "fanoutMean": 3.61578369140625, + "recvTokensMax": 32881, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-765ad6b9", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|4|prefill|normal|none|none|0|tuned||1104ab83732593b", + "colorKey": "gb300_68d1366e", + "comparisonKey": "acd949dc9c464571", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:26.712461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "1104ab83732593b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 554.3360114097595, + "p90": 564.6719932556152, + "p95": 568.4159994125366, + "p99": 2559.8719120025635 + }, + "combine": { + "p50": 253.1520128250122, + "p90": 265.6640112400055, + "p95": 268.2560086250305, + "p99": 2446.2718963623047 + }, + "roundtrip": { + "p50": 765.1839852333069, + "p90": 776.416003704071, + "p95": 785.9200239181519, + "p99": 3311.7120265960693 + }, + "isolatedSum": { + "p50": 807.4880242347717, + "p90": 830.3360044956207, + "p95": 836.6720080375671, + "p99": 5006.143808364868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 4, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 557.9839944839478, + "p90": 572.4480152130127, + "p95": 582.6240181922913, + "p99": 2446.9120502471924 + }, + "combine": { + "p50": 351.4559864997864, + "p90": 360.48001050949097, + "p95": 368.5440123081207, + "p99": 2397.696018218994 + }, + "roundtrip": { + "p50": 863.0080223083496, + "p90": 875.711977481842, + "p95": 886.4960074424744, + "p99": 3185.120105743408 + }, + "isolatedSum": { + "p50": 909.4399809837341, + "p90": 932.9280257225037, + "p95": 951.168030500412, + "p99": 4844.6080684661865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 4, + "recvTokensMax": 2048, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 601.855993270874, + "p90": 616.2559986114502, + "p95": 623.9039897918701, + "p99": 2755.8720111846924 + }, + "combine": { + "p50": 635.5839967727661, + "p90": 646.2079882621765, + "p95": 655.1679968833923, + "p99": 1743.2960271835327 + }, + "roundtrip": { + "p50": 1160.9280109405518, + "p90": 1173.4720468521118, + "p95": 1185.4079961776733, + "p99": 2700.4799842834473 + }, + "isolatedSum": { + "p50": 1237.4399900436401, + "p90": 1262.4639868736267, + "p95": 1279.0719866752625, + "p99": 4499.168038368225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 4, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 746.0479736328125, + "p90": 829.4079899787903, + "p95": 877.5680065155029, + "p99": 2555.999994277954 + }, + "combine": { + "p50": 1166.0799980163574, + "p90": 1192.3199892044067, + "p95": 1226.1439561843872, + "p99": 1603.0399799346924 + }, + "roundtrip": { + "p50": 1835.9040021896362, + "p90": 1852.4160385131836, + "p95": 1955.3600549697876, + "p99": 2671.488046646118 + }, + "isolatedSum": { + "p50": 1912.12797164917, + "p90": 2021.727979183197, + "p95": 2103.71196269989, + "p99": 4159.0399742126465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 4, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 920.0000166893005, + "p90": 934.5279932022095, + "p95": 942.5600171089172, + "p99": 2376.5439987182617 + }, + "combine": { + "p50": 2208.6079120635986, + "p90": 2217.535972595215, + "p95": 2220.927953720093, + "p99": 2230.4320335388184 + }, + "roundtrip": { + "p50": 3037.3120307922363, + "p90": 3051.9039630889893, + "p95": 3063.3280277252197, + "p99": 3574.336051940918 + }, + "isolatedSum": { + "p50": 3128.607928752899, + "p90": 3152.0639657974243, + "p95": 3163.48797082901, + "p99": 4606.97603225708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 4, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1297.376036643982, + "p90": 1316.0320520401, + "p95": 1409.0559482574463, + "p99": 2108.128070831299 + }, + "combine": { + "p50": 4284.607887268066, + "p90": 4298.1438636779785, + "p95": 4302.464008331299, + "p99": 4309.9517822265625 + }, + "roundtrip": { + "p50": 5504.89616394043, + "p90": 5542.816162109375, + "p95": 5588.799953460693, + "p99": 5654.751777648926 + }, + "isolatedSum": { + "p50": 5581.983923912048, + "p90": 5614.175915718079, + "p95": 5711.519956588745, + "p99": 6418.079853057861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 4, + "recvTokensMax": 32768, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-613fec85", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|4|prefill|normal|none|none|0|tuned||e15d35cfeaea91f", + "colorKey": "gb300_85b0db41", + "comparisonKey": "a7ad3639d8728101", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:41:52.136382+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "e15d35cfeaea91f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 680.1919937133789, + "p90": 712.6719951629639, + "p95": 726.1120080947876, + "p99": 2456.2559127807617 + }, + "combine": { + "p50": 310.2079927921295, + "p90": 329.02398705482483, + "p95": 339.07198905944824, + "p99": 2113.663911819458 + }, + "roundtrip": { + "p50": 954.5599818229675, + "p90": 985.7280254364014, + "p95": 1013.856053352356, + "p99": 2987.3600006103516 + }, + "isolatedSum": { + "p50": 990.3999865055084, + "p90": 1041.6959822177887, + "p95": 1065.1839971542358, + "p99": 4569.91982460022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 697.5039839744568, + "p90": 746.9120025634766, + "p95": 766.8160200119019, + "p99": 2443.6800479888916 + }, + "combine": { + "p50": 646.6240286827087, + "p90": 658.9760184288025, + "p95": 1074.079990386963, + "p99": 1788.4800434112549 + }, + "roundtrip": { + "p50": 1284.991979598999, + "p90": 1327.0399570465088, + "p95": 1440.1919841766357, + "p99": 2692.863941192627 + }, + "isolatedSum": { + "p50": 1344.1280126571655, + "p90": 1405.888020992279, + "p95": 1840.8960103988647, + "p99": 4232.1600914001465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 991.6800260543823, + "p90": 1026.144027709961, + "p95": 1195.3279972076416, + "p99": 2087.4240398406982 + }, + "combine": { + "p50": 2211.7760181427, + "p90": 2221.407890319824, + "p95": 2225.152015686035, + "p99": 2234.879970550537 + }, + "roundtrip": { + "p50": 3132.3840618133545, + "p90": 3167.167901992798, + "p95": 3185.472011566162, + "p99": 3549.1199493408203 + }, + "isolatedSum": { + "p50": 3203.4560441970825, + "p90": 3247.551918029785, + "p95": 3420.4800128936768, + "p99": 4322.304010391235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2ec1cfef", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|4|prefill|normal|none|none|0|tuned||33484f7e5b87248", + "colorKey": "gb300_2cbcb2a0", + "comparisonKey": "fc73b792e3880be1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:45:37.800349+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "33484f7e5b87248", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 557.0240020751953, + "p90": 658.30397605896, + "p95": 681.7600131034851, + "p99": 2973.2160568237305 + }, + "combine": { + "p50": 252.3519992828369, + "p90": 276.67200565338135, + "p95": 292.83198714256287, + "p99": 327.5200128555298 + }, + "roundtrip": { + "p50": 757.5039863586426, + "p90": 847.7759957313538, + "p95": 887.4239921569824, + "p99": 3544.1598892211914 + }, + "isolatedSum": { + "p50": 809.3760013580322, + "p90": 934.9759817123413, + "p95": 974.592000246048, + "p99": 3300.7360696792603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11010048, + "combineLogicalBytes": 11010048, + "fanoutMean": 1.5, + "recvTokensMax": 1152, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 549.3760108947754, + "p90": 567.1039819717407, + "p95": 576.960027217865, + "p99": 2357.9840660095215 + }, + "combine": { + "p50": 353.85599732398987, + "p90": 361.56800389289856, + "p95": 365.34398794174194, + "p99": 2196.415901184082 + }, + "roundtrip": { + "p50": 855.6479811668396, + "p90": 871.4240193367004, + "p95": 881.2479972839355, + "p99": 3190.5601024627686 + }, + "isolatedSum": { + "p50": 903.2320082187653, + "p90": 928.6719858646393, + "p95": 942.3040151596069, + "p99": 4554.3999671936035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22020096, + "combineLogicalBytes": 22020096, + "fanoutMean": 1.5, + "recvTokensMax": 2304, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 599.3599891662598, + "p90": 657.4720144271851, + "p95": 705.6639790534973, + "p99": 2724.1599559783936 + }, + "combine": { + "p50": 643.8400149345398, + "p90": 670.1120138168335, + "p95": 709.5680236816406, + "p99": 2124.8319149017334 + }, + "roundtrip": { + "p50": 1177.7280569076538, + "p90": 1275.2959728240967, + "p95": 1309.8560571670532, + "p99": 2956.768035888672 + }, + "isolatedSum": { + "p50": 1243.2000041007996, + "p90": 1327.5840282440186, + "p95": 1415.232002735138, + "p99": 4848.991870880127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44040192, + "combineLogicalBytes": 44040192, + "fanoutMean": 1.5, + "recvTokensMax": 4608, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 745.248019695282, + "p90": 821.183979511261, + "p95": 843.8720107078552, + "p99": 2300.4798889160156 + }, + "combine": { + "p50": 1181.1200380325317, + "p90": 1191.3280487060547, + "p95": 1226.304054260254, + "p99": 1702.8160095214844 + }, + "roundtrip": { + "p50": 1852.8000116348267, + "p90": 1872.607946395874, + "p95": 2030.56001663208, + "p99": 2765.7599449157715 + }, + "isolatedSum": { + "p50": 1926.3680577278137, + "p90": 2012.5120282173157, + "p95": 2070.176064968109, + "p99": 4003.2958984375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 88080384, + "combineLogicalBytes": 88080384, + "fanoutMean": 1.5, + "recvTokensMax": 9216, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 939.2639994621277, + "p90": 958.9120149612427, + "p95": 973.3440279960632, + "p99": 2377.023935317993 + }, + "combine": { + "p50": 2241.408109664917, + "p90": 2256.704092025757, + "p95": 2270.2720165252686, + "p99": 2308.864116668701 + }, + "roundtrip": { + "p50": 3103.1999588012695, + "p90": 3193.023920059204, + "p95": 3247.3599910736084, + "p99": 4304.992198944092 + }, + "isolatedSum": { + "p50": 3180.6721091270447, + "p90": 3215.6161069869995, + "p95": 3243.616044521332, + "p99": 4685.888051986694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 176160768, + "combineLogicalBytes": 176160768, + "fanoutMean": 1.5, + "recvTokensMax": 18432, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1377.1840333938599, + "p90": 1401.7280340194702, + "p95": 1631.168007850647, + "p99": 2257.567882537842 + }, + "combine": { + "p50": 4354.6881675720215, + "p90": 4368.031978607178, + "p95": 4372.128009796143, + "p99": 4383.423805236816 + }, + "roundtrip": { + "p50": 5655.9038162231445, + "p90": 5686.880111694336, + "p95": 5723.999977111816, + "p99": 5813.663959503174 + }, + "isolatedSum": { + "p50": 5731.872200965881, + "p90": 5769.760012626648, + "p95": 6003.29601764679, + "p99": 6640.991687774658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352321536, + "combineLogicalBytes": 352321536, + "fanoutMean": 1.5, + "recvTokensMax": 36864, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ef5c7537", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|4|prefill|normal|none|none|0|tuned||b8e52e92c6d3379", + "colorKey": "gb300_03c0b464", + "comparisonKey": "08e9a1c86ba0489f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:44:12.303148+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "b8e52e92c6d3379", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 563.6159777641296, + "p90": 578.7839889526367, + "p95": 606.8800091743469, + "p99": 2870.62406539917 + }, + "combine": { + "p50": 255.64798712730408, + "p90": 266.4960026741028, + "p95": 268.5120105743408, + "p99": 294.0160036087036 + }, + "roundtrip": { + "p50": 782.8800082206726, + "p90": 793.1200265884399, + "p95": 797.5360155105591, + "p99": 3248.095989227295 + }, + "isolatedSum": { + "p50": 819.2639648914337, + "p90": 845.2799916267395, + "p95": 875.3920197486877, + "p99": 3164.6400690078735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26492928, + "combineLogicalBytes": 26492928, + "fanoutMean": 3.609375, + "recvTokensMax": 1373, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 568.1279897689819, + "p90": 584.2559933662415, + "p95": 590.3679728507996, + "p99": 2698.0481147766113 + }, + "combine": { + "p50": 353.7920117378235, + "p90": 364.44801092147827, + "p95": 371.4880049228668, + "p99": 2335.263967514038 + }, + "roundtrip": { + "p50": 871.9040155410767, + "p90": 885.6639862060547, + "p95": 899.5199799537659, + "p99": 3038.8801097869873 + }, + "isolatedSum": { + "p50": 921.9200015068054, + "p90": 948.7040042877197, + "p95": 961.8559777736664, + "p99": 5033.312082290649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53100544, + "combineLogicalBytes": 53100544, + "fanoutMean": 3.6171875, + "recvTokensMax": 2764, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 613.8560175895691, + "p90": 628.1599998474121, + "p95": 631.2320232391357, + "p99": 2635.2319717407227 + }, + "combine": { + "p50": 647.0720171928406, + "p90": 657.535970211029, + "p95": 663.2320284843445, + "p99": 1773.0560302734375 + }, + "roundtrip": { + "p50": 1209.439992904663, + "p90": 1221.2480306625366, + "p95": 1231.0400009155273, + "p99": 2665.40789604187 + }, + "isolatedSum": { + "p50": 1260.9280347824097, + "p90": 1285.6959700584412, + "p95": 1294.4640517234802, + "p99": 4408.28800201416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106373120, + "combineLogicalBytes": 106373120, + "fanoutMean": 3.623046875, + "recvTokensMax": 5507, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 770.3359723091125, + "p90": 783.7759852409363, + "p95": 790.336012840271, + "p99": 2348.896026611328 + }, + "combine": { + "p50": 1186.7519617080688, + "p90": 1201.5680074691772, + "p95": 1206.6880464553833, + "p99": 1591.1999940872192 + }, + "roundtrip": { + "p50": 1912.608027458191, + "p90": 1930.7520389556885, + "p95": 2131.103992462158, + "p99": 2871.392011642456 + }, + "isolatedSum": { + "p50": 1957.0879340171814, + "p90": 1985.3439927101135, + "p95": 1997.0240592956543, + "p99": 3940.0960206985474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212402176, + "combineLogicalBytes": 212402176, + "fanoutMean": 3.6171875, + "recvTokensMax": 11137, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 976.2560129165649, + "p90": 992.3520088195801, + "p95": 1050.2079725265503, + "p99": 2271.199941635132 + }, + "combine": { + "p50": 2259.2639923095703, + "p90": 2270.688056945801, + "p95": 2275.3920555114746, + "p99": 2284.480094909668 + }, + "roundtrip": { + "p50": 3204.479932785034, + "p90": 3223.2320308685303, + "p95": 3309.1518878936768, + "p99": 3677.504062652588 + }, + "isolatedSum": { + "p50": 3235.5200052261353, + "p90": 3263.040065765381, + "p95": 3325.600028038025, + "p99": 4555.6800365448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423958528, + "combineLogicalBytes": 423958528, + "fanoutMean": 3.6099853515625, + "recvTokensMax": 22293, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1629.472017288208, + "p90": 1649.2480039596558, + "p95": 1659.2960357666016, + "p99": 1872.83194065094 + }, + "combine": { + "p50": 4457.024097442627, + "p90": 4468.800067901611, + "p95": 4472.896099090576, + "p99": 4478.335857391357 + }, + "roundtrip": { + "p50": 5987.071990966797, + "p90": 6004.127979278564, + "p95": 6010.752201080322, + "p99": 6039.103984832764 + }, + "isolatedSum": { + "p50": 6086.496114730835, + "p90": 6118.048071861267, + "p95": 6132.192134857178, + "p99": 6351.167798042297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847988736, + "combineLogicalBytes": 847988736, + "fanoutMean": 3.61029052734375, + "recvTokensMax": 44503, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fac189c4", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|4|prefill|normal|none|none|0|tuned||5f9878f45872329", + "colorKey": "gb300_07142b8a", + "comparisonKey": "96a4ea20e62153bb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:50:45.047796+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "5f9878f45872329", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.358123779296875, + "eplbImbalanceAfter": 1.000026818477746, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 556.1280250549316, + "p90": 569.4079995155334, + "p95": 579.1360139846802, + "p99": 2815.840005874634 + }, + "combine": { + "p50": 254.5279860496521, + "p90": 265.855997800827, + "p95": 269.4079875946045, + "p99": 2375.4560947418213 + }, + "roundtrip": { + "p50": 769.9199914932251, + "p90": 782.3039889335632, + "p95": 792.3200130462646, + "p99": 3183.3600997924805 + }, + "isolatedSum": { + "p50": 810.6560111045837, + "p90": 835.2639973163605, + "p95": 848.5440015792847, + "p99": 5191.296100616455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26664960, + "combineLogicalBytes": 26664960, + "fanoutMean": 3.6328125, + "recvTokensMax": 1074, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 559.1359734535217, + "p90": 571.9040036201477, + "p95": 586.9119763374329, + "p99": 2718.91188621521 + }, + "combine": { + "p50": 350.71998834609985, + "p90": 359.0719997882843, + "p95": 373.9840090274811, + "p99": 2147.520065307617 + }, + "roundtrip": { + "p50": 865.343987941742, + "p90": 884.3839764595032, + "p95": 924.3519902229309, + "p99": 3057.8560829162598 + }, + "isolatedSum": { + "p50": 909.8559617996216, + "p90": 930.976003408432, + "p95": 960.8959853649139, + "p99": 4866.431951522827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53143552, + "combineLogicalBytes": 53143552, + "fanoutMean": 3.6201171875, + "recvTokensMax": 2164, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 603.7120223045349, + "p90": 619.1359758377075, + "p95": 631.55198097229, + "p99": 2728.543996810913 + }, + "combine": { + "p50": 636.7999911308289, + "p90": 648.6719846725464, + "p95": 1164.415955543518, + "p99": 2014.2719745635986 + }, + "roundtrip": { + "p50": 1164.5760536193848, + "p90": 1177.9839992523193, + "p95": 1232.6719760894775, + "p99": 2797.856092453003 + }, + "isolatedSum": { + "p50": 1240.5120134353638, + "p90": 1267.807960510254, + "p95": 1795.967936515808, + "p99": 4742.815971374512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 106258432, + "combineLogicalBytes": 106258432, + "fanoutMean": 3.619140625, + "recvTokensMax": 4228, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 743.5200214385986, + "p90": 757.6640248298645, + "p95": 767.5520181655884, + "p99": 2395.519971847534 + }, + "combine": { + "p50": 1167.9359674453735, + "p90": 1178.5919666290283, + "p95": 1188.5759830474854, + "p99": 1693.0559873580933 + }, + "roundtrip": { + "p50": 1838.4640216827393, + "p90": 1855.2320003509521, + "p95": 2002.8159618377686, + "p99": 2671.0400581359863 + }, + "isolatedSum": { + "p50": 1911.4559888839722, + "p90": 1936.2559914588928, + "p95": 1956.1280012130737, + "p99": 4088.5759592056274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 212645888, + "combineLogicalBytes": 212645888, + "fanoutMean": 3.621337890625, + "recvTokensMax": 8397, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 917.4399971961975, + "p90": 933.9200258255005, + "p95": 953.0879855155945, + "p99": 2319.711923599243 + }, + "combine": { + "p50": 2208.479881286621, + "p90": 2218.656063079834, + "p95": 2221.3759422302246, + "p99": 2236.543893814087 + }, + "roundtrip": { + "p50": 3039.3919944763184, + "p90": 3056.3199520111084, + "p95": 3071.1679458618164, + "p99": 3487.071990966797 + }, + "isolatedSum": { + "p50": 3125.9198784828186, + "p90": 3152.5760889053345, + "p95": 3174.463927745819, + "p99": 4556.25581741333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 424775680, + "combineLogicalBytes": 424775680, + "fanoutMean": 3.616943359375, + "recvTokensMax": 16520, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1297.9199886322021, + "p90": 1317.2800540924072, + "p95": 1396.9919681549072, + "p99": 2035.9361171722412 + }, + "combine": { + "p50": 4288.415908813477, + "p90": 4300.127983093262, + "p95": 4304.512023925781, + "p99": 4314.303874969482 + }, + "roundtrip": { + "p50": 5508.16011428833, + "p90": 5529.2158126831055, + "p95": 5536.416053771973, + "p99": 5699.007987976074 + }, + "isolatedSum": { + "p50": 5586.335897445679, + "p90": 5617.408037185669, + "p95": 5701.5039920806885, + "p99": 6350.239992141724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 848547840, + "combineLogicalBytes": 848547840, + "fanoutMean": 3.6126708984375, + "recvTokensMax": 32772, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-32ad3703", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|4|prefill|normal|none|none|0|tuned||ed21345b2de53e0", + "colorKey": "gb300_99da9098", + "comparisonKey": "e3358a20d18aa956", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:44:55.207737+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "ed21345b2de53e0", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.003448486328125, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 558.463990688324, + "p90": 570.527970790863, + "p95": 580.7679891586304, + "p99": 2382.3680877685547 + }, + "combine": { + "p50": 255.23200631141663, + "p90": 267.4880027770996, + "p95": 270.3999876976013, + "p99": 2431.328058242798 + }, + "roundtrip": { + "p50": 772.8000283241272, + "p90": 784.4799757003784, + "p95": 798.0480194091797, + "p99": 3272.768020629883 + }, + "isolatedSum": { + "p50": 813.6959969997406, + "p90": 838.0159735679626, + "p95": 851.1679768562317, + "p99": 4813.6961460113525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 1057, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 563.2320046424866, + "p90": 581.9839835166931, + "p95": 587.9999995231628, + "p99": 2682.976007461548 + }, + "combine": { + "p50": 351.4240086078644, + "p90": 359.6160113811493, + "p95": 365.9200072288513, + "p99": 2297.856092453003 + }, + "roundtrip": { + "p50": 869.4080114364624, + "p90": 886.1439824104309, + "p95": 906.6240191459656, + "p99": 3075.648069381714 + }, + "isolatedSum": { + "p50": 914.656013250351, + "p90": 941.5999948978424, + "p95": 953.9200067520142, + "p99": 4980.832099914551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52570112, + "combineLogicalBytes": 52570112, + "fanoutMean": 3.5810546875, + "recvTokensMax": 2088, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 610.368013381958, + "p90": 623.9680051803589, + "p95": 628.4480094909668, + "p99": 2558.9759349823 + }, + "combine": { + "p50": 637.8880143165588, + "p90": 646.2399959564209, + "p95": 779.1039943695068, + "p99": 1777.5039672851562 + }, + "roundtrip": { + "p50": 1170.3039407730103, + "p90": 1184.8959922790527, + "p95": 1229.8879623413086, + "p99": 2768.9919471740723 + }, + "isolatedSum": { + "p50": 1248.2560276985168, + "p90": 1270.2080011367798, + "p95": 1407.5520038604736, + "p99": 4336.479902267456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105613312, + "combineLogicalBytes": 105613312, + "fanoutMean": 3.59716796875, + "recvTokensMax": 4157, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 752.0959973335266, + "p90": 763.808012008667, + "p95": 771.776020526886, + "p99": 2470.112085342407 + }, + "combine": { + "p50": 1169.376015663147, + "p90": 1180.7359457015991, + "p95": 1186.3360404968262, + "p99": 1393.02396774292 + }, + "roundtrip": { + "p50": 1846.0479974746704, + "p90": 1862.9120588302612, + "p95": 2031.9359302520752, + "p99": 2834.847927093506 + }, + "isolatedSum": { + "p50": 1921.4720129966736, + "p90": 1944.543957710266, + "p95": 1958.1120610237122, + "p99": 3863.136053085327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211843072, + "combineLogicalBytes": 211843072, + "fanoutMean": 3.607666015625, + "recvTokensMax": 8291, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 925.599992275238, + "p90": 942.0160055160522, + "p95": 955.456018447876, + "p99": 2308.896064758301 + }, + "combine": { + "p50": 2208.2879543304443, + "p90": 2216.1600589752197, + "p95": 2218.9760208129883, + "p99": 2250.7200241088867 + }, + "roundtrip": { + "p50": 3050.5599975585938, + "p90": 3069.216012954712, + "p95": 3113.9841079711914, + "p99": 3548.703908920288 + }, + "isolatedSum": { + "p50": 3133.8879466056824, + "p90": 3158.176064491272, + "p95": 3174.4320392608643, + "p99": 4559.6160888671875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423198720, + "combineLogicalBytes": 423198720, + "fanoutMean": 3.603515625, + "recvTokensMax": 16542, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1312.4480247497559, + "p90": 1332.41605758667, + "p95": 1536.1599922180176, + "p99": 2080.7039737701416 + }, + "combine": { + "p50": 4294.559955596924, + "p90": 4306.560039520264, + "p95": 4309.311866760254, + "p99": 4316.383838653564 + }, + "roundtrip": { + "p50": 5522.1757888793945, + "p90": 5539.872169494629, + "p95": 5546.36812210083, + "p99": 5630.847930908203 + }, + "isolatedSum": { + "p50": 5607.00798034668, + "p90": 5638.976097106934, + "p95": 5845.4718589782715, + "p99": 6397.087812423706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 846024704, + "combineLogicalBytes": 846024704, + "fanoutMean": 3.6019287109375, + "recvTokensMax": 32852, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-06280b43", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_8e905a35", + "comparisonKey": "bc4cc4eb71bc560a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:42:39.348720+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 553.7919998168945, + "p90": 569.1199898719788, + "p95": 578.5279870033264, + "p99": 2550.175905227661 + }, + "combine": { + "p50": 255.96800446510315, + "p90": 266.07999205589294, + "p95": 269.1519856452942, + "p99": 2455.552101135254 + }, + "roundtrip": { + "p50": 763.5520100593567, + "p90": 779.9680233001709, + "p95": 802.9760122299194, + "p99": 3134.5279216766357 + }, + "isolatedSum": { + "p50": 809.7600042819977, + "p90": 835.1999819278717, + "p95": 847.6799726486206, + "p99": 5005.728006362915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 3014, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 589.024007320404, + "p90": 610.2079749107361, + "p95": 622.4319934844971, + "p99": 2498.6560344696045 + }, + "combine": { + "p50": 394.23999190330505, + "p90": 407.29600191116333, + "p95": 415.0719940662384, + "p99": 2004.4798851013184 + }, + "roundtrip": { + "p50": 950.7840275764465, + "p90": 966.3040041923523, + "p95": 1006.0800313949585, + "p99": 3010.688066482544 + }, + "isolatedSum": { + "p50": 983.2639992237091, + "p90": 1017.5039768218994, + "p95": 1037.5039875507355, + "p99": 4503.135919570923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 6044, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 687.5200271606445, + "p90": 709.4079852104187, + "p95": 737.2480034828186, + "p99": 2314.847946166992 + }, + "combine": { + "p50": 736.8000149726868, + "p90": 748.6720085144043, + "p95": 1015.8400535583496, + "p99": 1691.8400526046753 + }, + "roundtrip": { + "p50": 1380.8319568634033, + "p90": 1397.0880508422852, + "p95": 1713.4720087051392, + "p99": 2562.527894973755 + }, + "isolatedSum": { + "p50": 1424.3200421333313, + "p90": 1458.079993724823, + "p95": 1753.0880570411682, + "p99": 4006.6879987716675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 12111, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 930.9759736061096, + "p90": 963.808000087738, + "p95": 1294.9440479278564, + "p99": 2168.639898300171 + }, + "combine": { + "p50": 1362.6240491867065, + "p90": 1371.7759847640991, + "p95": 1376.3519525527954, + "p99": 1460.5439901351929 + }, + "roundtrip": { + "p50": 2250.4639625549316, + "p90": 2269.023895263672, + "p95": 2406.65602684021, + "p99": 2904.3519496917725 + }, + "isolatedSum": { + "p50": 2293.600022792816, + "p90": 2335.583984851837, + "p95": 2671.296000480652, + "p99": 3629.1838884353638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 24247, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 1319.7760581970215, + "p90": 1370.5600500106812, + "p95": 1496.3840246200562, + "p99": 1875.4240274429321 + }, + "combine": { + "p50": 2649.9838829040527, + "p90": 2659.2319011688232, + "p95": 2661.439895629883, + "p99": 2665.760040283203 + }, + "roundtrip": { + "p50": 3930.4959774017334, + "p90": 3948.352098464966, + "p95": 3956.511974334717, + "p99": 4086.559772491455 + }, + "isolatedSum": { + "p50": 3969.759941101074, + "p90": 4029.7919511795044, + "p95": 4157.823920249939, + "p99": 4541.184067726135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 48503, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 2148.9920616149902, + "p90": 2169.4719791412354, + "p95": 2178.0478954315186, + "p99": 2363.9678955078125 + }, + "combine": { + "p50": 5245.120048522949, + "p90": 5261.055946350098, + "p95": 5266.272068023682, + "p99": 5275.968074798584 + }, + "roundtrip": { + "p50": 7354.30383682251, + "p90": 7375.1678466796875, + "p95": 7380.832195281982, + "p99": 7392.8961753845215 + }, + "isolatedSum": { + "p50": 7394.112110137939, + "p90": 7430.527925491333, + "p95": 7444.3199634552, + "p99": 7639.9359703063965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 97022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd6574f5", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|4|prefill|normal|none|none|0|tuned||25840dd8241ba10", + "colorKey": "gb300_7b7dff47", + "comparisonKey": "62f1755fa54643e3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:43:29.002637+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "25840dd8241ba10", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 564.8000240325928, + "p90": 579.8720121383667, + "p95": 593.3759808540344, + "p99": 2686.4640712738037 + }, + "combine": { + "p50": 255.5840015411377, + "p90": 268.5120105743408, + "p95": 272.5119888782501, + "p99": 2264.3840312957764 + }, + "roundtrip": { + "p50": 802.5280237197876, + "p90": 817.8880214691162, + "p95": 832.0320248603821, + "p99": 3283.9999198913574 + }, + "isolatedSum": { + "p50": 820.3840255737305, + "p90": 848.3840227127075, + "p95": 865.8879697322845, + "p99": 4950.84810256958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9261056, + "combineLogicalBytes": 9261056, + "fanoutMean": 1.26171875, + "recvTokensMax": 3956, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 629.6319961547852, + "p90": 646.4319825172424, + "p95": 659.9040031433105, + "p99": 2702.944040298462 + }, + "combine": { + "p50": 421.2479889392853, + "p90": 435.07200479507446, + "p95": 441.9200122356415, + "p99": 2003.8719177246094 + }, + "roundtrip": { + "p50": 1025.1200199127197, + "p90": 1043.936014175415, + "p95": 1057.2799444198608, + "p99": 2926.5921115875244 + }, + "isolatedSum": { + "p50": 1050.8799850940704, + "p90": 1081.503987312317, + "p95": 1101.824015378952, + "p99": 4706.815958023071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18335744, + "combineLogicalBytes": 18335744, + "fanoutMean": 1.2490234375, + "recvTokensMax": 7923, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 757.4399709701538, + "p90": 779.2320251464844, + "p95": 808.351993560791, + "p99": 2517.280101776123 + }, + "combine": { + "p50": 796.5120077133179, + "p90": 805.1199913024902, + "p95": 848.25599193573, + "p99": 1472.7040529251099 + }, + "roundtrip": { + "p50": 1507.904052734375, + "p90": 1594.5279598236084, + "p95": 1795.423984527588, + "p99": 2659.4879627227783 + }, + "isolatedSum": { + "p50": 1553.9519786834717, + "p90": 1584.3520164489746, + "p95": 1656.607985496521, + "p99": 3989.984154701233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36900864, + "combineLogicalBytes": 36900864, + "fanoutMean": 1.2568359375, + "recvTokensMax": 15826, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 1050.8160591125488, + "p90": 1145.6960439682007, + "p95": 1375.167965888977, + "p99": 2232.095956802368 + }, + "combine": { + "p50": 1476.9920110702515, + "p90": 1487.7439737319946, + "p95": 1493.5040473937988, + "p99": 2176.3200759887695 + }, + "roundtrip": { + "p50": 2478.048086166382, + "p90": 2552.6719093322754, + "p95": 2695.136070251465, + "p99": 3923.5520362854004 + }, + "isolatedSum": { + "p50": 2527.8080701828003, + "p90": 2633.4400177001953, + "p95": 2868.672013282776, + "p99": 4408.416032791138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73730048, + "combineLogicalBytes": 73730048, + "fanoutMean": 1.255615234375, + "recvTokensMax": 31657, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 1554.527997970581, + "p90": 1586.4959955215454, + "p95": 1657.5360298156738, + "p99": 2041.759967803955 + }, + "combine": { + "p50": 2926.3360500335693, + "p90": 2936.2239837646484, + "p95": 2939.1040802001953, + "p99": 2945.823907852173 + }, + "roundtrip": { + "p50": 4429.1839599609375, + "p90": 4451.295852661133, + "p95": 4463.3917808532715, + "p99": 4538.656234741211 + }, + "isolatedSum": { + "p50": 4480.86404800415, + "p90": 4522.719979286194, + "p95": 4596.640110015869, + "p99": 4987.583875656128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147746816, + "combineLogicalBytes": 147746816, + "fanoutMean": 1.258056640625, + "recvTokensMax": 63316, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 2606.46390914917, + "p90": 2650.3360271453857, + "p95": 2695.1680183410645, + "p99": 3251.231908798218 + }, + "combine": { + "p50": 5738.560199737549, + "p90": 5752.416133880615, + "p95": 5757.9522132873535, + "p99": 5770.336151123047 + }, + "roundtrip": { + "p50": 8272.480010986328, + "p90": 8295.87173461914, + "p95": 8309.599876403809, + "p99": 8325.85620880127 + }, + "isolatedSum": { + "p50": 8345.024108886719, + "p90": 8402.752161026001, + "p95": 8453.120231628418, + "p99": 9021.568059921265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295278592, + "combineLogicalBytes": 295278592, + "fanoutMean": 1.25714111328125, + "recvTokensMax": 126650, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8c2ef2df", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|4|prefill|normal|none|none|0|tuned||cabb28c468fd7cf", + "colorKey": "gb300_bdcb6417", + "comparisonKey": "54ba406d07f28b43", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:50:00.837947+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "cabb28c468fd7cf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 3.86505126953125, + "eplbImbalanceAfter": 1.0000149681454613, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 586.4319801330566, + "p90": 690.8800005912781, + "p95": 714.1759991645813, + "p99": 2611.7119789123535 + }, + "combine": { + "p50": 263.96799087524414, + "p90": 300.57600140571594, + "p95": 320.607990026474, + "p99": 3610.5918884277344 + }, + "roundtrip": { + "p50": 807.3279857635498, + "p90": 919.3599820137024, + "p95": 988.8319969177246, + "p99": 4626.944065093994 + }, + "isolatedSum": { + "p50": 850.3999710083008, + "p90": 991.456001996994, + "p95": 1034.7839891910553, + "p99": 6222.303867340088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25862144, + "combineLogicalBytes": 25862144, + "fanoutMean": 3.5234375, + "recvTokensMax": 1044, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 581.2159776687622, + "p90": 599.7440218925476, + "p95": 607.7119708061218, + "p99": 2767.263889312744 + }, + "combine": { + "p50": 353.7279963493347, + "p90": 363.0079925060272, + "p95": 368.8639998435974, + "p99": 2326.688051223755 + }, + "roundtrip": { + "p50": 892.4800157546997, + "p90": 912.0640158653259, + "p95": 987.2959852218628, + "p99": 3311.0079765319824 + }, + "isolatedSum": { + "p50": 934.9439740180969, + "p90": 962.7520143985748, + "p95": 976.5759706497192, + "p99": 5093.951940536499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 51509248, + "combineLogicalBytes": 51509248, + "fanoutMean": 3.5087890625, + "recvTokensMax": 2086, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 614.7519946098328, + "p90": 629.2799711227417, + "p95": 640.064001083374, + "p99": 2800.800085067749 + }, + "combine": { + "p50": 639.519989490509, + "p90": 652.8639793395996, + "p95": 912.5120043754578, + "p99": 2239.936113357544 + }, + "roundtrip": { + "p50": 1183.8079690933228, + "p90": 1200.4159688949585, + "p95": 1930.2719831466675, + "p99": 3430.5601119995117 + }, + "isolatedSum": { + "p50": 1254.2719841003418, + "p90": 1282.1439504623413, + "p95": 1552.5760054588318, + "p99": 5040.736198425293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 102688768, + "combineLogicalBytes": 102688768, + "fanoutMean": 3.49755859375, + "recvTokensMax": 4145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 755.5840015411377, + "p90": 774.3359804153442, + "p95": 794.3360209465027, + "p99": 2527.071952819824 + }, + "combine": { + "p50": 1170.9120273590088, + "p90": 1183.4559440612793, + "p95": 1250.3679990768433, + "p99": 1826.8799781799316 + }, + "roundtrip": { + "p50": 1851.9680500030518, + "p90": 1878.9440393447876, + "p95": 2481.5680980682373, + "p99": 3259.8719596862793 + }, + "isolatedSum": { + "p50": 1926.4960289001465, + "p90": 1957.7919244766235, + "p95": 2044.704020023346, + "p99": 4353.951930999756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 205520896, + "combineLogicalBytes": 205520896, + "fanoutMean": 3.5, + "recvTokensMax": 8244, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 939.0079975128174, + "p90": 1043.7439680099487, + "p95": 1076.4800310134888, + "p99": 2970.8480834960938 + }, + "combine": { + "p50": 2212.064027786255, + "p90": 2237.1199131011963, + "p95": 2254.8160552978516, + "p99": 2284.991979598999 + }, + "roundtrip": { + "p50": 3063.136100769043, + "p90": 3145.8239555358887, + "p95": 3227.168083190918, + "p99": 3799.2959022521973 + }, + "isolatedSum": { + "p50": 3151.0720252990723, + "p90": 3280.863881111145, + "p95": 3331.2960863113403, + "p99": 5255.840063095093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 412016640, + "combineLogicalBytes": 412016640, + "fanoutMean": 3.50830078125, + "recvTokensMax": 16435, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1313.9519691467285, + "p90": 1333.7600231170654, + "p95": 1556.3520193099976, + "p99": 2543.328046798706 + }, + "combine": { + "p50": 4294.015884399414, + "p90": 4309.696197509766, + "p95": 4314.527988433838, + "p99": 4331.5839767456055 + }, + "roundtrip": { + "p50": 5535.071849822998, + "p90": 5622.3039627075195, + "p95": 5662.015914916992, + "p99": 6036.128044128418 + }, + "isolatedSum": { + "p50": 5607.967853546143, + "p90": 5643.456220626831, + "p95": 5870.880007743835, + "p99": 6874.9120235443115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 824119296, + "combineLogicalBytes": 824119296, + "fanoutMean": 3.5086669921875, + "recvTokensMax": 32861, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ab0185a8", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|4|prefill|normal|none|none|0|tuned||370c8dd16f08e2c", + "colorKey": "gb300_927737aa", + "comparisonKey": "0d8daf3b7dfe1150", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:47:20.835517+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "370c8dd16f08e2c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 570.0799822807312, + "p90": 583.3280086517334, + "p95": 588.2560014724731, + "p99": 2232.9280376434326 + }, + "combine": { + "p50": 260.2880001068115, + "p90": 271.2000012397766, + "p95": 277.3439884185791, + "p99": 2398.848056793213 + }, + "roundtrip": { + "p50": 788.4479761123657, + "p90": 800.9920120239258, + "p95": 811.6480112075806, + "p99": 3049.7920513153076 + }, + "isolatedSum": { + "p50": 830.3679823875427, + "p90": 854.52800989151, + "p95": 865.5999898910522, + "p99": 4631.7760944366455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 24715264, + "combineLogicalBytes": 24715264, + "fanoutMean": 3.3671875, + "recvTokensMax": 1910, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 575.2320289611816, + "p90": 593.9199924468994, + "p95": 603.1039953231812, + "p99": 2556.960105895996 + }, + "combine": { + "p50": 362.2719943523407, + "p90": 375.7759928703308, + "p95": 381.632000207901, + "p99": 2324.7361183166504 + }, + "roundtrip": { + "p50": 902.176022529602, + "p90": 920.0639724731445, + "p95": 933.8560104370117, + "p99": 3141.279935836792 + }, + "isolatedSum": { + "p50": 937.5040233135223, + "p90": 969.6959853172302, + "p95": 984.7359955310822, + "p99": 4881.6962242126465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49057792, + "combineLogicalBytes": 49057792, + "fanoutMean": 3.341796875, + "recvTokensMax": 3871, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 639.7759914398193, + "p90": 654.528021812439, + "p95": 660.7679724693298, + "p99": 2450.84810256958 + }, + "combine": { + "p50": 671.6160178184509, + "p90": 686.8159770965576, + "p95": 693.3119893074036, + "p99": 1541.2160158157349 + }, + "roundtrip": { + "p50": 1269.152045249939, + "p90": 1282.0160388946533, + "p95": 1291.9360399246216, + "p99": 2569.7920322418213 + }, + "isolatedSum": { + "p50": 1311.3920092582703, + "p90": 1341.3439989089966, + "p95": 1354.0799617767334, + "p99": 3992.064118385315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 98344960, + "combineLogicalBytes": 98344960, + "fanoutMean": 3.349609375, + "recvTokensMax": 7763, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 819.1999793052673, + "p90": 836.2879753112793, + "p95": 881.1519742012024, + "p99": 2272.480010986328 + }, + "combine": { + "p50": 1239.4239902496338, + "p90": 1255.679965019226, + "p95": 1259.9999904632568, + "p99": 1441.3119554519653 + }, + "roundtrip": { + "p50": 2030.2400588989258, + "p90": 2046.112060546875, + "p95": 2120.896100997925, + "p99": 2776.0000228881836 + }, + "isolatedSum": { + "p50": 2058.623969554901, + "p90": 2091.9679403305054, + "p95": 2141.1519646644592, + "p99": 3713.7919664382935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 196704256, + "combineLogicalBytes": 196704256, + "fanoutMean": 3.349853515625, + "recvTokensMax": 15514, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 1087.1679782867432, + "p90": 1106.943964958191, + "p95": 1145.5039978027344, + "p99": 2140.223979949951 + }, + "combine": { + "p50": 2368.0639266967773, + "p90": 2384.000062942505, + "p95": 2387.8719806671143, + "p99": 2397.183895111084 + }, + "roundtrip": { + "p50": 3427.743911743164, + "p90": 3447.808027267456, + "p95": 3469.856023788452, + "p99": 3760.063886642456 + }, + "isolatedSum": { + "p50": 3455.2319049835205, + "p90": 3490.944027900696, + "p95": 3533.3759784698486, + "p99": 4537.407875061035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 393351168, + "combineLogicalBytes": 393351168, + "fanoutMean": 3.349365234375, + "recvTokensMax": 31012, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1650.496006011963, + "p90": 1675.968050956726, + "p95": 1764.2879486083984, + "p99": 2191.551923751831 + }, + "combine": { + "p50": 4645.343780517578, + "p90": 4655.519962310791, + "p95": 4659.359931945801, + "p99": 4665.503978729248 + }, + "roundtrip": { + "p50": 6236.480236053467, + "p90": 6253.376007080078, + "p95": 6261.727809906006, + "p99": 6416.639804840088 + }, + "isolatedSum": { + "p50": 6295.839786529541, + "p90": 6331.488013267517, + "p95": 6423.647880554199, + "p99": 6857.055902481079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 785469440, + "combineLogicalBytes": 785469440, + "fanoutMean": 3.3441162109375, + "recvTokensMax": 61879, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4a81c6bb", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|4|prefill|normal|none|none|0|tuned||624fdceae193d94", + "colorKey": "gb300_a565a324", + "comparisonKey": "4fe9cc3e981bba3a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:47:47.439924+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "624fdceae193d94", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.888397216796875, + "eplbImbalanceAfter": 1.00013427734375, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 572.0959901809692, + "p90": 650.7200002670288, + "p95": 692.8640007972717, + "p99": 2612.191915512085 + }, + "combine": { + "p50": 258.2719922065735, + "p90": 275.4240036010742, + "p95": 293.5679852962494, + "p99": 320.0640082359314 + }, + "roundtrip": { + "p50": 784.0960025787354, + "p90": 860.863983631134, + "p95": 901.1840224266052, + "p99": 2969.95210647583 + }, + "isolatedSum": { + "p50": 830.3679823875427, + "p90": 926.144003868103, + "p95": 986.4319860935211, + "p99": 2932.2559237480164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26277888, + "combineLogicalBytes": 26277888, + "fanoutMean": 3.580078125, + "recvTokensMax": 1038, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 572.0000267028809, + "p90": 590.5600190162659, + "p95": 690.559983253479, + "p99": 2514.0159130096436 + }, + "combine": { + "p50": 353.983998298645, + "p90": 362.5600039958954, + "p95": 368.6079978942871, + "p99": 2320.159912109375 + }, + "roundtrip": { + "p50": 879.4879913330078, + "p90": 897.5679874420166, + "p95": 908.735990524292, + "p99": 3274.6880054473877 + }, + "isolatedSum": { + "p50": 925.9840250015259, + "p90": 953.1200230121613, + "p95": 1059.167981147766, + "p99": 4834.175825119019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52441088, + "combineLogicalBytes": 52441088, + "fanoutMean": 3.572265625, + "recvTokensMax": 2073, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 614.2399907112122, + "p90": 723.9999771118164, + "p95": 744.0320253372192, + "p99": 2650.7840156555176 + }, + "combine": { + "p50": 639.9040222167969, + "p90": 673.695981502533, + "p95": 694.8480010032654, + "p99": 1384.8960399627686 + }, + "roundtrip": { + "p50": 1181.6320419311523, + "p90": 1279.1680097579956, + "p95": 1321.3119506835938, + "p99": 3063.6799335479736 + }, + "isolatedSum": { + "p50": 1254.144012928009, + "p90": 1397.6959586143494, + "p95": 1438.8800263404846, + "p99": 4035.680055618286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105670656, + "combineLogicalBytes": 105670656, + "fanoutMean": 3.59912109375, + "recvTokensMax": 4116, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 755.8720111846924, + "p90": 826.7840147018433, + "p95": 871.2000250816345, + "p99": 2425.312042236328 + }, + "combine": { + "p50": 1171.7760562896729, + "p90": 1201.6639709472656, + "p95": 1232.3839664459229, + "p99": 1851.3280153274536 + }, + "roundtrip": { + "p50": 1863.4560108184814, + "p90": 1957.8239917755127, + "p95": 1997.8560209274292, + "p99": 2795.583963394165 + }, + "isolatedSum": { + "p50": 1927.6480674743652, + "p90": 2028.447985649109, + "p95": 2103.5839915275574, + "p99": 4276.640057563782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211527680, + "combineLogicalBytes": 211527680, + "fanoutMean": 3.602294921875, + "recvTokensMax": 8243, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 928.4480214118958, + "p90": 942.4639940261841, + "p95": 948.6079812049866, + "p99": 2252.768039703369 + }, + "combine": { + "p50": 2208.9600563049316, + "p90": 2219.1998958587646, + "p95": 2222.1760749816895, + "p99": 2293.6959266662598 + }, + "roundtrip": { + "p50": 3062.8159046173096, + "p90": 3100.895881652832, + "p95": 3150.2718925476074, + "p99": 3727.1039485931396 + }, + "isolatedSum": { + "p50": 3137.4080777168274, + "p90": 3161.6638898849487, + "p95": 3170.784056186676, + "p99": 4546.463966369629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 423284736, + "combineLogicalBytes": 423284736, + "fanoutMean": 3.604248046875, + "recvTokensMax": 16574, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1311.2319707870483, + "p90": 1402.0800590515137, + "p95": 1459.3600034713745, + "p99": 2199.007987976074 + }, + "combine": { + "p50": 4286.848068237305, + "p90": 4298.719882965088, + "p95": 4304.255962371826, + "p99": 4322.495937347412 + }, + "roundtrip": { + "p50": 5514.431953430176, + "p90": 5536.223888397217, + "p95": 5553.98416519165, + "p99": 5655.935764312744 + }, + "isolatedSum": { + "p50": 5598.080039024353, + "p90": 5700.799942016602, + "p95": 5763.615965843201, + "p99": 6521.503925323486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 847745024, + "combineLogicalBytes": 847745024, + "fanoutMean": 3.6092529296875, + "recvTokensMax": 32806, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db06e7ed", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|4|prefill|normal|none|none|0|tuned||611e3dfc517a533", + "colorKey": "gb300_0c94bea1", + "comparisonKey": "fe775a823c7b19b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:48:51.262868+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "611e3dfc517a533", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 580.6080102920532, + "p90": 607.9360246658325, + "p95": 614.8160099983215, + "p99": 2352.0638942718506 + }, + "combine": { + "p50": 259.71201062202454, + "p90": 275.0079929828644, + "p95": 281.18398785591125, + "p99": 2276.7040729522705 + }, + "roundtrip": { + "p50": 804.4800162315369, + "p90": 833.728015422821, + "p95": 843.9679741859436, + "p99": 3219.8400497436523 + }, + "isolatedSum": { + "p50": 840.3200209140778, + "p90": 882.9440176486969, + "p95": 895.9999978542328, + "p99": 4628.767967224121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18550784, + "combineLogicalBytes": 18550784, + "fanoutMean": 2.52734375, + "recvTokensMax": 3014, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 649.6319770812988, + "p90": 694.6880221366882, + "p95": 711.135983467102, + "p99": 2444.063901901245 + }, + "combine": { + "p50": 402.3680090904236, + "p90": 416.03198647499084, + "p95": 424.22398924827576, + "p99": 1984.5759868621826 + }, + "roundtrip": { + "p50": 1010.7840299606323, + "p90": 1053.760051727295, + "p95": 1071.0400342941284, + "p99": 2775.4878997802734 + }, + "isolatedSum": { + "p50": 1051.9999861717224, + "p90": 1110.720008611679, + "p95": 1135.3599727153778, + "p99": 4428.639888763428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 36642816, + "combineLogicalBytes": 36642816, + "fanoutMean": 2.49609375, + "recvTokensMax": 6044, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 717.6960110664368, + "p90": 780.0319790840149, + "p95": 801.5040159225464, + "p99": 2497.82395362854 + }, + "combine": { + "p50": 743.9360022544861, + "p90": 755.4240226745605, + "p95": 778.7200212478638, + "p99": 1518.847942352295 + }, + "roundtrip": { + "p50": 1393.1200504302979, + "p90": 1441.5680170059204, + "p95": 1464.6079540252686, + "p99": 2655.1361083984375 + }, + "isolatedSum": { + "p50": 1461.6320133209229, + "p90": 1535.4560017585754, + "p95": 1580.2240371704102, + "p99": 4016.671895980835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 73715712, + "combineLogicalBytes": 73715712, + "fanoutMean": 2.5107421875, + "recvTokensMax": 12111, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 950.3359794616699, + "p90": 1004.6720504760742, + "p95": 1056.9920539855957, + "p99": 2107.2959899902344 + }, + "combine": { + "p50": 1369.8240518569946, + "p90": 1378.8479566574097, + "p95": 1382.815957069397, + "p99": 1505.2160024642944 + }, + "roundtrip": { + "p50": 2258.8798999786377, + "p90": 2291.327953338623, + "p95": 2305.759906768799, + "p99": 3074.687957763672 + }, + "isolatedSum": { + "p50": 2320.1600313186646, + "p90": 2383.520007133484, + "p95": 2439.8080110549927, + "p99": 3612.511992454529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 147775488, + "combineLogicalBytes": 147775488, + "fanoutMean": 2.5166015625, + "recvTokensMax": 24247, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 1366.1119937896729, + "p90": 1414.1440391540527, + "p95": 1460.1919651031494, + "p99": 2026.655912399292 + }, + "combine": { + "p50": 2657.75990486145, + "p90": 2668.8320636749268, + "p95": 2671.999931335449, + "p99": 2680.000066757202 + }, + "roundtrip": { + "p50": 3940.448045730591, + "p90": 3974.7838973999023, + "p95": 3985.7919216156006, + "p99": 4062.528133392334 + }, + "isolatedSum": { + "p50": 4023.871898651123, + "p90": 4082.9761028289795, + "p95": 4132.191896438599, + "p99": 4706.655979156494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 295723008, + "combineLogicalBytes": 295723008, + "fanoutMean": 2.51806640625, + "recvTokensMax": 48503, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 2168.031930923462, + "p90": 2217.1521186828613, + "p95": 2245.2480792999268, + "p99": 2438.5600090026855 + }, + "combine": { + "p50": 5252.575874328613, + "p90": 5269.343852996826, + "p95": 5274.496078491211, + "p99": 5284.607887268066 + }, + "roundtrip": { + "p50": 7374.656200408936, + "p90": 7416.192054748535, + "p95": 7428.703784942627, + "p99": 7454.495906829834 + }, + "isolatedSum": { + "p50": 7420.607805252075, + "p90": 7486.4959716796875, + "p95": 7519.744157791138, + "p99": 7723.167896270752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 590614528, + "combineLogicalBytes": 590614528, + "fanoutMean": 2.5145263671875, + "recvTokensMax": 97022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7d6dfa9", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_6f30342d", + "comparisonKey": "39fbcab99784926a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:49:17.428714+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 563.0720257759094, + "p90": 575.2320289611816, + "p95": 579.7119736671448, + "p99": 2507.711887359619 + }, + "combine": { + "p50": 257.9199969768524, + "p90": 269.53598856925964, + "p95": 273.72801303863525, + "p99": 2664.992094039917 + }, + "roundtrip": { + "p50": 777.8880000114441, + "p90": 790.9119725227356, + "p95": 800.9600043296814, + "p99": 3080.735921859741 + }, + "isolatedSum": { + "p50": 820.9920227527618, + "p90": 844.7680175304413, + "p95": 853.43998670578, + "p99": 5172.703981399536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 1060, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 567.1039819717407, + "p90": 583.296000957489, + "p95": 591.5520191192627, + "p99": 2540.287971496582 + }, + "combine": { + "p50": 354.2720079421997, + "p90": 363.20000886917114, + "p95": 370.1759874820709, + "p99": 2309.6959590911865 + }, + "roundtrip": { + "p50": 873.4719753265381, + "p90": 889.5040154457092, + "p95": 907.4879884719849, + "p99": 3056.4799308776855 + }, + "isolatedSum": { + "p50": 921.3759899139404, + "p90": 946.4960098266602, + "p95": 961.7280066013336, + "p99": 4849.983930587769 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 2083, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 612.3520135879517, + "p90": 625.4400014877319, + "p95": 633.184015750885, + "p99": 2773.2160091400146 + }, + "combine": { + "p50": 636.1280083656311, + "p90": 645.4079747200012, + "p95": 654.911994934082, + "p99": 1573.855996131897 + }, + "roundtrip": { + "p50": 1169.3120002746582, + "p90": 1184.928059577942, + "p95": 1220.031976699829, + "p99": 2875.7760524749756 + }, + "isolatedSum": { + "p50": 1248.4800219535828, + "p90": 1270.8479762077332, + "p95": 1288.096010684967, + "p99": 4347.072005271912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 4144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 753.4719705581665, + "p90": 768.127977848053, + "p95": 774.4960188865662, + "p99": 2491.8720722198486 + }, + "combine": { + "p50": 1172.1919775009155, + "p90": 1182.8800439834595, + "p95": 1192.9279565811157, + "p99": 1688.8959407806396 + }, + "roundtrip": { + "p50": 1845.9199666976929, + "p90": 1863.7759685516357, + "p95": 2072.511911392212, + "p99": 2859.839916229248 + }, + "isolatedSum": { + "p50": 1925.663948059082, + "p90": 1951.0080218315125, + "p95": 1967.4239754676819, + "p99": 4180.768013000488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 8249, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 925.8559942245483, + "p90": 946.3679790496826, + "p95": 1061.568021774292, + "p99": 2263.10396194458 + }, + "combine": { + "p50": 2209.2480659484863, + "p90": 2217.952013015747, + "p95": 2220.4480171203613, + "p99": 2227.4560928344727 + }, + "roundtrip": { + "p50": 3047.0080375671387, + "p90": 3068.5439109802246, + "p95": 3085.439920425415, + "p99": 3581.2480449676514 + }, + "isolatedSum": { + "p50": 3135.1040601730347, + "p90": 3164.3199920654297, + "p95": 3282.0160388946533, + "p99": 4490.560054779053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 16444, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1307.7759742736816, + "p90": 1324.4800567626953, + "p95": 1391.1360502243042, + "p99": 2184.0319633483887 + }, + "combine": { + "p50": 4287.680149078369, + "p90": 4298.111915588379, + "p95": 4302.3681640625, + "p99": 4311.071872711182 + }, + "roundtrip": { + "p50": 5515.520095825195, + "p90": 5532.991886138916, + "p95": 5541.376113891602, + "p99": 5679.776191711426 + }, + "isolatedSum": { + "p50": 5595.456123352051, + "p90": 5622.591972351074, + "p95": 5693.504214286804, + "p99": 6495.10383605957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 32843, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f092945", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|4|prefill|normal|none|none|0|tuned||9f4d5a652cae831", + "colorKey": "gb300_b3935729", + "comparisonKey": "491d21fc6a1154e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:46:20.182980+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-4x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 4, + "epSize": 4, + "label": "GB300 EP4 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 4, + "scaleUpDomain": 4 + }, + "routingConsistent": true, + "traceSignature": "9f4d5a652cae831", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.96087646484375, + "eplbImbalanceAfter": 1.0000905354817708, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 512, + "dispatch": { + "p50": 558.784008026123, + "p90": 571.9360113143921, + "p95": 575.5839943885803, + "p99": 2298.8479137420654 + }, + "combine": { + "p50": 253.63200902938843, + "p90": 265.76000452041626, + "p95": 269.6639895439148, + "p99": 2386.0480785369873 + }, + "roundtrip": { + "p50": 769.6319818496704, + "p90": 780.6079983711243, + "p95": 796.1919903755188, + "p99": 3269.11997795105 + }, + "isolatedSum": { + "p50": 812.4160170555115, + "p90": 837.6960158348083, + "p95": 845.2479839324951, + "p99": 4684.895992279053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 26363904, + "combineLogicalBytes": 26363904, + "fanoutMean": 3.591796875, + "recvTokensMax": 1060, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 1024, + "dispatch": { + "p50": 565.1519894599915, + "p90": 583.1999778747559, + "p95": 593.4079885482788, + "p99": 2643.296003341675 + }, + "combine": { + "p50": 355.0719916820526, + "p90": 365.88799953460693, + "p95": 373.1839954853058, + "p99": 2296.639919281006 + }, + "roundtrip": { + "p50": 872.0960021018982, + "p90": 891.4560079574585, + "p95": 907.1679711341858, + "p99": 3037.152051925659 + }, + "isolatedSum": { + "p50": 920.2239811420441, + "p90": 949.0879774093628, + "p95": 966.5919840335846, + "p99": 4939.935922622681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 52455424, + "combineLogicalBytes": 52455424, + "fanoutMean": 3.5732421875, + "recvTokensMax": 2083, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 2048, + "dispatch": { + "p50": 608.6080074310303, + "p90": 623.0720281600952, + "p95": 634.0159773826599, + "p99": 2708.479881286621 + }, + "combine": { + "p50": 635.7120275497437, + "p90": 646.9119787216187, + "p95": 684.6399903297424, + "p99": 1857.2479486465454 + }, + "roundtrip": { + "p50": 1166.6560173034668, + "p90": 1182.2400093078613, + "p95": 1195.4560279846191, + "p99": 2858.4959506988525 + }, + "isolatedSum": { + "p50": 1244.320034980774, + "p90": 1269.9840068817139, + "p95": 1318.6559677124023, + "p99": 4565.7278299331665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 105240576, + "combineLogicalBytes": 105240576, + "fanoutMean": 3.58447265625, + "recvTokensMax": 4144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 4096, + "dispatch": { + "p50": 751.2000203132629, + "p90": 765.504002571106, + "p95": 775.7440209388733, + "p99": 2411.58390045166 + }, + "combine": { + "p50": 1173.0560064315796, + "p90": 1182.4640035629272, + "p95": 1281.4719676971436, + "p99": 1659.4560146331787 + }, + "roundtrip": { + "p50": 1842.0480489730835, + "p90": 1860.416054725647, + "p95": 2084.4480991363525, + "p99": 2818.2079792022705 + }, + "isolatedSum": { + "p50": 1924.2560267448425, + "p90": 1947.9680061340332, + "p95": 2057.215988636017, + "p99": 4071.039915084839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 211140608, + "combineLogicalBytes": 211140608, + "fanoutMean": 3.595703125, + "recvTokensMax": 8249, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 8192, + "dispatch": { + "p50": 924.6079921722412, + "p90": 941.1519765853882, + "p95": 961.8239998817444, + "p99": 2236.255884170532 + }, + "combine": { + "p50": 2209.536075592041, + "p90": 2219.072103500366, + "p95": 2221.5681076049805, + "p99": 2230.976104736328 + }, + "roundtrip": { + "p50": 3047.0399856567383, + "p90": 3068.1281089782715, + "p95": 3121.0238933563232, + "p99": 3585.3118896484375 + }, + "isolatedSum": { + "p50": 3134.144067764282, + "p90": 3160.2240800857544, + "p95": 3183.392107486725, + "p99": 4467.23198890686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 422180864, + "combineLogicalBytes": 422180864, + "fanoutMean": 3.5948486328125, + "recvTokensMax": 16444, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 16384, + "dispatch": { + "p50": 1306.7519664764404, + "p90": 1326.367974281311, + "p95": 1445.855975151062, + "p99": 2133.888006210327 + }, + "combine": { + "p50": 4287.360191345215, + "p90": 4299.776077270508, + "p95": 4303.679943084717, + "p99": 4315.1679039001465 + }, + "roundtrip": { + "p50": 5515.103816986084, + "p90": 5535.647869110107, + "p95": 5541.247844696045, + "p99": 5723.455905914307 + }, + "isolatedSum": { + "p50": 5594.112157821655, + "p90": 5626.144051551819, + "p95": 5749.535918235779, + "p99": 6449.055910110474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845064192, + "combineLogicalBytes": 845064192, + "fanoutMean": 3.59783935546875, + "recvTokensMax": 32843, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13594be7", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b9896b0a7ca9901", + "colorKey": "gb300_c2190482", + "comparisonKey": "71a72903867751cc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:56:24.645700+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "adversarial", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b9896b0a7ca9901", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 585.8240127563477, + "p90": 606.7839860916138, + "p95": 748.6400008201599, + "p99": 3785.9840393066406 + }, + "combine": { + "p50": 265.9839987754822, + "p90": 284.41599011421204, + "p95": 1287.168025970459, + "p99": 3825.2480030059814 + }, + "roundtrip": { + "p50": 823.4559893608093, + "p90": 914.3040180206299, + "p95": 2729.3760776519775, + "p99": 5111.648082733154 + }, + "isolatedSum": { + "p50": 851.8080115318298, + "p90": 891.1999762058258, + "p95": 2035.808026790619, + "p99": 7611.232042312622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 590.4319882392883, + "p90": 608.8640093803406, + "p95": 740.7360076904297, + "p99": 4244.895935058594 + }, + "combine": { + "p50": 364.0640079975128, + "p90": 376.800000667572, + "p95": 1890.4000520706177, + "p99": 3548.799991607666 + }, + "roundtrip": { + "p50": 908.4159731864929, + "p90": 956.063985824585, + "p95": 2653.9199352264404, + "p99": 4602.848052978516 + }, + "isolatedSum": { + "p50": 954.4959962368011, + "p90": 985.6640100479126, + "p95": 2631.1360597610474, + "p99": 7793.69592666626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 636.5759968757629, + "p90": 695.6800222396851, + "p95": 2122.2081184387207, + "p99": 4153.632164001465 + }, + "combine": { + "p50": 654.9760103225708, + "p90": 716.0959839820862, + "p95": 1723.1040000915527, + "p99": 2421.7278957366943 + }, + "roundtrip": { + "p50": 1213.6319875717163, + "p90": 1296.1920499801636, + "p95": 2638.592004776001, + "p99": 3976.128101348877 + }, + "isolatedSum": { + "p50": 1291.5520071983337, + "p90": 1411.7760062217712, + "p95": 3845.3121185302734, + "p99": 6575.360059738159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 779.8399925231934, + "p90": 803.5200238227844, + "p95": 2042.4320697784424, + "p99": 3678.816080093384 + }, + "combine": { + "p50": 1184.607982635498, + "p90": 1294.975996017456, + "p95": 1853.279948234558, + "p99": 2191.135883331299 + }, + "roundtrip": { + "p50": 1889.5039558410645, + "p90": 2064.0320777893066, + "p95": 2833.024024963379, + "p99": 3872.9920387268066 + }, + "isolatedSum": { + "p50": 1964.4479751586914, + "p90": 2098.4960198402405, + "p95": 3895.7120180130005, + "p99": 5869.951963424683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 953.5040259361267, + "p90": 1008.895993232727, + "p95": 2146.7840671539307, + "p99": 3360.5120182037354 + }, + "combine": { + "p50": 2226.464033126831, + "p90": 2244.3840503692627, + "p95": 2268.6400413513184, + "p99": 2465.2159214019775 + }, + "roundtrip": { + "p50": 3099.7440814971924, + "p90": 3289.4721031188965, + "p95": 3646.6879844665527, + "p99": 4117.440223693848 + }, + "isolatedSum": { + "p50": 3179.9680590629578, + "p90": 3253.2800436019897, + "p95": 4415.424108505249, + "p99": 5825.727939605713 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1333.888053894043, + "p90": 1420.7680225372314, + "p95": 1994.52805519104, + "p99": 3060.544013977051 + }, + "combine": { + "p50": 4303.103923797607, + "p90": 4318.175792694092, + "p95": 4324.960231781006, + "p99": 4340.799808502197 + }, + "roundtrip": { + "p50": 5554.368019104004, + "p90": 5607.456207275391, + "p95": 5807.90376663208, + "p99": 6143.616199493408 + }, + "isolatedSum": { + "p50": 5636.99197769165, + "p90": 5738.943815231323, + "p95": 6319.488286972046, + "p99": 7401.343822479248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-915b2a75", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||03799dfc4e73d7f", + "colorKey": "gb300_1cd48f0a", + "comparisonKey": "dffc3c3c1f4665a7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:43:51.476461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "03799dfc4e73d7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 718.9120054244995, + "p90": 764.352023601532, + "p95": 2110.1438999176025, + "p99": 3472.320079803467 + }, + "combine": { + "p50": 341.98400378227234, + "p90": 378.2399892807007, + "p95": 1822.3680257797241, + "p99": 3380.3200721740723 + }, + "roundtrip": { + "p50": 1012.4160051345825, + "p90": 1697.7920532226562, + "p95": 2792.191982269287, + "p99": 4288.383960723877 + }, + "isolatedSum": { + "p50": 1060.8960092067719, + "p90": 1142.5920128822327, + "p95": 3932.5119256973267, + "p99": 6852.640151977539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 692.7040219306946, + "p90": 763.6479735374451, + "p95": 2364.8319244384766, + "p99": 3569.535970687866 + }, + "combine": { + "p50": 379.5520067214966, + "p90": 399.23200011253357, + "p95": 1748.8640546798706, + "p99": 3117.7918910980225 + }, + "roundtrip": { + "p50": 1005.728006362915, + "p90": 1112.1280193328857, + "p95": 2829.440116882324, + "p99": 4144.800186157227 + }, + "isolatedSum": { + "p50": 1072.2560286521912, + "p90": 1162.8799736499786, + "p95": 4113.695979118347, + "p99": 6687.327861785889 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 699.4879841804504, + "p90": 739.0080094337463, + "p95": 2042.2720909118652, + "p99": 3419.4560050964355 + }, + "combine": { + "p50": 662.8479957580566, + "p90": 732.2559952735901, + "p95": 1549.7920513153076, + "p99": 2272.160053253174 + }, + "roundtrip": { + "p50": 1300.4800081253052, + "p90": 1966.2079811096191, + "p95": 2599.1039276123047, + "p99": 3820.41597366333 + }, + "isolatedSum": { + "p50": 1362.335979938507, + "p90": 1471.2640047073364, + "p95": 3592.064142227173, + "p99": 5691.616058349609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 837.6960158348083, + "p90": 894.2400217056274, + "p95": 2155.519962310791, + "p99": 3079.5199871063232 + }, + "combine": { + "p50": 1192.6079988479614, + "p90": 1215.6480550765991, + "p95": 1399.6800184249878, + "p99": 1959.007978439331 + }, + "roundtrip": { + "p50": 1968.0320024490356, + "p90": 2352.3199558258057, + "p95": 2710.911989212036, + "p99": 3346.6238975524902 + }, + "isolatedSum": { + "p50": 2030.3040146827698, + "p90": 2109.8880767822266, + "p95": 3555.199980735779, + "p99": 5038.527965545654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1025.056004524231, + "p90": 1779.0720462799072, + "p95": 2400.12788772583, + "p99": 29507.71141052246 + }, + "combine": { + "p50": 2232.127904891968, + "p90": 2247.9679584503174, + "p95": 2259.200096130371, + "p99": 2362.4000549316406 + }, + "roundtrip": { + "p50": 3169.856071472168, + "p90": 3223.77610206604, + "p95": 3511.4240646362305, + "p99": 4212.031841278076 + }, + "isolatedSum": { + "p50": 3257.1839094161987, + "p90": 4027.0400047302246, + "p95": 4659.327983856201, + "p99": 31870.1114654541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1367.5199747085571, + "p90": 1625.3119707107544, + "p95": 1944.4160461425781, + "p99": 2614.2399311065674 + }, + "combine": { + "p50": 4311.168193817139, + "p90": 4329.408168792725, + "p95": 4337.440013885498, + "p99": 4367.839813232422 + }, + "roundtrip": { + "p50": 5613.183975219727, + "p90": 5648.863792419434, + "p95": 5668.12801361084, + "p99": 5807.551860809326 + }, + "isolatedSum": { + "p50": 5678.688168525696, + "p90": 5954.720139503479, + "p95": 6281.856060028076, + "p99": 6982.079744338989 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-03f3ab10", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7f1ea4cf569d12c", + "colorKey": "gb300_20aa4dc5", + "comparisonKey": "731592475f16c454", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:44:23.144604+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7f1ea4cf569d12c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 777.728021144867, + "p90": 844.543993473053, + "p95": 2025.8240699768066, + "p99": 3450.9119987487793 + }, + "combine": { + "p50": 356.76801204681396, + "p90": 379.10398840904236, + "p95": 1702.9759883880615, + "p99": 3264.76788520813 + }, + "roundtrip": { + "p50": 1093.3760404586792, + "p90": 1193.951964378357, + "p95": 2594.752073287964, + "p99": 4002.079963684082 + }, + "isolatedSum": { + "p50": 1134.496033191681, + "p90": 1223.6479818820953, + "p95": 3728.800058364868, + "p99": 6715.679883956909 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 772.0000147819519, + "p90": 852.0320057868958, + "p95": 1946.7519521713257, + "p99": 3663.2959842681885 + }, + "combine": { + "p50": 667.9679751396179, + "p90": 920.1920032501221, + "p95": 1578.112006187439, + "p99": 2714.240074157715 + }, + "roundtrip": { + "p50": 1376.3519525527954, + "p90": 2008.5439682006836, + "p95": 2627.455949783325, + "p99": 3909.856081008911 + }, + "isolatedSum": { + "p50": 1439.9679899215698, + "p90": 1772.2240090370178, + "p95": 3524.8639583587646, + "p99": 6377.536058425903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1066.912055015564, + "p90": 1153.7280082702637, + "p95": 1854.688048362732, + "p99": 3216.7038917541504 + }, + "combine": { + "p50": 2237.823963165283, + "p90": 2254.4639110565186, + "p95": 2274.2719650268555, + "p99": 2336.6079330444336 + }, + "roundtrip": { + "p50": 3223.1359481811523, + "p90": 3289.9839878082275, + "p95": 3499.1040229797363, + "p99": 4128.640174865723 + }, + "isolatedSum": { + "p50": 3304.736018180847, + "p90": 3408.191919326782, + "p95": 4128.960013389587, + "p99": 5553.311824798584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c9723eca", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||7ac30b0a39b1405", + "colorKey": "gb300_30494704", + "comparisonKey": "608cb04475b690bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:49:00.837664+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7ac30b0a39b1405", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 721.9840288162231, + "p90": 796.288013458252, + "p95": 2117.1200275421143, + "p99": 7665.952205657959 + }, + "combine": { + "p50": 338.0480110645294, + "p90": 370.2400028705597, + "p95": 1696.8319416046143, + "p99": 3250.432014465332 + }, + "roundtrip": { + "p50": 1014.8160457611084, + "p90": 1106.8480014801025, + "p95": 2647.871971130371, + "p99": 4446.49600982666 + }, + "isolatedSum": { + "p50": 1060.0320398807526, + "p90": 1166.5280163288116, + "p95": 3813.9519691467285, + "p99": 10916.384220123291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 630.5919885635376, + "p90": 773.2800245285034, + "p95": 2206.0799598693848, + "p99": 3658.0801010131836 + }, + "combine": { + "p50": 376.19200348854065, + "p90": 397.63200283050537, + "p95": 1713.4720087051392, + "p99": 3240.1280403137207 + }, + "roundtrip": { + "p50": 942.8160190582275, + "p90": 1121.1199760437012, + "p95": 2647.0398902893066, + "p99": 4284.224033355713 + }, + "isolatedSum": { + "p50": 1006.7839920520782, + "p90": 1170.9120273590088, + "p95": 3919.551968574524, + "p99": 6898.208141326904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 2304, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 725.8880138397217, + "p90": 782.5599908828735, + "p95": 2041.50390625, + "p99": 3497.8559017181396 + }, + "combine": { + "p50": 671.392023563385, + "p90": 1041.375994682312, + "p95": 1441.823959350586, + "p99": 2464.992046356201 + }, + "roundtrip": { + "p50": 1327.936053276062, + "p90": 1822.1440315246582, + "p95": 2561.5999698638916, + "p99": 3822.688102722168 + }, + "isolatedSum": { + "p50": 1397.2800374031067, + "p90": 1823.9359855651855, + "p95": 3483.327865600586, + "p99": 5962.847948074341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 4608, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 836.2560272216797, + "p90": 935.9359741210938, + "p95": 1991.1359548568726, + "p99": 3318.5598850250244 + }, + "combine": { + "p50": 1202.1440267562866, + "p90": 1234.5279455184937, + "p95": 1548.9599704742432, + "p99": 2113.3759021759033 + }, + "roundtrip": { + "p50": 1952.448010444641, + "p90": 2384.0320110321045, + "p95": 2818.687915802002, + "p99": 3622.5600242614746 + }, + "isolatedSum": { + "p50": 2038.4000539779663, + "p90": 2170.4639196395874, + "p95": 3540.0959253311157, + "p99": 5431.935787200928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 9216, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1093.664050102234, + "p90": 1199.6159553527832, + "p95": 1877.3759603500366, + "p99": 3031.8400859832764 + }, + "combine": { + "p50": 2263.4880542755127, + "p90": 2278.3679962158203, + "p95": 2285.0239276885986, + "p99": 2405.280113220215 + }, + "roundtrip": { + "p50": 3264.672040939331, + "p90": 3351.775884628296, + "p95": 3782.111883163452, + "p99": 17784.51156616211 + }, + "isolatedSum": { + "p50": 3357.1521043777466, + "p90": 3477.9839515686035, + "p95": 4162.399888038635, + "p99": 5437.120199203491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 18432, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1441.6639804840088, + "p90": 1603.9680242538452, + "p95": 2060.512065887451, + "p99": 2994.368076324463 + }, + "combine": { + "p50": 4381.408214569092, + "p90": 4402.495861053467, + "p95": 4409.023761749268, + "p99": 4433.407783508301 + }, + "roundtrip": { + "p50": 5734.2400550842285, + "p90": 5808.864116668701, + "p95": 5845.856189727783, + "p99": 6237.088203430176 + }, + "isolatedSum": { + "p50": 5823.072195053101, + "p90": 6006.463885307312, + "p95": 6469.535827636719, + "p99": 7427.775859832764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 36864, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2fa4bf75", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||e3707ddc343088b", + "colorKey": "gb300_9700a008", + "comparisonKey": "d669035586df9197", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:47:15.672545+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e3707ddc343088b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 582.5279951095581, + "p90": 615.7439947128296, + "p95": 2329.888105392456, + "p99": 4024.5437622070312 + }, + "combine": { + "p50": 268.2879865169525, + "p90": 282.3359966278076, + "p95": 291.00799560546875, + "p99": 3845.5679416656494 + }, + "roundtrip": { + "p50": 819.7119832038879, + "p90": 878.5600066184998, + "p95": 3134.8159313201904, + "p99": 4619.487762451172 + }, + "isolatedSum": { + "p50": 850.8159816265106, + "p90": 898.0799913406372, + "p95": 2620.896100997925, + "p99": 7870.111703872681 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 612.8960251808167, + "p90": 698.3680129051208, + "p95": 2621.5360164642334, + "p99": 4129.119873046875 + }, + "combine": { + "p50": 388.7679874897003, + "p90": 437.44000792503357, + "p95": 2184.3841075897217, + "p99": 3293.344020843506 + }, + "roundtrip": { + "p50": 952.0000219345093, + "p90": 1138.5279893875122, + "p95": 3835.9360694885254, + "p99": 5467.135906219482 + }, + "isolatedSum": { + "p50": 1001.664012670517, + "p90": 1135.8080208301544, + "p95": 4805.920124053955, + "p99": 7422.463893890381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 3755, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 684.0959787368774, + "p90": 779.4880270957947, + "p95": 2347.7120399475098, + "p99": 3928.0319213867188 + }, + "combine": { + "p50": 704.4159770011902, + "p90": 816.5119886398315, + "p95": 1874.559998512268, + "p99": 2791.071891784668 + }, + "roundtrip": { + "p50": 1311.8400573730469, + "p90": 2209.791898727417, + "p95": 3364.959955215454, + "p99": 3886.1119747161865 + }, + "isolatedSum": { + "p50": 1388.5119557380676, + "p90": 1596.0000157356262, + "p95": 4222.272038459778, + "p99": 6719.103813171387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 7556, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 898.144006729126, + "p90": 974.7200012207031, + "p95": 2134.239912033081, + "p99": 3318.8159465789795 + }, + "combine": { + "p50": 1283.0719947814941, + "p90": 1350.4960536956787, + "p95": 1800.7359504699707, + "p99": 2212.928056716919 + }, + "roundtrip": { + "p50": 2110.208034515381, + "p90": 2824.831962585449, + "p95": 3186.8159770965576, + "p99": 3682.5919151306152 + }, + "isolatedSum": { + "p50": 2181.21600151062, + "p90": 2325.216054916382, + "p95": 3934.9758625030518, + "p99": 5531.744003295898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 15163, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1158.400058746338, + "p90": 1486.7520332336426, + "p95": 2504.7359466552734, + "p99": 3234.71999168396 + }, + "combine": { + "p50": 2435.1680278778076, + "p90": 2447.968006134033, + "p95": 2462.399959564209, + "p99": 2493.056058883667 + }, + "roundtrip": { + "p50": 3523.9040851593018, + "p90": 3734.368085861206, + "p95": 4083.104133605957, + "p99": 4472.576141357422 + }, + "isolatedSum": { + "p50": 3593.5680866241455, + "p90": 3934.720039367676, + "p95": 4967.135906219482, + "p99": 5727.776050567627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 30215, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1835.7759714126587, + "p90": 2127.19988822937, + "p95": 2513.792037963867, + "p99": 2858.720064163208 + }, + "combine": { + "p50": 4762.784004211426, + "p90": 4780.831813812256, + "p95": 4788.832187652588, + "p99": 4806.687831878662 + }, + "roundtrip": { + "p50": 6441.311836242676, + "p90": 6519.423961639404, + "p95": 6595.00789642334, + "p99": 6734.272003173828 + }, + "isolatedSum": { + "p50": 6598.5599756240845, + "p90": 6908.031702041626, + "p95": 7302.624225616455, + "p99": 7665.40789604187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 60512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7bb11914", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||6248b19ef786add", + "colorKey": "gb300_c510a7e6", + "comparisonKey": "3bb8ab3d6cd13bbb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:55:24.556682+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6248b19ef786add", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 688.0319714546204, + "p90": 740.2560114860535, + "p95": 2095.6480503082275, + "p99": 3490.1440143585205 + }, + "combine": { + "p50": 325.8880078792572, + "p90": 354.65601086616516, + "p95": 1711.5199565887451, + "p99": 3130.176067352295 + }, + "roundtrip": { + "p50": 969.2479968070984, + "p90": 1037.1840000152588, + "p95": 2600.6720066070557, + "p99": 4201.087951660156 + }, + "isolatedSum": { + "p50": 1013.9199793338776, + "p90": 1094.9120223522186, + "p95": 3807.1680068969727, + "p99": 6620.320081710815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 1080, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 678.3360242843628, + "p90": 728.4799814224243, + "p95": 1990.8479452133179, + "p99": 2853.7919521331787 + }, + "combine": { + "p50": 376.0319948196411, + "p90": 402.6559889316559, + "p95": 1690.9760236740112, + "p99": 2882.944107055664 + }, + "roundtrip": { + "p50": 986.624002456665, + "p90": 1071.071982383728, + "p95": 2598.0799198150635, + "p99": 4203.167915344238 + }, + "isolatedSum": { + "p50": 1054.368019104004, + "p90": 1131.1359703540802, + "p95": 3681.823968887329, + "p99": 5736.736059188843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 2102, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 719.9680209159851, + "p90": 766.1759853363037, + "p95": 2115.231990814209, + "p99": 15837.696075439453 + }, + "combine": { + "p50": 665.2160286903381, + "p90": 937.3120069503784, + "p95": 1361.4399433135986, + "p99": 2076.159954071045 + }, + "roundtrip": { + "p50": 1311.8400573730469, + "p90": 1732.7040433883667, + "p95": 2684.448003768921, + "p99": 3592.031955718994 + }, + "isolatedSum": { + "p50": 1385.1840496063232, + "p90": 1703.4879922866821, + "p95": 3476.6719341278076, + "p99": 17913.856029510498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 4207, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 841.2799835205078, + "p90": 925.7280230522156, + "p95": 2089.792013168335, + "p99": 3239.039897918701 + }, + "combine": { + "p50": 1193.4080123901367, + "p90": 1251.5840530395508, + "p95": 1490.3680086135864, + "p99": 2063.040018081665 + }, + "roundtrip": { + "p50": 1957.3760032653809, + "p90": 2445.8560943603516, + "p95": 2752.5439262390137, + "p99": 3551.7759323120117 + }, + "isolatedSum": { + "p50": 2034.6879959106445, + "p90": 2177.3120760917664, + "p95": 3580.1600217819214, + "p99": 5302.079916000366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8365, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 993.5680031776428, + "p90": 1094.5600271224976, + "p95": 1977.471947669983, + "p99": 3180.000066757202 + }, + "combine": { + "p50": 2229.7921180725098, + "p90": 2248.800039291382, + "p95": 2274.2719650268555, + "p99": 2477.3759841918945 + }, + "roundtrip": { + "p50": 3147.1359729766846, + "p90": 3275.264024734497, + "p95": 3632.4799060821533, + "p99": 4345.536231994629 + }, + "isolatedSum": { + "p50": 3223.3601212501526, + "p90": 3343.3600664138794, + "p95": 4251.743912696838, + "p99": 5657.376050949097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 16483, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1393.9839601516724, + "p90": 1590.432047843933, + "p95": 2122.015953063965, + "p99": 2794.3038940429688 + }, + "combine": { + "p50": 4312.128067016602, + "p90": 4327.55184173584, + "p95": 4332.608222961426, + "p99": 4359.16805267334 + }, + "roundtrip": { + "p50": 5610.400199890137, + "p90": 5640.704154968262, + "p95": 5651.616096496582, + "p99": 5816.991806030273 + }, + "isolatedSum": { + "p50": 5706.112027168274, + "p90": 5917.983889579773, + "p95": 6454.624176025391, + "p99": 7153.471946716309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 32777, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d5d04d2", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||291e5ce62735286", + "colorKey": "gb300_6c27634c", + "comparisonKey": "9a3b1bcdaf1fb087", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:48:06.994338+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "291e5ce62735286", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 757.2479844093323, + "p90": 799.1679906845093, + "p95": 2293.1840419769287, + "p99": 3628.3841133117676 + }, + "combine": { + "p50": 343.84000301361084, + "p90": 373.53599071502686, + "p95": 1698.0160474777222, + "p99": 3632.159948348999 + }, + "roundtrip": { + "p50": 1006.2400102615356, + "p90": 1255.679965019226, + "p95": 3973.2799530029297, + "p99": 4433.951854705811 + }, + "isolatedSum": { + "p50": 1101.0879874229431, + "p90": 1172.7039813995361, + "p95": 3991.200089454651, + "p99": 7260.544061660767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 1064, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 640.8960223197937, + "p90": 683.67999792099, + "p95": 2008.7358951568604, + "p99": 3973.439931869507 + }, + "combine": { + "p50": 368.8639998435974, + "p90": 414.88000750541687, + "p95": 1809.440016746521, + "p99": 3178.5600185394287 + }, + "roundtrip": { + "p50": 944.096028804779, + "p90": 1002.9759407043457, + "p95": 2871.7119693756104, + "p99": 4461.27986907959 + }, + "isolatedSum": { + "p50": 1009.7600221633911, + "p90": 1098.5600054264069, + "p95": 3818.1759119033813, + "p99": 7151.999950408936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 2081, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 667.6160097122192, + "p90": 735.2960109710693, + "p95": 2398.8161087036133, + "p99": 3875.6160736083984 + }, + "combine": { + "p50": 658.5279703140259, + "p90": 829.7920227050781, + "p95": 1915.93599319458, + "p99": 2621.2480068206787 + }, + "roundtrip": { + "p50": 1260.7040405273438, + "p90": 1933.4399700164795, + "p95": 3322.0479488372803, + "p99": 3989.311933517456 + }, + "isolatedSum": { + "p50": 1326.1439800262451, + "p90": 1565.0880336761475, + "p95": 4314.752101898193, + "p99": 6496.864080429077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 4153, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 804.2240142822266, + "p90": 864.9920225143433, + "p95": 2057.5358867645264, + "p99": 3595.8080291748047 + }, + "combine": { + "p50": 1188.3200407028198, + "p90": 1239.1040325164795, + "p95": 1539.7440195083618, + "p99": 2063.1680488586426 + }, + "roundtrip": { + "p50": 1931.1360120773315, + "p90": 2314.3680095672607, + "p95": 3125.1840591430664, + "p99": 3731.487989425659 + }, + "isolatedSum": { + "p50": 1992.5440549850464, + "p90": 2104.0960550308228, + "p95": 3597.279906272888, + "p99": 5658.976078033447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8313, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 980.9600114822388, + "p90": 1108.2559823989868, + "p95": 2170.111894607544, + "p99": 3200.7040977478027 + }, + "combine": { + "p50": 2232.959985733032, + "p90": 2258.2080364227295, + "p95": 2274.0800380706787, + "p99": 2407.2959423065186 + }, + "roundtrip": { + "p50": 3136.4800930023193, + "p90": 3332.832098007202, + "p95": 3890.84792137146, + "p99": 4243.904113769531 + }, + "isolatedSum": { + "p50": 3213.919997215271, + "p90": 3366.4640188217163, + "p95": 4444.191932678223, + "p99": 5608.000040054321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 16581, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1361.1520528793335, + "p90": 1489.408016204834, + "p95": 2591.8400287628174, + "p99": 3125.0879764556885 + }, + "combine": { + "p50": 4304.06379699707, + "p90": 4321.631908416748, + "p95": 4329.216003417969, + "p99": 4353.536128997803 + }, + "roundtrip": { + "p50": 5594.175815582275, + "p90": 5677.055835723877, + "p95": 5781.184196472168, + "p99": 6166.463851928711 + }, + "isolatedSum": { + "p50": 5665.215849876404, + "p90": 5811.039924621582, + "p95": 6921.056032180786, + "p99": 7478.624105453491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 32887, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4358bbfe", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_06081769", + "comparisonKey": "908f01a78fca7f39", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:57:08.161336+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "striped", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 606.3359975814819, + "p90": 630.4640173912048, + "p95": 1781.1199426651, + "p99": 3837.34393119812 + }, + "combine": { + "p50": 296.8960106372833, + "p90": 314.4319951534271, + "p95": 934.2399835586548, + "p99": 3597.951889038086 + }, + "roundtrip": { + "p50": 854.6879887580872, + "p90": 922.111988067627, + "p95": 2638.688087463379, + "p99": 4658.4320068359375 + }, + "isolatedSum": { + "p50": 903.2320082187653, + "p90": 944.896012544632, + "p95": 2715.359926223755, + "p99": 7435.295820236206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 722.271978855133, + "p90": 818.015992641449, + "p95": 2143.296003341675, + "p99": 3748.8958835601807 + }, + "combine": { + "p50": 496.7679977416992, + "p90": 556.4799904823303, + "p95": 1709.5359563827515, + "p99": 2775.808095932007 + }, + "roundtrip": { + "p50": 1147.3280191421509, + "p90": 1850.3680229187012, + "p95": 2829.7600746154785, + "p99": 4270.5278396606445 + }, + "isolatedSum": { + "p50": 1219.0399765968323, + "p90": 1374.4959831237793, + "p95": 3852.8319597244263, + "p99": 6524.7039794921875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 897.4400162696838, + "p90": 1110.7200384140015, + "p95": 1951.2959718704224, + "p99": 3466.14408493042 + }, + "combine": { + "p50": 918.6239838600159, + "p90": 1024.8960256576538, + "p95": 1324.3520259857178, + "p99": 2116.5759563446045 + }, + "roundtrip": { + "p50": 1754.2719841003418, + "p90": 2118.4639930725098, + "p95": 2722.048044204712, + "p99": 3540.127992630005 + }, + "isolatedSum": { + "p50": 1816.0640001296997, + "p90": 2135.6160640716553, + "p95": 3275.64799785614, + "p99": 5582.720041275024 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1344.9599742889404, + "p90": 1532.7680110931396, + "p95": 1864.8960590362549, + "p99": 2510.0159645080566 + }, + "combine": { + "p50": 1726.5280485153198, + "p90": 1735.103964805603, + "p95": 1739.7119998931885, + "p99": 1754.5599937438965 + }, + "roundtrip": { + "p50": 3006.3040256500244, + "p90": 3078.3679485321045, + "p95": 3359.391927719116, + "p99": 3723.3920097351074 + }, + "isolatedSum": { + "p50": 3071.4880228042603, + "p90": 3267.8719758987427, + "p95": 3604.6080589294434, + "p99": 4264.575958251953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2065.824031829834, + "p90": 2133.984088897705, + "p95": 2297.5680828094482, + "p99": 2467.7441120147705 + }, + "combine": { + "p50": 3444.672107696533, + "p90": 3458.944082260132, + "p95": 3462.752103805542, + "p99": 3472.7039337158203 + }, + "roundtrip": { + "p50": 5413.280010223389, + "p90": 5438.496112823486, + "p95": 5454.14400100708, + "p99": 5540.927886962891 + }, + "isolatedSum": { + "p50": 5510.496139526367, + "p90": 5592.928171157837, + "p95": 5760.32018661499, + "p99": 5940.448045730591 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3762.3679637908936, + "p90": 3792.639970779419, + "p95": 3805.4399490356445, + "p99": 3916.9280529022217 + }, + "combine": { + "p50": 7217.376232147217, + "p90": 7235.551834106445, + "p95": 7240.70405960083, + "p99": 7255.392074584961 + }, + "roundtrip": { + "p50": 10764.73617553711, + "p90": 10822.431564331055, + "p95": 10870.688438415527, + "p99": 28595.64781188965 + }, + "isolatedSum": { + "p50": 10979.74419593811, + "p90": 11028.191804885864, + "p95": 11046.144008636475, + "p99": 11172.320127487183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-70580722", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||8183e404f63b100", + "colorKey": "gb300_00154133", + "comparisonKey": "c9e88399055262df", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:46:20.541108+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8183e404f63b100", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 785.6640219688416, + "p90": 1279.48796749115, + "p95": 2238.5599613189697, + "p99": 3364.6719455718994 + }, + "combine": { + "p50": 350.94401240348816, + "p90": 385.3119909763336, + "p95": 1739.7760152816772, + "p99": 2992.6719665527344 + }, + "roundtrip": { + "p50": 1070.6559419631958, + "p90": 2079.9360275268555, + "p95": 2737.0240688323975, + "p99": 4129.727840423584 + }, + "isolatedSum": { + "p50": 1136.6080343723297, + "p90": 1664.7999584674835, + "p95": 3978.335976600647, + "p99": 6357.343912124634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 809.1520071029663, + "p90": 953.3119797706604, + "p95": 2099.7440814971924, + "p99": 3479.5520305633545 + }, + "combine": { + "p50": 582.8800201416016, + "p90": 678.1759858131409, + "p95": 1344.5119857788086, + "p99": 2281.08811378479 + }, + "roundtrip": { + "p50": 1330.4320573806763, + "p90": 1867.2319650650024, + "p95": 3130.431890487671, + "p99": 4078.432083129883 + }, + "isolatedSum": { + "p50": 1392.0320272445679, + "p90": 1631.4879655838013, + "p95": 3444.256067276001, + "p99": 5760.6401443481445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 15151, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1124.9920129776, + "p90": 1520.1599597930908, + "p95": 2300.4798889160156, + "p99": 3303.0080795288086 + }, + "combine": { + "p50": 1089.4399881362915, + "p90": 1105.3760051727295, + "p95": 1195.3920125961304, + "p99": 1533.7920188903809 + }, + "roundtrip": { + "p50": 2155.3919315338135, + "p90": 2395.967960357666, + "p95": 2811.840057373047, + "p99": 3479.7439575195312 + }, + "isolatedSum": { + "p50": 2214.4320011138916, + "p90": 2625.5359649658203, + "p95": 3495.871901512146, + "p99": 4836.800098419189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 30290, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1786.080002784729, + "p90": 1875.5520582199097, + "p95": 2042.464017868042, + "p99": 2547.7120876312256 + }, + "combine": { + "p50": 2105.247974395752, + "p90": 2119.4241046905518, + "p95": 2126.5599727630615, + "p99": 2159.615993499756 + }, + "roundtrip": { + "p50": 3812.0639324188232, + "p90": 3859.8079681396484, + "p95": 3909.3120098114014, + "p99": 4342.7839279174805 + }, + "isolatedSum": { + "p50": 3891.327977180481, + "p90": 3994.9761629104614, + "p95": 4169.0239906311035, + "p99": 4707.328081130981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 60548, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2787.935972213745, + "p90": 2839.200019836426, + "p95": 2947.295904159546, + "p99": 3365.9839630126953 + }, + "combine": { + "p50": 4229.087829589844, + "p90": 4247.424125671387, + "p95": 4254.720211029053, + "p99": 4274.432182312012 + }, + "roundtrip": { + "p50": 6936.416149139404, + "p90": 6967.648029327393, + "p95": 6993.760108947754, + "p99": 7110.464096069336 + }, + "isolatedSum": { + "p50": 7017.023801803589, + "p90": 7086.6241455078125, + "p95": 7202.016115188599, + "p99": 7640.416145324707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 121046, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 5198.463916778564, + "p90": 5232.03182220459, + "p95": 5245.952129364014, + "p99": 5291.520118713379 + }, + "combine": { + "p50": 8418.368339538574, + "p90": 8444.416046142578, + "p95": 8453.984260559082, + "p99": 8475.90446472168 + }, + "roundtrip": { + "p50": 13577.535629272461, + "p90": 13636.384010314941, + "p95": 13649.151802062988, + "p99": 13690.336227416992 + }, + "isolatedSum": { + "p50": 13616.832256317139, + "p90": 13676.447868347168, + "p95": 13699.936389923096, + "p99": 13767.424583435059 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 242154, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5e16a19d", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||0e6b07a25691d72", + "colorKey": "gb300_311629eb", + "comparisonKey": "717dd91cb6d08c07", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:54:31.252163+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0e6b07a25691d72", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 580.8960199356079, + "p90": 649.3440270423889, + "p95": 1912.287950515747, + "p99": 3942.336082458496 + }, + "combine": { + "p50": 266.2079930305481, + "p90": 316.3839876651764, + "p95": 422.2719967365265, + "p99": 2679.487943649292 + }, + "roundtrip": { + "p50": 812.1920228004456, + "p90": 897.9520201683044, + "p95": 2758.6240768432617, + "p99": 4857.920169830322 + }, + "isolatedSum": { + "p50": 847.104012966156, + "p90": 965.7280147075653, + "p95": 2334.5599472522736, + "p99": 6621.824026107788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 1049, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 579.8400044441223, + "p90": 608.735978603363, + "p95": 1750.3999471664429, + "p99": 3137.183904647827 + }, + "combine": { + "p50": 363.5520040988922, + "p90": 375.5840063095093, + "p95": 416.9920086860657, + "p99": 2639.71209526062 + }, + "roundtrip": { + "p50": 896.2879776954651, + "p90": 940.3200149536133, + "p95": 2690.687894821167, + "p99": 4141.88814163208 + }, + "isolatedSum": { + "p50": 943.3920085430145, + "p90": 984.3199849128723, + "p95": 2167.3919558525085, + "p99": 5776.895999908447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 2084, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 628.928005695343, + "p90": 646.2720036506653, + "p95": 1445.024013519287, + "p99": 3028.0001163482666 + }, + "combine": { + "p50": 651.4559984207153, + "p90": 1054.6560287475586, + "p95": 1712.3199701309204, + "p99": 2532.927989959717 + }, + "roundtrip": { + "p50": 1200.6720304489136, + "p90": 1360.9600067138672, + "p95": 2614.9439811706543, + "p99": 3635.0719928741455 + }, + "isolatedSum": { + "p50": 1280.3840041160583, + "p90": 1700.9280323982239, + "p95": 3157.3439836502075, + "p99": 5560.928106307983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 4126, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 769.3120241165161, + "p90": 795.7760095596313, + "p95": 1991.711974143982, + "p99": 3303.040027618408 + }, + "combine": { + "p50": 1182.6560497283936, + "p90": 1255.7120323181152, + "p95": 1572.8000402450562, + "p99": 1935.5520009994507 + }, + "roundtrip": { + "p50": 1876.255989074707, + "p90": 2079.1358947753906, + "p95": 2738.368034362793, + "p99": 3608.7679862976074 + }, + "isolatedSum": { + "p50": 1951.9680738449097, + "p90": 2051.4880418777466, + "p95": 3564.512014389038, + "p99": 5238.592028617859 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8234, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 941.5040016174316, + "p90": 1053.8239479064941, + "p95": 1995.8080053329468, + "p99": 3156.2559604644775 + }, + "combine": { + "p50": 2223.072052001953, + "p90": 2233.4721088409424, + "p95": 2240.447998046875, + "p99": 2284.38401222229 + }, + "roundtrip": { + "p50": 3093.1520462036133, + "p90": 3180.799961090088, + "p95": 3562.9758834838867, + "p99": 4160.639762878418 + }, + "isolatedSum": { + "p50": 3164.5760536193848, + "p90": 3287.2960567474365, + "p95": 4236.256003379822, + "p99": 5440.639972686768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 16480, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1330.3359746932983, + "p90": 1567.1679973602295, + "p95": 2010.3681087493896, + "p99": 2703.903913497925 + }, + "combine": { + "p50": 4301.983833312988, + "p90": 4315.5198097229, + "p95": 4321.375846862793, + "p99": 4353.055953979492 + }, + "roundtrip": { + "p50": 5546.592235565186, + "p90": 5599.808216094971, + "p95": 5752.096176147461, + "p99": 6131.616115570068 + }, + "isolatedSum": { + "p50": 5632.319808006287, + "p90": 5882.68780708313, + "p95": 6331.743955612183, + "p99": 7056.959867477417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 32889, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a39aa855", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||a39eeb7c2dc6ca7", + "colorKey": "gb300_6400c8a6", + "comparisonKey": "ed877dcc4329d2a4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:51:12.203818+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a39eeb7c2dc6ca7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 658.240020275116, + "p90": 730.0159931182861, + "p95": 2110.4960441589355, + "p99": 3752.671957015991 + }, + "combine": { + "p50": 298.911988735199, + "p90": 340.831995010376, + "p95": 1926.3999462127686, + "p99": 3415.3599739074707 + }, + "roundtrip": { + "p50": 914.7520065307617, + "p90": 1019.3920135498047, + "p95": 2538.7840270996094, + "p99": 4404.672145843506 + }, + "isolatedSum": { + "p50": 957.1520090103149, + "p90": 1070.847988128662, + "p95": 4036.895990371704, + "p99": 7168.031930923462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 660.6400012969971, + "p90": 785.6000065803528, + "p95": 2175.168037414551, + "p99": 2909.152030944824 + }, + "combine": { + "p50": 407.8400135040283, + "p90": 427.35999822616577, + "p95": 1724.128007888794, + "p99": 2649.4081020355225 + }, + "roundtrip": { + "p50": 1015.328049659729, + "p90": 1139.0399932861328, + "p95": 2483.135938644409, + "p99": 3967.8399562835693 + }, + "isolatedSum": { + "p50": 1068.4800148010254, + "p90": 1212.9600048065186, + "p95": 3899.2960453033447, + "p99": 5558.560132980347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 5302, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 768.1599855422974, + "p90": 869.1200017929077, + "p95": 2037.4720096588135, + "p99": 3353.856086730957 + }, + "combine": { + "p50": 750.5919933319092, + "p90": 1075.6160020828247, + "p95": 1417.9840087890625, + "p99": 2048.448085784912 + }, + "roundtrip": { + "p50": 1470.8160161972046, + "p90": 1900.1599550247192, + "p95": 2505.6960582733154, + "p99": 3211.199998855591 + }, + "isolatedSum": { + "p50": 1518.7519788742065, + "p90": 1944.7360038757324, + "p95": 3455.456018447876, + "p99": 5402.304172515869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 10587, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1029.5039415359497, + "p90": 1184.1599941253662, + "p95": 1809.183955192566, + "p99": 2971.07195854187 + }, + "combine": { + "p50": 1376.7039775848389, + "p90": 1388.1280422210693, + "p95": 1420.5119609832764, + "p99": 1673.5999584197998 + }, + "roundtrip": { + "p50": 2347.520112991333, + "p90": 2565.567970275879, + "p95": 2846.4319705963135, + "p99": 3250.52809715271 + }, + "isolatedSum": { + "p50": 2406.2079191207886, + "p90": 2572.2880363464355, + "p95": 3229.6959161758423, + "p99": 4644.67191696167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 21014, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1414.7839546203613, + "p90": 1543.2319641113281, + "p95": 1885.2479457855225, + "p99": 2651.8399715423584 + }, + "combine": { + "p50": 2643.0399417877197, + "p90": 2653.696060180664, + "p95": 2657.0239067077637, + "p99": 2663.6478900909424 + }, + "roundtrip": { + "p50": 3961.9839191436768, + "p90": 4018.688201904297, + "p95": 4200.160026550293, + "p99": 28981.855392456055 + }, + "isolatedSum": { + "p50": 4057.823896408081, + "p90": 4196.928024291992, + "p95": 4542.271852493286, + "p99": 5315.487861633301 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 41814, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2367.392063140869, + "p90": 2420.991897583008, + "p95": 2455.0399780273438, + "p99": 2614.11190032959 + }, + "combine": { + "p50": 5440.415859222412, + "p90": 5456.831932067871, + "p95": 5461.8239402771, + "p99": 5474.751949310303 + }, + "roundtrip": { + "p50": 7596.799850463867, + "p90": 7621.632099151611, + "p95": 7631.231784820557, + "p99": 7659.359931945801 + }, + "isolatedSum": { + "p50": 7807.807922363281, + "p90": 7877.823829650879, + "p95": 7916.863918304443, + "p99": 8088.863849639893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 83417, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63710612", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||3eb2f0d7bdba0fe", + "colorKey": "gb300_bf4b6268", + "comparisonKey": "564d77251f1c4ba3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:51:42.052256+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3eb2f0d7bdba0fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 583.2639932632446, + "p90": 727.616012096405, + "p95": 2072.319984436035, + "p99": 3682.9121112823486 + }, + "combine": { + "p50": 265.21599292755127, + "p90": 304.32000756263733, + "p95": 330.59200644493103, + "p99": 3544.1598892211914 + }, + "roundtrip": { + "p50": 800.0320196151733, + "p90": 933.568000793457, + "p95": 2694.240093231201, + "p99": 4407.839775085449 + }, + "isolatedSum": { + "p50": 848.4799861907959, + "p90": 1031.9360196590424, + "p95": 2402.911990880966, + "p99": 7227.07200050354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 1067, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 592.7039980888367, + "p90": 653.3439755439758, + "p95": 1411.8720293045044, + "p99": 3704.927921295166 + }, + "combine": { + "p50": 366.5600121021271, + "p90": 406.3040018081665, + "p95": 1756.8000555038452, + "p99": 3137.5679969787598 + }, + "roundtrip": { + "p50": 909.0560078620911, + "p90": 1015.1040554046631, + "p95": 2483.680009841919, + "p99": 4205.215930938721 + }, + "isolatedSum": { + "p50": 959.2640101909637, + "p90": 1059.6479773521423, + "p95": 3168.6720848083496, + "p99": 6842.495918273926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 2097, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 635.1040005683899, + "p90": 676.8640279769897, + "p95": 792.8000092506409, + "p99": 2897.696018218994 + }, + "combine": { + "p50": 657.5040221214294, + "p90": 702.0480036735535, + "p95": 1354.3039560317993, + "p99": 2174.880027770996 + }, + "roundtrip": { + "p50": 1210.6560468673706, + "p90": 1335.263967514038, + "p95": 2480.799913406372, + "p99": 3804.863929748535 + }, + "isolatedSum": { + "p50": 1292.6080226898193, + "p90": 1378.9120316505432, + "p95": 2147.10396528244, + "p99": 5072.57604598999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 4163, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 779.2959809303284, + "p90": 814.8800134658813, + "p95": 1981.8880558013916, + "p99": 3318.78399848938 + }, + "combine": { + "p50": 1182.8479766845703, + "p90": 1253.7280321121216, + "p95": 1481.2480211257935, + "p99": 1793.7599420547485 + }, + "roundtrip": { + "p50": 1890.2080059051514, + "p90": 2246.783971786499, + "p95": 2799.13592338562, + "p99": 3521.8560695648193 + }, + "isolatedSum": { + "p50": 1962.1439576148987, + "p90": 2068.608045578003, + "p95": 3463.136076927185, + "p99": 5112.543940544128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8305, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 948.6719965934753, + "p90": 1079.2319774627686, + "p95": 1988.6399507522583, + "p99": 3262.432098388672 + }, + "combine": { + "p50": 2226.720094680786, + "p90": 2251.904010772705, + "p95": 2269.63210105896, + "p99": 2386.8160247802734 + }, + "roundtrip": { + "p50": 3093.8880443573, + "p90": 3228.2559871673584, + "p95": 3586.911916732788, + "p99": 4230.976104736328 + }, + "isolatedSum": { + "p50": 3175.3920912742615, + "p90": 3331.1359882354736, + "p95": 4258.272051811218, + "p99": 5649.248123168945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 16529, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1340.831995010376, + "p90": 1523.9360332489014, + "p95": 2121.407985687256, + "p99": 2932.2240352630615 + }, + "combine": { + "p50": 4314.335823059082, + "p90": 4340.6081199646, + "p95": 4349.440097808838, + "p99": 4367.775917053223 + }, + "roundtrip": { + "p50": 5585.599899291992, + "p90": 5662.240028381348, + "p95": 5691.487789154053, + "p99": 6096.799850463867 + }, + "isolatedSum": { + "p50": 5655.167818069458, + "p90": 5864.544153213501, + "p95": 6470.848083496094, + "p99": 7299.999952316284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 32880, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-334dae78", + "identity": "gb300|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||bfb01c61bdf926e", + "colorKey": "gb300_0fa732b5", + "comparisonKey": "396fbe03f3212318", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:53:10.013859+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfb01c61bdf926e", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 691.2959814071655, + "p90": 743.3279752731323, + "p95": 2166.559934616089, + "p99": 3695.744037628174 + }, + "combine": { + "p50": 327.0399868488312, + "p90": 353.66401076316833, + "p95": 1746.1119890213013, + "p99": 3376.2879371643066 + }, + "roundtrip": { + "p50": 987.4879717826843, + "p90": 1242.9120540618896, + "p95": 2959.4879150390625, + "p99": 4420.767784118652 + }, + "isolatedSum": { + "p50": 1018.3359682559967, + "p90": 1096.9919860363007, + "p95": 3912.67192363739, + "p99": 7072.0319747924805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 716.159999370575, + "p90": 797.4079847335815, + "p95": 2205.728054046631, + "p99": 3853.1200885772705 + }, + "combine": { + "p50": 493.50398778915405, + "p90": 515.392005443573, + "p95": 1771.5200185775757, + "p99": 2720.223903656006 + }, + "roundtrip": { + "p50": 1143.4240341186523, + "p90": 1320.41597366333, + "p95": 3118.5600757598877, + "p99": 4015.872001647949 + }, + "isolatedSum": { + "p50": 1209.663987159729, + "p90": 1312.7999901771545, + "p95": 3977.2480726242065, + "p99": 6573.343992233276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 933.4719777107239, + "p90": 1072.0640420913696, + "p95": 2118.016004562378, + "p99": 3303.8079738616943 + }, + "combine": { + "p50": 911.903977394104, + "p90": 1045.024037361145, + "p95": 1509.3120336532593, + "p99": 1998.8160133361816 + }, + "roundtrip": { + "p50": 1782.7199697494507, + "p90": 2731.1360836029053, + "p95": 3565.632104873657, + "p99": 26826.751708984375 + }, + "isolatedSum": { + "p50": 1845.3759551048279, + "p90": 2117.0880794525146, + "p95": 3627.328038215637, + "p99": 5302.623987197876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1347.9679822921753, + "p90": 1555.616021156311, + "p95": 2189.5039081573486, + "p99": 2669.1839694976807 + }, + "combine": { + "p50": 1723.9680290222168, + "p90": 1735.0399494171143, + "p95": 1740.1599884033203, + "p99": 1788.4479761123657 + }, + "roundtrip": { + "p50": 3013.823986053467, + "p90": 3155.6479930877686, + "p95": 3312.351942062378, + "p99": 3504.192113876343 + }, + "isolatedSum": { + "p50": 3071.936011314392, + "p90": 3290.6559705734253, + "p95": 3929.663896560669, + "p99": 4457.631945610046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 2063.391923904419, + "p90": 2212.127923965454, + "p95": 2381.887912750244, + "p99": 2515.7759189605713 + }, + "combine": { + "p50": 3442.4960613250732, + "p90": 3456.2559127807617, + "p95": 3462.30411529541, + "p99": 3470.400094985962 + }, + "roundtrip": { + "p50": 5422.304153442383, + "p90": 5471.519947052002, + "p95": 5513.792037963867, + "p99": 5673.791885375977 + }, + "isolatedSum": { + "p50": 5505.887985229492, + "p90": 5668.383836746216, + "p95": 5844.192028045654, + "p99": 5986.176013946533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3769.5679664611816, + "p90": 3811.392068862915, + "p95": 3834.8801136016846, + "p99": 3974.560022354126 + }, + "combine": { + "p50": 7215.231895446777, + "p90": 7231.872081756592, + "p95": 7237.215995788574, + "p99": 7249.407768249512 + }, + "roundtrip": { + "p50": 10753.69644165039, + "p90": 10785.216331481934, + "p95": 10795.392036437988, + "p99": 10851.584434509277 + }, + "isolatedSum": { + "p50": 10984.799861907959, + "p90": 11043.264150619507, + "p95": 11072.096109390259, + "p99": 11223.967790603638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49c72ba7", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_9985c0a9", + "comparisonKey": "5b78a7de0a9f3c41", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:53:39.914685+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 731.8400144577026, + "p90": 783.7439775466919, + "p95": 1981.600046157837, + "p99": 2339.967966079712 + }, + "combine": { + "p50": 336.8000090122223, + "p90": 377.47201323509216, + "p95": 1778.656005859375, + "p99": 2075.0720500946045 + }, + "roundtrip": { + "p50": 1024.5120525360107, + "p90": 1094.7200059890747, + "p95": 2577.5680541992188, + "p99": 2954.848051071167 + }, + "isolatedSum": { + "p50": 1068.640023469925, + "p90": 1161.215990781784, + "p95": 3760.256052017212, + "p99": 4415.040016174316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 704.2880058288574, + "p90": 772.3199725151062, + "p95": 2131.455898284912, + "p99": 2749.9840259552 + }, + "combine": { + "p50": 382.01600313186646, + "p90": 407.55200386047363, + "p95": 1730.4960489273071, + "p99": 2022.047996520996 + }, + "roundtrip": { + "p50": 1012.6399993896484, + "p90": 1109.1200113296509, + "p95": 2471.2319374084473, + "p99": 2842.3678874969482 + }, + "isolatedSum": { + "p50": 1086.3040089607239, + "p90": 1179.8719763755798, + "p95": 3861.9519472122192, + "p99": 4772.032022476196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 703.4879922866821, + "p90": 749.2799758911133, + "p95": 2138.9760971069336, + "p99": 2562.7520084381104 + }, + "combine": { + "p50": 664.5119786262512, + "p90": 693.1520104408264, + "p95": 1359.71200466156, + "p99": 1804.2559623718262 + }, + "roundtrip": { + "p50": 1326.367974281311, + "p90": 1779.4560194015503, + "p95": 2317.0878887176514, + "p99": 3262.943983078003 + }, + "isolatedSum": { + "p50": 1367.9999709129333, + "p90": 1442.4319863319397, + "p95": 3498.6881017684937, + "p99": 4367.0079708099365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 856.8639755249023, + "p90": 907.3280096054077, + "p95": 1944.4160461425781, + "p99": 2431.2000274658203 + }, + "combine": { + "p50": 1198.3360052108765, + "p90": 1291.424036026001, + "p95": 1411.7120504379272, + "p99": 2098.6878871917725 + }, + "roundtrip": { + "p50": 1982.8159809112549, + "p90": 2183.072090148926, + "p95": 2782.7200889587402, + "p99": 3400.736093521118 + }, + "isolatedSum": { + "p50": 2055.199980735779, + "p90": 2198.7520456314087, + "p95": 3356.1280965805054, + "p99": 4529.887914657593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1045.3439950942993, + "p90": 1112.6079559326172, + "p95": 1946.239948272705, + "p99": 2411.9999408721924 + }, + "combine": { + "p50": 2235.2960109710693, + "p90": 2248.800039291382, + "p95": 2254.4000148773193, + "p99": 2282.912015914917 + }, + "roundtrip": { + "p50": 3185.1840019226074, + "p90": 3225.3119945526123, + "p95": 3383.392095565796, + "p99": 3875.5199909210205 + }, + "isolatedSum": { + "p50": 3280.6400060653687, + "p90": 3361.407995223999, + "p95": 4200.639963150024, + "p99": 4694.911956787109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1389.0559673309326, + "p90": 1593.727946281433, + "p95": 1986.7520332336426, + "p99": 2400.223970413208 + }, + "combine": { + "p50": 4313.7922286987305, + "p90": 4331.136226654053, + "p95": 4338.848114013672, + "p99": 4360.928058624268 + }, + "roundtrip": { + "p50": 5629.663944244385, + "p90": 5682.112216949463, + "p95": 5712.831974029541, + "p99": 5867.167949676514 + }, + "isolatedSum": { + "p50": 5702.848196029663, + "p90": 5924.864172935486, + "p95": 6325.600147247314, + "p99": 6761.152029037476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c8d2b3b", + "identity": "gb300|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||45b103b10fbcaef", + "colorKey": "gb300_85029fa5", + "comparisonKey": "1077e1f2b69726b7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:49:53.257292+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "gb300-8x", + "sku": "gb300", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "gb300-nvl72-mnnvl", + "transport": "mnnvl", + "worldSize": 8, + "epSize": 8, + "label": "GB300 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 152, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "45b103b10fbcaef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577797931", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577797931", + "createdAt": "2026-07-02T08:53:52Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 587.9039764404297, + "p90": 703.2319903373718, + "p95": 2241.2800788879395, + "p99": 4248.191833496094 + }, + "combine": { + "p50": 266.7520046234131, + "p90": 283.4559977054596, + "p95": 1927.6800155639648, + "p99": 4103.424072265625 + }, + "roundtrip": { + "p50": 815.6800270080566, + "p90": 859.5839738845825, + "p95": 2956.32004737854, + "p99": 4847.743988037109 + }, + "isolatedSum": { + "p50": 854.6559810638428, + "p90": 986.6879880428314, + "p95": 4168.960094451904, + "p99": 8351.615905761719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 588.0640149116516, + "p90": 702.9119729995728, + "p95": 2144.67191696167, + "p99": 4013.631820678711 + }, + "combine": { + "p50": 364.0640079975128, + "p90": 407.29600191116333, + "p95": 1817.7599906921387, + "p99": 3742.5920963287354 + }, + "roundtrip": { + "p50": 900.7359743118286, + "p90": 1037.0240211486816, + "p95": 2702.336072921753, + "p99": 4564.640045166016 + }, + "isolatedSum": { + "p50": 952.1280229091644, + "p90": 1110.207974910736, + "p95": 3962.4319076538086, + "p99": 7756.223917007446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 632.2559714317322, + "p90": 656.1920046806335, + "p95": 1919.0720319747925, + "p99": 4108.575820922852 + }, + "combine": { + "p50": 650.592029094696, + "p90": 772.4480032920837, + "p95": 1954.6560049057007, + "p99": 2632.352113723755 + }, + "roundtrip": { + "p50": 1211.7760181427002, + "p90": 1852.8640270233154, + "p95": 2728.2559871673584, + "p99": 3956.0959339141846 + }, + "isolatedSum": { + "p50": 1282.8480005264282, + "p90": 1428.6400079727173, + "p95": 3873.728036880493, + "p99": 6740.927934646606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 779.7120213508606, + "p90": 860.9920144081116, + "p95": 2183.1040382385254, + "p99": 3679.1999340057373 + }, + "combine": { + "p50": 1187.648057937622, + "p90": 1337.0239734649658, + "p95": 1730.847954750061, + "p99": 2229.8240661621094 + }, + "roundtrip": { + "p50": 1896.5439796447754, + "p90": 2521.343946456909, + "p95": 2937.983989715576, + "p99": 3745.9518909454346 + }, + "isolatedSum": { + "p50": 1967.3600792884827, + "p90": 2198.0159878730774, + "p95": 3913.9519929885864, + "p99": 5909.024000167847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 947.5520253181458, + "p90": 1050.4640340805054, + "p95": 2097.759962081909, + "p99": 3315.0079250335693 + }, + "combine": { + "p50": 2222.111940383911, + "p90": 2235.167980194092, + "p95": 2257.5039863586426, + "p99": 2447.0720291137695 + }, + "roundtrip": { + "p50": 3094.912052154541, + "p90": 3205.631971359253, + "p95": 3490.015983581543, + "p99": 4248.672008514404 + }, + "isolatedSum": { + "p50": 3169.663965702057, + "p90": 3285.632014274597, + "p95": 4355.263948440552, + "p99": 5762.079954147339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1345.5040454864502, + "p90": 1557.31201171875, + "p95": 2320.159912109375, + "p99": 3098.367929458618 + }, + "combine": { + "p50": 4307.295799255371, + "p90": 4329.08821105957, + "p95": 4335.552215576172, + "p99": 4362.97607421875 + }, + "roundtrip": { + "p50": 5555.679798126221, + "p90": 5607.903957366943, + "p95": 5769.887924194336, + "p99": 6142.591953277588 + }, + "isolatedSum": { + "p50": 5652.799844741821, + "p90": 5886.40022277832, + "p95": 6655.712127685547, + "p99": 7461.344003677368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3487a218", + "identity": "h100|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_3514e1c5", + "comparisonKey": "9ad201953364c1ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:38.693426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.23200243711472, + "p90": 102.75200009346008, + "p95": 104.032002389431, + "p99": 109.79200154542923 + }, + "combine": { + "p50": 65.69600105285645, + "p90": 73.08799773454666, + "p95": 73.47200065851212, + "p99": 74.97599720954895 + }, + "roundtrip": { + "p50": 134.91199910640717, + "p90": 141.08799397945404, + "p95": 142.84799993038177, + "p99": 148.12800288200378 + }, + "isolatedSum": { + "p50": 160.92800348997116, + "p90": 175.83999782800674, + "p95": 177.50400304794312, + "p99": 184.76799875497818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.67200136184692, + "p90": 100.12800246477127, + "p95": 101.95200145244598, + "p99": 105.66399991512299 + }, + "combine": { + "p50": 63.551999628543854, + "p90": 72.92799651622772, + "p95": 73.37599992752075, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 115.61600118875504, + "p90": 139.8400068283081, + "p95": 141.79199934005737, + "p99": 146.81600034236908 + }, + "isolatedSum": { + "p50": 132.22400099039078, + "p90": 173.055998980999, + "p95": 175.32800137996674, + "p99": 179.77599799633026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.28799843788147, + "p90": 101.98400169610977, + "p95": 103.13600301742554, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 63.90400230884552, + "p90": 72.25599884986877, + "p95": 73.11999797821045, + "p99": 75.83999633789062 + }, + "roundtrip": { + "p50": 117.5680011510849, + "p90": 143.77599954605103, + "p95": 147.07200229167938, + "p99": 153.28000485897064 + }, + "isolatedSum": { + "p50": 132.192000746727, + "p90": 174.24000054597855, + "p95": 176.256000995636, + "p99": 183.80799889564514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.82400268316269, + "p90": 101.95200145244598, + "p95": 105.31199723482132, + "p99": 134.07999277114868 + }, + "combine": { + "p50": 64.09599632024765, + "p90": 73.15199822187424, + "p95": 73.56800138950348, + "p99": 189.2479956150055 + }, + "roundtrip": { + "p50": 117.88800358772278, + "p90": 141.85599982738495, + "p95": 144.31999623775482, + "p99": 147.96799421310425 + }, + "isolatedSum": { + "p50": 133.91999900341034, + "p90": 175.10399967432022, + "p95": 178.8799986243248, + "p99": 323.3279883861542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.15999811887741, + "p90": 98.62399846315384, + "p95": 101.1200025677681, + "p99": 107.00800269842148 + }, + "combine": { + "p50": 64.15999680757523, + "p90": 73.15199822187424, + "p95": 74.52800124883652, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 120.09599804878235, + "p90": 145.6640064716339, + "p95": 149.9200016260147, + "p99": 154.62400019168854 + }, + "isolatedSum": { + "p50": 136.31999492645264, + "p90": 171.77599668502808, + "p95": 175.64800381660461, + "p99": 186.75200641155243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.27999985218048, + "p90": 97.47199714183807, + "p95": 100.47999769449234, + "p99": 105.95200210809708 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 79.68000322580338, + "p95": 80.76799660921097, + "p99": 195.96800208091736 + }, + "roundtrip": { + "p50": 118.07999759912491, + "p90": 149.9200016260147, + "p95": 151.45599842071533, + "p99": 258.33600759506226 + }, + "isolatedSum": { + "p50": 143.0080011487007, + "p90": 177.15200036764145, + "p95": 181.2479943037033, + "p99": 301.92000418901443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.87999844551086, + "p90": 104.99200224876404, + "p95": 106.46399855613708, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 73.85600358247757, + "p90": 87.80799806118011, + "p95": 88.22400122880936, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 134.14399325847626, + "p90": 159.39199924468994, + "p95": 161.28000617027283, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 160.73600202798843, + "p90": 192.80000030994415, + "p95": 194.68799978494644, + "p99": 200.25599747896194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.2080020904541, + "p90": 112.15999722480774, + "p95": 114.46399986743927, + "p99": 118.84800344705582 + }, + "combine": { + "p50": 89.05600011348724, + "p90": 97.31200337409973, + "p95": 98.14400225877762, + "p99": 104.16000336408615 + }, + "roundtrip": { + "p50": 162.04799711704254, + "p90": 173.8239973783493, + "p95": 176.35199427604675, + "p99": 204.12799715995789 + }, + "isolatedSum": { + "p50": 183.26400220394135, + "p90": 209.47200059890747, + "p95": 212.6080021262169, + "p99": 223.00800681114197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a8dd94dd", + "identity": "h100|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_3514e1c5", + "comparisonKey": "3d37e058838d53fe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:03.452363+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.19200229644775, + "p90": 103.96800190210342, + "p95": 105.18400371074677, + "p99": 111.10399663448334 + }, + "combine": { + "p50": 71.26399874687195, + "p90": 73.37599992752075, + "p95": 73.82400333881378, + "p99": 79.55200225114822 + }, + "roundtrip": { + "p50": 138.91200721263885, + "p90": 148.8640010356903, + "p95": 150.2079963684082, + "p99": 155.07200360298157 + }, + "isolatedSum": { + "p50": 167.4560010433197, + "p90": 177.34400182962418, + "p95": 179.00800704956055, + "p99": 190.65599888563156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.34399902820587, + "p90": 101.59999877214432, + "p95": 103.61599922180176, + "p99": 107.51999914646149 + }, + "combine": { + "p50": 65.18399715423584, + "p90": 73.47200065851212, + "p95": 74.01599735021591, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 120.2239990234375, + "p90": 147.07200229167938, + "p95": 149.79200065135956, + "p99": 153.85599434375763 + }, + "isolatedSum": { + "p50": 134.5279961824417, + "p90": 175.07199943065643, + "p95": 177.63199657201767, + "p99": 187.391996383667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.023996591568, + "p90": 99.0080013871193, + "p95": 102.01600193977356, + "p99": 107.29599744081497 + }, + "combine": { + "p50": 65.60000032186508, + "p90": 78.84799689054489, + "p95": 79.23199981451035, + "p99": 79.93599772453308 + }, + "roundtrip": { + "p50": 120.7680031657219, + "p90": 149.3760049343109, + "p95": 150.68799257278442, + "p99": 156.5759927034378 + }, + "isolatedSum": { + "p50": 134.62399691343307, + "p90": 177.85599827766418, + "p95": 181.2480017542839, + "p99": 187.23199516534805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.82400268316269, + "p90": 98.7199991941452, + "p95": 101.31199657917023, + "p99": 107.00800269842148 + }, + "combine": { + "p50": 65.98400324583054, + "p90": 78.94399762153625, + "p95": 80.6720033288002, + "p99": 81.63200318813324 + }, + "roundtrip": { + "p50": 119.71200257539749, + "p90": 151.36000514030457, + "p95": 152.41600573062897, + "p99": 155.32800555229187 + }, + "isolatedSum": { + "p50": 135.80800592899323, + "p90": 177.66399681568146, + "p95": 181.98399990797043, + "p99": 188.64000588655472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.23999971151352, + "p90": 99.48799759149551, + "p95": 102.62399911880493, + "p99": 106.11200332641602 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 79.26400005817413, + "p95": 80.73599636554718, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 124.76799637079239, + "p90": 150.9760022163391, + "p95": 153.28000485897064, + "p99": 158.4639996290207 + }, + "isolatedSum": { + "p50": 144.86400038003922, + "p90": 178.75199764966965, + "p95": 183.3599954843521, + "p99": 188.73600661754608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 78.015998005867, + "p90": 103.55199873447418, + "p95": 105.34399747848511, + "p99": 110.68800091743469 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 81.4720019698143, + "p95": 82.04799890518188, + "p99": 87.55200356245041 + }, + "roundtrip": { + "p50": 128.9920061826706, + "p90": 157.60000050067902, + "p95": 160.73599457740784, + "p99": 442.2079920768738 + }, + "isolatedSum": { + "p50": 151.0399952530861, + "p90": 185.02400070428848, + "p95": 187.391996383667, + "p99": 198.2400044798851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 88.06400001049042, + "p90": 109.27999764680862, + "p95": 111.58400028944016, + "p99": 117.88800358772278 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 88.79999816417694, + "p95": 95.29600292444229, + "p99": 96.12800180912018 + }, + "roundtrip": { + "p50": 148.28799664974213, + "p90": 167.77600347995758, + "p95": 169.72799599170685, + "p99": 174.68799650669098 + }, + "isolatedSum": { + "p50": 169.72800344228745, + "p90": 198.07999581098557, + "p95": 206.88000321388245, + "p99": 214.01600539684296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.64800012111664, + "p90": 120.83200365304947, + "p95": 122.40000069141388, + "p99": 126.11199915409088 + }, + "combine": { + "p50": 96.22400254011154, + "p90": 105.50399869680405, + "p95": 105.92000186443329, + "p99": 111.68000102043152 + }, + "roundtrip": { + "p50": 172.35200107097626, + "p90": 190.23999571800232, + "p95": 193.2159960269928, + "p99": 198.97599518299103 + }, + "isolatedSum": { + "p50": 203.87200266122818, + "p90": 226.33600234985352, + "p95": 228.32000255584717, + "p99": 237.7920001745224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5e0e7110", + "identity": "h100|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_3514e1c5", + "comparisonKey": "bf9228e0ee3ffbe7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:31.202564+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.67199850082397, + "p90": 103.26399654150009, + "p95": 104.2879968881607, + "p99": 109.8560020327568 + }, + "combine": { + "p50": 78.62400263547897, + "p90": 81.18399977684021, + "p95": 81.44000172615051, + "p99": 86.87999844551086 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 151.2639969587326, + "p95": 153.888002038002, + "p99": 162.88000345230103 + }, + "isolatedSum": { + "p50": 175.29600113630295, + "p90": 184.4479963183403, + "p95": 185.72799861431122, + "p99": 196.73600047826767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.60000097751617, + "p90": 102.30399668216705, + "p95": 103.93600165843964, + "p99": 110.01600325107574 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 80.57600259780884, + "p95": 81.24800026416779, + "p99": 86.75199747085571 + }, + "roundtrip": { + "p50": 122.8799968957901, + "p90": 151.45599842071533, + "p95": 153.1520038843155, + "p99": 157.05600380897522 + }, + "isolatedSum": { + "p50": 140.51200449466705, + "p90": 182.8799992799759, + "p95": 185.18400192260742, + "p99": 196.76800072193146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.31999933719635, + "p90": 98.08000177145004, + "p95": 100.99200159311295, + "p99": 104.76800054311752 + }, + "combine": { + "p50": 71.23199850320816, + "p90": 78.8159966468811, + "p95": 78.97599786520004, + "p99": 86.59200370311737 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 150.36800503730774, + "p95": 152.16000378131866, + "p99": 159.58400070667267 + }, + "isolatedSum": { + "p50": 143.5519978404045, + "p90": 176.89599841833115, + "p95": 179.967999458313, + "p99": 191.3600042462349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.03200107812881, + "p90": 101.47199779748917, + "p95": 104.22399640083313, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 81.24800026416779, + "p95": 81.88799768686295, + "p99": 86.62399649620056 + }, + "roundtrip": { + "p50": 144.44799721240997, + "p90": 152.79999375343323, + "p95": 156.80000185966492, + "p99": 164.57599401474 + }, + "isolatedSum": { + "p50": 174.56000298261642, + "p90": 182.71999806165695, + "p95": 186.11199408769608, + "p99": 196.1279958486557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.8079993724823, + "p90": 100.35199671983719, + "p95": 103.39199751615524, + "p99": 114.43199962377548 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 81.34400099515915, + "p95": 82.36800134181976, + "p99": 95.04000097513199 + }, + "roundtrip": { + "p50": 150.9760022163391, + "p90": 159.04000401496887, + "p95": 161.3440066576004, + "p99": 259.5199942588806 + }, + "isolatedSum": { + "p50": 174.56000298261642, + "p90": 181.69599771499634, + "p95": 185.759998857975, + "p99": 209.47200059890747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.09599959850311, + "p90": 100.22400319576263, + "p95": 103.39199751615524, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 74.27199929952621, + "p90": 87.10400015115738, + "p95": 87.64799684286118, + "p99": 95.20000219345093 + }, + "roundtrip": { + "p50": 135.55200397968292, + "p90": 156.47999942302704, + "p95": 159.71200168132782, + "p99": 164.12800550460815 + }, + "isolatedSum": { + "p50": 158.36799889802933, + "p90": 187.32800334692, + "p95": 191.03999435901642, + "p99": 202.04800367355347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.5199978351593, + "p90": 114.72000181674957, + "p95": 116.70400202274323, + "p99": 124.35200065374374 + }, + "combine": { + "p50": 87.16800063848495, + "p90": 95.8079993724823, + "p95": 96.12800180912018, + "p99": 103.90400141477585 + }, + "roundtrip": { + "p50": 154.84799444675446, + "p90": 172.89599776268005, + "p95": 176.67199671268463, + "p99": 180.2240014076233 + }, + "isolatedSum": { + "p50": 186.68799847364426, + "p90": 210.52800118923187, + "p95": 212.8320038318634, + "p99": 228.2560020685196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.78400164842606, + "p90": 126.0479986667633, + "p95": 127.3919939994812, + "p99": 131.26400113105774 + }, + "combine": { + "p50": 98.94400089979172, + "p90": 111.58400028944016, + "p95": 112.12799698114395, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 186.46399676799774, + "p90": 200.80000162124634, + "p95": 203.13599705696106, + "p99": 208.48000049591064 + }, + "isolatedSum": { + "p50": 209.72800254821777, + "p90": 237.63199895620346, + "p95": 239.51999098062515, + "p99": 245.05600333213806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3a560e7b", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_e08c0fcd", + "comparisonKey": "a9f5907ae155647c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:51.760058+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.02400118112564, + "p90": 104.89600151777267, + "p95": 108.12799632549286, + "p99": 113.21599781513214 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 82.49600231647491, + "p95": 82.94399827718735, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 149.98400211334229, + "p90": 154.7199934720993, + "p95": 156.89599514007568, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 177.98399925231934, + "p90": 187.3920038342476, + "p95": 191.0719946026802, + "p99": 203.07199656963348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.66400146484375, + "p90": 102.78400033712387, + "p95": 104.3199971318245, + "p99": 109.11999642848969 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 82.17599987983704, + "p95": 82.5280025601387, + "p99": 87.20000088214874 + }, + "roundtrip": { + "p50": 128.9599984884262, + "p90": 154.7199934720993, + "p95": 156.19200468063354, + "p99": 160.41600704193115 + }, + "isolatedSum": { + "p50": 143.0720016360283, + "p90": 184.9600002169609, + "p95": 186.8479996919632, + "p99": 196.31999731063843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.88000327348709, + "p90": 99.29600358009338, + "p95": 103.04000228643417, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 73.85600358247757, + "p90": 81.727996468544, + "p95": 82.17599987983704, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 130.91200590133667, + "p90": 154.78399395942688, + "p95": 156.95999562740326, + "p99": 163.07200491428375 + }, + "isolatedSum": { + "p50": 144.73600685596466, + "p90": 181.0240000486374, + "p95": 185.2160021662712, + "p99": 200.6720006465912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.00799679756165, + "p90": 98.88000041246414, + "p95": 101.43999755382538, + "p99": 107.10400342941284 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 82.40000158548355, + "p95": 82.78399705886841, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 131.071999669075, + "p90": 155.64799308776855, + "p95": 158.9760035276413, + "p99": 164.60800170898438 + }, + "isolatedSum": { + "p50": 145.05599439144135, + "p90": 181.2800019979477, + "p95": 184.2239946126938, + "p99": 195.8400011062622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.50399738550186, + "p90": 101.95200145244598, + "p95": 104.76800054311752, + "p99": 110.36799848079681 + }, + "combine": { + "p50": 80.83199709653854, + "p90": 88.48000317811966, + "p95": 89.88799899816513, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 128.31999361515045, + "p90": 158.4639996290207, + "p95": 161.0880047082901, + "p99": 164.76799547672272 + }, + "isolatedSum": { + "p50": 178.3359944820404, + "p90": 190.43200463056564, + "p95": 194.65599954128265, + "p99": 207.68000185489655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.94399958848953, + "p90": 105.05600273609161, + "p95": 109.72800105810165, + "p99": 181.5679967403412 + }, + "combine": { + "p50": 81.95199817419052, + "p90": 90.01599997282028, + "p95": 90.36800265312195, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 141.85599982738495, + "p90": 163.83999586105347, + "p95": 167.71200299263, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 172.89599776268005, + "p90": 195.0720027089119, + "p95": 200.0960037112236, + "p99": 279.03999388217926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.67199850082397, + "p90": 115.77600240707397, + "p95": 120.28799951076508, + "p99": 124.9919980764389 + }, + "combine": { + "p50": 90.52799642086029, + "p90": 98.55999797582626, + "p95": 99.16800260543823, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 161.56800091266632, + "p90": 180.86400628089905, + "p95": 183.00800025463104, + "p99": 187.04000115394592 + }, + "isolatedSum": { + "p50": 187.19999492168427, + "p90": 214.33600038290024, + "p95": 219.4560021162033, + "p99": 231.00800067186356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.48800021409988, + "p90": 131.80799782276154, + "p95": 134.65599715709686, + "p99": 142.39999651908875 + }, + "combine": { + "p50": 106.9440022110939, + "p90": 114.75200206041336, + "p95": 115.42399972677231, + "p99": 122.72000312805176 + }, + "roundtrip": { + "p50": 196.03200256824493, + "p90": 217.50399470329285, + "p95": 220.41599452495575, + "p99": 222.91199862957 + }, + "isolatedSum": { + "p50": 222.4320024251938, + "p90": 246.5599998831749, + "p95": 250.07999688386917, + "p99": 265.1199996471405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd449dbf", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_3514e1c5", + "comparisonKey": "30e8e42955ad99d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:15.975537+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.12000125646591, + "p90": 101.34399682283401, + "p95": 102.9760017991066, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 81.15199953317642, + "p95": 81.60000294446945, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 127.96799838542938, + "p90": 147.8080004453659, + "p95": 150.78400075435638, + "p99": 153.60000729560852 + }, + "isolatedSum": { + "p50": 165.15199840068817, + "p90": 182.49599635601044, + "p95": 184.57600474357605, + "p99": 192.25600361824036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.92000341415405, + "p90": 99.35999661684036, + "p95": 101.05600208044052, + "p99": 105.98400235176086 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 78.62400263547897, + "p95": 79.58400249481201, + "p99": 81.60000294446945 + }, + "roundtrip": { + "p50": 128.86400520801544, + "p90": 147.5519984960556, + "p95": 149.88799393177032, + "p99": 153.56799960136414 + }, + "isolatedSum": { + "p50": 142.14400202035904, + "p90": 177.98399925231934, + "p95": 180.64000457525253, + "p99": 187.58400529623032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.9919963479042, + "p90": 97.82399982213974, + "p95": 100.832000374794, + "p99": 105.24799674749374 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 79.96799796819687, + "p95": 81.7599967122078, + "p99": 87.74399757385254 + }, + "roundtrip": { + "p50": 129.43999469280243, + "p90": 156.92800283432007, + "p95": 159.61599349975586, + "p99": 167.10400581359863 + }, + "isolatedSum": { + "p50": 141.95199310779572, + "p90": 177.7919977903366, + "p95": 182.5919970870018, + "p99": 192.99199432134628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.16799801588058, + "p90": 99.5199978351593, + "p95": 102.04800218343735, + "p99": 105.76000064611435 + }, + "combine": { + "p50": 73.15199822187424, + "p90": 81.50400221347809, + "p95": 81.98399841785431, + "p99": 88.16000074148178 + }, + "roundtrip": { + "p50": 128.00000607967377, + "p90": 155.35999834537506, + "p95": 158.33599865436554, + "p99": 161.21600568294525 + }, + "isolatedSum": { + "p50": 144.31999623775482, + "p90": 181.0240000486374, + "p95": 184.03200060129166, + "p99": 193.92000138759613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.91999793052673, + "p90": 98.4639972448349, + "p95": 100.63999891281128, + "p99": 106.04800283908844 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 87.8399983048439, + "p95": 88.67199718952179, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 126.20800733566284, + "p90": 157.31200575828552, + "p95": 159.743994474411, + "p99": 164.32000696659088 + }, + "isolatedSum": { + "p50": 155.64800053834915, + "p90": 186.3039955496788, + "p95": 189.31199610233307, + "p99": 196.9280019402504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.01599997282028, + "p90": 102.55999863147736, + "p95": 104.3199971318245, + "p99": 109.27999764680862 + }, + "combine": { + "p50": 81.216000020504, + "p90": 89.6959975361824, + "p95": 90.30400216579437, + "p99": 96.09600156545639 + }, + "roundtrip": { + "p50": 141.31200313568115, + "p90": 165.56799411773682, + "p95": 168.5120016336441, + "p99": 177.08800733089447 + }, + "isolatedSum": { + "p50": 171.23199999332428, + "p90": 192.25599616765976, + "p95": 194.62399929761887, + "p99": 205.37599921226501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.14400160312653, + "p90": 115.00799655914307, + "p95": 119.39200013875961, + "p99": 123.87199699878693 + }, + "combine": { + "p50": 89.91999924182892, + "p90": 97.15200215578079, + "p95": 103.42399775981903, + "p99": 105.21599650382996 + }, + "roundtrip": { + "p50": 161.98399662971497, + "p90": 181.5039962530136, + "p95": 185.2159947156906, + "p99": 187.1359944343567 + }, + "isolatedSum": { + "p50": 184.06400084495544, + "p90": 212.15999871492386, + "p95": 222.81599789857864, + "p99": 229.08799350261688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.72000116109848, + "p90": 130.0799995660782, + "p95": 133.760005235672, + "p99": 137.11999356746674 + }, + "combine": { + "p50": 106.39999806880951, + "p90": 115.1999980211258, + "p95": 120.28799951076508, + "p99": 121.95199728012085 + }, + "roundtrip": { + "p50": 194.7840005159378, + "p90": 213.95200490951538, + "p95": 216.95999801158905, + "p99": 221.343994140625 + }, + "isolatedSum": { + "p50": 217.119999229908, + "p90": 245.27999758720398, + "p95": 254.04800474643707, + "p99": 259.0719908475876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e381b02e", + "identity": "h100|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_3514e1c5", + "comparisonKey": "f82ac5f1a2e408c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:11.842905+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.28800302743912, + "p90": 103.4879982471466, + "p95": 104.86400127410889, + "p99": 110.20799726247787 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 81.37600123882294, + "p95": 81.91999793052673, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 143.19999516010284, + "p90": 148.03199470043182, + "p95": 150.43200552463531, + "p99": 153.28000485897064 + }, + "isolatedSum": { + "p50": 176.00000649690628, + "p90": 184.86399948596954, + "p95": 186.78399920463562, + "p99": 194.14399564266205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.26399874687195, + "p90": 103.96800190210342, + "p95": 105.82400113344193, + "p99": 464.4800126552582 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 81.18399977684021, + "p95": 81.50400221347809, + "p99": 82.20800012350082 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 149.3760049343109, + "p95": 152.19199657440186, + "p99": 156.3519984483719 + }, + "isolatedSum": { + "p50": 143.93600076436996, + "p90": 185.15200167894363, + "p95": 187.32800334692, + "p99": 546.688012778759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.10399752855301, + "p90": 98.62399846315384, + "p95": 102.01600193977356, + "p99": 107.26399719715118 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 81.15199953317642, + "p95": 84.927998483181, + "p99": 87.61599659919739 + }, + "roundtrip": { + "p50": 129.4720023870468, + "p90": 159.42400693893433, + "p95": 160.76800227165222, + "p99": 164.0319973230362 + }, + "isolatedSum": { + "p50": 143.8400000333786, + "p90": 179.77599799633026, + "p95": 186.94400042295456, + "p99": 194.87999379634857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.62400132417679, + "p90": 99.80800002813339, + "p95": 102.55999863147736, + "p99": 105.43999820947647 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 81.4720019698143, + "p95": 82.07999914884567, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 129.4720023870468, + "p90": 157.60000050067902, + "p95": 160.16000509262085, + "p99": 163.4880006313324 + }, + "isolatedSum": { + "p50": 143.6159983277321, + "p90": 181.2800019979477, + "p95": 184.63999778032303, + "p99": 193.24799627065659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.38399630784988, + "p90": 101.47199779748917, + "p95": 103.58399897813797, + "p99": 106.62399977445602 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 88.3840024471283, + "p95": 88.99199962615967, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 151.48800611495972, + "p90": 159.7760021686554, + "p95": 161.3440066576004, + "p99": 166.4319932460785 + }, + "isolatedSum": { + "p50": 177.47199535369873, + "p90": 189.85600024461746, + "p95": 192.57599860429764, + "p99": 196.83200120925903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.20800143480301, + "p90": 102.62399911880493, + "p95": 104.44799810647964, + "p99": 109.69600081443787 + }, + "combine": { + "p50": 81.24800026416779, + "p90": 89.6959975361824, + "p95": 95.07200121879578, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 143.36000382900238, + "p90": 167.9680049419403, + "p95": 169.40799355506897, + "p99": 172.7679967880249 + }, + "isolatedSum": { + "p50": 171.4560016989708, + "p90": 192.31999665498734, + "p95": 199.51999932527542, + "p99": 206.01600408554077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.4959968328476, + "p90": 114.46399986743927, + "p95": 116.64000153541565, + "p99": 121.72800302505493 + }, + "combine": { + "p50": 89.85599875450134, + "p90": 103.16800326108932, + "p95": 104.12800312042236, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 164.15999829769135, + "p90": 184.32000279426575, + "p95": 186.36800348758698, + "p99": 188.38399648666382 + }, + "isolatedSum": { + "p50": 184.35199558734894, + "p90": 217.6320031285286, + "p95": 220.768004655838, + "p99": 227.2000014781952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.17599767446518, + "p90": 129.72800433635712, + "p95": 131.9040060043335, + "p99": 136.1600011587143 + }, + "combine": { + "p50": 106.78400099277496, + "p90": 119.64800208806992, + "p95": 120.57600170373917, + "p99": 122.17599898576736 + }, + "roundtrip": { + "p50": 197.85599410533905, + "p90": 217.6000028848648, + "p95": 219.4560021162033, + "p99": 221.69600427150726 + }, + "isolatedSum": { + "p50": 220.95999866724014, + "p90": 249.37600642442703, + "p95": 252.48000770807266, + "p99": 258.33600014448166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b03072ef", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_6257df07", + "comparisonKey": "f8e3a2693df24c2a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:12.569615+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.6079980134964, + "p90": 103.58399897813797, + "p95": 105.12000322341919, + "p99": 109.8560020327568 + }, + "combine": { + "p50": 80.03199845552444, + "p90": 81.69600367546082, + "p95": 82.20800012350082, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 153.24799716472626, + "p95": 155.45600652694702, + "p99": 158.65600109100342 + }, + "isolatedSum": { + "p50": 176.63999646902084, + "p90": 185.28000265359879, + "p95": 187.32800334692, + "p99": 197.56799936294556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.35999727249146, + "p90": 107.68000036478043, + "p95": 110.27199774980545, + "p99": 116.73600226640701 + }, + "combine": { + "p50": 88.67199718952179, + "p90": 90.30400216579437, + "p95": 91.0400003194809, + "p99": 95.90400010347366 + }, + "roundtrip": { + "p50": 154.08000349998474, + "p90": 161.6320013999939, + "p95": 162.9759967327118, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 192.03199446201324, + "p90": 197.9840025305748, + "p95": 201.31199806928635, + "p99": 212.64000236988068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.2160017490387, + "p90": 144.54400539398193, + "p95": 145.47200500965118, + "p99": 150.04800260066986 + }, + "combine": { + "p50": 121.11999839544296, + "p90": 122.75200337171555, + "p95": 123.16799908876419, + "p99": 127.9039978981018 + }, + "roundtrip": { + "p50": 223.07200729846954, + "p90": 228.32000255584717, + "p95": 229.69600558280945, + "p99": 231.87200725078583 + }, + "isolatedSum": { + "p50": 258.33600014448166, + "p90": 267.2960087656975, + "p95": 268.6400040984154, + "p99": 277.95200049877167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56af727c", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_03822fc9", + "comparisonKey": "e9632ccede4cd596", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:29.198981+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.13600170612335, + "p90": 101.79200023412704, + "p95": 104.73600029945374, + "p99": 113.8560026884079 + }, + "combine": { + "p50": 79.80799674987793, + "p90": 82.20800012350082, + "p95": 82.65600353479385, + "p99": 87.87199854850769 + }, + "roundtrip": { + "p50": 149.31200444698334, + "p90": 155.39200603961945, + "p95": 156.73600137233734, + "p99": 161.47199273109436 + }, + "isolatedSum": { + "p50": 174.94399845600128, + "p90": 184.00000035762787, + "p95": 187.3920038342476, + "p99": 201.7280012369156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.20800340175629, + "p90": 106.91200196743011, + "p95": 108.51199924945831, + "p99": 112.44799941778183 + }, + "combine": { + "p50": 87.36000210046768, + "p90": 90.2400016784668, + "p95": 90.65599739551544, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 157.4079990386963, + "p90": 163.83999586105347, + "p95": 165.8560037612915, + "p99": 174.3679940700531 + }, + "isolatedSum": { + "p50": 189.56800550222397, + "p90": 197.1520036458969, + "p95": 199.16799664497375, + "p99": 208.0959975719452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.66400516033173, + "p90": 142.20799505710602, + "p95": 144.1279947757721, + "p99": 148.44800531864166 + }, + "combine": { + "p50": 120.92799693346024, + "p90": 123.03999811410904, + "p95": 124.09599870443344, + "p99": 130.75199723243713 + }, + "roundtrip": { + "p50": 223.1680005788803, + "p90": 228.15999388694763, + "p95": 229.66399788856506, + "p99": 234.20800268650055 + }, + "isolatedSum": { + "p50": 258.59200209379196, + "p90": 265.24799317121506, + "p95": 268.22399348020554, + "p99": 279.2000025510788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4055ebae", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_00822b10", + "comparisonKey": "7d314b3c7f9cda1b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:46.015009+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.6639986038208, + "p90": 104.22399640083313, + "p95": 106.1440035700798, + "p99": 111.93600296974182 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 81.85599744319916, + "p95": 85.31200140714645, + "p99": 88.16000074148178 + }, + "roundtrip": { + "p50": 147.0080018043518, + "p90": 154.14400398731232, + "p95": 157.02399611473083, + "p99": 161.40800714492798 + }, + "isolatedSum": { + "p50": 177.37600207328796, + "p90": 186.0799938440323, + "p95": 191.45600497722626, + "p99": 200.0960037112236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.67199915647507, + "p90": 108.15999656915665, + "p95": 111.64800077676773, + "p99": 160.288006067276 + }, + "combine": { + "p50": 87.8399983048439, + "p90": 90.55999666452408, + "p95": 95.32800316810608, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 157.3439985513687, + "p90": 163.80800306797028, + "p95": 165.82399606704712, + "p99": 173.66400361061096 + }, + "isolatedSum": { + "p50": 188.51199746131897, + "p90": 198.71999323368073, + "p95": 206.9760039448738, + "p99": 256.6080093383789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.0720009803772, + "p90": 146.04799449443817, + "p95": 150.01599490642548, + "p99": 347.104012966156 + }, + "combine": { + "p50": 120.80000340938568, + "p90": 123.07199835777283, + "p95": 128.38399410247803, + "p99": 169.8240041732788 + }, + "roundtrip": { + "p50": 223.07200729846954, + "p90": 229.50400412082672, + "p95": 231.80800676345825, + "p99": 384.38400626182556 + }, + "isolatedSum": { + "p50": 259.8720043897629, + "p90": 269.119992852211, + "p95": 278.3999890089035, + "p99": 516.9280171394348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d34b7924", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_01822ca3", + "comparisonKey": "47b03954b11a4f1e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:02.802227+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.33600306510925, + "p90": 100.22400319576263, + "p95": 101.3759970664978, + "p99": 107.61599987745285 + }, + "combine": { + "p50": 79.03999835252762, + "p90": 80.32000064849854, + "p95": 81.15199953317642, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 148.0959951877594, + "p90": 153.18399667739868, + "p95": 155.45600652694702, + "p99": 158.78400206565857 + }, + "isolatedSum": { + "p50": 173.37600141763687, + "p90": 180.54400384426117, + "p95": 182.52799659967422, + "p99": 195.00800222158432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.8640006184578, + "p90": 105.69600015878677, + "p95": 107.2319969534874, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 90.20800143480301, + "p95": 90.94399958848953, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 156.92800283432007, + "p90": 160.5439931154251, + "p95": 163.42400014400482, + "p99": 166.87999665737152 + }, + "isolatedSum": { + "p50": 188.47999721765518, + "p90": 195.90400159358978, + "p95": 198.17599654197693, + "p99": 208.8320031762123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 136.9280070066452, + "p90": 142.2400027513504, + "p95": 143.8400000333786, + "p99": 146.97599411010742 + }, + "combine": { + "p50": 120.64000219106674, + "p90": 122.94399738311768, + "p95": 123.55200201272964, + "p99": 128.76799702644348 + }, + "roundtrip": { + "p50": 224.2240011692047, + "p90": 229.24800217151642, + "p95": 232.09600150585175, + "p99": 248.79999458789825 + }, + "isolatedSum": { + "p50": 257.56800919771194, + "p90": 265.1840001344681, + "p95": 267.39200204610825, + "p99": 275.7439911365509 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a1d61c72", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_f3bcaf4f", + "comparisonKey": "fb42d0403130a11e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:12.680350+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.68799829483032, + "p90": 101.40799731016159, + "p95": 102.62399911880493, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 87.26400136947632, + "p95": 87.8399983048439, + "p99": 90.04800021648407 + }, + "roundtrip": { + "p50": 150.39999783039093, + "p90": 157.3439985513687, + "p95": 159.0079963207245, + "p99": 162.84799575805664 + }, + "isolatedSum": { + "p50": 174.97599869966507, + "p90": 188.6719986796379, + "p95": 190.46399742364883, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.0479975938797, + "p90": 95.74399888515472, + "p95": 98.43199700117111, + "p99": 104.35199737548828 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 87.2960016131401, + "p95": 87.90399879217148, + "p99": 89.50400352478027 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 155.58399260044098, + "p95": 156.92800283432007, + "p99": 161.02400422096252 + }, + "isolatedSum": { + "p50": 147.39199727773666, + "p90": 183.04000049829483, + "p95": 186.3359957933426, + "p99": 193.85600090026855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 93.85599941015244, + "p90": 98.62399846315384, + "p95": 101.24800354242325, + "p99": 104.67199981212616 + }, + "combine": { + "p50": 80.1599994301796, + "p90": 87.99999952316284, + "p95": 88.3840024471283, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 150.30400454998016, + "p90": 157.27999806404114, + "p95": 158.75199437141418, + "p99": 163.83999586105347 + }, + "isolatedSum": { + "p50": 174.01599884033203, + "p90": 186.62399798631668, + "p95": 189.63200598955154, + "p99": 194.5279985666275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.24000233411789, + "p90": 98.52799773216248, + "p95": 101.1200025677681, + "p99": 106.33599758148193 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 88.03199976682663, + "p95": 88.35200220346451, + "p99": 89.82399851083755 + }, + "roundtrip": { + "p50": 150.7200002670288, + "p90": 156.25600516796112, + "p95": 158.36800634860992, + "p99": 164.48000073432922 + }, + "isolatedSum": { + "p50": 176.41600221395493, + "p90": 186.5599974989891, + "p95": 189.4720047712326, + "p99": 196.1599960923195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.36000341176987, + "p90": 103.04000228643417, + "p95": 105.43999820947647, + "p99": 129.12000715732574 + }, + "combine": { + "p50": 86.7839977145195, + "p90": 88.76799792051315, + "p95": 89.4400030374527, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 155.2640050649643, + "p90": 160.5439931154251, + "p95": 164.0319973230362, + "p99": 169.79199647903442 + }, + "isolatedSum": { + "p50": 182.14400112628937, + "p90": 191.80800020694733, + "p95": 194.88000124692917, + "p99": 224.86400604248047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.5280032157898, + "p90": 109.37599837779999, + "p95": 111.07199639081955, + "p99": 114.9120032787323 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 96.12800180912018, + "p95": 96.63999825716019, + "p99": 99.58399832248688 + }, + "roundtrip": { + "p50": 151.5199989080429, + "p90": 163.16799819469452, + "p95": 166.17600619792938, + "p99": 170.56000232696533 + }, + "isolatedSum": { + "p50": 168.70400309562683, + "p90": 205.50400018692017, + "p95": 207.71199464797974, + "p99": 214.49600160121918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 116.99199676513672, + "p90": 145.34400403499603, + "p95": 147.32800424098969, + "p99": 152.0320028066635 + }, + "combine": { + "p50": 103.42399775981903, + "p90": 111.455999314785, + "p95": 116.38399958610535, + "p99": 160.73599457740784 + }, + "roundtrip": { + "p50": 189.7599995136261, + "p90": 196.70400023460388, + "p95": 199.20000433921814, + "p99": 205.05599677562714 + }, + "isolatedSum": { + "p50": 220.41599452495575, + "p90": 256.80000334978104, + "p95": 263.71200382709503, + "p99": 312.76799738407135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 140.76800644397736, + "p90": 145.6959992647171, + "p95": 147.67999947071075, + "p99": 152.79999375343323 + }, + "combine": { + "p50": 128.9920061826706, + "p90": 137.31199502944946, + "p95": 137.53600418567657, + "p99": 139.20000195503235 + }, + "roundtrip": { + "p50": 232.86400735378265, + "p90": 247.45599925518036, + "p95": 250.68798661231995, + "p99": 260.0319981575012 + }, + "isolatedSum": { + "p50": 269.76001262664795, + "p90": 283.00799429416656, + "p95": 285.21600365638733, + "p99": 291.9999957084656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-482bcd9b", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_5dd7acd6", + "comparisonKey": "2582f35a92486f30", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:56.368735+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 91.10400080680847, + "p90": 98.24000298976898, + "p95": 101.43999755382538, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 73.08799773454666, + "p95": 73.53600114583969, + "p99": 74.40000027418137 + }, + "roundtrip": { + "p50": 134.39999520778656, + "p90": 142.04800128936768, + "p95": 144.44799721240997, + "p99": 154.27200496196747 + }, + "isolatedSum": { + "p50": 156.8320021033287, + "p90": 171.32800072431564, + "p95": 174.97599869966507, + "p99": 183.04000049829483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.120001912117, + "p90": 103.93600165843964, + "p95": 106.78400099277496, + "p99": 126.8479973077774 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 73.31199944019318, + "p95": 73.66400212049484, + "p99": 79.48800176382065 + }, + "roundtrip": { + "p50": 139.55199718475342, + "p90": 147.10399508476257, + "p95": 150.52799880504608, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 168.73600333929062, + "p90": 177.2480010986328, + "p95": 180.4480031132698, + "p99": 206.33599907159805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.32799637317657, + "p90": 105.85600137710571, + "p95": 108.15999656915665, + "p99": 115.64800143241882 + }, + "combine": { + "p50": 80.32000064849854, + "p90": 82.0159986615181, + "p95": 82.84799754619598, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 151.39199793338776, + "p90": 158.91200304031372, + "p95": 161.31199896335602, + "p99": 169.44000124931335 + }, + "isolatedSum": { + "p50": 179.6479970216751, + "p90": 187.8720000386238, + "p95": 191.00799411535263, + "p99": 204.47999984025955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.4319976568222, + "p90": 107.26399719715118, + "p95": 109.02400314807892, + "p99": 114.33599889278412 + }, + "combine": { + "p50": 81.50400221347809, + "p90": 87.67999708652496, + "p95": 88.28800171613693, + "p99": 89.9839997291565 + }, + "roundtrip": { + "p50": 153.02400290966034, + "p90": 160.67199409008026, + "p95": 165.95199704170227, + "p99": 467.51999855041504 + }, + "isolatedSum": { + "p50": 183.9359998703003, + "p90": 194.94399428367615, + "p95": 197.31200486421585, + "p99": 204.3199986219406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db8ec206", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_6f8dc9ef", + "comparisonKey": "763a7b63f1b4a882", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:27.400696+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.47199714183807, + "p90": 106.23999685049057, + "p95": 107.87200182676315, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 74.68800246715546, + "p95": 80.6720033288002, + "p99": 353.983998298645 + }, + "roundtrip": { + "p50": 144.67200636863708, + "p90": 152.63999998569489, + "p95": 154.14400398731232, + "p99": 156.92800283432007 + }, + "isolatedSum": { + "p50": 169.53599452972412, + "p90": 180.92799931764603, + "p95": 188.54400515556335, + "p99": 467.48799830675125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.77600264549255, + "p90": 102.30399668216705, + "p95": 104.38399761915207, + "p99": 107.39199817180634 + }, + "combine": { + "p50": 65.47199934720993, + "p90": 73.88799637556076, + "p95": 74.36800003051758, + "p99": 78.68800312280655 + }, + "roundtrip": { + "p50": 123.1359988451004, + "p90": 149.3760049343109, + "p95": 152.48000621795654, + "p99": 154.36799824237823 + }, + "isolatedSum": { + "p50": 137.24800199270248, + "p90": 176.1919930577278, + "p95": 178.75199764966965, + "p99": 186.08000129461288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.08799773454666, + "p90": 102.30399668216705, + "p95": 104.47999835014343, + "p99": 107.4879989027977 + }, + "combine": { + "p50": 65.92000275850296, + "p90": 78.3040001988411, + "p95": 78.68800312280655, + "p99": 79.1039988398552 + }, + "roundtrip": { + "p50": 123.4240010380745, + "p90": 151.45599842071533, + "p95": 153.53600680828094, + "p99": 167.67999529838562 + }, + "isolatedSum": { + "p50": 139.00800049304962, + "p90": 180.60799688100815, + "p95": 183.16800147294998, + "p99": 186.5919977426529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.39999675750732, + "p90": 104.70400005578995, + "p95": 106.75200074911118, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 66.0799965262413, + "p90": 78.78399640321732, + "p95": 79.64800298213959, + "p99": 80.92799782752991 + }, + "roundtrip": { + "p50": 121.98399752378464, + "p90": 154.59200739860535, + "p95": 156.2879979610443, + "p99": 159.2639982700348 + }, + "isolatedSum": { + "p50": 164.47999328374863, + "p90": 183.48799645900726, + "p95": 186.40000373125076, + "p99": 193.34399700164795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.08000177145004, + "p90": 104.06400263309479, + "p95": 106.23999685049057, + "p99": 112.03200370073318 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 81.85599744319916, + "p95": 82.17599987983704, + "p99": 86.14400029182434 + }, + "roundtrip": { + "p50": 148.0959951877594, + "p90": 154.11199629306793, + "p95": 156.12800419330597, + "p99": 160.5760008096695 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 185.92000007629395, + "p95": 188.4159967303276, + "p99": 198.17600399255753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.28800106048584, + "p90": 102.04800218343735, + "p95": 104.47999835014343, + "p99": 110.59200018644333 + }, + "combine": { + "p50": 70.97599655389786, + "p90": 80.79999685287476, + "p95": 81.40800148248672, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 125.76000392436981, + "p90": 153.1520038843155, + "p95": 156.15999698638916, + "p99": 161.43999993801117 + }, + "isolatedSum": { + "p50": 155.2639976143837, + "p90": 182.8479990363121, + "p95": 185.88799983263016, + "p99": 192.76800006628036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 83.80799740552902, + "p90": 104.35199737548828, + "p95": 106.175996363163, + "p99": 110.46399921178818 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 79.6160027384758, + "p95": 86.65599673986435, + "p99": 87.52000331878662 + }, + "roundtrip": { + "p50": 133.37600231170654, + "p90": 158.9760035276413, + "p95": 163.2319986820221, + "p99": 167.4560010433197 + }, + "isolatedSum": { + "p50": 157.53600001335144, + "p90": 183.96800011396408, + "p95": 192.83199310302734, + "p99": 197.9840025305748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.92800045013428, + "p90": 116.70400202274323, + "p95": 118.78400295972824, + "p99": 124.22399967908859 + }, + "combine": { + "p50": 87.55200356245041, + "p90": 95.45599669218063, + "p95": 96.00000083446503, + "p99": 103.16800326108932 + }, + "roundtrip": { + "p50": 160.64000129699707, + "p90": 175.74399709701538, + "p95": 178.0479997396469, + "p99": 181.08800053596497 + }, + "isolatedSum": { + "p50": 184.4800040125847, + "p90": 212.15999871492386, + "p95": 214.78400379419327, + "p99": 227.39200294017792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-40dbb252", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_332d5fd7", + "comparisonKey": "0a75d092fab001a7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:07.499051+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.10400211811066, + "p90": 104.99200224876404, + "p95": 107.4879989027977, + "p99": 119.29599940776825 + }, + "combine": { + "p50": 80.48000186681747, + "p90": 87.55200356245041, + "p95": 88.16000074148178, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 155.10399639606476, + "p90": 161.56800091266632, + "p95": 163.00800442695618, + "p99": 167.29600727558136 + }, + "isolatedSum": { + "p50": 179.58400398492813, + "p90": 192.54400581121445, + "p95": 195.64799964427948, + "p99": 209.3759998679161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.07200253009796, + "p90": 106.36799782514572, + "p95": 108.89600217342377, + "p99": 115.1999980211258 + }, + "combine": { + "p50": 89.47200328111649, + "p90": 96.0640013217926, + "p95": 96.57599776983261, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 164.0319973230362, + "p90": 170.27199268341064, + "p95": 171.7119961977005, + "p99": 176.7680048942566 + }, + "isolatedSum": { + "p50": 192.54400581121445, + "p90": 202.43199914693832, + "p95": 205.47199994325638, + "p99": 212.67199516296387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 134.5600038766861, + "p90": 143.48800480365753, + "p95": 146.84799313545227, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 128.89599800109863, + "p90": 130.78400492668152, + "p95": 131.32800161838531, + "p99": 137.7280056476593 + }, + "roundtrip": { + "p50": 231.26399517059326, + "p90": 236.76800727844238, + "p95": 238.62400650978088, + "p99": 242.5599992275238 + }, + "isolatedSum": { + "p50": 263.45600187778473, + "p90": 274.27200973033905, + "p95": 278.1759947538376, + "p99": 291.74400866031647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2009aae", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_82f56d39", + "comparisonKey": "98f809d281bea1ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:23.893613+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.08000177145004, + "p90": 101.72799974679947, + "p95": 104.06400263309479, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 82.14399963617325, + "p95": 87.3280018568039, + "p99": 89.12000060081482 + }, + "roundtrip": { + "p50": 152.76800096035004, + "p90": 158.81599485874176, + "p95": 160.41600704193115, + "p99": 165.66400229930878 + }, + "isolatedSum": { + "p50": 177.60000377893448, + "p90": 183.87199938297272, + "p95": 191.39200448989868, + "p99": 197.27999716997147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.0640019774437, + "p90": 105.18400371074677, + "p95": 107.39199817180634, + "p99": 116.41599982976913 + }, + "combine": { + "p50": 88.73599767684937, + "p90": 95.04000097513199, + "p95": 95.77599912881851, + "p99": 98.30400347709656 + }, + "roundtrip": { + "p50": 162.11199760437012, + "p90": 168.2240068912506, + "p95": 169.69600319862366, + "p99": 176.28799378871918 + }, + "isolatedSum": { + "p50": 188.79999965429306, + "p90": 200.22400468587875, + "p95": 203.16799730062485, + "p99": 214.7200033068657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.60800325870514, + "p90": 137.95199990272522, + "p95": 140.00000059604645, + "p99": 143.8080072402954 + }, + "combine": { + "p50": 128.48000228405, + "p90": 130.20800054073334, + "p95": 130.5920034646988, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 229.69600558280945, + "p90": 236.67199909687042, + "p95": 239.1040027141571, + "p99": 244.4480061531067 + }, + "isolatedSum": { + "p50": 257.0880055427551, + "p90": 268.16000044345856, + "p95": 270.59200406074524, + "p99": 280.2560031414032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa79f7eb", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_7ff56880", + "comparisonKey": "3a630dc32f44877b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:40.196315+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.43999689817429, + "p90": 103.64799946546555, + "p95": 107.39199817180634, + "p99": 168.99199783802032 + }, + "combine": { + "p50": 80.19199967384338, + "p90": 82.24000036716461, + "p95": 87.8399983048439, + "p99": 169.18399930000305 + }, + "roundtrip": { + "p50": 152.67199277877808, + "p90": 160.41600704193115, + "p95": 161.5999937057495, + "p99": 166.52800142765045 + }, + "isolatedSum": { + "p50": 177.63199657201767, + "p90": 185.88799983263016, + "p95": 195.23199647665024, + "p99": 338.1759971380234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.89600086212158, + "p90": 106.55999928712845, + "p95": 108.99200290441513, + "p99": 124.38400089740753 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 90.4960036277771, + "p95": 95.36000341176987, + "p99": 97.21600264310837 + }, + "roundtrip": { + "p50": 166.46400094032288, + "p90": 170.30400037765503, + "p95": 172.95999825000763, + "p99": 179.71199750900269 + }, + "isolatedSum": { + "p50": 190.0160014629364, + "p90": 197.05600291490555, + "p95": 204.352006316185, + "p99": 221.6000035405159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.0799995660782, + "p90": 137.02400028705597, + "p95": 141.92000031471252, + "p99": 196.06399536132812 + }, + "combine": { + "p50": 124.09599870443344, + "p90": 131.16799294948578, + "p95": 132.32000172138214, + "p99": 136.86400651931763 + }, + "roundtrip": { + "p50": 227.84000635147095, + "p90": 235.3920042514801, + "p95": 237.2799962759018, + "p99": 246.33599817752838 + }, + "isolatedSum": { + "p50": 254.17599827051163, + "p90": 268.19199323654175, + "p95": 274.24000203609467, + "p99": 332.92800188064575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a943cb2e", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_80f56a13", + "comparisonKey": "5f1c41799ec04158", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:56.355103+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.120001912117, + "p90": 103.93600165843964, + "p95": 107.16799646615982, + "p99": 121.50400131940842 + }, + "combine": { + "p50": 80.60800284147263, + "p90": 86.87999844551086, + "p95": 87.90399879217148, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 152.99199521541595, + "p90": 160.09600460529327, + "p95": 161.40800714492798, + "p99": 167.29600727558136 + }, + "isolatedSum": { + "p50": 177.72800475358963, + "p90": 190.8160001039505, + "p95": 195.0719952583313, + "p99": 211.71200275421143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 103.13600301742554, + "p90": 108.44799876213074, + "p95": 110.04800349473953, + "p99": 116.38399958610535 + }, + "combine": { + "p50": 89.4400030374527, + "p90": 95.23200243711472, + "p95": 96.00000083446503, + "p99": 97.88800030946732 + }, + "roundtrip": { + "p50": 161.79199516773224, + "p90": 169.0559983253479, + "p95": 170.6240028142929, + "p99": 174.30399358272552 + }, + "isolatedSum": { + "p50": 192.57600605487823, + "p90": 203.68000119924545, + "p95": 206.04800432920456, + "p99": 214.27199989557266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.66400516033173, + "p90": 146.2080031633377, + "p95": 149.63200688362122, + "p99": 151.7760008573532 + }, + "combine": { + "p50": 128.22400033473969, + "p90": 130.65600395202637, + "p95": 131.16799294948578, + "p99": 133.82400572299957 + }, + "roundtrip": { + "p50": 230.04800081253052, + "p90": 237.2799962759018, + "p95": 239.1359955072403, + "p99": 240.89600145816803 + }, + "isolatedSum": { + "p50": 265.8880054950714, + "p90": 276.8640071153641, + "p95": 280.799999833107, + "p99": 285.6000065803528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-733dd319", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_4d411159", + "comparisonKey": "1ae8c3ecd2fe9713", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:58.225278+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 92.86399930715561, + "p90": 102.52799838781357, + "p95": 105.15200346708298, + "p99": 116.73600226640701 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 98.11200201511383, + "p95": 103.29599678516388, + "p99": 106.33599758148193 + }, + "roundtrip": { + "p50": 142.39999651908875, + "p90": 152.8960019350052, + "p95": 155.16799688339233, + "p99": 158.4320068359375 + }, + "isolatedSum": { + "p50": 166.59200191497803, + "p90": 200.6400004029274, + "p95": 208.44800025224686, + "p99": 223.07199984788895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.36799937486649, + "p90": 101.88800096511841, + "p95": 103.00800204277039, + "p99": 110.04800349473953 + }, + "combine": { + "p50": 72.60800153017044, + "p90": 81.24800026416779, + "p95": 81.79199695587158, + "p99": 83.3280012011528 + }, + "roundtrip": { + "p50": 128.4479945898056, + "p90": 151.64799988269806, + "p95": 153.76000106334686, + "p99": 160.47999262809753 + }, + "isolatedSum": { + "p50": 142.97600090503693, + "p90": 183.1360012292862, + "p95": 184.79999899864197, + "p99": 193.37600469589233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.77599912881851, + "p90": 100.63999891281128, + "p95": 102.39999741315842, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 80.28800040483475, + "p95": 83.10399949550629, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 147.0080018043518, + "p90": 156.54399991035461, + "p95": 159.42400693893433, + "p99": 164.2560064792633 + }, + "isolatedSum": { + "p50": 169.72799599170685, + "p90": 180.92799931764603, + "p95": 185.5039969086647, + "p99": 197.88799434900284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.40000355243683, + "p90": 101.1200025677681, + "p95": 103.45599800348282, + "p99": 111.80800199508667 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 81.85599744319916, + "p95": 82.62400329113007, + "p99": 89.08800035715103 + }, + "roundtrip": { + "p50": 146.4959979057312, + "p90": 156.47999942302704, + "p95": 158.9439958333969, + "p99": 166.20799899101257 + }, + "isolatedSum": { + "p50": 168.38400065898895, + "p90": 182.97600001096725, + "p95": 186.08000129461288, + "p99": 200.8960023522377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 92.3520028591156, + "p90": 100.19200295209885, + "p95": 104.16000336408615, + "p99": 114.30399864912033 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 88.3840024471283, + "p95": 89.28000181913376, + "p99": 90.81599861383438 + }, + "roundtrip": { + "p50": 148.00000190734863, + "p90": 160.73599457740784, + "p95": 162.78399527072906, + "p99": 167.77600347995758 + }, + "isolatedSum": { + "p50": 173.95200580358505, + "p90": 188.57600539922714, + "p95": 193.4400051832199, + "p99": 205.1199972629547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.2400016784668, + "p90": 104.54399883747101, + "p95": 108.22399705648422, + "p99": 113.3119985461235 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 90.2400016784668, + "p95": 94.14400160312653, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 145.47200500965118, + "p90": 167.52000153064728, + "p95": 170.20800709724426, + "p99": 174.55999553203583 + }, + "isolatedSum": { + "p50": 171.90400511026382, + "p90": 194.7840005159378, + "p95": 202.36799865961075, + "p99": 210.9759971499443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 109.24799740314484, + "p90": 120.7680031657219, + "p95": 122.43200093507767, + "p99": 128.80000472068787 + }, + "combine": { + "p50": 96.03200107812881, + "p90": 104.09600287675858, + "p95": 104.38399761915207, + "p99": 105.66399991512299 + }, + "roundtrip": { + "p50": 179.26399409770966, + "p90": 189.15200233459473, + "p95": 192.32000410556793, + "p99": 195.19999623298645 + }, + "isolatedSum": { + "p50": 205.27999848127365, + "p90": 224.86400604248047, + "p95": 226.81599855422974, + "p99": 234.46400463581085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.76799637079239, + "p90": 144.3839967250824, + "p95": 145.75999975204468, + "p99": 147.8399932384491 + }, + "combine": { + "p50": 121.11999839544296, + "p90": 128.7039965391159, + "p95": 128.92800569534302, + "p99": 136.1279934644699 + }, + "roundtrip": { + "p50": 221.24800086021423, + "p90": 238.94399404525757, + "p95": 242.17599630355835, + "p99": 245.66400051116943 + }, + "isolatedSum": { + "p50": 245.88799476623535, + "p90": 273.0879932641983, + "p95": 274.6880054473877, + "p99": 283.967986702919 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0dd11ec2", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_d237e055", + "comparisonKey": "1d9f1681fa6bf5ae", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:26.382225+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.91200351715088, + "p90": 75.96799731254578, + "p95": 79.29600030183792, + "p99": 86.2400010228157 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 73.02399724721909, + "p95": 73.47200065851212, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 127.03999876976013, + "p90": 130.78400492668152, + "p95": 132.6719969511032, + "p99": 141.37600362300873 + }, + "isolatedSum": { + "p50": 141.9840008020401, + "p90": 148.99199455976486, + "p95": 152.76800096035004, + "p99": 164.12799805402756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.03999704122543, + "p90": 78.5600021481514, + "p95": 79.96799796819687, + "p99": 87.23200112581253 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 73.11999797821045, + "p95": 73.63200187683105, + "p99": 75.42400062084198 + }, + "roundtrip": { + "p50": 126.5919953584671, + "p90": 131.29599392414093, + "p95": 132.86399841308594, + "p99": 139.96799290180206 + }, + "isolatedSum": { + "p50": 142.4959972500801, + "p90": 151.68000012636185, + "p95": 153.59999984502792, + "p99": 162.6560017466545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.43200051784515, + "p90": 80.54400235414505, + "p95": 84.19200032949448, + "p99": 93.85599941015244 + }, + "combine": { + "p50": 72.83200323581696, + "p90": 73.88799637556076, + "p95": 75.1039981842041, + "p99": 80.6720033288002 + }, + "roundtrip": { + "p50": 129.08799946308136, + "p90": 134.8479986190796, + "p95": 136.4160031080246, + "p99": 142.33599603176117 + }, + "isolatedSum": { + "p50": 147.2640037536621, + "p90": 154.4319987297058, + "p95": 159.29599851369858, + "p99": 174.52800273895264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.48800045251846, + "p90": 78.78399640321732, + "p95": 79.77599650621414, + "p99": 87.16800063848495 + }, + "combine": { + "p50": 72.92799651622772, + "p90": 73.91999661922455, + "p95": 75.23199915885925, + "p99": 80.79999685287476 + }, + "roundtrip": { + "p50": 127.13600695133209, + "p90": 132.25600123405457, + "p95": 134.783998131752, + "p99": 141.12000167369843 + }, + "isolatedSum": { + "p50": 144.41599696874619, + "p90": 152.70399302244186, + "p95": 155.0079956650734, + "p99": 167.9679974913597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.73599636554718, + "p90": 83.71199667453766, + "p95": 85.7279971241951, + "p99": 90.84799885749817 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 78.75200361013412, + "p95": 79.64800298213959, + "p99": 81.98399841785431 + }, + "roundtrip": { + "p50": 129.15199995040894, + "p90": 135.71199774742126, + "p95": 137.08800077438354, + "p99": 141.40799641609192 + }, + "isolatedSum": { + "p50": 154.4319987297058, + "p90": 162.46400028467178, + "p95": 165.3760001063347, + "p99": 172.83199727535248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.40800279378891, + "p90": 92.06400066614151, + "p95": 93.24800223112106, + "p99": 98.78399968147278 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 82.07999914884567, + "p95": 82.65600353479385, + "p99": 89.02399986982346 + }, + "roundtrip": { + "p50": 140.1599943637848, + "p90": 144.99199390411377, + "p95": 145.85599303245544, + "p99": 149.1200029850006 + }, + "isolatedSum": { + "p50": 170.3680008649826, + "p90": 174.14399981498718, + "p95": 175.90400576591492, + "p99": 187.80799955129623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.30400347709656, + "p90": 114.78400230407715, + "p95": 117.0559972524643, + "p99": 122.56000190973282 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 97.79199957847595, + "p95": 98.43199700117111, + "p99": 103.04000228643417 + }, + "roundtrip": { + "p50": 162.4000072479248, + "p90": 166.49599373340607, + "p95": 168.03200542926788, + "p99": 172.19200730323792 + }, + "isolatedSum": { + "p50": 188.38400393724442, + "p90": 212.5760018825531, + "p95": 215.4879942536354, + "p99": 225.600004196167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.92800307273865, + "p90": 117.0559972524643, + "p95": 119.07199770212173, + "p99": 122.5920021533966 + }, + "combine": { + "p50": 107.07200318574905, + "p90": 113.43999952077866, + "p95": 114.01599645614624, + "p99": 115.42399972677231 + }, + "roundtrip": { + "p50": 197.6960003376007, + "p90": 202.87999510765076, + "p95": 203.99999618530273, + "p99": 206.52799308300018 + }, + "isolatedSum": { + "p50": 220.0000062584877, + "p90": 230.49599677324295, + "p95": 233.08799415826797, + "p99": 238.01600188016891 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9526e505", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h100_0bc8ea1c", + "comparisonKey": "e77c4d8e671d77e5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:35.962080+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 97.88800030946732, + "p90": 101.69599950313568, + "p95": 103.84000092744827, + "p99": 107.26399719715118 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 83.67999643087387, + "p95": 87.3280018568039, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 151.8400013446808, + "p90": 159.96800363063812, + "p95": 161.43999993801117, + "p99": 169.0559983253479 + }, + "isolatedSum": { + "p50": 179.4240027666092, + "p90": 185.37599593400955, + "p95": 191.16800278425217, + "p99": 196.83199375867844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 102.33599692583084, + "p90": 106.08000308275223, + "p95": 108.83200168609619, + "p99": 112.70400136709213 + }, + "combine": { + "p50": 89.34400230646133, + "p90": 91.00800007581711, + "p95": 91.61599725484848, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 160.96000373363495, + "p90": 167.77600347995758, + "p95": 169.63200271129608, + "p99": 179.77599799633026 + }, + "isolatedSum": { + "p50": 191.67999923229218, + "p90": 197.08800315856934, + "p95": 200.44799894094467, + "p99": 208.576001226902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 128.38399410247803, + "p90": 136.28800213336945, + "p95": 138.0160003900528, + "p99": 142.2719955444336 + }, + "combine": { + "p50": 119.07199770212173, + "p90": 122.94399738311768, + "p95": 123.36000055074692, + "p99": 125.66399574279785 + }, + "roundtrip": { + "p50": 216.73600375652313, + "p90": 221.37600183486938, + "p95": 222.9440063238144, + "p99": 228.4799963235855 + }, + "isolatedSum": { + "p50": 247.45599180459976, + "p90": 259.2319995164871, + "p95": 261.3760009407997, + "p99": 267.93599128723145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7560716b", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_8485f4d7", + "comparisonKey": "64676a157499a6b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:19.297379+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.6639986038208, + "p90": 102.36799716949463, + "p95": 104.12800312042236, + "p99": 109.69600081443787 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 87.0399996638298, + "p95": 87.5839963555336, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 153.34400534629822, + "p90": 160.64000129699707, + "p95": 162.1440052986145, + "p99": 164.67200219631195 + }, + "isolatedSum": { + "p50": 177.37600207328796, + "p90": 189.40799683332443, + "p95": 191.71199947595596, + "p99": 198.2080042362213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.75199943780899, + "p90": 104.86400127410889, + "p95": 107.07200318574905, + "p99": 114.94400352239609 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 90.01599997282028, + "p95": 95.16800194978714, + "p99": 96.19200229644775 + }, + "roundtrip": { + "p50": 160.288006067276, + "p90": 167.1680063009262, + "p95": 168.73599588871002, + "p99": 173.6000031232834 + }, + "isolatedSum": { + "p50": 186.52799725532532, + "p90": 194.88000124692917, + "p95": 202.2400051355362, + "p99": 211.13600581884384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.9599984884262, + "p90": 135.903999209404, + "p95": 137.15200126171112, + "p99": 140.1280015707016 + }, + "combine": { + "p50": 119.4240003824234, + "p90": 121.63200229406357, + "p95": 122.8799968957901, + "p99": 128.92800569534302 + }, + "roundtrip": { + "p50": 208.19200575351715, + "p90": 218.6560034751892, + "p95": 221.98399901390076, + "p99": 229.76000607013702 + }, + "isolatedSum": { + "p50": 248.3839988708496, + "p90": 257.53600150346756, + "p95": 260.0319981575012, + "p99": 269.0560072660446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ce45bc27", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_1687ef91", + "comparisonKey": "0140e2b0747f0227", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:42.800346+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.81600278615952, + "p90": 103.58399897813797, + "p95": 104.86400127410889, + "p99": 113.08799684047699 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 80.9599980711937, + "p95": 81.66400343179703, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 125.2480000257492, + "p90": 147.32800424098969, + "p95": 150.4960060119629, + "p99": 154.11199629306793 + }, + "isolatedSum": { + "p50": 141.56800508499146, + "p90": 184.54399704933167, + "p95": 186.52800470590591, + "p99": 195.39199769496918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.15999811887741, + "p90": 99.07200187444687, + "p95": 100.09600222110748, + "p99": 103.84000092744827 + }, + "combine": { + "p50": 72.31999933719635, + "p90": 78.75200361013412, + "p95": 79.39200103282928, + "p99": 80.19199967384338 + }, + "roundtrip": { + "p50": 128.83199751377106, + "p90": 151.74399316310883, + "p95": 153.3759981393814, + "p99": 155.64799308776855 + }, + "isolatedSum": { + "p50": 144.47999745607376, + "p90": 177.824005484581, + "p95": 179.48800325393677, + "p99": 184.03200060129166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.64000177383423, + "p90": 101.50399804115295, + "p95": 103.39199751615524, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 81.34400099515915, + "p95": 82.56000280380249, + "p99": 87.80799806118011 + }, + "roundtrip": { + "p50": 129.7599971294403, + "p90": 159.64800119400024, + "p95": 161.56800091266632, + "p99": 164.89599645137787 + }, + "isolatedSum": { + "p50": 144.99200135469437, + "p90": 182.8479990363121, + "p95": 185.95200031995773, + "p99": 195.77600061893463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.45600086450577, + "p90": 99.45599734783173, + "p95": 102.4319976568222, + "p99": 106.4319983124733 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 81.7599967122078, + "p95": 82.33600109815598, + "p99": 87.67999708652496 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 154.81600165367126, + "p95": 157.75999426841736, + "p99": 163.96799683570862 + }, + "isolatedSum": { + "p50": 148.70399981737137, + "p90": 181.21599406003952, + "p95": 184.76799875497818, + "p99": 194.11199539899826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.20000022649765, + "p90": 100.80000013113022, + "p95": 105.6319996714592, + "p99": 166.4319932460785 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 87.77599781751633, + "p95": 88.70399743318558, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 130.048006772995, + "p90": 161.18399798870087, + "p95": 164.86400365829468, + "p99": 446.5920031070709 + }, + "isolatedSum": { + "p50": 157.27999806404114, + "p90": 188.57599794864655, + "p95": 194.33599710464478, + "p99": 256.51199370622635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.94399958848953, + "p90": 104.2879968881607, + "p95": 105.3759977221489, + "p99": 109.43999886512756 + }, + "combine": { + "p50": 80.99199831485748, + "p90": 89.9839997291565, + "p95": 90.7839983701706, + "p99": 95.93600034713745 + }, + "roundtrip": { + "p50": 140.83200693130493, + "p90": 167.58400201797485, + "p95": 169.53599452972412, + "p99": 171.2000072002411 + }, + "isolatedSum": { + "p50": 171.93599790334702, + "p90": 194.2719966173172, + "p95": 196.1599960923195, + "p99": 205.37599921226501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.551997423172, + "p90": 115.32799899578094, + "p95": 117.47200042009354, + "p99": 152.22400426864624 + }, + "combine": { + "p50": 90.11200070381165, + "p90": 102.68799960613251, + "p95": 103.80800068378448, + "p99": 127.13600695133209 + }, + "roundtrip": { + "p50": 163.90399634838104, + "p90": 179.967999458313, + "p95": 184.9920004606247, + "p99": 445.6639885902405 + }, + "isolatedSum": { + "p50": 185.66399812698364, + "p90": 218.01599860191345, + "p95": 221.28000110387802, + "p99": 279.36001121997833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.2559979557991, + "p90": 132.83200562000275, + "p95": 136.1279934644699, + "p99": 204.3839991092682 + }, + "combine": { + "p50": 106.52799904346466, + "p90": 119.6800023317337, + "p95": 120.28799951076508, + "p99": 122.079998254776 + }, + "roundtrip": { + "p50": 198.2720047235489, + "p90": 216.8319970369339, + "p95": 218.9439982175827, + "p99": 226.3679951429367 + }, + "isolatedSum": { + "p50": 218.78399699926376, + "p90": 252.51200795173645, + "p95": 256.415992975235, + "p99": 326.4639973640442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-64374365", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_63732aec", + "comparisonKey": "66b64ed5b1251ceb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:30.762247+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.30400282144547, + "p90": 102.52799838781357, + "p95": 105.24799674749374, + "p99": 109.18399691581726 + }, + "combine": { + "p50": 73.82400333881378, + "p90": 81.34400099515915, + "p95": 81.727996468544, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 146.94400131702423, + "p90": 154.23999726772308, + "p95": 155.58399260044098, + "p99": 161.02400422096252 + }, + "isolatedSum": { + "p50": 168.12800616025925, + "p90": 183.87199938297272, + "p95": 186.97599321603775, + "p99": 191.45599752664566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.94399631023407, + "p90": 101.34399682283401, + "p95": 102.75200009346008, + "p99": 108.38399827480316 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 80.48000186681747, + "p95": 81.08799904584885, + "p99": 81.79199695587158 + }, + "roundtrip": { + "p50": 127.3919939994812, + "p90": 147.93600142002106, + "p95": 149.6960073709488, + "p99": 154.78399395942688 + }, + "isolatedSum": { + "p50": 142.7839994430542, + "p90": 181.8239986896515, + "p95": 183.83999913930893, + "p99": 190.17599523067474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 94.87999975681305, + "p90": 100.92800110578537, + "p95": 102.36799716949463, + "p99": 106.84800148010254 + }, + "combine": { + "p50": 75.93599706888199, + "p90": 80.86399734020233, + "p95": 81.24800026416779, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 142.36800372600555, + "p90": 152.92799472808838, + "p95": 155.07200360298157, + "p99": 159.29600596427917 + }, + "isolatedSum": { + "p50": 170.81599682569504, + "p90": 181.7919984459877, + "p95": 183.61599743366241, + "p99": 190.20800292491913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.55999732017517, + "p90": 101.3759970664978, + "p95": 103.00800204277039, + "p99": 108.38399827480316 + }, + "combine": { + "p50": 76.1599987745285, + "p90": 81.18399977684021, + "p95": 81.53600245714188, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 153.31199765205383, + "p95": 154.94400262832642, + "p99": 158.62399339675903 + }, + "isolatedSum": { + "p50": 170.71999609470367, + "p90": 182.559996843338, + "p95": 184.54400449991226, + "p99": 190.91200083494186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 93.50399672985077, + "p90": 99.16800260543823, + "p95": 100.09600222110748, + "p99": 105.8880016207695 + }, + "combine": { + "p50": 80.60800284147263, + "p90": 81.85599744319916, + "p95": 82.56000280380249, + "p99": 88.48000317811966 + }, + "roundtrip": { + "p50": 148.99200201034546, + "p90": 155.45600652694702, + "p95": 157.98400342464447, + "p99": 163.39200735092163 + }, + "isolatedSum": { + "p50": 174.1119995713234, + "p90": 181.0240000486374, + "p95": 182.65600502490997, + "p99": 194.36800479888916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.0960002541542, + "p90": 100.35199671983719, + "p95": 102.01600193977356, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 80.32000064849854, + "p90": 89.85599875450134, + "p95": 90.27200192213058, + "p99": 96.70399874448776 + }, + "roundtrip": { + "p50": 142.04800128936768, + "p90": 160.70400178432465, + "p95": 162.7199947834015, + "p99": 167.00799763202667 + }, + "isolatedSum": { + "p50": 168.41600090265274, + "p90": 190.20799547433853, + "p95": 192.28800386190414, + "p99": 202.78400182724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.8880016207695, + "p90": 113.3119985461235, + "p95": 115.32799899578094, + "p99": 120.83200365304947 + }, + "combine": { + "p50": 95.07200121879578, + "p90": 98.1760025024414, + "p95": 99.67999905347824, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 174.6239960193634, + "p90": 183.84000658988953, + "p95": 186.27199530601501, + "p99": 189.60000574588776 + }, + "isolatedSum": { + "p50": 200.96000283956528, + "p90": 211.4880010485649, + "p95": 215.0079980492592, + "p99": 226.20800137519836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.85599720478058, + "p90": 131.71200454235077, + "p95": 132.9279989004135, + "p99": 136.25599443912506 + }, + "combine": { + "p50": 113.79200220108032, + "p90": 122.49600142240524, + "p95": 122.8799968957901, + "p99": 124.54400211572647 + }, + "roundtrip": { + "p50": 218.81599724292755, + "p90": 230.01599311828613, + "p95": 232.54400491714478, + "p99": 237.05600202083588 + }, + "isolatedSum": { + "p50": 239.6479994058609, + "p90": 254.208005964756, + "p95": 255.8079957962036, + "p99": 260.79999655485153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fd97267e", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_3c21fbbe", + "comparisonKey": "5c3ec411c06970e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:14.625145+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 91.39200299978256, + "p90": 100.76799988746643, + "p95": 103.5199984908104, + "p99": 107.13600367307663 + }, + "combine": { + "p50": 66.97600334882736, + "p90": 73.72800260782242, + "p95": 74.0479975938797, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 139.16799426078796, + "p90": 150.33599734306335, + "p95": 152.22400426864624, + "p99": 154.33600544929504 + }, + "isolatedSum": { + "p50": 158.36800634860992, + "p90": 174.49600249528885, + "p95": 177.5679960846901, + "p99": 185.69600582122803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.50400024652481, + "p90": 93.88799965381622, + "p95": 98.08000177145004, + "p99": 106.30399733781815 + }, + "combine": { + "p50": 65.40799885988235, + "p90": 71.26399874687195, + "p95": 72.1919983625412, + "p99": 73.98399710655212 + }, + "roundtrip": { + "p50": 121.91999703645706, + "p90": 138.2720023393631, + "p95": 140.6719982624054, + "p99": 145.28000354766846 + }, + "isolatedSum": { + "p50": 134.91199910640717, + "p90": 165.15199840068817, + "p95": 170.27200013399124, + "p99": 180.28799444437027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.49600034952164, + "p90": 100.0640019774437, + "p95": 103.07200253009796, + "p99": 108.8000014424324 + }, + "combine": { + "p50": 65.85600227117538, + "p90": 78.3040001988411, + "p95": 78.97599786520004, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 121.47200107574463, + "p90": 152.12799608707428, + "p95": 153.43999862670898, + "p99": 157.79200196266174 + }, + "isolatedSum": { + "p50": 136.35200262069702, + "p90": 178.3680021762848, + "p95": 182.048000395298, + "p99": 188.60799819231033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.96800059080124, + "p90": 103.93600165843964, + "p95": 105.27999699115753, + "p99": 108.19199681282043 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 79.58400249481201, + "p95": 80.73599636554718, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 141.63200557231903, + "p90": 149.79200065135956, + "p95": 151.8400013446808, + "p99": 155.7759940624237 + }, + "isolatedSum": { + "p50": 169.344000518322, + "p90": 183.52000415325165, + "p95": 186.0159933567047, + "p99": 190.11199474334717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.68799829483032, + "p90": 100.80000013113022, + "p95": 102.52799838781357, + "p99": 109.92000252008438 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 81.4720019698143, + "p95": 82.0159986615181, + "p99": 82.62400329113007 + }, + "roundtrip": { + "p50": 143.96800100803375, + "p90": 151.07199549674988, + "p95": 153.79199385643005, + "p99": 158.49600732326508 + }, + "isolatedSum": { + "p50": 169.24799978733063, + "p90": 182.27200210094452, + "p95": 184.54399704933167, + "p99": 192.54400581121445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.08799970149994, + "p90": 99.67999905347824, + "p95": 102.27199643850327, + "p99": 110.27199774980545 + }, + "combine": { + "p50": 74.17599856853485, + "p90": 87.16800063848495, + "p95": 87.8399983048439, + "p99": 89.79199826717377 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 159.45599973201752, + "p95": 160.70400178432465, + "p99": 162.9440039396286 + }, + "isolatedSum": { + "p50": 159.2639982700348, + "p90": 186.8479996919632, + "p95": 190.11199474334717, + "p99": 200.06399601697922 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.16800260543823, + "p90": 113.6000007390976, + "p95": 115.32799899578094, + "p99": 121.76000326871872 + }, + "combine": { + "p50": 83.83999764919281, + "p90": 96.00000083446503, + "p95": 96.79999947547913, + "p99": 99.20000284910202 + }, + "roundtrip": { + "p50": 162.59199380874634, + "p90": 173.69599640369415, + "p95": 175.64800381660461, + "p99": 179.1680008172989 + }, + "isolatedSum": { + "p50": 183.00800025463104, + "p90": 209.60000157356262, + "p95": 212.12799847126007, + "p99": 220.96000611782074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.60800194740295, + "p90": 142.04800128936768, + "p95": 144.1279947757721, + "p99": 146.2080031633377 + }, + "combine": { + "p50": 111.26399785280228, + "p90": 120.44800072908401, + "p95": 120.86399644613266, + "p99": 123.10399860143661 + }, + "roundtrip": { + "p50": 209.6640020608902, + "p90": 223.1999933719635, + "p95": 225.53600370883942, + "p99": 228.03199291229248 + }, + "isolatedSum": { + "p50": 231.87199980020523, + "p90": 262.4960020184517, + "p95": 264.99199122190475, + "p99": 269.3120017647743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1f1ec066", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_905c5730", + "comparisonKey": "90698aa63f79f256", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:43.549713+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 67.87200272083282, + "p90": 74.11199808120728, + "p95": 76.38400048017502, + "p99": 84.19200032949448 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 72.80000299215317, + "p95": 73.27999919652939, + "p99": 75.13599842786789 + }, + "roundtrip": { + "p50": 123.6800029873848, + "p90": 128.25599312782288, + "p95": 130.36799430847168, + "p99": 137.37599551677704 + }, + "isolatedSum": { + "p50": 139.20000195503235, + "p90": 146.91200107336044, + "p95": 149.6639996767044, + "p99": 159.32799875736237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.06399673223495, + "p90": 74.23999905586243, + "p95": 76.73600316047668, + "p99": 82.24000036716461 + }, + "combine": { + "p50": 71.48800045251846, + "p90": 72.92799651622772, + "p95": 73.56800138950348, + "p99": 79.71200346946716 + }, + "roundtrip": { + "p50": 125.47199428081512, + "p90": 130.048006772995, + "p95": 131.55199587345123, + "p99": 137.92000710964203 + }, + "isolatedSum": { + "p50": 139.55199718475342, + "p90": 147.16799557209015, + "p95": 150.30400454998016, + "p99": 161.95200383663177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.3919997215271, + "p90": 99.35999661684036, + "p95": 101.43999755382538, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 81.44000172615051, + "p95": 87.39200234413147, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 129.4720023870468, + "p90": 156.8640023469925, + "p95": 160.25599837303162, + "p99": 163.80800306797028 + }, + "isolatedSum": { + "p50": 144.70399916172028, + "p90": 180.79999834299088, + "p95": 188.83199989795685, + "p99": 197.63199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.99999690055847, + "p90": 98.52799773216248, + "p95": 103.35999727249146, + "p99": 228.5120040178299 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 73.85600358247757, + "p95": 74.33599978685379, + "p99": 80.70400357246399 + }, + "roundtrip": { + "p50": 126.56000256538391, + "p90": 133.91999900341034, + "p95": 148.8959938287735, + "p99": 211.58400177955627 + }, + "isolatedSum": { + "p50": 144.79999989271164, + "p90": 172.38400131464005, + "p95": 177.69599705934525, + "p99": 309.2160075902939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.0719992518425, + "p90": 99.23200309276581, + "p95": 101.98400169610977, + "p99": 105.98400235176086 + }, + "combine": { + "p50": 73.91999661922455, + "p90": 88.86399865150452, + "p95": 89.59999680519104, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 127.80800461769104, + "p90": 159.58400070667267, + "p95": 160.8320027589798, + "p99": 165.56799411773682 + }, + "isolatedSum": { + "p50": 156.99199587106705, + "p90": 188.09600174427032, + "p95": 191.5839985013008, + "p99": 196.60799950361252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.75999802350998, + "p90": 104.99200224876404, + "p95": 106.6880002617836, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 81.18399977684021, + "p90": 89.91999924182892, + "p95": 91.26400202512741, + "p99": 96.41599655151367 + }, + "roundtrip": { + "p50": 142.59199798107147, + "p90": 164.38399255275726, + "p95": 167.87199676036835, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 170.9439978003502, + "p90": 194.91200149059296, + "p95": 197.952002286911, + "p99": 206.59199357032776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.25600278377533, + "p90": 116.7680025100708, + "p95": 119.90399658679962, + "p99": 126.81600451469421 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 104.06400263309479, + "p95": 104.51199859380722, + "p99": 105.47199845314026 + }, + "roundtrip": { + "p50": 159.71200168132782, + "p90": 182.97599256038666, + "p95": 185.05600094795227, + "p99": 189.63199853897095 + }, + "isolatedSum": { + "p50": 186.3360032439232, + "p90": 220.8320051431656, + "p95": 224.41599518060684, + "p99": 232.28800296783447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.96800321340561, + "p90": 135.93600690364838, + "p95": 137.2479945421219, + "p99": 140.03199338912964 + }, + "combine": { + "p50": 106.59199953079224, + "p90": 120.03199756145477, + "p95": 120.83200365304947, + "p99": 122.46400117874146 + }, + "roundtrip": { + "p50": 196.03200256824493, + "p90": 216.86400473117828, + "p95": 219.10400688648224, + "p99": 223.1999933719635 + }, + "isolatedSum": { + "p50": 218.56000274419785, + "p90": 255.96800446510315, + "p95": 258.07999819517136, + "p99": 262.4959945678711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8646da09", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_eaf2a101", + "comparisonKey": "ffb9ea15b5c42c2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:47.782297+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.92800045013428, + "p90": 104.00000214576721, + "p95": 105.53599894046783, + "p99": 110.20799726247787 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 80.28800040483475, + "p95": 81.69600367546082, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 126.24000012874603, + "p90": 153.98399531841278, + "p95": 157.3760062456131, + "p99": 163.32800686359406 + }, + "isolatedSum": { + "p50": 168.5440018773079, + "p90": 184.28800255060196, + "p95": 187.23200261592865, + "p99": 194.84799355268478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.52000069618225, + "p90": 100.99200159311295, + "p95": 104.35199737548828, + "p99": 108.89600217342377 + }, + "combine": { + "p50": 72.64000177383423, + "p90": 81.88799768686295, + "p95": 85.31200140714645, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 129.66400384902954, + "p90": 156.80000185966492, + "p95": 159.32799875736237, + "p99": 162.1440052986145 + }, + "isolatedSum": { + "p50": 144.16000247001648, + "p90": 182.8799992799759, + "p95": 189.66399878263474, + "p99": 196.83200120925903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.75200229883194, + "p90": 101.18400305509567, + "p95": 104.89600151777267, + "p99": 115.167997777462 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 79.74400371313095, + "p95": 83.67999643087387, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 127.42400169372559, + "p90": 157.6640009880066, + "p95": 159.04000401496887, + "p99": 165.12000560760498 + }, + "isolatedSum": { + "p50": 142.88000017404556, + "p90": 180.92800676822662, + "p95": 188.57599794864655, + "p99": 202.7519941329956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.47200065851212, + "p90": 99.96800124645233, + "p95": 102.11200267076492, + "p99": 111.16799712181091 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 81.98399841785431, + "p95": 87.07199990749359, + "p99": 88.76799792051315 + }, + "roundtrip": { + "p50": 129.43999469280243, + "p90": 158.52800011634827, + "p95": 160.5439931154251, + "p99": 164.22399878501892 + }, + "isolatedSum": { + "p50": 146.36800438165665, + "p90": 181.95199966430664, + "p95": 189.18400257825851, + "p99": 199.93599504232407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.53600245714188, + "p90": 99.71199929714203, + "p95": 101.9200012087822, + "p99": 106.78400099277496 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 87.42400258779526, + "p95": 87.96799927949905, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 129.50399518013, + "p90": 158.62399339675903, + "p95": 160.8320027589798, + "p99": 166.17600619792938 + }, + "isolatedSum": { + "p50": 155.4879993200302, + "p90": 187.1360018849373, + "p95": 189.88800048828125, + "p99": 196.8960016965866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.97599983215332, + "p90": 100.19200295209885, + "p95": 102.65599936246872, + "p99": 106.20799660682678 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 90.59199690818787, + "p95": 95.23200243711472, + "p99": 96.99200093746185 + }, + "roundtrip": { + "p50": 145.21600306034088, + "p90": 164.19200599193573, + "p95": 167.55199432373047, + "p99": 174.75199699401855 + }, + "isolatedSum": { + "p50": 172.64000326395035, + "p90": 190.7839998602867, + "p95": 197.88800179958344, + "p99": 203.19999754428864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.05600208044052, + "p90": 118.40000003576279, + "p95": 120.19199877977371, + "p99": 123.58400225639343 + }, + "combine": { + "p50": 90.68799763917923, + "p90": 114.84800279140472, + "p95": 124.1919994354248, + "p99": 128.9599984884262 + }, + "roundtrip": { + "p50": 169.79199647903442, + "p90": 184.9599927663803, + "p95": 187.19999492168427, + "p99": 192.1280026435852 + }, + "isolatedSum": { + "p50": 191.74399971961975, + "p90": 233.2480028271675, + "p95": 244.38399821519852, + "p99": 252.54400074481964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.3360002040863, + "p90": 131.48799538612366, + "p95": 134.36800241470337, + "p99": 138.97599279880524 + }, + "combine": { + "p50": 115.48800021409988, + "p90": 128.86400520801544, + "p95": 130.78400492668152, + "p99": 138.46400380134583 + }, + "roundtrip": { + "p50": 216.0000056028366, + "p90": 234.1119945049286, + "p95": 236.35199666023254, + "p99": 239.29600417613983 + }, + "isolatedSum": { + "p50": 237.8240004181862, + "p90": 260.3520005941391, + "p95": 265.1520073413849, + "p99": 277.43999660015106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96c05c03", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_f649ca0d", + "comparisonKey": "1d713d9d08dfa1f6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:05.906002+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.55999732017517, + "p90": 102.4319976568222, + "p95": 104.54399883747101, + "p99": 108.09600353240967 + }, + "combine": { + "p50": 74.27199929952621, + "p90": 80.48000186681747, + "p95": 81.216000020504, + "p99": 82.11199939250946 + }, + "roundtrip": { + "p50": 143.0719941854477, + "p90": 151.36000514030457, + "p95": 153.6960005760193, + "p99": 158.07999670505524 + }, + "isolatedSum": { + "p50": 168.83199661970139, + "p90": 182.91199952363968, + "p95": 185.759998857975, + "p99": 190.20800292491913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.8480030298233, + "p90": 101.98400169610977, + "p95": 103.2319962978363, + "p99": 109.15199667215347 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 79.99999821186066, + "p95": 81.08799904584885, + "p99": 82.91199803352356 + }, + "roundtrip": { + "p50": 128.31999361515045, + "p90": 152.38399803638458, + "p95": 156.5759927034378, + "p99": 161.28000617027283 + }, + "isolatedSum": { + "p50": 142.91200041770935, + "p90": 181.98399990797043, + "p95": 184.31999534368515, + "p99": 192.06399470567703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.0640013217926, + "p90": 102.55999863147736, + "p95": 104.44799810647964, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 78.17599922418594, + "p90": 81.85599744319916, + "p95": 82.20800012350082, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 149.47199821472168, + "p90": 157.31200575828552, + "p95": 160.19199788570404, + "p99": 193.15199553966522 + }, + "isolatedSum": { + "p50": 174.24000054597855, + "p90": 184.4159960746765, + "p95": 186.65599822998047, + "p99": 195.0719952583313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.62399780750275, + "p90": 100.28800368309021, + "p95": 102.46399790048599, + "p99": 110.84800213575363 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 86.94399893283844, + "p95": 87.61599659919739, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 149.85600113868713, + "p90": 157.151997089386, + "p95": 158.39999914169312, + "p99": 161.24799847602844 + }, + "isolatedSum": { + "p50": 173.8239973783493, + "p90": 187.23200261592865, + "p95": 190.07999449968338, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.10400146245956, + "p90": 99.35999661684036, + "p95": 101.40799731016159, + "p99": 106.52799904346466 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 87.80799806118011, + "p95": 88.54400366544724, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 150.91200172901154, + "p90": 158.07999670505524, + "p95": 159.5200002193451, + "p99": 164.76799547672272 + }, + "isolatedSum": { + "p50": 176.57600343227386, + "p90": 187.16799467802048, + "p95": 189.95200097560883, + "p99": 196.70400023460388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.81599861383438, + "p90": 100.67199915647507, + "p95": 101.95200145244598, + "p99": 109.82400178909302 + }, + "combine": { + "p50": 80.57600259780884, + "p90": 89.28000181913376, + "p95": 89.72799777984619, + "p99": 95.20000219345093 + }, + "roundtrip": { + "p50": 143.327996134758, + "p90": 162.04799711704254, + "p95": 164.38399255275726, + "p99": 169.79199647903442 + }, + "isolatedSum": { + "p50": 171.39200121164322, + "p90": 189.95200097560883, + "p95": 191.67999923229218, + "p99": 205.02400398254395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 110.23999750614166, + "p90": 115.10399729013443, + "p95": 117.50400066375732, + "p99": 121.5360015630722 + }, + "combine": { + "p50": 95.29600292444229, + "p90": 103.32799702882767, + "p95": 104.12800312042236, + "p99": 113.11999708414078 + }, + "roundtrip": { + "p50": 175.26400089263916, + "p90": 181.5679967403412, + "p95": 184.38400328159332, + "p99": 187.32799589633942 + }, + "isolatedSum": { + "p50": 205.53600043058395, + "p90": 218.4319943189621, + "p95": 221.6320037841797, + "p99": 234.65599864721298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.82400441169739, + "p90": 133.85599851608276, + "p95": 135.6160044670105, + "p99": 140.25600254535675 + }, + "combine": { + "p50": 106.49599879980087, + "p90": 115.13599753379822, + "p95": 119.71200257539749, + "p99": 121.37600034475327 + }, + "roundtrip": { + "p50": 197.24799692630768, + "p90": 215.55200219154358, + "p95": 217.72800385951996, + "p99": 222.78399765491486 + }, + "isolatedSum": { + "p50": 232.32000321149826, + "p90": 248.99199604988098, + "p95": 255.328007042408, + "p99": 261.63200289011 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3404b479", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_68d64fee", + "comparisonKey": "4ecd8dbfdffaf08d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:15.470326+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.36800330877304, + "p90": 102.04800218343735, + "p95": 104.09600287675858, + "p99": 112.60800063610077 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 81.15199953317642, + "p95": 81.63200318813324, + "p99": 82.68799632787704 + }, + "roundtrip": { + "p50": 142.04800128936768, + "p90": 150.04800260066986, + "p95": 151.96800231933594, + "p99": 154.94400262832642 + }, + "isolatedSum": { + "p50": 168.60800236463547, + "p90": 183.20000171661377, + "p95": 185.72800606489182, + "p99": 195.2959969639778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.3919997215271, + "p90": 101.31199657917023, + "p95": 102.59199887514114, + "p99": 107.26399719715118 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 80.92799782752991, + "p95": 81.53600245714188, + "p99": 83.16799998283386 + }, + "roundtrip": { + "p50": 128.9599984884262, + "p90": 148.51200580596924, + "p95": 150.84800124168396, + "p99": 154.7199934720993 + }, + "isolatedSum": { + "p50": 143.327996134758, + "p90": 182.23999440670013, + "p95": 184.12800133228302, + "p99": 190.43199717998505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.95199686288834, + "p90": 100.00000149011612, + "p95": 103.35999727249146, + "p99": 152.319997549057 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 79.52000200748444, + "p95": 79.77599650621414, + "p99": 87.00799942016602 + }, + "roundtrip": { + "p50": 127.93600559234619, + "p90": 152.99199521541595, + "p95": 157.02399611473083, + "p99": 215.13600647449493 + }, + "isolatedSum": { + "p50": 145.63199877738953, + "p90": 179.52000349760056, + "p95": 183.1359937787056, + "p99": 239.32799696922302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.38400048017502, + "p90": 100.44799745082855, + "p95": 101.79200023412704, + "p99": 107.13600367307663 + }, + "combine": { + "p50": 72.92799651622772, + "p90": 81.24800026416779, + "p95": 81.7599967122078, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 128.03199887275696, + "p90": 151.74399316310883, + "p95": 156.12800419330597, + "p99": 162.75200247764587 + }, + "isolatedSum": { + "p50": 149.31199699640274, + "p90": 181.69599771499634, + "p95": 183.55199694633484, + "p99": 194.17600333690643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 92.96000003814697, + "p90": 99.23200309276581, + "p95": 101.15200281143188, + "p99": 105.85600137710571 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 82.14399963617325, + "p95": 84.25600081682205, + "p99": 88.128000497818 + }, + "roundtrip": { + "p50": 128.4479945898056, + "p90": 156.92800283432007, + "p95": 158.49600732326508, + "p99": 163.35999965667725 + }, + "isolatedSum": { + "p50": 166.6880026459694, + "p90": 181.37600272893906, + "p95": 185.40800362825394, + "p99": 193.9840018749237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.06400001049042, + "p90": 102.14400291442871, + "p95": 104.38399761915207, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 79.93599772453308, + "p90": 89.88799899816513, + "p95": 90.4960036277771, + "p99": 95.29600292444229 + }, + "roundtrip": { + "p50": 143.26399564743042, + "p90": 161.21600568294525, + "p95": 164.60800170898438, + "p99": 166.75199568271637 + }, + "isolatedSum": { + "p50": 167.9999977350235, + "p90": 192.03200191259384, + "p95": 194.88000124692917, + "p99": 204.8960030078888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.21599650382996, + "p90": 119.29599940776825, + "p95": 120.4800009727478, + "p99": 125.08800625801086 + }, + "combine": { + "p50": 89.72799777984619, + "p90": 96.83199971914291, + "p95": 103.35999727249146, + "p99": 104.3199971318245 + }, + "roundtrip": { + "p50": 168.2880073785782, + "p90": 184.06400084495544, + "p95": 186.94399297237396, + "p99": 193.12000274658203 + }, + "isolatedSum": { + "p50": 194.94399428367615, + "p90": 216.12799912691116, + "p95": 223.83999824523926, + "p99": 229.40800338983536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.98400628566742, + "p90": 144.86399292945862, + "p95": 150.56000649929047, + "p99": 286.655992269516 + }, + "combine": { + "p50": 114.01599645614624, + "p90": 122.56000190973282, + "p95": 127.6479959487915, + "p99": 130.048006772995 + }, + "roundtrip": { + "p50": 219.07199919223785, + "p90": 234.047994017601, + "p95": 236.1920028924942, + "p99": 239.00799453258514 + }, + "isolatedSum": { + "p50": 244.00000274181366, + "p90": 267.42399483919144, + "p95": 278.20800244808197, + "p99": 416.703999042511 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fa1a9267", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_3d319f20", + "comparisonKey": "c1d456a0ef949d75", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:33.548754+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.00000369548798, + "p90": 74.23999905586243, + "p95": 76.09599828720093, + "p99": 82.36800134181976 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 71.9040036201477, + "p95": 72.89600372314453, + "p99": 75.07199794054031 + }, + "roundtrip": { + "p50": 122.94399738311768, + "p90": 128.06400656700134, + "p95": 129.56799566745758, + "p99": 133.98399949073792 + }, + "isolatedSum": { + "p50": 138.5280042886734, + "p90": 146.14400267601013, + "p95": 148.99200201034546, + "p99": 157.43999928236008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.38399916887283, + "p90": 76.06399804353714, + "p95": 77.02399790287018, + "p99": 81.50400221347809 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 72.73600250482559, + "p95": 73.08799773454666, + "p99": 74.5600014925003 + }, + "roundtrip": { + "p50": 128.06400656700134, + "p90": 131.3920021057129, + "p95": 133.31200182437897, + "p99": 142.91200041770935 + }, + "isolatedSum": { + "p50": 139.90399986505508, + "p90": 148.80000054836273, + "p95": 150.11199563741684, + "p99": 156.0640037059784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.25599884986877, + "p90": 97.98400104045868, + "p95": 100.41599720716476, + "p99": 105.15200346708298 + }, + "combine": { + "p50": 72.1919983625412, + "p90": 81.05599880218506, + "p95": 82.68799632787704, + "p99": 87.00799942016602 + }, + "roundtrip": { + "p50": 130.048006772995, + "p90": 154.08000349998474, + "p95": 156.92800283432007, + "p99": 160.51200032234192 + }, + "isolatedSum": { + "p50": 144.44799721240997, + "p90": 179.03999984264374, + "p95": 183.1039935350418, + "p99": 192.160002887249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.99999690055847, + "p90": 100.44799745082855, + "p95": 102.75200009346008, + "p99": 108.47999900579453 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 80.64000308513641, + "p95": 81.11999928951263, + "p99": 83.64800363779068 + }, + "roundtrip": { + "p50": 127.74400413036346, + "p90": 145.7280069589615, + "p95": 154.55999970436096, + "p99": 160.12799739837646 + }, + "isolatedSum": { + "p50": 144.73599940538406, + "p90": 181.08800053596497, + "p95": 183.87199938297272, + "p99": 192.1280026435852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.62400329113007, + "p90": 98.88000041246414, + "p95": 101.82400047779083, + "p99": 108.83200168609619 + }, + "combine": { + "p50": 73.79200309515, + "p90": 87.71199733018875, + "p95": 88.51200342178345, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 127.80800461769104, + "p90": 156.80000185966492, + "p95": 158.36800634860992, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 156.41600638628006, + "p90": 186.5919977426529, + "p95": 190.33600389957428, + "p99": 198.94400238990784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.34400230646133, + "p90": 103.67999970912933, + "p95": 106.20799660682678, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 80.60800284147263, + "p90": 89.56799656152725, + "p95": 90.30400216579437, + "p99": 96.16000205278397 + }, + "roundtrip": { + "p50": 141.79199934005737, + "p90": 161.3759994506836, + "p95": 164.41600024700165, + "p99": 170.3680008649826 + }, + "isolatedSum": { + "p50": 169.95200514793396, + "p90": 193.24799627065659, + "p95": 196.51199877262115, + "p99": 206.9760039448738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.29600292444229, + "p90": 115.55200070142746, + "p95": 119.1679984331131, + "p99": 123.71200323104858 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 96.19200229644775, + "p95": 103.20000350475311, + "p99": 103.67999970912933 + }, + "roundtrip": { + "p50": 159.743994474411, + "p90": 179.32799458503723, + "p95": 180.9920072555542, + "p99": 185.98400056362152 + }, + "isolatedSum": { + "p50": 184.89599972963333, + "p90": 211.7440029978752, + "p95": 222.3680019378662, + "p99": 227.39200294017792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.7440015077591, + "p90": 133.760005235672, + "p95": 137.11999356746674, + "p99": 140.9280002117157 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 117.34399944543839, + "p95": 120.03199756145477, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 197.28000462055206, + "p90": 216.22399985790253, + "p95": 217.8560048341751, + "p99": 220.47999501228333 + }, + "isolatedSum": { + "p50": 217.8560048341751, + "p90": 251.10400468111038, + "p95": 257.1519911289215, + "p99": 262.11199909448624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8c73495", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_2f577c82", + "comparisonKey": "9f81a1bf598376c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:06.282576+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.06399673223495, + "p90": 74.07999783754349, + "p95": 76.35200023651123, + "p99": 82.71999657154083 + }, + "combine": { + "p50": 71.52000069618225, + "p90": 73.05599749088287, + "p95": 73.56800138950348, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 124.1919994354248, + "p90": 128.48000228405, + "p95": 131.04000687599182, + "p99": 134.94400680065155 + }, + "isolatedSum": { + "p50": 139.5839974284172, + "p90": 147.13599532842636, + "p95": 149.9200016260147, + "p99": 162.46400028467178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.31999868154526, + "p90": 74.62400197982788, + "p95": 76.76800340414047, + "p99": 83.10399949550629 + }, + "combine": { + "p50": 71.48800045251846, + "p90": 73.02399724721909, + "p95": 73.60000163316727, + "p99": 79.3600007891655 + }, + "roundtrip": { + "p50": 128.31999361515045, + "p90": 131.23199343681335, + "p95": 133.63200426101685, + "p99": 138.46400380134583 + }, + "isolatedSum": { + "p50": 139.80799913406372, + "p90": 147.64799922704697, + "p95": 150.36800503730774, + "p99": 162.46400028467178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.15999811887741, + "p90": 101.05600208044052, + "p95": 102.7199998497963, + "p99": 106.52799904346466 + }, + "combine": { + "p50": 72.57600128650665, + "p90": 80.86399734020233, + "p95": 81.40800148248672, + "p99": 82.8159973025322 + }, + "roundtrip": { + "p50": 130.52800297737122, + "p90": 158.33599865436554, + "p95": 160.96000373363495, + "p99": 163.26400637626648 + }, + "isolatedSum": { + "p50": 144.73599940538406, + "p90": 181.91999942064285, + "p95": 184.12800133228302, + "p99": 189.34399634599686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 68.4799998998642, + "p90": 75.48800110816956, + "p95": 77.05599814653397, + "p99": 82.56000280380249 + }, + "combine": { + "p50": 72.48000055551529, + "p90": 73.63200187683105, + "p95": 74.01599735021591, + "p99": 80.6720033288002 + }, + "roundtrip": { + "p50": 128.03199887275696, + "p90": 132.28799402713776, + "p95": 133.95200669765472, + "p99": 142.33599603176117 + }, + "isolatedSum": { + "p50": 140.9600004553795, + "p90": 149.1200029850006, + "p95": 151.07199549674988, + "p99": 163.2320061326027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.69600367546082, + "p90": 83.99999886751175, + "p95": 86.20800077915192, + "p99": 92.38400310277939 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 74.52800124883652, + "p95": 79.52000200748444, + "p99": 81.95199817419052 + }, + "roundtrip": { + "p50": 125.82400441169739, + "p90": 132.60799646377563, + "p95": 134.2719942331314, + "p99": 138.36799561977386 + }, + "isolatedSum": { + "p50": 155.04000335931778, + "p90": 158.52800011634827, + "p95": 165.72800278663635, + "p99": 174.3360012769699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.96799927949905, + "p90": 91.13600105047226, + "p95": 93.37600320577621, + "p99": 99.67999905347824 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 81.66400343179703, + "p95": 82.04799890518188, + "p99": 87.8399983048439 + }, + "roundtrip": { + "p50": 137.08800077438354, + "p90": 143.0400013923645, + "p95": 144.76799964904785, + "p99": 148.3519971370697 + }, + "isolatedSum": { + "p50": 168.06399822235107, + "p90": 172.8000044822693, + "p95": 175.4240021109581, + "p99": 187.51999735832214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.77599912881851, + "p90": 113.72800171375275, + "p95": 115.00799655914307, + "p99": 120.4800009727478 + }, + "combine": { + "p50": 89.91999924182892, + "p90": 98.62399846315384, + "p95": 104.00000214576721, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 160.25599837303162, + "p90": 177.50400304794312, + "p95": 179.45599555969238, + "p99": 182.65600502490997 + }, + "isolatedSum": { + "p50": 185.69599837064743, + "p90": 212.35200017690659, + "p95": 219.00799870491028, + "p99": 226.04800015687943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.8000020980835, + "p90": 129.98400628566742, + "p95": 131.00799918174744, + "p99": 135.83999872207642 + }, + "combine": { + "p50": 106.04800283908844, + "p90": 112.47999966144562, + "p95": 113.82400244474411, + "p99": 121.76000326871872 + }, + "roundtrip": { + "p50": 194.72000002861023, + "p90": 199.68000054359436, + "p95": 201.4079988002777, + "p99": 205.53599298000336 + }, + "isolatedSum": { + "p50": 218.84800493717194, + "p90": 242.46400594711304, + "p95": 244.83200162649155, + "p99": 257.60000199079514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2729d8d3", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_65927f79", + "comparisonKey": "62f403a93c0d27c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:33.389451+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 76.80000364780426, + "p90": 83.36000144481659, + "p95": 84.3840017914772, + "p99": 88.67199718952179 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 80.70400357246399, + "p95": 81.31200075149536, + "p99": 82.11199939250946 + }, + "roundtrip": { + "p50": 115.39199948310852, + "p90": 130.87999820709229, + "p95": 132.192000746727, + "p99": 137.95199990272522 + }, + "isolatedSum": { + "p50": 149.21600371599197, + "p90": 164.06400501728058, + "p95": 165.69600254297256, + "p99": 170.78399658203125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 60.896001756191254, + "p90": 82.91199803352356, + "p95": 84.25600081682205, + "p99": 89.9519994854927 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 80.03199845552444, + "p95": 80.83199709653854, + "p99": 81.50400221347809 + }, + "roundtrip": { + "p50": 116.5120005607605, + "p90": 132.1280002593994, + "p95": 133.40799510478973, + "p99": 138.68799805641174 + }, + "isolatedSum": { + "p50": 133.12000036239624, + "p90": 162.943996489048, + "p95": 165.0879979133606, + "p99": 171.4560016989708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.97600269317627, + "p90": 80.89599758386612, + "p95": 83.77599716186523, + "p99": 87.99999952316284 + }, + "combine": { + "p50": 72.86400347948074, + "p90": 80.79999685287476, + "p95": 87.52000331878662, + "p99": 88.0960002541542 + }, + "roundtrip": { + "p50": 116.73600226640701, + "p90": 141.92000031471252, + "p95": 143.2960033416748, + "p99": 147.07200229167938 + }, + "isolatedSum": { + "p50": 135.840006172657, + "p90": 161.69599443674088, + "p95": 171.29600048065186, + "p99": 176.09599977731705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 66.6240006685257, + "p90": 79.42400127649307, + "p95": 81.53600245714188, + "p99": 86.20800077915192 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 81.08799904584885, + "p95": 81.69600367546082, + "p99": 86.87999844551086 + }, + "roundtrip": { + "p50": 115.90400338172913, + "p90": 140.1599943637848, + "p95": 141.184002161026, + "p99": 144.31999623775482 + }, + "isolatedSum": { + "p50": 139.74399864673615, + "p90": 160.51200032234192, + "p95": 163.2320061326027, + "p99": 173.08799922466278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 65.63200056552887, + "p90": 80.19199967384338, + "p95": 81.85599744319916, + "p99": 87.67999708652496 + }, + "combine": { + "p50": 73.82400333881378, + "p90": 88.25600147247314, + "p95": 89.05600011348724, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 114.30399864912033, + "p90": 140.47999680042267, + "p95": 142.71999895572662, + "p99": 146.2399959564209 + }, + "isolatedSum": { + "p50": 139.45600390434265, + "p90": 168.44800114631653, + "p95": 170.9119975566864, + "p99": 178.55999618768692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.45600086450577, + "p90": 86.68799698352814, + "p95": 89.72799777984619, + "p99": 91.839998960495 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 89.85599875450134, + "p95": 90.36800265312195, + "p99": 95.45599669218063 + }, + "roundtrip": { + "p50": 131.20000064373016, + "p90": 148.73600006103516, + "p95": 150.14399588108063, + "p99": 151.87199413776398 + }, + "isolatedSum": { + "p50": 156.54399991035461, + "p90": 176.54399573802948, + "p95": 180.09600043296814, + "p99": 187.29599565267563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.10400015115738, + "p90": 99.04000163078308, + "p95": 101.95200145244598, + "p99": 104.70400005578995 + }, + "combine": { + "p50": 89.75999802350998, + "p90": 103.55199873447418, + "p95": 104.2879968881607, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 154.84799444675446, + "p90": 166.36799275875092, + "p95": 167.61599481105804, + "p99": 169.88800466060638 + }, + "isolatedSum": { + "p50": 176.86399817466736, + "p90": 202.59200036525726, + "p95": 206.2399983406067, + "p99": 210.27199923992157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.8320010304451, + "p90": 115.03999680280685, + "p95": 118.49600076675415, + "p99": 122.6240023970604 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 116.86400324106216, + "p95": 120.44800072908401, + "p99": 122.01599776744843 + }, + "roundtrip": { + "p50": 187.42400407791138, + "p90": 198.88000190258026, + "p95": 200.3519982099533, + "p99": 203.2960057258606 + }, + "isolatedSum": { + "p50": 211.10399812459946, + "p90": 231.90400004386902, + "p95": 238.94400149583817, + "p99": 244.64000016450882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d09b18ec", + "identity": "h100|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_45f24b88", + "comparisonKey": "47ff558b05920403", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:14.629121+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.75200143456459, + "p90": 50.04800111055374, + "p95": 50.52800104022026, + "p99": 51.19999870657921 + }, + "combine": { + "p50": 37.98399865627289, + "p90": 38.784001022577286, + "p95": 39.135999977588654, + "p99": 45.951999723911285 + }, + "roundtrip": { + "p50": 60.896001756191254, + "p90": 62.431998550891876, + "p95": 67.9360032081604, + "p99": 70.94399631023407 + }, + "isolatedSum": { + "p50": 80.73600009083748, + "p90": 88.83200213313103, + "p95": 89.66400101780891, + "p99": 97.1519984304905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 41.34399816393852, + "p90": 49.56800118088722, + "p95": 50.175998359918594, + "p99": 51.35999992489815 + }, + "combine": { + "p50": 30.751999467611313, + "p90": 38.43199834227562, + "p95": 38.656000047922134, + "p99": 45.98399996757507 + }, + "roundtrip": { + "p50": 53.79199981689453, + "p90": 61.184000223875046, + "p95": 61.792001128196716, + "p99": 68.38399916887283 + }, + "isolatedSum": { + "p50": 72.09599763154984, + "p90": 87.99999952316284, + "p95": 88.83199840784073, + "p99": 97.34399989247322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.007999658584595, + "p90": 49.855999648571014, + "p95": 50.23999884724617, + "p99": 51.392000168561935 + }, + "combine": { + "p50": 37.21600025892258, + "p90": 38.72000053524971, + "p95": 39.23200070858002, + "p99": 46.04800045490265 + }, + "roundtrip": { + "p50": 59.167999774217606, + "p90": 64.09599632024765, + "p95": 67.84000247716904, + "p99": 69.11999732255936 + }, + "isolatedSum": { + "p50": 80.22399991750717, + "p90": 88.57600018382072, + "p95": 89.47199955582619, + "p99": 97.44000062346458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.847998440265656, + "p90": 49.695998430252075, + "p95": 50.40000006556511, + "p99": 57.18399956822395 + }, + "combine": { + "p50": 37.63199970126152, + "p90": 38.68800029158592, + "p95": 44.16000097990036, + "p99": 46.52800038456917 + }, + "roundtrip": { + "p50": 60.80000102519989, + "p90": 68.1919977068901, + "p95": 68.9919963479042, + "p99": 70.0799971818924 + }, + "isolatedSum": { + "p50": 80.47999814152718, + "p90": 88.383998721838, + "p95": 94.56000104546547, + "p99": 103.71199995279312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.16000097990036, + "p90": 50.65599828958511, + "p95": 51.45600065588951, + "p99": 57.440001517534256 + }, + "combine": { + "p50": 38.72000053524971, + "p90": 46.33599892258644, + "p95": 46.560000628232956, + "p99": 47.520000487565994 + }, + "roundtrip": { + "p50": 67.23199784755707, + "p90": 69.98399645090103, + "p95": 71.68000191450119, + "p99": 77.79199630022049 + }, + "isolatedSum": { + "p50": 82.88000151515007, + "p90": 96.99199721217155, + "p95": 98.01600128412247, + "p99": 104.96000200510025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.75199902057648, + "p90": 57.5999990105629, + "p95": 58.04799869656563, + "p99": 58.9120015501976 + }, + "combine": { + "p50": 46.560000628232956, + "p90": 53.119998425245285, + "p95": 53.63199859857559, + "p99": 55.55199831724167 + }, + "roundtrip": { + "p50": 76.38400048017502, + "p90": 78.59200239181519, + "p95": 81.56800270080566, + "p99": 84.41600203514099 + }, + "isolatedSum": { + "p50": 97.31199964880943, + "p90": 110.71999743580818, + "p95": 111.67999729514122, + "p99": 114.46399986743927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 61.43999844789505, + "p90": 66.81600213050842, + "p95": 67.23199784755707, + "p99": 69.2799985408783 + }, + "combine": { + "p50": 62.144000083208084, + "p90": 63.840001821517944, + "p95": 64.41599875688553, + "p99": 67.84000247716904 + }, + "roundtrip": { + "p50": 101.56799852848053, + "p90": 107.64800012111664, + "p95": 108.67200046777725, + "p99": 110.944002866745 + }, + "isolatedSum": { + "p50": 123.58399853110313, + "p90": 130.65600395202637, + "p95": 131.6479966044426, + "p99": 137.12000101804733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.01599931716919, + "p90": 88.639996945858, + "p95": 91.839998960495, + "p99": 95.71199864149094 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 90.7519981265068, + "p95": 91.45600348711014, + "p99": 96.3200032711029 + }, + "roundtrip": { + "p50": 153.08800339698792, + "p90": 158.27199816703796, + "p95": 159.13599729537964, + "p99": 160.8320027589798 + }, + "isolatedSum": { + "p50": 175.135999917984, + "p90": 179.3919950723648, + "p95": 183.29600244760513, + "p99": 192.03200191259384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d8d744d8", + "identity": "h100|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_1748c100", + "comparisonKey": "a9bcb4b19640853a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:34.506311+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 41.34399816393852, + "p90": 48.54400083422661, + "p95": 48.8319993019104, + "p99": 50.592001527547836 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 37.50399872660637, + "p95": 44.096000492572784, + "p99": 44.863998889923096 + }, + "roundtrip": { + "p50": 58.88000130653381, + "p90": 65.76000154018402, + "p95": 66.65600091218948, + "p99": 75.32799988985062 + }, + "isolatedSum": { + "p50": 77.91999727487564, + "p90": 86.04799956083298, + "p95": 92.92799979448318, + "p99": 95.45600041747093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 41.120000183582306, + "p90": 48.448000103235245, + "p95": 48.70399832725525, + "p99": 49.984000623226166 + }, + "combine": { + "p50": 36.320000886917114, + "p90": 37.50399872660637, + "p95": 43.327998369932175, + "p99": 44.863998889923096 + }, + "roundtrip": { + "p50": 54.368000477552414, + "p90": 65.47199934720993, + "p95": 66.11199676990509, + "p99": 67.52000004053116 + }, + "isolatedSum": { + "p50": 77.44000107049942, + "p90": 85.95199882984161, + "p95": 92.03199669718742, + "p99": 94.84799951314926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.14400053024292, + "p90": 48.448000103235245, + "p95": 48.767998814582825, + "p99": 50.624001771211624 + }, + "combine": { + "p50": 36.67199984192848, + "p90": 37.696000188589096, + "p95": 39.264000952243805, + "p99": 45.21600157022476 + }, + "roundtrip": { + "p50": 58.62399935722351, + "p90": 65.47199934720993, + "p95": 66.59200042486191, + "p99": 68.31999868154526 + }, + "isolatedSum": { + "p50": 78.8160003721714, + "p90": 86.14400029182434, + "p95": 88.03199976682663, + "p99": 95.84000334143639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.10400104522705, + "p90": 48.70399832725525, + "p95": 49.02400076389313, + "p99": 49.75999891757965 + }, + "combine": { + "p50": 36.41600161790848, + "p90": 38.015998899936676, + "p95": 44.16000097990036, + "p99": 44.863998889923096 + }, + "roundtrip": { + "p50": 59.4559982419014, + "p90": 67.16799736022949, + "p95": 67.77600198984146, + "p99": 74.07999783754349 + }, + "isolatedSum": { + "p50": 83.52000266313553, + "p90": 86.71999722719193, + "p95": 93.18400174379349, + "p99": 94.62399780750275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 48.41599985957146, + "p90": 55.32800033688545, + "p95": 56.12799897789955, + "p99": 56.73599988222122 + }, + "combine": { + "p50": 44.28799822926521, + "p90": 44.89599913358688, + "p95": 45.184001326560974, + "p99": 49.60000142455101 + }, + "roundtrip": { + "p50": 66.84800237417221, + "p90": 73.47200065851212, + "p95": 74.49600100517273, + "p99": 75.42400062084198 + }, + "isolatedSum": { + "p50": 92.70399808883667, + "p90": 100.22399947047234, + "p95": 101.31200030446053, + "p99": 106.33600130677223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.02400076389313, + "p90": 56.28800019621849, + "p95": 56.671999394893646, + "p99": 57.631999254226685 + }, + "combine": { + "p50": 45.632001012563705, + "p90": 52.70399898290634, + "p95": 53.02400141954422, + "p99": 53.727999329566956 + }, + "roundtrip": { + "p50": 82.33600109815598, + "p90": 84.03199911117554, + "p95": 84.3840017914772, + "p99": 85.21600067615509 + }, + "isolatedSum": { + "p50": 94.65600177645683, + "p90": 108.99199917912483, + "p95": 109.69600081443787, + "p99": 111.35999858379364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 64.57599997520447, + "p90": 65.85600227117538, + "p95": 68.15999746322632, + "p99": 73.02399724721909 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 68.25599819421768, + "p95": 68.70400160551071, + "p99": 69.21599805355072 + }, + "roundtrip": { + "p50": 100.99200159311295, + "p90": 108.12799632549286, + "p95": 108.83200168609619, + "p99": 114.656001329422 + }, + "isolatedSum": { + "p50": 125.66399946808815, + "p90": 134.11200046539307, + "p95": 136.86399906873703, + "p99": 142.2399953007698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 85.9839990735054, + "p90": 88.99199962615967, + "p95": 92.06400066614151, + "p99": 94.55999732017517 + }, + "combine": { + "p50": 88.51200342178345, + "p90": 96.44799679517746, + "p95": 96.99200093746185, + "p99": 97.85600006580353 + }, + "roundtrip": { + "p50": 152.76800096035004, + "p90": 159.7760021686554, + "p95": 160.7999950647354, + "p99": 164.95999693870544 + }, + "isolatedSum": { + "p50": 174.49600249528885, + "p90": 185.43999642133713, + "p95": 189.05600160360336, + "p99": 192.4159973859787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4851c83f", + "identity": "h100|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_50016168", + "comparisonKey": "895755c4081b16a1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:56.104078+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 182.559996843338, + "p90": 191.13600254058838, + "p95": 194.04800236225128, + "p99": 202.68799364566803 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 52.09600180387497, + "p95": 54.336000233888626, + "p99": 57.66399949789047 + }, + "roundtrip": { + "p50": 217.1200066804886, + "p90": 224.44799542427063, + "p95": 227.03999280929565, + "p99": 239.48800563812256 + }, + "isolatedSum": { + "p50": 232.54399746656418, + "p90": 243.23200434446335, + "p95": 248.3840025961399, + "p99": 260.3519931435585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 181.18399381637573, + "p90": 190.08000195026398, + "p95": 193.66399943828583, + "p99": 201.4079988002777 + }, + "combine": { + "p50": 51.072001457214355, + "p90": 54.655998945236206, + "p95": 56.76800012588501, + "p99": 61.11999973654747 + }, + "roundtrip": { + "p50": 218.27200055122375, + "p90": 225.72800517082214, + "p95": 228.4799963235855, + "p99": 234.6239984035492 + }, + "isolatedSum": { + "p50": 232.2559952735901, + "p90": 244.73600089550018, + "p95": 250.43199956417084, + "p99": 262.5279985368252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 186.49600446224213, + "p90": 263.8719975948334, + "p95": 267.39200949668884, + "p99": 281.40801191329956 + }, + "combine": { + "p50": 52.25599929690361, + "p90": 64.25599753856659, + "p95": 66.0799965262413, + "p99": 69.76000219583511 + }, + "roundtrip": { + "p50": 222.97599911689758, + "p90": 305.82401156425476, + "p95": 308.9599907398224, + "p99": 313.8880133628845 + }, + "isolatedSum": { + "p50": 238.75200375914574, + "p90": 328.12799513339996, + "p95": 333.47200602293015, + "p99": 351.1680141091347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 182.01600015163422, + "p90": 189.53600525856018, + "p95": 194.5279985666275, + "p99": 259.71201062202454 + }, + "combine": { + "p50": 51.80799961090088, + "p90": 54.46400120854378, + "p95": 57.920001447200775, + "p99": 63.07200342416763 + }, + "roundtrip": { + "p50": 219.07199919223785, + "p90": 226.8799990415573, + "p95": 230.01599311828613, + "p99": 234.97599363327026 + }, + "isolatedSum": { + "p50": 233.8239997625351, + "p90": 244.00000646710396, + "p95": 252.44800001382828, + "p99": 322.78401404619217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 186.3040030002594, + "p90": 266.27200841903687, + "p95": 269.6000039577484, + "p99": 279.83999252319336 + }, + "combine": { + "p50": 53.599998354911804, + "p90": 65.85600227117538, + "p95": 67.00800359249115, + "p99": 71.58400118350983 + }, + "roundtrip": { + "p50": 224.09600019454956, + "p90": 304.639995098114, + "p95": 307.9040050506592, + "p99": 323.35999608039856 + }, + "isolatedSum": { + "p50": 239.9040013551712, + "p90": 332.12801069021225, + "p95": 336.60800755023956, + "p99": 351.4239937067032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.391996383667, + "p90": 262.1760070323944, + "p95": 265.6640112400055, + "p99": 279.83999252319336 + }, + "combine": { + "p50": 57.24800005555153, + "p90": 68.67200136184692, + "p95": 69.2799985408783, + "p99": 72.03199714422226 + }, + "roundtrip": { + "p50": 226.97600722312927, + "p90": 301.66399478912354, + "p95": 304.9600124359131, + "p99": 309.9519908428192 + }, + "isolatedSum": { + "p50": 244.63999643921852, + "p90": 330.84800839424133, + "p95": 334.9440097808838, + "p99": 351.8719896674156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 187.29600310325623, + "p90": 265.1839852333069, + "p95": 268.5439884662628, + "p99": 275.2639949321747 + }, + "combine": { + "p50": 64.54399973154068, + "p90": 76.83199644088745, + "p95": 78.52800190448761, + "p99": 82.20800012350082 + }, + "roundtrip": { + "p50": 236.15999519824982, + "p90": 314.65598940849304, + "p95": 318.33600997924805, + "p99": 327.1999955177307 + }, + "isolatedSum": { + "p50": 251.8400028347969, + "p90": 342.01598167419434, + "p95": 347.0719903707504, + "p99": 357.4719950556755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 191.03999435901642, + "p90": 267.64801144599915, + "p95": 270.59200406074524, + "p99": 278.0799865722656 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 90.33600240945816, + "p95": 90.97599983215332, + "p99": 95.10400146245956 + }, + "roundtrip": { + "p50": 254.20799851417542, + "p90": 328.7679851055145, + "p95": 332.5760066509247, + "p99": 359.360009431839 + }, + "isolatedSum": { + "p50": 269.567996263504, + "p90": 357.9840138554573, + "p95": 361.56800389289856, + "p99": 373.1839880347252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38d43500", + "identity": "h100|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_50016168", + "comparisonKey": "d356810710f0d474", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:19.493117+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 184.1599941253662, + "p90": 192.57600605487823, + "p95": 195.10400295257568, + "p99": 207.0399969816208 + }, + "combine": { + "p50": 52.352000027894974, + "p90": 55.456001311540604, + "p95": 57.72799998521805, + "p99": 64.4799992442131 + }, + "roundtrip": { + "p50": 221.98399901390076, + "p90": 228.57600450515747, + "p95": 231.9359928369522, + "p99": 240.79999327659607 + }, + "isolatedSum": { + "p50": 236.51199415326118, + "p90": 248.03200736641884, + "p95": 252.83200293779373, + "p99": 271.5199962258339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 184.60799753665924, + "p90": 192.25600361824036, + "p95": 194.59199905395508, + "p99": 207.64799416065216 + }, + "combine": { + "p50": 53.63199859857559, + "p90": 56.19199946522713, + "p95": 58.14399942755699, + "p99": 60.70400029420853 + }, + "roundtrip": { + "p50": 222.88000583648682, + "p90": 230.3680032491684, + "p95": 233.024001121521, + "p99": 240.76800048351288 + }, + "isolatedSum": { + "p50": 238.23999613523483, + "p90": 248.44800308346748, + "p95": 252.73599848151207, + "p99": 268.3519944548607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 189.4720047712326, + "p90": 270.9439992904663, + "p95": 275.58401226997375, + "p99": 286.78399324417114 + }, + "combine": { + "p50": 55.80800026655197, + "p90": 66.68800115585327, + "p95": 67.48799979686737, + "p99": 71.74400240182877 + }, + "roundtrip": { + "p50": 228.7999987602234, + "p90": 308.80001187324524, + "p95": 314.11200761795044, + "p99": 329.75998520851135 + }, + "isolatedSum": { + "p50": 245.28000503778458, + "p90": 337.6320004463196, + "p95": 343.0720120668411, + "p99": 358.5279956459999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 185.72799861431122, + "p90": 193.24800372123718, + "p95": 196.28800451755524, + "p99": 218.72000396251678 + }, + "combine": { + "p50": 56.0000017285347, + "p90": 59.26400050520897, + "p95": 61.184000223875046, + "p99": 65.15199691057205 + }, + "roundtrip": { + "p50": 226.84800624847412, + "p90": 233.72800648212433, + "p95": 237.34399676322937, + "p99": 263.16800713539124 + }, + "isolatedSum": { + "p50": 241.72800034284592, + "p90": 252.51200422644615, + "p95": 257.4720047414303, + "p99": 283.87200087308884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 189.53600525856018, + "p90": 268.48000288009644, + "p95": 272.5439965724945, + "p99": 282.9760015010834 + }, + "combine": { + "p50": 57.34400078654289, + "p90": 68.09599697589874, + "p95": 69.21599805355072, + "p99": 74.43200051784515 + }, + "roundtrip": { + "p50": 228.92799973487854, + "p90": 306.71998858451843, + "p95": 310.016006231308, + "p99": 318.84801387786865 + }, + "isolatedSum": { + "p50": 246.88000604510307, + "p90": 336.5759998559952, + "p95": 341.7599946260452, + "p99": 357.4080020189285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 190.36799669265747, + "p90": 269.53598856925964, + "p95": 275.4879891872406, + "p99": 323.199987411499 + }, + "combine": { + "p50": 61.69600039720535, + "p90": 72.86400347948074, + "p95": 73.44000041484833, + "p99": 76.28799974918365 + }, + "roundtrip": { + "p50": 232.06399381160736, + "p90": 241.91999435424805, + "p95": 291.51999950408936, + "p99": 298.8480031490326 + }, + "isolatedSum": { + "p50": 252.06399708986282, + "p90": 342.3999920487404, + "p95": 348.9279896020889, + "p99": 399.4879871606827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 189.79200720787048, + "p90": 270.59200406074524, + "p95": 274.78399872779846, + "p99": 318.08000802993774 + }, + "combine": { + "p50": 71.29599899053574, + "p90": 82.36800134181976, + "p95": 83.23200047016144, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 245.2159970998764, + "p90": 327.2959887981415, + "p95": 330.9119939804077, + "p99": 339.9040102958679 + }, + "isolatedSum": { + "p50": 261.0880061984062, + "p90": 352.960005402565, + "p95": 358.0159991979599, + "p99": 404.9280062317848 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 198.94400238990784, + "p90": 271.232008934021, + "p95": 275.519996881485, + "p99": 282.49600529670715 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 95.10400146245956, + "p95": 96.25600278377533, + "p99": 100.76799988746643 + }, + "roundtrip": { + "p50": 271.32800221443176, + "p90": 335.07201075553894, + "p95": 338.78400921821594, + "p99": 357.695996761322 + }, + "isolatedSum": { + "p50": 285.3440046310425, + "p90": 366.33601039648056, + "p95": 371.7759996652603, + "p99": 383.2640051841736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ab5616f3", + "identity": "h100|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_50016168", + "comparisonKey": "70e99de9e82bcf0d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:45.263644+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 197.02400267124176, + "p90": 261.4080011844635, + "p95": 265.3439939022064, + "p99": 271.1679935455322 + }, + "combine": { + "p50": 57.24800005555153, + "p90": 59.67999994754791, + "p95": 61.28000095486641, + "p99": 65.05600363016129 + }, + "roundtrip": { + "p50": 236.38400435447693, + "p90": 242.33600497245789, + "p95": 244.80000138282776, + "p99": 334.879994392395 + }, + "isolatedSum": { + "p50": 254.2720027267933, + "p90": 321.0880011320114, + "p95": 326.62399485707283, + "p99": 336.2239971756935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 194.7840005159378, + "p90": 201.31200551986694, + "p95": 203.77600193023682, + "p99": 218.20800006389618 + }, + "combine": { + "p50": 58.52799862623215, + "p90": 60.83200126886368, + "p95": 62.81600147485733, + "p99": 67.03999638557434 + }, + "roundtrip": { + "p50": 236.2239956855774, + "p90": 242.0479953289032, + "p95": 244.00000274181366, + "p99": 247.6159930229187 + }, + "isolatedSum": { + "p50": 253.31199914216995, + "p90": 262.1440067887306, + "p95": 266.59200340509415, + "p99": 285.2479964494705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 198.04799556732178, + "p90": 269.4399952888489, + "p95": 274.6239900588989, + "p99": 285.37601232528687 + }, + "combine": { + "p50": 59.7120001912117, + "p90": 71.29599899053574, + "p95": 72.03199714422226, + "p99": 74.01599735021591 + }, + "roundtrip": { + "p50": 239.32799696922302, + "p90": 310.7840120792389, + "p95": 314.07999992370605, + "p99": 322.4959969520569 + }, + "isolatedSum": { + "p50": 257.7599957585335, + "p90": 340.7359942793846, + "p95": 346.6559872031212, + "p99": 359.3920096755028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 198.30399751663208, + "p90": 261.50399446487427, + "p95": 264.76800441741943, + "p99": 270.04799246788025 + }, + "combine": { + "p50": 60.28800085186958, + "p90": 70.0799971818924, + "p95": 70.75200229883194, + "p99": 73.91999661922455 + }, + "roundtrip": { + "p50": 240.83200097084045, + "p90": 304.80000376701355, + "p95": 308.19201469421387, + "p99": 312.6719892024994 + }, + "isolatedSum": { + "p50": 258.59199836850166, + "p90": 331.58399164676666, + "p95": 335.5200067162514, + "p99": 343.9679890871048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 198.30399751663208, + "p90": 267.520010471344, + "p95": 271.232008934021, + "p99": 281.3119888305664 + }, + "combine": { + "p50": 61.47199869155884, + "p90": 72.64000177383423, + "p95": 73.34399968385696, + "p99": 77.66400277614594 + }, + "roundtrip": { + "p50": 241.7919933795929, + "p90": 307.45598673820496, + "p95": 310.11199951171875, + "p99": 319.10398602485657 + }, + "isolatedSum": { + "p50": 259.7759962081909, + "p90": 340.1600122451782, + "p95": 344.57600861787796, + "p99": 358.97599160671234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 199.072003364563, + "p90": 263.839989900589, + "p95": 268.8640058040619, + "p99": 276.2880027294159 + }, + "combine": { + "p50": 66.91200286149979, + "p90": 78.23999971151352, + "p95": 78.91199737787247, + "p99": 82.56000280380249 + }, + "roundtrip": { + "p50": 247.23200500011444, + "p90": 312.6719892024994, + "p95": 316.0960078239441, + "p99": 320.576012134552 + }, + "isolatedSum": { + "p50": 265.9840062260628, + "p90": 342.0799896121025, + "p95": 347.77600318193436, + "p99": 358.8480055332184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.77599382400513, + "p90": 287.07200288772583, + "p95": 291.83998703956604, + "p99": 316.8320059776306 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 88.54400366544724, + "p95": 89.50400352478027, + "p99": 93.63199770450592 + }, + "roundtrip": { + "p50": 259.99999046325684, + "p90": 349.37599301338196, + "p95": 351.8719971179962, + "p99": 359.51998829841614 + }, + "isolatedSum": { + "p50": 276.73599123954773, + "p90": 375.61600655317307, + "p95": 381.3439905643463, + "p99": 410.46400368213654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 212.3199999332428, + "p90": 272.19200134277344, + "p95": 276.5119969844818, + "p99": 284.09600257873535 + }, + "combine": { + "p50": 91.58399701118469, + "p90": 102.81600058078766, + "p95": 103.87200117111206, + "p99": 107.4879989027977 + }, + "roundtrip": { + "p50": 288.5439991950989, + "p90": 344.06399726867676, + "p95": 348.06400537490845, + "p99": 356.00000619888306 + }, + "isolatedSum": { + "p50": 303.9039969444275, + "p90": 375.0080019235611, + "p95": 380.3839981555939, + "p99": 391.58400148153305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-55b8ba8a", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_30e588e0", + "comparisonKey": "8ce53f90e15612eb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:58.992443+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.31200271844864, + "p90": 97.79199957847595, + "p95": 99.90400075912476, + "p99": 103.96800190210342 + }, + "combine": { + "p50": 67.52000004053116, + "p90": 69.24799829721451, + "p95": 70.01599669456482, + "p99": 73.15199822187424 + }, + "roundtrip": { + "p50": 183.1039935350418, + "p90": 194.7840005159378, + "p95": 196.6720074415207, + "p99": 202.2079974412918 + }, + "isolatedSum": { + "p50": 160.8320027589798, + "p90": 167.03999787569046, + "p95": 169.91999745368958, + "p99": 177.12000012397766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.75200229883194, + "p90": 96.19200229644775, + "p95": 99.61599856615067, + "p99": 104.63999956846237 + }, + "combine": { + "p50": 62.55999952554703, + "p90": 71.10399752855301, + "p95": 71.74400240182877, + "p99": 73.2479989528656 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 197.08800315856934, + "p95": 199.16799664497375, + "p99": 203.07199656963348 + }, + "isolatedSum": { + "p50": 133.31200182437897, + "p90": 167.29599982500076, + "p95": 171.36000096797943, + "p99": 177.88799852132797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.35999947786331, + "p90": 100.38399696350098, + "p95": 103.42399775981903, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 63.968002796173096, + "p90": 75.71200281381607, + "p95": 77.05599814653397, + "p99": 81.79199695587158 + }, + "roundtrip": { + "p50": 156.70399367809296, + "p90": 203.48800718784332, + "p95": 207.2640061378479, + "p99": 496.7679977416992 + }, + "isolatedSum": { + "p50": 135.3280022740364, + "p90": 176.09599977731705, + "p95": 180.479995906353, + "p99": 190.43199717998505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.66399794816971, + "p90": 97.28000313043594, + "p95": 99.61599856615067, + "p99": 102.88000106811523 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 74.14399832487106, + "p95": 75.58400183916092, + "p99": 81.02399855852127 + }, + "roundtrip": { + "p50": 156.89599514007568, + "p90": 198.7839937210083, + "p95": 200.3840059041977, + "p99": 205.21600544452667 + }, + "isolatedSum": { + "p50": 166.04799777269363, + "p90": 171.424001455307, + "p95": 175.20000040531158, + "p99": 183.9039996266365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.30400282144547, + "p90": 101.24800354242325, + "p95": 103.55199873447418, + "p99": 111.23199760913849 + }, + "combine": { + "p50": 74.52800124883652, + "p90": 77.34400033950806, + "p95": 79.1039988398552, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 189.98399376869202, + "p90": 203.0400037765503, + "p95": 206.04799687862396, + "p99": 214.78399634361267 + }, + "isolatedSum": { + "p50": 168.83200407028198, + "p90": 178.5920038819313, + "p95": 182.65599757432938, + "p99": 193.40799748897552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.4480003118515, + "p90": 98.36799651384354, + "p95": 101.40799731016159, + "p99": 108.09600353240967 + }, + "combine": { + "p50": 71.29599899053574, + "p90": 81.02399855852127, + "p95": 82.20800012350082, + "p99": 87.26400136947632 + }, + "roundtrip": { + "p50": 163.90399634838104, + "p90": 207.23199844360352, + "p95": 209.56799387931824, + "p99": 213.76000344753265 + }, + "isolatedSum": { + "p50": 143.74399930238724, + "p90": 179.3919950723648, + "p95": 183.61599743366241, + "p99": 195.360004901886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.89599758386612, + "p90": 103.35999727249146, + "p95": 105.50399869680405, + "p99": 110.30399799346924 + }, + "combine": { + "p50": 81.05599880218506, + "p90": 92.76799857616425, + "p95": 94.01600062847137, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 173.72800409793854, + "p90": 223.00800681114197, + "p95": 226.01599991321564, + "p99": 237.0239943265915 + }, + "isolatedSum": { + "p50": 161.95199638605118, + "p90": 196.1279958486557, + "p95": 199.51999932527542, + "p99": 208.25599879026413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.86399865150452, + "p90": 109.76000130176544, + "p95": 112.67200112342834, + "p99": 121.66400253772736 + }, + "combine": { + "p50": 98.49599748849869, + "p90": 109.69600081443787, + "p95": 110.68800091743469, + "p99": 115.10399729013443 + }, + "roundtrip": { + "p50": 216.51199460029602, + "p90": 240.76800048351288, + "p95": 244.3840056657791, + "p99": 251.48800015449524 + }, + "isolatedSum": { + "p50": 187.3599961400032, + "p90": 219.4560021162033, + "p95": 223.36000204086304, + "p99": 236.7679998278618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eed2a06a", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_50016168", + "comparisonKey": "335db208fca9398e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:27.591193+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 188.60800564289093, + "p90": 252.99200415611267, + "p95": 256.6080093383789, + "p99": 263.93601298332214 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 68.09599697589874, + "p95": 68.67200136184692, + "p99": 73.21599870920181 + }, + "roundtrip": { + "p50": 229.8240065574646, + "p90": 238.97600173950195, + "p95": 246.59200012683868, + "p99": 298.14401268959045 + }, + "isolatedSum": { + "p50": 250.5280040204525, + "p90": 321.0880011320114, + "p95": 325.28001070022583, + "p99": 337.15201169252396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 184.28799510002136, + "p90": 193.05600225925446, + "p95": 196.60800695419312, + "p99": 202.2400051355362 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 64.15999680757523, + "p95": 66.20799750089645, + "p99": 69.023996591568 + }, + "roundtrip": { + "p50": 230.94399273395538, + "p90": 238.5919988155365, + "p95": 242.40000545978546, + "p99": 262.84798979759216 + }, + "isolatedSum": { + "p50": 246.17599695920944, + "p90": 257.2159990668297, + "p95": 262.81600445508957, + "p99": 271.2640017271042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 188.92799317836761, + "p90": 270.3680098056793, + "p95": 274.27199482917786, + "p99": 284.960001707077 + }, + "combine": { + "p50": 63.1679967045784, + "p90": 74.5600014925003, + "p95": 76.22399926185608, + "p99": 81.40800148248672 + }, + "roundtrip": { + "p50": 235.58400571346283, + "p90": 316.22400879859924, + "p95": 319.64799761772156, + "p99": 327.58399844169617 + }, + "isolatedSum": { + "p50": 252.09598988294601, + "p90": 344.9280112981796, + "p95": 350.49599409103394, + "p99": 366.36800318956375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 188.73600661754608, + "p90": 256.44800066947937, + "p95": 261.05600595474243, + "p99": 270.04799246788025 + }, + "combine": { + "p50": 64.09599632024765, + "p90": 73.72800260782242, + "p95": 74.5600014925003, + "p99": 80.22399991750717 + }, + "roundtrip": { + "p50": 236.64000630378723, + "p90": 298.3039915561676, + "p95": 301.12001299858093, + "p99": 307.2960078716278 + }, + "isolatedSum": { + "p50": 252.83200293779373, + "p90": 330.1760032773018, + "p95": 335.61600744724274, + "p99": 350.2719923853874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 188.63999843597412, + "p90": 263.0400061607361, + "p95": 267.10399985313416, + "p99": 280.35199642181396 + }, + "combine": { + "p50": 65.63200056552887, + "p90": 77.08799839019775, + "p95": 79.19999957084656, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 238.27199637889862, + "p90": 306.91200494766235, + "p95": 309.56798791885376, + "p99": 314.36800956726074 + }, + "isolatedSum": { + "p50": 254.271999001503, + "p90": 340.12800455093384, + "p95": 346.3039994239807, + "p99": 362.87999898195267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 189.15200233459473, + "p90": 256.415992975235, + "p95": 259.42400097846985, + "p99": 263.5839879512787 + }, + "combine": { + "p50": 71.48800045251846, + "p90": 81.37600123882294, + "p95": 82.04799890518188, + "p99": 85.37600189447403 + }, + "roundtrip": { + "p50": 244.4480061531067, + "p90": 311.19999289512634, + "p95": 313.60000371932983, + "p99": 318.59201192855835 + }, + "isolatedSum": { + "p50": 260.6400027871132, + "p90": 337.7919942140579, + "p95": 341.47199988365173, + "p99": 348.9599898457527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 192.7040070295334, + "p90": 271.5199887752533, + "p95": 274.4320034980774, + "p99": 289.4720137119293 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 92.6399976015091, + "p95": 94.59199756383896, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 257.5039863586426, + "p90": 339.04001116752625, + "p95": 342.303991317749, + "p99": 350.3679931163788 + }, + "isolatedSum": { + "p50": 273.72800558805466, + "p90": 364.1599863767624, + "p95": 369.02400106191635, + "p99": 387.1360123157501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 214.91199731826782, + "p90": 273.6000120639801, + "p95": 278.0480086803436, + "p99": 342.272013425827 + }, + "combine": { + "p50": 98.27200323343277, + "p90": 109.43999886512756, + "p95": 110.944002866745, + "p99": 114.78400230407715 + }, + "roundtrip": { + "p50": 300.25601387023926, + "p90": 350.3040075302124, + "p95": 356.1919927597046, + "p99": 372.6400136947632 + }, + "isolatedSum": { + "p50": 313.1840005517006, + "p90": 383.04001092910767, + "p95": 388.9920115470886, + "p99": 457.0560157299042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5ce9546f", + "identity": "h100|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_50016168", + "comparisonKey": "09f88ecb87657576", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:23.930432+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 193.05600225925446, + "p90": 200.73600113391876, + "p95": 203.71200144290924, + "p99": 211.35999262332916 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 62.65600025653839, + "p95": 64.38399851322174, + "p99": 68.09599697589874 + }, + "roundtrip": { + "p50": 236.12800240516663, + "p90": 243.29599738121033, + "p95": 246.87999486923218, + "p99": 251.39200687408447 + }, + "isolatedSum": { + "p50": 253.7280023097992, + "p90": 263.39200139045715, + "p95": 268.095999956131, + "p99": 279.4559895992279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 192.76799261569977, + "p90": 201.12000405788422, + "p95": 204.19199764728546, + "p99": 209.85600352287292 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 64.28799778223038, + "p95": 66.59200042486191, + "p99": 68.96000355482101 + }, + "roundtrip": { + "p50": 237.15199530124664, + "p90": 244.89599466323853, + "p95": 248.31999838352203, + "p99": 253.37600708007812 + }, + "isolatedSum": { + "p50": 254.65599447488785, + "p90": 265.4080018401146, + "p95": 270.78399807214737, + "p99": 278.81600707769394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 196.60800695419312, + "p90": 266.9120132923126, + "p95": 270.3680098056793, + "p99": 279.55201268196106 + }, + "combine": { + "p50": 62.144000083208084, + "p90": 73.98399710655212, + "p95": 74.52800124883652, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 241.95200204849243, + "p90": 314.7839903831482, + "p95": 317.3759877681732, + "p99": 322.6880133152008 + }, + "isolatedSum": { + "p50": 258.7520070374012, + "p90": 340.89601039886475, + "p95": 344.89601105451584, + "p99": 357.4400097131729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 193.05600225925446, + "p90": 200.9280025959015, + "p95": 205.24799823760986, + "p99": 211.61599457263947 + }, + "combine": { + "p50": 62.68800050020218, + "p90": 65.92000275850296, + "p95": 67.96800345182419, + "p99": 70.14399766921997 + }, + "roundtrip": { + "p50": 240.25599658489227, + "p90": 247.0719963312149, + "p95": 250.59199333190918, + "p99": 255.77598810195923 + }, + "isolatedSum": { + "p50": 255.74400275945663, + "p90": 266.84800535440445, + "p95": 273.21600168943405, + "p99": 281.75999224185944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 197.34400510787964, + "p90": 267.13600754737854, + "p95": 270.55999636650085, + "p99": 276.0320007801056 + }, + "combine": { + "p50": 65.40799885988235, + "p90": 68.4799998998642, + "p95": 70.72000205516815, + "p99": 77.2479996085167 + }, + "roundtrip": { + "p50": 242.40000545978546, + "p90": 248.89600276947021, + "p95": 252.57599353790283, + "p99": 260.99199056625366 + }, + "isolatedSum": { + "p50": 262.752003967762, + "p90": 335.61600744724274, + "p95": 341.279998421669, + "p99": 353.2800003886223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 193.4400051832199, + "p90": 202.04800367355347, + "p95": 206.84799551963806, + "p99": 309.7600042819977 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 73.53600114583969, + "p95": 75.96799731254578, + "p99": 78.40000092983246 + }, + "roundtrip": { + "p50": 248.51199984550476, + "p90": 255.67999482154846, + "p95": 258.2080066204071, + "p99": 266.9120132923126 + }, + "isolatedSum": { + "p50": 264.5440027117729, + "p90": 275.58400481939316, + "p95": 282.81599283218384, + "p99": 388.16000521183014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 198.4959989786148, + "p90": 271.07200026512146, + "p95": 274.81600642204285, + "p99": 281.8560004234314 + }, + "combine": { + "p50": 82.40000158548355, + "p90": 94.14400160312653, + "p95": 94.97600048780441, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 263.839989900589, + "p90": 333.98398756980896, + "p95": 337.8559947013855, + "p99": 342.6240086555481 + }, + "isolatedSum": { + "p50": 280.89600056409836, + "p90": 365.216001868248, + "p95": 369.79200690984726, + "p99": 381.3439980149269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 218.30399334430695, + "p90": 272.6080119609833, + "p95": 275.64799785614014, + "p99": 281.2800109386444 + }, + "combine": { + "p50": 99.61599856615067, + "p90": 110.46399921178818, + "p95": 111.39199882745743, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 305.2160143852234, + "p90": 354.5919954776764, + "p95": 357.5040102005005, + "p99": 362.33600974082947 + }, + "isolatedSum": { + "p50": 317.9199919104576, + "p90": 383.07201117277145, + "p95": 387.03999668359756, + "p99": 395.9680125117302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eeb6d856", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_621a873c", + "comparisonKey": "4cc93e1dd333747e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:42.683913+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 81.08799904584885, + "p90": 83.5840031504631, + "p95": 85.28000116348267, + "p99": 89.6959975361824 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 74.68800246715546, + "p95": 75.6160020828247, + "p99": 78.75200361013412 + }, + "roundtrip": { + "p50": 173.95199835300446, + "p90": 179.9039989709854, + "p95": 182.14400112628937, + "p99": 194.72000002861023 + }, + "isolatedSum": { + "p50": 154.1759967803955, + "p90": 158.27200561761856, + "p95": 160.89600324630737, + "p99": 168.44800114631653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 79.13599908351898, + "p90": 85.91999858617783, + "p95": 92.41600334644318, + "p99": 132.47999548912048 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 72.12799787521362, + "p95": 73.05599749088287, + "p99": 76.92799717187881 + }, + "roundtrip": { + "p50": 137.88799941539764, + "p90": 174.81599748134613, + "p95": 177.66399681568146, + "p99": 182.40000307559967 + }, + "isolatedSum": { + "p50": 148.8960012793541, + "p90": 158.04799646139145, + "p95": 165.47200083732605, + "p99": 209.4079926609993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.7600035071373, + "p90": 81.98399841785431, + "p95": 84.25600081682205, + "p99": 87.42400258779526 + }, + "combine": { + "p50": 72.1919983625412, + "p90": 74.20799881219864, + "p95": 75.9039968252182, + "p99": 80.03199845552444 + }, + "roundtrip": { + "p50": 173.2800006866455, + "p90": 179.45599555969238, + "p95": 182.6239973306656, + "p99": 193.82399320602417 + }, + "isolatedSum": { + "p50": 149.9520018696785, + "p90": 156.19199723005295, + "p95": 160.15999764204025, + "p99": 167.4560010433197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.99199831485748, + "p90": 84.54400300979614, + "p95": 88.79999816417694, + "p99": 102.88000106811523 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 75.9039968252182, + "p95": 78.75200361013412, + "p99": 85.82399785518646 + }, + "roundtrip": { + "p50": 170.81600427627563, + "p90": 179.32799458503723, + "p95": 181.60000443458557, + "p99": 186.39999628067017 + }, + "isolatedSum": { + "p50": 154.94399517774582, + "p90": 160.44799983501434, + "p95": 167.55200177431107, + "p99": 188.7039989233017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.73599636554718, + "p90": 87.2960016131401, + "p95": 92.38400310277939, + "p99": 121.95199728012085 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 77.82399654388428, + "p95": 80.73599636554718, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 182.11199343204498, + "p95": 184.67199802398682, + "p99": 189.66400623321533 + }, + "isolatedSum": { + "p50": 155.74399381875992, + "p90": 165.11999815702438, + "p95": 173.11999946832657, + "p99": 208.8639959692955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.69600301980972, + "p90": 82.87999778985977, + "p95": 84.89599823951721, + "p99": 88.48000317811966 + }, + "combine": { + "p50": 79.64800298213959, + "p90": 81.85599744319916, + "p95": 82.97599852085114, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 179.19999361038208, + "p90": 186.46399676799774, + "p95": 189.53600525856018, + "p99": 197.6960003376007 + }, + "isolatedSum": { + "p50": 157.3440060019493, + "p90": 164.73599523305893, + "p95": 167.87199676036835, + "p99": 177.47200280427933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.64000308513641, + "p90": 85.56800335645676, + "p95": 88.41600269079208, + "p99": 94.68799829483032 + }, + "combine": { + "p50": 88.639996945858, + "p90": 91.2960022687912, + "p95": 92.57599711418152, + "p99": 97.120001912117 + }, + "roundtrip": { + "p50": 189.7599995136261, + "p90": 198.46400618553162, + "p95": 200.3519982099533, + "p99": 208.00000429153442 + }, + "isolatedSum": { + "p50": 169.28000003099442, + "p90": 176.86400562524796, + "p95": 180.9919998049736, + "p99": 191.80800020694733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.10400015115738, + "p90": 92.41600334644318, + "p95": 94.01600062847137, + "p99": 97.59999811649323 + }, + "combine": { + "p50": 105.66399991512299, + "p90": 108.73600095510483, + "p95": 110.62400043010712, + "p99": 115.32799899578094 + }, + "roundtrip": { + "p50": 212.44800090789795, + "p90": 219.87199783325195, + "p95": 222.1119999885559, + "p99": 225.055992603302 + }, + "isolatedSum": { + "p50": 192.76800006628036, + "p90": 201.152004301548, + "p95": 204.6400010585785, + "p99": 212.92799711227417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-23b215a1", + "identity": "h100|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_f8a94d59", + "comparisonKey": "6a11ee326cd2620e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:18.498153+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 30.527999624609947, + "p90": 32.99200162291527, + "p95": 34.40000116825104, + "p99": 37.31200098991394 + }, + "combine": { + "p50": 34.36800092458725, + "p90": 36.73600032925606, + "p95": 37.82400116324425, + "p99": 45.791998505592346 + }, + "roundtrip": { + "p50": 2063.29607963562, + "p90": 2065.664052963257, + "p95": 2067.744016647339, + "p99": 2075.808048248291 + }, + "isolatedSum": { + "p50": 64.8960005491972, + "p90": 69.72800195217133, + "p95": 72.22400233149529, + "p99": 83.10399949550629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.49599938094616, + "p90": 33.28000009059906, + "p95": 35.61599925160408, + "p99": 40.89599847793579 + }, + "combine": { + "p50": 34.623999148607254, + "p90": 36.896001547575, + "p95": 37.66399994492531, + "p99": 41.31200164556503 + }, + "roundtrip": { + "p50": 2063.999891281128, + "p90": 2065.984010696411, + "p95": 2067.3279762268066, + "p99": 2071.455955505371 + }, + "isolatedSum": { + "p50": 65.11999852955341, + "p90": 70.17600163817406, + "p95": 73.27999919652939, + "p99": 82.20800012350082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 30.880000442266464, + "p90": 32.99200162291527, + "p95": 35.61599925160408, + "p99": 40.44799879193306 + }, + "combine": { + "p50": 35.392001271247864, + "p90": 37.43999823927879, + "p95": 38.43199834227562, + "p99": 42.49599948525429 + }, + "roundtrip": { + "p50": 2065.216064453125, + "p90": 2067.6798820495605, + "p95": 2069.1521167755127, + "p99": 2114.6559715270996 + }, + "isolatedSum": { + "p50": 66.27200171351433, + "p90": 70.43199986219406, + "p95": 74.0479975938797, + "p99": 82.94399827718735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.039999797940254, + "p90": 33.440001308918, + "p95": 35.61599925160408, + "p99": 38.07999938726425 + }, + "combine": { + "p50": 36.06399893760681, + "p90": 38.24000060558319, + "p95": 39.61599990725517, + "p99": 60.99199876189232 + }, + "roundtrip": { + "p50": 2065.984010696411, + "p90": 2068.000078201294, + "p95": 2069.632053375244, + "p99": 2074.2719173431396 + }, + "isolatedSum": { + "p50": 67.10399873554707, + "p90": 71.68000191450119, + "p95": 75.23199915885925, + "p99": 99.07199814915657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 32.607998698949814, + "p90": 35.26400029659271, + "p95": 37.856001406908035, + "p99": 42.87999868392944 + }, + "combine": { + "p50": 39.135999977588654, + "p90": 41.4079986512661, + "p95": 43.96799951791763, + "p99": 61.24800071120262 + }, + "roundtrip": { + "p50": 2071.1679458618164, + "p90": 2076.7359733581543, + "p95": 2085.7279300689697, + "p99": 2130.847930908203 + }, + "isolatedSum": { + "p50": 71.74399867653847, + "p90": 76.67199894785881, + "p95": 81.82400092482567, + "p99": 104.12799939513206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 35.13599932193756, + "p90": 36.320000886917114, + "p95": 39.0079990029335, + "p99": 42.17600077390671 + }, + "combine": { + "p50": 45.791998505592346, + "p90": 47.61600121855736, + "p95": 48.67200180888176, + "p99": 51.10400170087814 + }, + "roundtrip": { + "p50": 2080.0321102142334, + "p90": 2081.9199085235596, + "p95": 2082.911968231201, + "p99": 2087.2321128845215 + }, + "isolatedSum": { + "p50": 80.92799782752991, + "p90": 83.93600210547447, + "p95": 87.68000081181526, + "p99": 93.28000247478485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 41.919998824596405, + "p90": 52.76799947023392, + "p95": 53.18399891257286, + "p99": 55.03999814391136 + }, + "combine": { + "p50": 60.864001512527466, + "p90": 66.97600334882736, + "p95": 69.11999732255936, + "p99": 73.37599992752075 + }, + "roundtrip": { + "p50": 2101.1838912963867, + "p90": 2108.5119247436523, + "p95": 2110.464096069336, + "p99": 2116.863965988159 + }, + "isolatedSum": { + "p50": 102.78400033712387, + "p90": 119.74400281906128, + "p95": 122.30399623513222, + "p99": 128.4159980714321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.85600030422211, + "p90": 55.36000058054924, + "p95": 58.36800113320351, + "p99": 62.55999952554703 + }, + "combine": { + "p50": 87.48800307512283, + "p90": 89.85599875450134, + "p95": 90.46400338411331, + "p99": 92.41600334644318 + }, + "roundtrip": { + "p50": 2141.632080078125, + "p90": 2144.063949584961, + "p95": 2145.024061203003, + "p99": 2149.183988571167 + }, + "isolatedSum": { + "p50": 141.34400337934494, + "p90": 145.21599933505058, + "p95": 148.83200451731682, + "p99": 154.9760028719902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8bdb429", + "identity": "h100|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_d12f3fb9", + "comparisonKey": "9235e11e6343b4c5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:02.813372+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 30.30399978160858, + "p90": 34.11199897527695, + "p95": 36.479998379945755, + "p99": 42.97599941492081 + }, + "combine": { + "p50": 34.07999873161316, + "p90": 37.18400001525879, + "p95": 40.44799879193306, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 2064.0320777893066, + "p90": 2066.4000511169434, + "p95": 2068.6399936676025, + "p99": 2096.384048461914 + }, + "isolatedSum": { + "p50": 64.38399851322174, + "p90": 71.29599899053574, + "p95": 76.92799717187881, + "p99": 148.03200215101242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.400000512599945, + "p90": 33.15199911594391, + "p95": 34.65599939227104, + "p99": 37.34400123357773 + }, + "combine": { + "p50": 33.56799855828285, + "p90": 36.19199991226196, + "p95": 37.28000074625015, + "p99": 54.59199845790863 + }, + "roundtrip": { + "p50": 2064.3200874328613, + "p90": 2066.5600299835205, + "p95": 2068.959951400757, + "p99": 2073.6000537872314 + }, + "isolatedSum": { + "p50": 63.9679990708828, + "p90": 69.34399902820587, + "p95": 71.9360001385212, + "p99": 91.93599969148636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 30.62400035560131, + "p90": 34.88000109791756, + "p95": 39.29600119590759, + "p99": 49.12000149488449 + }, + "combine": { + "p50": 35.32800078392029, + "p90": 37.79200091958046, + "p95": 39.135999977588654, + "p99": 47.58400097489357 + }, + "roundtrip": { + "p50": 2065.3440952301025, + "p90": 2067.840099334717, + "p95": 2069.279909133911, + "p99": 2072.9598999023438 + }, + "isolatedSum": { + "p50": 65.9520011395216, + "p90": 72.67200201749802, + "p95": 78.43200117349625, + "p99": 96.70400246977806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.168000772595406, + "p90": 32.92800113558769, + "p95": 35.0399985909462, + "p99": 38.2080003619194 + }, + "combine": { + "p50": 35.360001027584076, + "p90": 37.91999816894531, + "p95": 39.423998445272446, + "p99": 46.78399860858917 + }, + "roundtrip": { + "p50": 2066.5600299835205, + "p90": 2069.119930267334, + "p95": 2071.5839862823486, + "p99": 2093.8880443573 + }, + "isolatedSum": { + "p50": 66.52800180017948, + "p90": 70.847999304533, + "p95": 74.46399703621864, + "p99": 84.99199897050858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 32.607998698949814, + "p90": 34.4959981739521, + "p95": 36.320000886917114, + "p99": 38.68800029158592 + }, + "combine": { + "p50": 39.0079990029335, + "p90": 41.439998894929886, + "p95": 43.136000633239746, + "p99": 63.19999694824219 + }, + "roundtrip": { + "p50": 2071.647882461548, + "p90": 2074.0480422973633, + "p95": 2075.999975204468, + "p99": 2087.615966796875 + }, + "isolatedSum": { + "p50": 71.61599770188332, + "p90": 75.93599706888199, + "p95": 79.45600152015686, + "p99": 101.88799723982811 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 34.752000123262405, + "p90": 36.639999598264694, + "p95": 39.61599990725517, + "p99": 44.64000090956688 + }, + "combine": { + "p50": 45.08800059556961, + "p90": 47.16800153255463, + "p95": 48.48000034689903, + "p99": 57.023998349905014 + }, + "roundtrip": { + "p50": 2081.0561180114746, + "p90": 2083.1680297851562, + "p95": 2084.575891494751, + "p99": 2088.320016860962 + }, + "isolatedSum": { + "p50": 79.84000071883202, + "p90": 83.80800113081932, + "p95": 88.0960002541542, + "p99": 101.6639992594719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 41.728001087903976, + "p90": 51.711998879909515, + "p95": 52.960000932216644, + "p99": 56.8000003695488 + }, + "combine": { + "p50": 59.26400050520897, + "p90": 67.391999065876, + "p95": 68.80000233650208, + "p99": 75.68000257015228 + }, + "roundtrip": { + "p50": 2101.344108581543, + "p90": 2110.016107559204, + "p95": 2111.423969268799, + "p99": 2115.4561042785645 + }, + "isolatedSum": { + "p50": 100.99200159311295, + "p90": 119.10399794578552, + "p95": 121.76000326871872, + "p99": 132.48000293970108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.568001836538315, + "p90": 55.10399863123894, + "p95": 57.40800127387047, + "p99": 59.93599817156792 + }, + "combine": { + "p50": 85.15200018882751, + "p90": 89.05600011348724, + "p95": 90.17600119113922, + "p99": 115.55200070142746 + }, + "roundtrip": { + "p50": 2140.768051147461, + "p90": 2143.6479091644287, + "p95": 2144.7999477386475, + "p99": 2148.319959640503 + }, + "isolatedSum": { + "p50": 138.72000202536583, + "p90": 144.15999874472618, + "p95": 147.5840024650097, + "p99": 175.48799887299538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1f69f33f", + "identity": "h100|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_e51abe52", + "comparisonKey": "eae4a3564d8405b3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:28.382329+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.98400104045868, + "p90": 105.92000186443329, + "p95": 107.96800255775452, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 75.13599842786789, + "p95": 75.55200159549713, + "p99": 81.28000050783157 + }, + "roundtrip": { + "p50": 139.42399621009827, + "p90": 146.97599411010742, + "p95": 148.22399616241455, + "p99": 154.4959992170334 + }, + "isolatedSum": { + "p50": 171.26400023698807, + "p90": 181.05600029230118, + "p95": 183.52000415325165, + "p99": 195.3279972076416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.14400225877762, + "p90": 104.38399761915207, + "p95": 105.92000186443329, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 73.44000041484833, + "p90": 75.16799867153168, + "p95": 75.71200281381607, + "p99": 80.99199831485748 + }, + "roundtrip": { + "p50": 144.96000111103058, + "p90": 150.78400075435638, + "p95": 152.0320028066635, + "p99": 156.12800419330597 + }, + "isolatedSum": { + "p50": 171.58400267362595, + "p90": 179.55199629068375, + "p95": 181.63200467824936, + "p99": 190.49599766731262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.9200005531311, + "p90": 105.50399869680405, + "p95": 106.84800148010254, + "p99": 112.22399771213531 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 74.97599720954895, + "p95": 77.60000228881836, + "p99": 83.13599973917007 + }, + "roundtrip": { + "p50": 138.72000575065613, + "p90": 151.13599598407745, + "p95": 155.008003115654, + "p99": 163.96799683570862 + }, + "isolatedSum": { + "p50": 170.3680008649826, + "p90": 180.479995906353, + "p95": 184.4480037689209, + "p99": 195.3599974513054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.29600358009338, + "p90": 103.84000092744827, + "p95": 105.85600137710571, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 75.1039981842041, + "p95": 75.77600330114365, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 144.22400295734406, + "p90": 151.39199793338776, + "p95": 154.2080044746399, + "p99": 161.3440066576004 + }, + "isolatedSum": { + "p50": 172.60800302028656, + "p90": 178.94399911165237, + "p95": 181.63200467824936, + "p99": 197.37600535154343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.02400118112564, + "p90": 102.75200009346008, + "p95": 105.98400235176086, + "p99": 116.80000275373459 + }, + "combine": { + "p50": 72.60800153017044, + "p90": 75.26399940252304, + "p95": 80.89599758386612, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 144.51199769973755, + "p90": 154.40000593662262, + "p95": 156.41599893569946, + "p99": 160.99199652671814 + }, + "isolatedSum": { + "p50": 169.63200271129608, + "p90": 178.01599949598312, + "p95": 186.87999993562698, + "p99": 199.20000433921814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.2080020904541, + "p90": 100.99200159311295, + "p95": 105.6319996714592, + "p99": 118.30399930477142 + }, + "combine": { + "p50": 74.94399696588516, + "p90": 82.56000280380249, + "p95": 82.8159973025322, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 148.03199470043182, + "p90": 157.24800527095795, + "p95": 160.3199988603592, + "p99": 173.21600019931793 + }, + "isolatedSum": { + "p50": 169.15199905633926, + "p90": 183.55200439691544, + "p95": 188.4479969739914, + "p99": 202.4959996342659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.6639992594719, + "p90": 108.70400071144104, + "p95": 111.51999980211258, + "p99": 115.4559999704361 + }, + "combine": { + "p50": 82.36800134181976, + "p90": 89.79199826717377, + "p95": 90.68799763917923, + "p99": 92.25600212812424 + }, + "roundtrip": { + "p50": 155.13600409030914, + "p90": 164.86400365829468, + "p95": 167.67999529838562, + "p99": 246.94399535655975 + }, + "isolatedSum": { + "p50": 184.03200060129166, + "p90": 198.4959989786148, + "p95": 202.2079974412918, + "p99": 207.71200209856033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.72800105810165, + "p90": 115.64800143241882, + "p95": 118.52800101041794, + "p99": 123.83999675512314 + }, + "combine": { + "p50": 97.02400118112564, + "p90": 99.45599734783173, + "p95": 100.03200173377991, + "p99": 106.23999685049057 + }, + "roundtrip": { + "p50": 172.83199727535248, + "p90": 179.71199750900269, + "p95": 182.3039948940277, + "p99": 188.9919936656952 + }, + "isolatedSum": { + "p50": 206.7520022392273, + "p90": 215.10399878025055, + "p95": 218.56000274419785, + "p99": 230.0799936056137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-440f901a", + "identity": "h100|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_e51abe52", + "comparisonKey": "91f8858f51a98b66", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:20.268885+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.96000355482101, + "p90": 76.03199779987335, + "p95": 80.76799660921097, + "p99": 133.85599851608276 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 67.10399687290192, + "p95": 67.45599955320358, + "p99": 73.53600114583969 + }, + "roundtrip": { + "p50": 121.91999703645706, + "p90": 125.72799623012543, + "p95": 128.1919926404953, + "p99": 135.83999872207642 + }, + "isolatedSum": { + "p50": 134.8480060696602, + "p90": 143.13599467277527, + "p95": 148.22399616241455, + "p99": 207.39199966192245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.83200258016586, + "p90": 74.52800124883652, + "p95": 77.2159993648529, + "p99": 80.83199709653854 + }, + "combine": { + "p50": 66.59200042486191, + "p90": 67.61600077152252, + "p95": 68.67200136184692, + "p99": 74.33599978685379 + }, + "roundtrip": { + "p50": 121.24799937009811, + "p90": 125.50400197505951, + "p95": 127.13600695133209, + "p99": 132.89600610733032 + }, + "isolatedSum": { + "p50": 135.42400300502777, + "p90": 142.14400202035904, + "p95": 145.88800072669983, + "p99": 155.16799688339233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.07199728488922, + "p90": 105.21599650382996, + "p95": 106.39999806880951, + "p99": 116.54400080442429 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 81.7599967122078, + "p95": 82.36800134181976, + "p99": 83.13599973917007 + }, + "roundtrip": { + "p50": 122.78400361537933, + "p90": 156.80000185966492, + "p95": 158.30400586128235, + "p99": 161.47199273109436 + }, + "isolatedSum": { + "p50": 138.39999586343765, + "p90": 186.97599321603775, + "p95": 188.76799941062927, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.37599927186966, + "p90": 74.49600100517273, + "p95": 77.27999985218048, + "p99": 84.25600081682205 + }, + "combine": { + "p50": 67.00800359249115, + "p90": 71.45600020885468, + "p95": 72.83200323581696, + "p99": 74.8480036854744 + }, + "roundtrip": { + "p50": 120.70400267839432, + "p90": 126.88000500202179, + "p95": 129.4720023870468, + "p99": 135.55200397968292 + }, + "isolatedSum": { + "p50": 136.3840028643608, + "p90": 145.9520012140274, + "p95": 150.11200308799744, + "p99": 159.10400450229645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.26400005817413, + "p90": 101.3759970664978, + "p95": 103.35999727249146, + "p99": 108.47999900579453 + }, + "combine": { + "p50": 67.64800101518631, + "p90": 82.97599852085114, + "p95": 83.39200168848038, + "p99": 84.41600203514099 + }, + "roundtrip": { + "p50": 126.08000636100769, + "p90": 152.92799472808838, + "p95": 154.52800691127777, + "p99": 158.91200304031372 + }, + "isolatedSum": { + "p50": 146.91200107336044, + "p90": 184.35199558734894, + "p95": 186.75199896097183, + "p99": 192.89600104093552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.64800298213959, + "p90": 99.20000284910202, + "p95": 101.72799974679947, + "p99": 106.1440035700798 + }, + "combine": { + "p50": 74.75200295448303, + "p90": 83.3280012011528, + "p95": 83.80799740552902, + "p99": 90.65599739551544 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 133.69600474834442, + "p95": 135.55200397968292, + "p99": 140.54399728775024 + }, + "isolatedSum": { + "p50": 154.40000593662262, + "p90": 182.52800405025482, + "p95": 185.5359971523285, + "p99": 196.80000096559525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.02400052547455, + "p90": 110.72000116109848, + "p95": 114.14399743080139, + "p99": 117.53600090742111 + }, + "combine": { + "p50": 83.13599973917007, + "p90": 91.61599725484848, + "p95": 92.92799979448318, + "p99": 98.68799895048141 + }, + "roundtrip": { + "p50": 147.2959965467453, + "p90": 169.72799599170685, + "p95": 172.06400632858276, + "p99": 175.9359985589981 + }, + "isolatedSum": { + "p50": 176.16000026464462, + "p90": 202.33599841594696, + "p95": 207.07199722528458, + "p99": 216.22399985790253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.84800148010254, + "p90": 123.10399860143661, + "p95": 125.82400441169739, + "p99": 133.82400572299957 + }, + "combine": { + "p50": 99.23200309276581, + "p90": 107.77600109577179, + "p95": 108.47999900579453, + "p99": 114.9120032787323 + }, + "roundtrip": { + "p50": 174.3360012769699, + "p90": 193.40799748897552, + "p95": 195.360004901886, + "p99": 198.81600141525269 + }, + "isolatedSum": { + "p50": 206.08000457286835, + "p90": 230.8799996972084, + "p95": 234.3040034174919, + "p99": 248.73600900173187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b0602a1f", + "identity": "h100|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_e51abe52", + "comparisonKey": "c4409574a57cfa95", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:13.461513+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.29600292444229, + "p90": 102.68799960613251, + "p95": 103.45599800348282, + "p99": 109.15199667215347 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 81.66400343179703, + "p95": 82.65600353479385, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 146.01600170135498, + "p90": 153.21600437164307, + "p95": 154.52800691127777, + "p99": 160.67199409008026 + }, + "isolatedSum": { + "p50": 170.33600062131882, + "p90": 184.35200303792953, + "p95": 186.11200153827667, + "p99": 192.51199811697006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.35999947786331, + "p90": 102.46399790048599, + "p95": 103.74400019645691, + "p99": 107.45599865913391 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 81.216000020504, + "p95": 82.36800134181976, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 128.4160017967224, + "p90": 150.176003575325, + "p95": 152.8320014476776, + "p99": 178.81600558757782 + }, + "isolatedSum": { + "p50": 144.06400173902512, + "p90": 183.67999792099, + "p95": 186.11200153827667, + "p99": 190.8160001039505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.29599899053574, + "p90": 104.06400263309479, + "p95": 106.6880002617836, + "p99": 173.21600019931793 + }, + "combine": { + "p50": 72.86400347948074, + "p90": 83.03999900817871, + "p95": 84.48000252246857, + "p99": 235.55199801921844 + }, + "roundtrip": { + "p50": 128.67200374603271, + "p90": 156.09599649906158, + "p95": 159.58400070667267, + "p99": 166.52800142765045 + }, + "isolatedSum": { + "p50": 144.16000247001648, + "p90": 187.1040016412735, + "p95": 191.16800278425217, + "p99": 408.7679982185364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.05599749088287, + "p90": 103.29599678516388, + "p95": 104.96000200510025, + "p99": 220.47999501228333 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 82.24000036716461, + "p95": 82.94399827718735, + "p99": 107.58399963378906 + }, + "roundtrip": { + "p50": 130.3360015153885, + "p90": 154.88000214099884, + "p95": 158.65600109100342, + "p99": 308.28800797462463 + }, + "isolatedSum": { + "p50": 146.2399959564209, + "p90": 185.5359971523285, + "p95": 187.9040002822876, + "p99": 328.0639946460724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 81.4720019698143, + "p90": 100.44799745082855, + "p95": 104.19200360774994, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 73.63200187683105, + "p90": 83.52000266313553, + "p95": 84.22400057315826, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 131.96800649166107, + "p90": 159.55199301242828, + "p95": 162.84799575805664, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 155.10400384664536, + "p90": 183.96800011396408, + "p95": 188.4160041809082, + "p99": 200.96000283956528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.60800349712372, + "p90": 108.03200304508209, + "p95": 109.53599959611893, + "p99": 116.83200299739838 + }, + "combine": { + "p50": 80.25600016117096, + "p90": 89.66399729251862, + "p95": 91.51999652385712, + "p99": 97.4079966545105 + }, + "roundtrip": { + "p50": 137.40800321102142, + "p90": 161.24799847602844, + "p95": 165.3120070695877, + "p99": 307.99999833106995 + }, + "isolatedSum": { + "p50": 164.86400365829468, + "p90": 197.6960003376007, + "p95": 201.05599611997604, + "p99": 214.23999965190887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.1200025677681, + "p90": 116.19199812412262, + "p95": 117.98399686813354, + "p99": 122.23999947309494 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 98.01600128412247, + "p95": 99.5199978351593, + "p99": 106.78400099277496 + }, + "roundtrip": { + "p50": 154.6880006790161, + "p90": 175.84000527858734, + "p95": 181.60000443458557, + "p99": 369.1520094871521 + }, + "isolatedSum": { + "p50": 190.71999937295914, + "p90": 214.2079994082451, + "p95": 217.50399470329285, + "p99": 229.0240004658699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.7120019197464, + "p90": 130.5599957704544, + "p95": 131.45600259304047, + "p99": 134.49600338935852 + }, + "combine": { + "p50": 104.44799810647964, + "p90": 114.97599631547928, + "p95": 115.48800021409988, + "p99": 116.41599982976913 + }, + "roundtrip": { + "p50": 188.1600022315979, + "p90": 205.88800311088562, + "p95": 207.7759951353073, + "p99": 213.28000724315643 + }, + "isolatedSum": { + "p50": 220.16000002622604, + "p90": 245.53599208593369, + "p95": 246.94400280714035, + "p99": 250.91200321912766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3a2e815c", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_0b58f4de", + "comparisonKey": "2b24bee4ac6d8f67", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:15.496048+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.88000041246414, + "p90": 105.21599650382996, + "p95": 106.59199953079224, + "p99": 111.96800321340561 + }, + "combine": { + "p50": 82.11199939250946, + "p90": 83.80799740552902, + "p95": 85.95199882984161, + "p99": 91.20000153779984 + }, + "roundtrip": { + "p50": 153.31199765205383, + "p90": 159.96800363063812, + "p95": 161.5999937057495, + "p99": 164.89599645137787 + }, + "isolatedSum": { + "p50": 180.9919998049736, + "p90": 189.02399390935898, + "p95": 192.54399836063385, + "p99": 203.16800475120544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.16000270843506, + "p90": 105.95200210809708, + "p95": 107.80800133943558, + "p99": 112.86400258541107 + }, + "combine": { + "p50": 81.82399719953537, + "p90": 83.61600339412689, + "p95": 84.16000008583069, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 153.9199948310852, + "p90": 162.4000072479248, + "p95": 163.42400014400482, + "p99": 169.69600319862366 + }, + "isolatedSum": { + "p50": 181.98399990797043, + "p90": 189.56800550222397, + "p95": 191.96800142526627, + "p99": 203.42399924993515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.52799773216248, + "p90": 105.85600137710571, + "p95": 107.29599744081497, + "p99": 113.79200220108032 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 88.51200342178345, + "p95": 89.53599631786346, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 157.31200575828552, + "p90": 165.21599888801575, + "p95": 166.46400094032288, + "p99": 174.112007021904 + }, + "isolatedSum": { + "p50": 180.95999956130981, + "p90": 194.36800479888916, + "p95": 196.83199375867844, + "p99": 204.73600178956985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.64799880981445, + "p90": 104.06400263309479, + "p95": 106.08000308275223, + "p99": 112.86400258541107 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 89.08800035715103, + "p95": 90.01599997282028, + "p99": 91.80799871683121 + }, + "roundtrip": { + "p50": 157.95199573040009, + "p90": 164.95999693870544, + "p95": 166.36799275875092, + "p99": 171.424001455307 + }, + "isolatedSum": { + "p50": 182.65599757432938, + "p90": 193.15200299024582, + "p95": 196.0960030555725, + "p99": 204.67200130224228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.30400347709656, + "p90": 102.75200009346008, + "p95": 104.86400127410889, + "p99": 108.09600353240967 + }, + "combine": { + "p50": 83.48800241947174, + "p90": 90.7519981265068, + "p95": 91.32800251245499, + "p99": 97.15200215578079 + }, + "roundtrip": { + "p50": 158.1439971923828, + "p90": 164.41600024700165, + "p95": 165.6319946050644, + "p99": 169.91999745368958 + }, + "isolatedSum": { + "p50": 181.7920058965683, + "p90": 193.5039982199669, + "p95": 196.19200378656387, + "p99": 205.24800568819046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.04800218343735, + "p90": 105.98400235176086, + "p95": 107.68000036478043, + "p99": 111.23199760913849 + }, + "combine": { + "p50": 89.28000181913376, + "p90": 91.96799993515015, + "p95": 96.99200093746185, + "p99": 98.62399846315384 + }, + "roundtrip": { + "p50": 161.31199896335602, + "p90": 170.52799463272095, + "p95": 172.38399386405945, + "p99": 177.85599827766418 + }, + "isolatedSum": { + "p50": 191.3280040025711, + "p90": 197.952002286911, + "p95": 204.67200130224228, + "p99": 209.85599607229233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.6000007390976, + "p90": 117.85600334405899, + "p95": 120.44800072908401, + "p99": 126.78399682044983 + }, + "combine": { + "p50": 98.88000041246414, + "p90": 105.98400235176086, + "p95": 106.84800148010254, + "p99": 108.09600353240967 + }, + "roundtrip": { + "p50": 177.66399681568146, + "p90": 183.00800025463104, + "p95": 185.248002409935, + "p99": 188.960000872612 + }, + "isolatedSum": { + "p50": 212.48000115156174, + "p90": 223.84000569581985, + "p95": 227.29600220918655, + "p99": 234.8800003528595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.06400656700134, + "p90": 134.11200046539307, + "p95": 136.09600067138672, + "p99": 139.8400068283081 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 123.6800029873848, + "p95": 124.32000041007996, + "p99": 125.15200674533844 + }, + "roundtrip": { + "p50": 212.99199759960175, + "p90": 220.96000611782074, + "p95": 223.13599288463593, + "p99": 229.312002658844 + }, + "isolatedSum": { + "p50": 244.35200542211533, + "p90": 257.79200345277786, + "p95": 260.4160010814667, + "p99": 264.99201357364655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4b66f8a8", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_e51abe52", + "comparisonKey": "57182dd5971ef442", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:07.451487+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.47999703884125, + "p90": 104.41599786281586, + "p95": 106.27199709415436, + "p99": 112.44799941778183 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 83.10399949550629, + "p95": 83.36000144481659, + "p99": 84.1279998421669 + }, + "roundtrip": { + "p50": 145.50399780273438, + "p90": 152.8320014476776, + "p95": 154.08000349998474, + "p99": 159.55199301242828 + }, + "isolatedSum": { + "p50": 177.279993891716, + "p90": 187.51999735832214, + "p95": 189.63199853897095, + "p99": 196.57599925994873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.77600264549255, + "p90": 102.81600058078766, + "p95": 104.00000214576721, + "p99": 107.77600109577179 + }, + "combine": { + "p50": 74.30399954319, + "p90": 82.84799754619598, + "p95": 83.23200047016144, + "p99": 84.3840017914772 + }, + "roundtrip": { + "p50": 132.22399353981018, + "p90": 153.1199961900711, + "p95": 154.78399395942688, + "p99": 160.5760008096695 + }, + "isolatedSum": { + "p50": 146.08000218868256, + "p90": 185.66399812698364, + "p95": 187.23200261592865, + "p99": 192.160002887249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.120001912117, + "p90": 103.87200117111206, + "p95": 106.08000308275223, + "p99": 114.84800279140472 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 84.6719965338707, + "p95": 89.02399986982346, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 132.60799646377563, + "p90": 165.3439998626709, + "p95": 167.55199432373047, + "p99": 175.32800137996674 + }, + "isolatedSum": { + "p50": 178.65600436925888, + "p90": 188.54399770498276, + "p95": 195.10400295257568, + "p99": 205.72800189256668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.24800288677216, + "p90": 103.7760004401207, + "p95": 105.59999942779541, + "p99": 110.59200018644333 + }, + "combine": { + "p50": 81.18399977684021, + "p90": 83.45600217580795, + "p95": 88.25600147247314, + "p99": 90.97599983215332 + }, + "roundtrip": { + "p50": 153.82400155067444, + "p90": 161.3759994506836, + "p95": 162.75200247764587, + "p99": 166.24000668525696 + }, + "isolatedSum": { + "p50": 178.43200266361237, + "p90": 187.23200261592865, + "p95": 193.85600090026855, + "p99": 201.56800001859665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.6079980134964, + "p90": 102.62399911880493, + "p95": 105.15200346708298, + "p99": 112.06399649381638 + }, + "combine": { + "p50": 82.14399963617325, + "p90": 89.9839997291565, + "p95": 91.0400003194809, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 157.02399611473083, + "p90": 165.3120070695877, + "p95": 167.67999529838562, + "p99": 174.40000176429749 + }, + "isolatedSum": { + "p50": 178.75199764966965, + "p90": 192.60799884796143, + "p95": 196.19200378656387, + "p99": 204.19199764728546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.28800237178802, + "p90": 106.1440035700798, + "p95": 110.52799969911575, + "p99": 230.3999960422516 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 91.87199920415878, + "p95": 97.08800166845322, + "p99": 99.67999905347824 + }, + "roundtrip": { + "p50": 143.74400675296783, + "p90": 168.2240068912506, + "p95": 173.24799299240112, + "p99": 179.51999604701996 + }, + "isolatedSum": { + "p50": 175.26400089263916, + "p90": 198.0160027742386, + "p95": 207.61600136756897, + "p99": 330.0799950957298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.51999980211258, + "p90": 117.91999638080597, + "p95": 119.77600306272507, + "p99": 127.80800461769104 + }, + "combine": { + "p50": 97.88800030946732, + "p90": 105.05600273609161, + "p95": 105.95200210809708, + "p99": 107.93600231409073 + }, + "roundtrip": { + "p50": 172.19200730323792, + "p90": 183.74399840831757, + "p95": 186.0159933567047, + "p99": 190.14400243759155 + }, + "isolatedSum": { + "p50": 209.4080001115799, + "p90": 222.97599911689758, + "p95": 225.72800517082214, + "p99": 235.74400693178177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.68800157308578, + "p90": 134.62400436401367, + "p95": 137.60000467300415, + "p99": 172.38399386405945 + }, + "combine": { + "p50": 108.41599851846695, + "p90": 122.52800166606903, + "p95": 123.3920007944107, + "p99": 205.9199959039688 + }, + "roundtrip": { + "p50": 200.095996260643, + "p90": 220.73599696159363, + "p95": 222.6559966802597, + "p99": 320.76799869537354 + }, + "isolatedSum": { + "p50": 223.10400009155273, + "p90": 257.1520060300827, + "p95": 260.99200546741486, + "p99": 378.30398976802826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8f7bd789", + "identity": "h100|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_e51abe52", + "comparisonKey": "5afcc954ab595c4a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:35.413481+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.88000327348709, + "p90": 77.11999863386154, + "p95": 79.32800054550171, + "p99": 85.66399663686752 + }, + "combine": { + "p50": 72.86400347948074, + "p90": 75.19999891519547, + "p95": 75.6160020828247, + "p99": 76.92799717187881 + }, + "roundtrip": { + "p50": 128.86400520801544, + "p90": 134.43200290203094, + "p95": 136.19199395179749, + "p99": 144.3520039319992 + }, + "isolatedSum": { + "p50": 143.74400675296783, + "p90": 152.319997549057, + "p95": 154.94400262832642, + "p99": 162.59199380874634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.23199850320816, + "p90": 76.57600194215775, + "p95": 79.3600007891655, + "p99": 83.52000266313553 + }, + "combine": { + "p50": 73.69600236415863, + "p90": 75.29599964618683, + "p95": 75.83999633789062, + "p99": 83.45600217580795 + }, + "roundtrip": { + "p50": 130.46400249004364, + "p90": 135.3919953107834, + "p95": 137.43999600410461, + "p99": 142.94399321079254 + }, + "isolatedSum": { + "p50": 144.9280008673668, + "p90": 151.87200158834457, + "p95": 155.19999712705612, + "p99": 166.97600483894348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.80000299215317, + "p90": 107.80800133943558, + "p95": 109.18399691581726, + "p99": 115.77600240707397 + }, + "combine": { + "p50": 74.94399696588516, + "p90": 83.67999643087387, + "p95": 84.48000252246857, + "p99": 90.40000289678574 + }, + "roundtrip": { + "p50": 133.760005235672, + "p90": 163.2319986820221, + "p95": 166.07999801635742, + "p99": 170.56000232696533 + }, + "isolatedSum": { + "p50": 147.74399995803833, + "p90": 191.48799777030945, + "p95": 193.66399943828583, + "p99": 206.1760053038597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.72800260782242, + "p90": 104.38399761915207, + "p95": 105.50399869680405, + "p99": 109.72800105810165 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 83.64800363779068, + "p95": 83.96799862384796, + "p99": 84.60800349712372 + }, + "roundtrip": { + "p50": 131.9040060043335, + "p90": 136.86400651931763, + "p95": 139.13600146770477, + "p99": 156.6080003976822 + }, + "isolatedSum": { + "p50": 149.05600249767303, + "p90": 188.03200125694275, + "p95": 189.471997320652, + "p99": 194.33600455522537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 85.28000116348267, + "p90": 101.6319990158081, + "p95": 103.32799702882767, + "p99": 108.41599851846695 + }, + "combine": { + "p50": 76.35200023651123, + "p90": 89.75999802350998, + "p95": 91.20000153779984, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 132.4480026960373, + "p90": 164.70399498939514, + "p95": 166.24000668525696, + "p99": 168.83200407028198 + }, + "isolatedSum": { + "p50": 161.6320013999939, + "p90": 191.39199703931808, + "p95": 194.5279985666275, + "p99": 200.44799894094467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.73599833250046, + "p90": 107.10400342941284, + "p95": 108.83200168609619, + "p99": 113.92000317573547 + }, + "combine": { + "p50": 83.71199667453766, + "p90": 92.41600334644318, + "p95": 92.86399930715561, + "p99": 98.59199821949005 + }, + "roundtrip": { + "p50": 140.70400595664978, + "p90": 173.2800006866455, + "p95": 174.5920032262802, + "p99": 177.76000499725342 + }, + "isolatedSum": { + "p50": 176.44799500703812, + "p90": 199.52000677585602, + "p95": 201.6960009932518, + "p99": 212.51200139522552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.27200323343277, + "p90": 116.57600104808807, + "p95": 117.98399686813354, + "p99": 122.84799665212631 + }, + "combine": { + "p50": 92.03200042247772, + "p90": 104.92800176143646, + "p95": 105.98400235176086, + "p99": 107.32799768447876 + }, + "roundtrip": { + "p50": 168.09600591659546, + "p90": 183.58400464057922, + "p95": 185.82400679588318, + "p99": 188.57599794864655 + }, + "isolatedSum": { + "p50": 190.3040036559105, + "p90": 221.50400280952454, + "p95": 223.9679992198944, + "p99": 230.17599433660507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.26399850845337, + "p90": 134.46399569511414, + "p95": 135.5839967727661, + "p99": 141.12000167369843 + }, + "combine": { + "p50": 108.57599973678589, + "p90": 122.72000312805176, + "p95": 124.41600114107132, + "p99": 126.3359934091568 + }, + "roundtrip": { + "p50": 201.63199305534363, + "p90": 216.19200706481934, + "p95": 218.87999773025513, + "p99": 224.2240011692047 + }, + "isolatedSum": { + "p50": 223.83999824523926, + "p90": 257.1839988231659, + "p95": 259.99999791383743, + "p99": 267.4559950828552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-af86800a", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_a0b9cab6", + "comparisonKey": "7264926f63286527", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:35.077114+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 101.05600208044052, + "p90": 108.06400328874588, + "p95": 110.55999994277954, + "p99": 114.46399986743927 + }, + "combine": { + "p50": 82.20800012350082, + "p90": 84.22400057315826, + "p95": 85.7279971241951, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 153.60000729560852, + "p90": 159.743994474411, + "p95": 161.9199961423874, + "p99": 166.20799899101257 + }, + "isolatedSum": { + "p50": 183.26400220394135, + "p90": 192.28800386190414, + "p95": 196.28799706697464, + "p99": 205.02399653196335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 106.72000050544739, + "p90": 111.96800321340561, + "p95": 114.23999816179276, + "p99": 120.64000219106674 + }, + "combine": { + "p50": 89.82399851083755, + "p90": 92.54399687051773, + "p95": 96.70399874448776, + "p99": 97.69599884748459 + }, + "roundtrip": { + "p50": 164.09599781036377, + "p90": 169.8240041732788, + "p95": 171.61600291728973, + "p99": 176.06399953365326 + }, + "isolatedSum": { + "p50": 196.54399901628494, + "p90": 204.51200008392334, + "p95": 210.94399690628052, + "p99": 218.33600103855133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 143.39199662208557, + "p90": 148.3200043439865, + "p95": 150.07999539375305, + "p99": 154.78399395942688 + }, + "combine": { + "p50": 122.3360002040863, + "p90": 124.95999783277512, + "p95": 128.60800325870514, + "p99": 130.46400249004364 + }, + "roundtrip": { + "p50": 227.07200050354004, + "p90": 232.2559952735901, + "p95": 233.69599878787994, + "p99": 236.76800727844238 + }, + "isolatedSum": { + "p50": 265.7279968261719, + "p90": 273.2800021767616, + "p95": 278.6879986524582, + "p99": 285.2479964494705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-497fe37b", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_55323c56", + "comparisonKey": "241d869f8e6b833d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:58.984057+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.71199864149094, + "p90": 103.00800204277039, + "p95": 104.60799932479858, + "p99": 108.76800119876862 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 83.96799862384796, + "p95": 84.41600203514099, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 146.5280055999756, + "p90": 154.62400019168854, + "p95": 157.44000673294067, + "p99": 162.08000481128693 + }, + "isolatedSum": { + "p50": 177.18400061130524, + "p90": 186.97600066661835, + "p95": 189.02400135993958, + "p99": 198.62399995326996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.49599814414978, + "p90": 207.35999941825867, + "p95": 209.98400449752808, + "p99": 214.59199488162994 + }, + "combine": { + "p50": 91.5519967675209, + "p90": 131.58400356769562, + "p95": 138.43199610710144, + "p99": 143.0719941854477 + }, + "roundtrip": { + "p50": 160.22400557994843, + "p90": 272.8320062160492, + "p95": 278.1760096549988, + "p99": 331.87198638916016 + }, + "isolatedSum": { + "p50": 194.04799491167068, + "p90": 338.9440029859543, + "p95": 348.4160006046295, + "p99": 357.66398906707764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 136.6720050573349, + "p90": 144.80000734329224, + "p95": 147.13600277900696, + "p99": 151.16800367832184 + }, + "combine": { + "p50": 123.55200201272964, + "p90": 125.18399953842163, + "p95": 125.76000392436981, + "p99": 130.65600395202637 + }, + "roundtrip": { + "p50": 223.00800681114197, + "p90": 229.72799837589264, + "p95": 231.58399760723114, + "p99": 235.1360023021698 + }, + "isolatedSum": { + "p50": 260.22400707006454, + "p90": 269.98400688171387, + "p95": 272.89600670337677, + "p99": 281.8240076303482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a6243da1", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_54323ac3", + "comparisonKey": "5b9b8c3890bf9efd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:21.857981+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.59199756383896, + "p90": 103.74400019645691, + "p95": 105.3759977221489, + "p99": 120.7680031657219 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 83.52000266313553, + "p95": 84.16000008583069, + "p99": 89.50400352478027 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 155.71199357509613, + "p95": 157.50400722026825, + "p99": 161.6320013999939 + }, + "isolatedSum": { + "p50": 176.28800123929977, + "p90": 187.26400285959244, + "p95": 189.53599780797958, + "p99": 210.27200669050217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.62399911880493, + "p90": 108.83200168609619, + "p95": 110.49599945545197, + "p99": 114.52800035476685 + }, + "combine": { + "p50": 90.27200192213058, + "p90": 91.74399822950363, + "p95": 92.22400188446045, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 157.75999426841736, + "p90": 165.43999314308167, + "p95": 167.55199432373047, + "p99": 172.70399630069733 + }, + "isolatedSum": { + "p50": 192.89600104093552, + "p90": 200.57599991559982, + "p95": 202.72000133991241, + "p99": 211.99999749660492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.12800705432892, + "p90": 146.97599411010742, + "p95": 148.3519971370697, + "p99": 150.39999783039093 + }, + "combine": { + "p50": 122.72000312805176, + "p90": 124.51200187206268, + "p95": 124.95999783277512, + "p99": 130.2720010280609 + }, + "roundtrip": { + "p50": 224.86400604248047, + "p90": 231.32799565792084, + "p95": 232.28800296783447, + "p99": 237.63200640678406 + }, + "isolatedSum": { + "p50": 250.84801018238068, + "p90": 271.4879959821701, + "p95": 273.3119949698448, + "p99": 280.67199885845184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d0ab8873", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_53323930", + "comparisonKey": "be12000e65b38632", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:45.094955+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.28800302743912, + "p90": 103.00800204277039, + "p95": 106.1440035700798, + "p99": 145.4080045223236 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 83.20000022649765, + "p95": 83.74399691820145, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 149.05600249767303, + "p90": 156.15999698638916, + "p95": 158.720001578331, + "p99": 167.52000153064728 + }, + "isolatedSum": { + "p50": 177.37600207328796, + "p90": 186.20800226926804, + "p95": 189.88800048828125, + "p99": 234.8800078034401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.98400169610977, + "p90": 106.91200196743011, + "p95": 109.66400057077408, + "p99": 117.37599968910217 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 91.45600348711014, + "p95": 93.66399794816971, + "p99": 98.36799651384354 + }, + "roundtrip": { + "p50": 160.16000509262085, + "p90": 165.21599888801575, + "p95": 167.9680049419403, + "p99": 171.87200486660004 + }, + "isolatedSum": { + "p50": 191.5839985013008, + "p90": 198.36800545454025, + "p95": 203.3279985189438, + "p99": 215.7439962029457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 139.93600010871887, + "p90": 144.51199769973755, + "p95": 146.08000218868256, + "p99": 151.45599842071533 + }, + "combine": { + "p50": 122.36800044775009, + "p90": 124.4800016283989, + "p95": 127.45599448680878, + "p99": 131.00799918174744 + }, + "roundtrip": { + "p50": 225.72800517082214, + "p90": 230.880007147789, + "p95": 233.5679978132248, + "p99": 239.23200368881226 + }, + "isolatedSum": { + "p50": 262.30400055646896, + "p90": 268.99199932813644, + "p95": 273.53599667549133, + "p99": 282.46399760246277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-87618cbf", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_f262fa06", + "comparisonKey": "2c637cf894c41c65", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:07.784757+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.22399860620499, + "p90": 102.65599936246872, + "p95": 103.93600165843964, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 83.20000022649765, + "p95": 83.67999643087387, + "p99": 90.97599983215332 + }, + "roundtrip": { + "p50": 130.40000200271606, + "p90": 159.36000645160675, + "p95": 160.44799983501434, + "p99": 163.00800442695618 + }, + "isolatedSum": { + "p50": 147.39199727773666, + "p90": 185.85599958896637, + "p95": 187.6159980893135, + "p99": 199.0400031208992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.96799665689468, + "p90": 79.16799932718277, + "p95": 81.4720019698143, + "p99": 101.18400305509567 + }, + "combine": { + "p50": 75.07199794054031, + "p90": 76.60800218582153, + "p95": 80.89599758386612, + "p99": 83.3280012011528 + }, + "roundtrip": { + "p50": 127.96799838542938, + "p90": 134.62400436401367, + "p95": 136.57599687576294, + "p99": 142.62400567531586 + }, + "isolatedSum": { + "p50": 147.039994597435, + "p90": 155.7760015130043, + "p95": 162.36799955368042, + "p99": 184.51200425624847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.19199901819229, + "p90": 100.35199671983719, + "p95": 103.10400277376175, + "p99": 109.21599715948105 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 90.46400338411331, + "p95": 91.16800129413605, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 130.8159977197647, + "p90": 163.455992937088, + "p95": 164.86400365829468, + "p99": 169.95200514793396 + }, + "isolatedSum": { + "p50": 151.90400183200836, + "p90": 190.8160001039505, + "p95": 194.2720040678978, + "p99": 201.1519968509674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 80.03199845552444, + "p90": 99.23200309276581, + "p95": 101.56799852848053, + "p99": 106.175996363163 + }, + "combine": { + "p50": 75.52000135183334, + "p90": 90.65599739551544, + "p95": 91.20000153779984, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 131.9040060043335, + "p90": 160.12799739837646, + "p95": 162.4000072479248, + "p99": 166.20799899101257 + }, + "isolatedSum": { + "p50": 155.5519998073578, + "p90": 189.88800048828125, + "p95": 192.76800006628036, + "p99": 197.79199361801147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 80.1599994301796, + "p90": 102.27199643850327, + "p95": 104.86400127410889, + "p99": 112.12799698114395 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 91.23200178146362, + "p95": 91.5519967675209, + "p99": 116.28799885511398 + }, + "roundtrip": { + "p50": 137.5039964914322, + "p90": 163.00800442695618, + "p95": 166.9120043516159, + "p99": 171.36000096797943 + }, + "isolatedSum": { + "p50": 161.8560031056404, + "p90": 193.5039982199669, + "p95": 196.4159980416298, + "p99": 228.41599583625793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 88.54400366544724, + "p90": 106.97600245475769, + "p95": 109.15199667215347, + "p99": 113.88800293207169 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 99.35999661684036, + "p95": 99.71199929714203, + "p99": 100.63999891281128 + }, + "roundtrip": { + "p50": 147.90399372577667, + "p90": 170.3680008649826, + "p95": 171.87200486660004, + "p99": 175.00799894332886 + }, + "isolatedSum": { + "p50": 173.40800166130066, + "p90": 206.33599907159805, + "p95": 208.8639959692955, + "p99": 214.52800184488297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.15200346708298, + "p90": 128.1599998474121, + "p95": 130.68799674510956, + "p99": 137.82399892807007 + }, + "combine": { + "p50": 99.2640033364296, + "p90": 107.90400207042694, + "p95": 109.47199910879135, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 182.14400112628937, + "p90": 197.9839950799942, + "p95": 207.5520008802414, + "p99": 322.84799218177795 + }, + "isolatedSum": { + "p50": 204.41600680351257, + "p90": 236.06400191783905, + "p95": 240.1599958539009, + "p99": 253.66400182247162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.56800377368927, + "p90": 146.84799313545227, + "p95": 148.92800152301788, + "p99": 154.4319987297058 + }, + "combine": { + "p50": 132.32000172138214, + "p90": 140.9280002117157, + "p95": 143.10400187969208, + "p99": 145.60000598430634 + }, + "roundtrip": { + "p50": 231.36000335216522, + "p90": 248.06399643421173, + "p95": 250.71999430656433, + "p99": 254.88001108169556 + }, + "isolatedSum": { + "p50": 265.8880054950714, + "p90": 287.77599334716797, + "p95": 292.03200340270996, + "p99": 300.03200471401215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-38177f28", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_fea85c39", + "comparisonKey": "dc1306fe292ff900", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:03.205817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.54399752616882, + "p90": 103.71199995279312, + "p95": 107.2319969534874, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 72.60800153017044, + "p90": 75.29599964618683, + "p95": 81.31200075149536, + "p99": 363.74399065971375 + }, + "roundtrip": { + "p50": 140.79999923706055, + "p90": 150.30400454998016, + "p95": 152.70400047302246, + "p99": 158.49600732326508 + }, + "isolatedSum": { + "p50": 169.15199905633926, + "p90": 179.00799959897995, + "p95": 188.54399770498276, + "p99": 477.1199896931648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.14400225877762, + "p90": 105.53599894046783, + "p95": 108.67200046777725, + "p99": 116.38399958610535 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 75.13599842786789, + "p95": 77.37600058317184, + "p99": 83.23200047016144 + }, + "roundtrip": { + "p50": 146.14400267601013, + "p90": 153.4080058336258, + "p95": 156.63999319076538, + "p99": 161.69600188732147 + }, + "isolatedSum": { + "p50": 170.56000232696533, + "p90": 180.67199736833572, + "p95": 186.0480010509491, + "p99": 199.61600005626678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.82400047779083, + "p90": 108.8000014424324, + "p95": 118.6240017414093, + "p99": 164.32000696659088 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 83.3280012011528, + "p95": 88.92799913883209, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 155.83999454975128, + "p90": 164.000004529953, + "p95": 166.07999801635742, + "p99": 173.47200214862823 + }, + "isolatedSum": { + "p50": 182.78399854898453, + "p90": 192.1280026435852, + "p95": 207.5520008802414, + "p99": 253.88800352811813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.64799946546555, + "p90": 108.5439994931221, + "p95": 110.49599945545197, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 81.7599967122078, + "p90": 89.21600133180618, + "p95": 89.79199826717377, + "p99": 91.23200178146362 + }, + "roundtrip": { + "p50": 156.95999562740326, + "p90": 164.8000031709671, + "p95": 167.23200678825378, + "p99": 173.8239973783493 + }, + "isolatedSum": { + "p50": 185.40799617767334, + "p90": 197.76000082492828, + "p95": 200.28799772262573, + "p99": 208.22399854660034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd5c85e9", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_14949248", + "comparisonKey": "0525008a21dcb08b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:38.535427+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.55999732017517, + "p90": 98.04800152778625, + "p95": 101.75999999046326, + "p99": 108.89600217342377 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 73.53600114583969, + "p95": 74.43200051784515, + "p99": 79.9039974808693 + }, + "roundtrip": { + "p50": 137.31199502944946, + "p90": 145.7280069589615, + "p95": 148.41599762439728, + "p99": 153.98399531841278 + }, + "isolatedSum": { + "p50": 161.85599565505981, + "p90": 171.58400267362595, + "p95": 176.1920005083084, + "p99": 188.79999965429306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 95.58399766683578, + "p90": 102.39999741315842, + "p95": 103.55199873447418, + "p99": 108.09600353240967 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 74.5600014925003, + "p95": 74.97599720954895, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 144.9279934167862, + "p90": 150.78400075435638, + "p95": 152.19199657440186, + "p99": 156.41599893569946 + }, + "isolatedSum": { + "p50": 168.09599846601486, + "p90": 176.95999890565872, + "p95": 178.52799594402313, + "p99": 184.51200425624847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.30400347709656, + "p90": 104.19200360774994, + "p95": 106.20799660682678, + "p99": 113.15199732780457 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 80.70400357246399, + "p95": 81.15199953317642, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 146.04799449443817, + "p90": 157.02399611473083, + "p95": 158.62399339675903, + "p99": 165.18400609493256 + }, + "isolatedSum": { + "p50": 170.68800330162048, + "p90": 184.89600718021393, + "p95": 187.3599961400032, + "p99": 195.0719952583313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 101.05600208044052, + "p90": 104.2879968881607, + "p95": 105.76000064611435, + "p99": 110.01600325107574 + }, + "combine": { + "p50": 72.38399982452393, + "p90": 74.94399696588516, + "p95": 75.45600086450577, + "p99": 81.11999928951263 + }, + "roundtrip": { + "p50": 144.80000734329224, + "p90": 152.48000621795654, + "p95": 153.50399911403656, + "p99": 157.6640009880066 + }, + "isolatedSum": { + "p50": 173.44000190496445, + "p90": 179.23199385404587, + "p95": 181.21600151062012, + "p99": 191.13600254058838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.39999675750732, + "p90": 104.35199737548828, + "p95": 107.10400342941284, + "p99": 114.33599889278412 + }, + "combine": { + "p50": 79.8719972372055, + "p90": 82.8159973025322, + "p95": 83.26400071382523, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 147.90399372577667, + "p90": 155.4879993200302, + "p95": 157.6319932937622, + "p99": 166.9439971446991 + }, + "isolatedSum": { + "p50": 178.27199399471283, + "p90": 187.16799467802048, + "p95": 190.36800414323807, + "p99": 203.16799730062485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.8639999628067, + "p90": 102.49599814414978, + "p95": 104.38399761915207, + "p99": 112.06399649381638 + }, + "combine": { + "p50": 80.6720033288002, + "p90": 83.03999900817871, + "p95": 83.64800363779068, + "p99": 107.80800133943558 + }, + "roundtrip": { + "p50": 147.2640037536621, + "p90": 153.18399667739868, + "p95": 154.7199934720993, + "p99": 157.75999426841736 + }, + "isolatedSum": { + "p50": 177.5360032916069, + "p90": 185.5359971523285, + "p95": 188.03200125694275, + "p99": 219.87199783325195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.39999675750732, + "p90": 136.9280070066452, + "p95": 142.59199798107147, + "p99": 251.80798768997192 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 83.48800241947174, + "p95": 88.8959988951683, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 154.2080044746399, + "p90": 166.49599373340607, + "p95": 169.91999745368958, + "p99": 497.8240132331848 + }, + "isolatedSum": { + "p50": 179.1040003299713, + "p90": 220.41600942611694, + "p95": 231.48799687623978, + "p99": 341.66398644447327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.29599809646606, + "p90": 116.22399836778641, + "p95": 118.49600076675415, + "p99": 123.71200323104858 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 99.45599734783173, + "p95": 99.90400075912476, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 172.06400632858276, + "p90": 180.60800433158875, + "p95": 182.3039948940277, + "p99": 187.48800456523895 + }, + "isolatedSum": { + "p50": 208.19199830293655, + "p90": 215.67999571561813, + "p95": 218.4000015258789, + "p99": 229.312002658844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28ffd907", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_c2e2c386", + "comparisonKey": "3ddcfed38d56b530", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:03.109427+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.20000284910202, + "p90": 106.55999928712845, + "p95": 108.89600217342377, + "p99": 115.58400094509125 + }, + "combine": { + "p50": 82.30400085449219, + "p90": 88.99199962615967, + "p95": 89.85599875450134, + "p99": 114.46399986743927 + }, + "roundtrip": { + "p50": 157.3439985513687, + "p90": 165.02399742603302, + "p95": 166.33599996566772, + "p99": 171.61600291728973 + }, + "isolatedSum": { + "p50": 181.5040037035942, + "p90": 195.55199891328812, + "p95": 198.7520009279251, + "p99": 230.04800081253052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.85600137710571, + "p90": 139.1039937734604, + "p95": 141.34399592876434, + "p99": 146.88000082969666 + }, + "combine": { + "p50": 91.74399822950363, + "p90": 108.60799998044968, + "p95": 113.63200098276138, + "p99": 115.10399729013443 + }, + "roundtrip": { + "p50": 169.11999881267548, + "p90": 205.34400641918182, + "p95": 208.6080014705658, + "p99": 212.73599565029144 + }, + "isolatedSum": { + "p50": 197.59999960660934, + "p90": 247.71199375391006, + "p95": 254.97599691152573, + "p99": 261.9839981198311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.7359974384308, + "p90": 158.720001578331, + "p95": 164.41600024700165, + "p99": 253.02401185035706 + }, + "combine": { + "p50": 129.7920048236847, + "p90": 132.25600123405457, + "p95": 132.9600065946579, + "p99": 138.62399756908417 + }, + "roundtrip": { + "p50": 232.03200101852417, + "p90": 240.6720072031021, + "p95": 243.20000410079956, + "p99": 247.39199876785278 + }, + "isolatedSum": { + "p50": 262.5280022621155, + "p90": 290.97600281238556, + "p95": 297.37600684165955, + "p99": 391.6480094194412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-66743855", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_c85c10c6", + "comparisonKey": "fcb68e93e737ff3c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:26.279771+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.93600100278854, + "p90": 106.23999685049057, + "p95": 108.12799632549286, + "p99": 115.9679964184761 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 85.05599945783615, + "p95": 88.92799913883209, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 158.07999670505524, + "p90": 163.96799683570862, + "p95": 165.53600132465363, + "p99": 170.59199512004852 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 191.29599630832672, + "p95": 197.05599546432495, + "p99": 206.84799551963806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.59199887514114, + "p90": 107.64800012111664, + "p95": 111.10399663448334, + "p99": 132.79999792575836 + }, + "combine": { + "p50": 91.36000275611877, + "p90": 96.99200093746185, + "p95": 97.79199957847595, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 166.62399470806122, + "p90": 172.2240000963211, + "p95": 173.8239973783493, + "p99": 176.96000635623932 + }, + "isolatedSum": { + "p50": 193.95200163125992, + "p90": 204.6400010585785, + "p95": 208.8959962129593, + "p99": 233.15199464559555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.63200426101685, + "p90": 138.46400380134583, + "p95": 141.7279988527298, + "p99": 149.31200444698334 + }, + "combine": { + "p50": 129.56799566745758, + "p90": 132.47999548912048, + "p95": 132.9279989004135, + "p99": 137.79200613498688 + }, + "roundtrip": { + "p50": 234.8800003528595, + "p90": 241.2479966878891, + "p95": 242.8479939699173, + "p99": 247.1040040254593 + }, + "isolatedSum": { + "p50": 263.1999999284744, + "p90": 270.9439992904663, + "p95": 274.6559977531433, + "p99": 287.1040105819702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da50f390", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_c75c0f33", + "comparisonKey": "68a11cf37c601086", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:49.603335+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.84000027179718, + "p90": 105.3759977221489, + "p95": 108.19199681282043, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 89.15200084447861, + "p95": 89.88799899816513, + "p99": 91.64799749851227 + }, + "roundtrip": { + "p50": 157.6640009880066, + "p90": 165.40800034999847, + "p95": 167.71200299263, + "p99": 175.20000040531158 + }, + "isolatedSum": { + "p50": 182.78399854898453, + "p90": 194.5279985666275, + "p95": 198.07999581098557, + "p99": 205.1839977502823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.24799674749374, + "p90": 109.37599837779999, + "p95": 111.58400028944016, + "p99": 118.6240017414093 + }, + "combine": { + "p50": 90.14400094747543, + "p90": 97.59999811649323, + "p95": 98.04800152778625, + "p99": 99.32799637317657 + }, + "roundtrip": { + "p50": 167.58400201797485, + "p90": 172.67200350761414, + "p95": 173.98400604724884, + "p99": 177.5680035352707 + }, + "isolatedSum": { + "p50": 195.39199769496918, + "p90": 206.9759964942932, + "p95": 209.6320018172264, + "p99": 217.95199811458588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.59999656677246, + "p90": 149.34399724006653, + "p95": 153.4080058336258, + "p99": 156.38400614261627 + }, + "combine": { + "p50": 129.82399761676788, + "p90": 132.38400220870972, + "p95": 133.08799266815186, + "p99": 139.16799426078796 + }, + "roundtrip": { + "p50": 234.01600122451782, + "p90": 242.08000302314758, + "p95": 244.7039932012558, + "p99": 249.08800423145294 + }, + "isolatedSum": { + "p50": 263.42399418354034, + "p90": 281.72799944877625, + "p95": 286.49599850177765, + "p99": 295.55200040340424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-757ff96d", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_c65c0da0", + "comparisonKey": "e695e3e1f603dafc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:12.098698+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.51199793815613, + "p90": 108.70400071144104, + "p95": 129.56799566745758, + "p99": 352.2239923477173 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 89.4400030374527, + "p95": 89.9519994854927, + "p99": 91.67999774217606 + }, + "roundtrip": { + "p50": 159.42400693893433, + "p90": 167.39200055599213, + "p95": 170.75200378894806, + "p99": 326.880007982254 + }, + "isolatedSum": { + "p50": 182.94399976730347, + "p90": 198.14400374889374, + "p95": 219.5199951529503, + "p99": 443.90399008989334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 105.56799918413162, + "p90": 110.52799969911575, + "p95": 113.27999830245972, + "p99": 119.6800023317337 + }, + "combine": { + "p50": 91.67999774217606, + "p90": 97.63199836015701, + "p95": 98.14400225877762, + "p99": 100.25600343942642 + }, + "roundtrip": { + "p50": 168.83200407028198, + "p90": 175.26400089263916, + "p95": 176.96000635623932, + "p99": 182.68799781799316 + }, + "isolatedSum": { + "p50": 197.24799692630768, + "p90": 208.15999805927277, + "p95": 211.42400056123734, + "p99": 219.93600577116013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.4719955921173, + "p90": 145.7280069589615, + "p95": 147.10399508476257, + "p99": 150.30400454998016 + }, + "combine": { + "p50": 129.92000579833984, + "p90": 132.60799646377563, + "p95": 133.31200182437897, + "p99": 138.36799561977386 + }, + "roundtrip": { + "p50": 234.1119945049286, + "p90": 241.60000681877136, + "p95": 242.88000166416168, + "p99": 247.3600059747696 + }, + "isolatedSum": { + "p50": 263.39200139045715, + "p90": 278.3360034227371, + "p95": 280.41599690914154, + "p99": 288.672000169754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8530da58", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_63e16ccc", + "comparisonKey": "aaa9574d7cbda5d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:45.445245+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 107.00800269842148, + "p90": 115.61600118875504, + "p95": 121.69600278139114, + "p99": 167.4879938364029 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 84.06399935483932, + "p95": 88.83199840784073, + "p99": 89.72799777984619 + }, + "roundtrip": { + "p50": 155.83999454975128, + "p90": 162.81600296497345, + "p95": 165.53600132465363, + "p99": 168.47999393939972 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 199.68000054359436, + "p95": 210.52800118923187, + "p99": 257.2159916162491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.57600128650665, + "p90": 102.84800082445145, + "p95": 105.47199845314026, + "p99": 117.18399822711945 + }, + "combine": { + "p50": 74.30399954319, + "p90": 87.71199733018875, + "p95": 89.02399986982346, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 131.04000687599182, + "p90": 161.3440066576004, + "p95": 164.38399255275726, + "p99": 169.98399794101715 + }, + "isolatedSum": { + "p50": 146.88000082969666, + "p90": 190.5599981546402, + "p95": 194.49599832296371, + "p99": 224.89599883556366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 99.04000163078308, + "p90": 104.09600287675858, + "p95": 106.175996363163, + "p99": 113.47199976444244 + }, + "combine": { + "p50": 80.6720033288002, + "p90": 88.06400001049042, + "p95": 88.70399743318558, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 153.9520025253296, + "p90": 162.33600676059723, + "p95": 165.27999937534332, + "p99": 172.03199863433838 + }, + "isolatedSum": { + "p50": 179.71200495958328, + "p90": 192.160002887249, + "p95": 194.87999379634857, + "p99": 202.91200280189514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.22400319576263, + "p90": 105.24799674749374, + "p95": 107.35999792814255, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 81.727996468544, + "p90": 88.99199962615967, + "p95": 89.40800279378891, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 159.10400450229645, + "p90": 165.8560037612915, + "p95": 167.71200299263, + "p99": 170.49600183963776 + }, + "isolatedSum": { + "p50": 181.95199966430664, + "p90": 194.2399963736534, + "p95": 196.76800072193146, + "p99": 205.05599677562714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.93600100278854, + "p90": 104.38399761915207, + "p95": 107.84000158309937, + "p99": 114.56000059843063 + }, + "combine": { + "p50": 88.03199976682663, + "p90": 91.0400003194809, + "p95": 91.74399822950363, + "p99": 97.18400239944458 + }, + "roundtrip": { + "p50": 160.09600460529327, + "p90": 167.32800006866455, + "p95": 168.60799491405487, + "p99": 172.83199727535248 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 195.42399793863297, + "p95": 199.583999812603, + "p99": 211.7440029978752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.93599969148636, + "p90": 107.29599744081497, + "p95": 109.92000252008438, + "p99": 133.02400708198547 + }, + "combine": { + "p50": 83.23200047016144, + "p90": 97.08800166845322, + "p95": 97.37599641084671, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 145.85599303245544, + "p90": 169.91999745368958, + "p95": 171.83999717235565, + "p99": 175.99999904632568 + }, + "isolatedSum": { + "p50": 175.1680001616478, + "p90": 204.3839991092682, + "p95": 207.2959989309311, + "p99": 231.26401007175446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 112.28799819946289, + "p90": 123.03999811410904, + "p95": 124.9919980764389, + "p99": 202.62399315834045 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 107.4879989027977, + "p95": 112.67200112342834, + "p99": 201.9519954919815 + }, + "roundtrip": { + "p50": 184.12800133228302, + "p90": 190.46400487422943, + "p95": 193.53599846363068, + "p99": 335.80800890922546 + }, + "isolatedSum": { + "p50": 217.24800020456314, + "p90": 230.52799701690674, + "p95": 237.66399919986725, + "p99": 404.57598865032196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.66399705410004, + "p90": 149.08799529075623, + "p95": 153.6960005760193, + "p99": 155.96799552440643 + }, + "combine": { + "p50": 129.7920048236847, + "p90": 132.09599256515503, + "p95": 132.83200562000275, + "p99": 138.84800672531128 + }, + "roundtrip": { + "p50": 223.23200106620789, + "p90": 240.92799425125122, + "p95": 244.28799748420715, + "p99": 248.416006565094 + }, + "isolatedSum": { + "p50": 263.45600187778473, + "p90": 281.18398785591125, + "p95": 286.52800619602203, + "p99": 294.8160022497177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a37f0c89", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_7fe650e2", + "comparisonKey": "87fb01af25e79130", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:07.663239+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.23999840021133, + "p90": 77.95199751853943, + "p95": 81.88799768686295, + "p99": 106.55999928712845 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 74.91199672222137, + "p95": 75.29599964618683, + "p99": 80.6720033288002 + }, + "roundtrip": { + "p50": 129.15199995040894, + "p90": 133.7279975414276, + "p95": 136.25599443912506, + "p99": 192.1280026435852 + }, + "isolatedSum": { + "p50": 143.0080011487007, + "p90": 152.8639942407608, + "p95": 157.18399733304977, + "p99": 187.23200261592865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.27199864387512, + "p90": 77.37600058317184, + "p95": 79.83999699354172, + "p99": 86.68799698352814 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 74.87999647855759, + "p95": 75.3600001335144, + "p99": 80.03199845552444 + }, + "roundtrip": { + "p50": 131.71200454235077, + "p90": 134.97599959373474, + "p95": 136.89599931240082, + "p99": 142.07999408245087 + }, + "isolatedSum": { + "p50": 143.0400013923645, + "p90": 152.25599706172943, + "p95": 155.19999712705612, + "p99": 166.71999543905258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.91999661922455, + "p90": 104.86400127410889, + "p95": 109.53599959611893, + "p99": 160.0639969110489 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 88.73599767684937, + "p95": 89.08800035715103, + "p99": 91.32800251245499 + }, + "roundtrip": { + "p50": 133.12000036239624, + "p90": 162.20800578594208, + "p95": 163.55200111865997, + "p99": 167.35999286174774 + }, + "isolatedSum": { + "p50": 148.79999309778214, + "p90": 193.59999895095825, + "p95": 198.62399995326996, + "p99": 251.39199942350388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.0799971818924, + "p90": 77.05599814653397, + "p95": 78.78399640321732, + "p99": 88.8959988951683 + }, + "combine": { + "p50": 74.0479975938797, + "p90": 75.52000135183334, + "p95": 80.76799660921097, + "p99": 156.22399747371674 + }, + "roundtrip": { + "p50": 130.5599957704544, + "p90": 135.83999872207642, + "p95": 137.7280056476593, + "p99": 143.16800236701965 + }, + "isolatedSum": { + "p50": 144.1279947757721, + "p90": 152.5759994983673, + "p95": 159.55199301242828, + "p99": 245.11999636888504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 82.59200304746628, + "p90": 85.21600067615509, + "p95": 87.26400136947632, + "p99": 91.48799628019333 + }, + "combine": { + "p50": 75.03999769687653, + "p90": 77.60000228881836, + "p95": 80.83199709653854, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 129.72800433635712, + "p90": 136.89599931240082, + "p95": 138.84800672531128, + "p99": 144.0960019826889 + }, + "isolatedSum": { + "p50": 157.6320007443428, + "p90": 162.81600296497345, + "p95": 168.09599846601486, + "p99": 175.07199943065643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.20800143480301, + "p90": 93.72799843549728, + "p95": 95.0080007314682, + "p99": 99.90400075912476 + }, + "combine": { + "p50": 81.56800270080566, + "p90": 83.55200290679932, + "p95": 85.24800091981888, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 143.2960033416748, + "p90": 147.35999703407288, + "p95": 149.1840034723282, + "p99": 156.12800419330597 + }, + "isolatedSum": { + "p50": 171.77600413560867, + "p90": 177.2800013422966, + "p95": 180.25600165128708, + "p99": 190.62399864196777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.99200093746185, + "p90": 116.54400080442429, + "p95": 118.14399808645248, + "p99": 121.60000205039978 + }, + "combine": { + "p50": 91.48799628019333, + "p90": 105.18400371074677, + "p95": 105.56799918413162, + "p99": 108.73600095510483 + }, + "roundtrip": { + "p50": 165.8879965543747, + "p90": 182.36799538135529, + "p95": 184.28799510002136, + "p99": 187.19999492168427 + }, + "isolatedSum": { + "p50": 188.47999721765518, + "p90": 221.72800451517105, + "p95": 223.7119972705841, + "p99": 230.3360030055046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.32799899578094, + "p90": 130.46400249004364, + "p95": 131.58400356769562, + "p99": 136.76799833774567 + }, + "combine": { + "p50": 108.31999778747559, + "p90": 122.23999947309494, + "p95": 123.3920007944107, + "p99": 124.64000284671783 + }, + "roundtrip": { + "p50": 201.24800503253937, + "p90": 214.33599293231964, + "p95": 218.30399334430695, + "p99": 220.768004655838 + }, + "isolatedSum": { + "p50": 223.64799678325653, + "p90": 252.70400196313858, + "p95": 254.97600436210632, + "p99": 261.4080011844635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-270a8e08", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h100_7f35e3bd", + "comparisonKey": "64ab51f8bfc524e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:31.243497+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 98.78399968147278, + "p90": 105.85600137710571, + "p95": 107.2319969534874, + "p99": 112.31999844312668 + }, + "combine": { + "p50": 82.30400085449219, + "p90": 84.16000008583069, + "p95": 88.95999938249588, + "p99": 91.45600348711014 + }, + "roundtrip": { + "p50": 136.57599687576294, + "p90": 164.44799304008484, + "p95": 165.79200327396393, + "p99": 170.23999989032745 + }, + "isolatedSum": { + "p50": 181.08800053596497, + "p90": 190.0160014629364, + "p95": 196.19199633598328, + "p99": 203.77600193023682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 100.8640006184578, + "p90": 107.93600231409073, + "p95": 109.21599715948105, + "p99": 119.87199634313583 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 92.22400188446045, + "p95": 92.99200028181076, + "p99": 97.79199957847595 + }, + "roundtrip": { + "p50": 163.71199488639832, + "p90": 171.64799571037292, + "p95": 173.24799299240112, + "p99": 177.34399437904358 + }, + "isolatedSum": { + "p50": 190.94400107860565, + "p90": 200.16000419855118, + "p95": 202.2079974412918, + "p99": 217.6639959216118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 113.21599781513214, + "p90": 131.8719983100891, + "p95": 135.0719928741455, + "p99": 139.0399932861328 + }, + "combine": { + "p50": 115.61600118875504, + "p90": 124.1919994354248, + "p95": 124.60800260305405, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 207.74400234222412, + "p90": 220.12799978256226, + "p95": 221.8559980392456, + "p99": 225.40800273418427 + }, + "isolatedSum": { + "p50": 228.83199900388718, + "p90": 256.0639977455139, + "p95": 259.67999547719955, + "p99": 267.7439898252487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6b6ff3de", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_42ed6e56", + "comparisonKey": "f067daba8458a77f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:08.402445+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.58399832248688, + "p90": 103.42399775981903, + "p95": 105.72800040245056, + "p99": 112.35199868679047 + }, + "combine": { + "p50": 81.31200075149536, + "p90": 89.31200206279755, + "p95": 89.66399729251862, + "p99": 91.2960022687912 + }, + "roundtrip": { + "p50": 157.6640009880066, + "p90": 163.32800686359406, + "p95": 165.15199840068817, + "p99": 171.9679981470108 + }, + "isolatedSum": { + "p50": 180.89599907398224, + "p90": 192.73599982261658, + "p95": 195.39199769496918, + "p99": 203.64800095558167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 102.7199998497963, + "p90": 107.90400207042694, + "p95": 109.72800105810165, + "p99": 120.44800072908401 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 98.11200201511383, + "p95": 98.65599870681763, + "p99": 100.80000013113022 + }, + "roundtrip": { + "p50": 167.00799763202667, + "p90": 172.7360039949417, + "p95": 174.01599884033203, + "p99": 179.61600422859192 + }, + "isolatedSum": { + "p50": 198.91200214624405, + "p90": 206.01600408554077, + "p95": 208.38399976491928, + "p99": 221.24800086021423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 135.6160044670105, + "p90": 143.61600577831268, + "p95": 145.85599303245544, + "p99": 175.35999417304993 + }, + "combine": { + "p50": 128.9920061826706, + "p90": 132.60799646377563, + "p95": 133.59999656677246, + "p99": 137.56799697875977 + }, + "roundtrip": { + "p50": 229.0239930152893, + "p90": 235.48799753189087, + "p95": 236.76800727844238, + "p99": 238.78400027751923 + }, + "isolatedSum": { + "p50": 264.6080106496811, + "p90": 276.2240022420883, + "p95": 279.4559895992279, + "p99": 312.9279911518097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5fcf163e", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_11265150", + "comparisonKey": "d43a567a9c9ff588", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:43.094944+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.37599927186966, + "p90": 76.19199901819229, + "p95": 77.82399654388428, + "p99": 82.0159986615181 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 73.37599992752075, + "p95": 74.14399832487106, + "p99": 75.45600086450577 + }, + "roundtrip": { + "p50": 126.01600587368011, + "p90": 131.58400356769562, + "p95": 133.12000036239624, + "p99": 138.33600282669067 + }, + "isolatedSum": { + "p50": 136.51199638843536, + "p90": 149.56799894571304, + "p95": 151.96799486875534, + "p99": 157.47199952602386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.37599927186966, + "p90": 76.86399668455124, + "p95": 78.46400141716003, + "p99": 84.51200276613235 + }, + "combine": { + "p50": 72.83200323581696, + "p90": 74.52800124883652, + "p95": 75.00799745321274, + "p99": 80.64000308513641 + }, + "roundtrip": { + "p50": 128.63999605178833, + "p90": 134.2719942331314, + "p95": 136.00000739097595, + "p99": 140.06400108337402 + }, + "isolatedSum": { + "p50": 142.20800250768661, + "p90": 151.39199793338776, + "p95": 153.47199887037277, + "p99": 165.15200585126877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.73600250482559, + "p90": 103.35999727249146, + "p95": 105.3759977221489, + "p99": 109.8560020327568 + }, + "combine": { + "p50": 73.91999661922455, + "p90": 81.95199817419052, + "p95": 88.79999816417694, + "p99": 102.88000106811523 + }, + "roundtrip": { + "p50": 133.34399461746216, + "p90": 164.95999693870544, + "p95": 167.64800250530243, + "p99": 176.86399817466736 + }, + "isolatedSum": { + "p50": 146.65599912405014, + "p90": 185.31199544668198, + "p95": 194.17599588632584, + "p99": 212.73600310087204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.76000219583511, + "p90": 77.2479996085167, + "p95": 79.45600152015686, + "p99": 83.52000266313553 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 75.23199915885925, + "p95": 76.31999999284744, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 130.2720010280609, + "p90": 135.6160044670105, + "p95": 137.95199990272522, + "p99": 142.7839994430542 + }, + "isolatedSum": { + "p50": 143.71199905872345, + "p90": 152.47999876737595, + "p95": 155.7760015130043, + "p99": 165.82400351762772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.90399813652039, + "p90": 102.9760017991066, + "p95": 106.1440035700798, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 75.16799867153168, + "p90": 89.59999680519104, + "p95": 90.59199690818787, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 132.35199451446533, + "p90": 164.000004529953, + "p95": 165.8560037612915, + "p99": 170.3680008649826 + }, + "isolatedSum": { + "p50": 159.07199680805206, + "p90": 192.57599860429764, + "p95": 196.73600047826767, + "p99": 220.76799720525742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.38400310277939, + "p90": 104.67199981212616, + "p95": 106.81600123643875, + "p99": 110.72000116109848 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 91.26400202512741, + "p95": 92.16000139713287, + "p99": 97.9200005531311 + }, + "roundtrip": { + "p50": 138.46400380134583, + "p90": 163.83999586105347, + "p95": 168.67199540138245, + "p99": 173.6000031232834 + }, + "isolatedSum": { + "p50": 174.81600493192673, + "p90": 195.93600183725357, + "p95": 198.97600263357162, + "p99": 208.64000171422958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.73599898815155, + "p90": 117.21599847078323, + "p95": 119.64800208806992, + "p99": 123.74400347471237 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 103.42399775981903, + "p95": 105.34399747848511, + "p99": 106.11200332641602 + }, + "roundtrip": { + "p50": 165.66400229930878, + "p90": 182.81599879264832, + "p95": 184.54399704933167, + "p99": 188.80000710487366 + }, + "isolatedSum": { + "p50": 188.03200125694275, + "p90": 220.63999623060226, + "p95": 224.99199956655502, + "p99": 229.8560068011284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.17599767446518, + "p90": 135.48800349235535, + "p95": 137.85600662231445, + "p99": 143.90400052070618 + }, + "combine": { + "p50": 110.1439967751503, + "p90": 122.27199971675873, + "p95": 122.84799665212631, + "p99": 124.35200065374374 + }, + "roundtrip": { + "p50": 197.31199741363525, + "p90": 218.20800006389618, + "p95": 220.5439954996109, + "p99": 223.80800545215607 + }, + "isolatedSum": { + "p50": 224.31999444961548, + "p90": 257.7600032091141, + "p95": 260.70400327444077, + "p99": 268.2560011744499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f5f9950", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_c103770d", + "comparisonKey": "47da937e68d060d8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:51.370135+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.03200107812881, + "p90": 103.29599678516388, + "p95": 104.67199981212616, + "p99": 109.69600081443787 + }, + "combine": { + "p50": 79.71200346946716, + "p90": 82.84799754619598, + "p95": 83.23200047016144, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 152.5759994983673, + "p90": 159.64800119400024, + "p95": 161.0880047082901, + "p99": 165.3120070695877 + }, + "isolatedSum": { + "p50": 175.74400454759598, + "p90": 186.14399433135986, + "p95": 187.9040002822876, + "p99": 194.59199905395508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.84000313282013, + "p90": 103.90400141477585, + "p95": 105.50399869680405, + "p99": 111.26399785280228 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 82.87999778985977, + "p95": 83.23200047016144, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 129.69599664211273, + "p90": 150.14399588108063, + "p95": 151.48800611495972, + "p99": 156.89599514007568 + }, + "isolatedSum": { + "p50": 145.56800574064255, + "p90": 186.78399920463562, + "p95": 188.73599916696548, + "p99": 195.9039941430092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.71200215816498, + "p90": 104.12800312042236, + "p95": 105.43999820947647, + "p99": 110.27199774980545 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 82.84799754619598, + "p95": 83.42400193214417, + "p99": 88.8959988951683 + }, + "roundtrip": { + "p50": 131.42399489879608, + "p90": 159.16800498962402, + "p95": 163.29599916934967, + "p99": 173.18400740623474 + }, + "isolatedSum": { + "p50": 144.73599940538406, + "p90": 186.97600066661835, + "p95": 188.86400014162064, + "p99": 199.16799664497375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.3200032711029, + "p90": 101.82400047779083, + "p95": 103.2319962978363, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 83.16799998283386, + "p95": 83.61600339412689, + "p99": 89.08800035715103 + }, + "roundtrip": { + "p50": 130.94399869441986, + "p90": 155.64799308776855, + "p95": 159.07199680805206, + "p99": 162.432000041008 + }, + "isolatedSum": { + "p50": 176.2240007519722, + "p90": 184.9920004606247, + "p95": 186.8479996919632, + "p99": 197.1520036458969 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.25600278377533, + "p90": 102.11200267076492, + "p95": 104.2879968881607, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 82.24000036716461, + "p90": 89.82399851083755, + "p95": 98.55999797582626, + "p99": 296.25600576400757 + }, + "roundtrip": { + "p50": 154.59200739860535, + "p90": 164.09599781036377, + "p95": 165.6000018119812, + "p99": 171.10399901866913 + }, + "isolatedSum": { + "p50": 178.49600315093994, + "p90": 191.93600118160248, + "p95": 202.84799486398697, + "p99": 404.2240083217621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.05600011348724, + "p90": 99.42399710416794, + "p95": 102.49599814414978, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 81.88799768686295, + "p90": 91.61599725484848, + "p95": 92.16000139713287, + "p99": 98.59199821949005 + }, + "roundtrip": { + "p50": 142.81600713729858, + "p90": 164.8319959640503, + "p95": 168.2880073785782, + "p99": 171.48800194263458 + }, + "isolatedSum": { + "p50": 170.9439978003502, + "p90": 191.03999435901642, + "p95": 194.65599954128265, + "p99": 205.08799701929092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 104.5759990811348, + "p90": 144.83200013637543, + "p95": 147.10399508476257, + "p99": 151.99999511241913 + }, + "combine": { + "p50": 91.26400202512741, + "p90": 116.86400324106216, + "p95": 122.36800044775009, + "p99": 122.8799968957901 + }, + "roundtrip": { + "p50": 167.67999529838562, + "p90": 215.2000069618225, + "p95": 218.52800250053406, + "p99": 350.3359854221344 + }, + "isolatedSum": { + "p50": 195.8400011062622, + "p90": 261.6960033774376, + "p95": 269.47199553251266, + "p99": 274.87999200820923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.34400141239166, + "p90": 140.1280015707016, + "p95": 143.45599710941315, + "p99": 147.5840061903 + }, + "combine": { + "p50": 115.29599875211716, + "p90": 124.12799894809723, + "p95": 124.67200309038162, + "p99": 130.8159977197647 + }, + "roundtrip": { + "p50": 219.200000166893, + "p90": 235.10399460792542, + "p95": 237.37600445747375, + "p99": 241.18399620056152 + }, + "isolatedSum": { + "p50": 244.64000016450882, + "p90": 264.2560005187988, + "p95": 268.12800019979477, + "p99": 278.4000039100647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c877cc8b", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_b2b419cf", + "comparisonKey": "fa3c84b63dcf7858", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:48.108372+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.92800331115723, + "p90": 102.75200009346008, + "p95": 104.22399640083313, + "p99": 106.97600245475769 + }, + "combine": { + "p50": 66.17599725723267, + "p90": 74.5600014925003, + "p95": 75.03999769687653, + "p99": 79.26400005817413 + }, + "roundtrip": { + "p50": 121.0239976644516, + "p90": 153.9520025253296, + "p95": 155.13600409030914, + "p99": 157.85600244998932 + }, + "isolatedSum": { + "p50": 135.1040005683899, + "p90": 177.3120015859604, + "p95": 179.26399409770966, + "p99": 186.24000251293182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 67.391999065876, + "p90": 72.83200323581696, + "p95": 75.03999769687653, + "p99": 81.82399719953537 + }, + "combine": { + "p50": 65.5680000782013, + "p90": 66.94400310516357, + "p95": 67.4239993095398, + "p99": 72.67200201749802 + }, + "roundtrip": { + "p50": 118.72000247240067, + "p90": 125.63200294971466, + "p95": 131.6480040550232, + "p99": 221.53599560260773 + }, + "isolatedSum": { + "p50": 132.9599991440773, + "p90": 139.77600634098053, + "p95": 142.46399700641632, + "p99": 154.4959992170334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.43199986219406, + "p90": 101.3759970664978, + "p95": 104.41599786281586, + "p99": 110.43199896812439 + }, + "combine": { + "p50": 66.880002617836, + "p90": 80.60800284147263, + "p95": 81.11999928951263, + "p99": 81.7599967122078 + }, + "roundtrip": { + "p50": 120.67200243473053, + "p90": 154.78399395942688, + "p95": 156.41599893569946, + "p99": 160.64000129699707 + }, + "isolatedSum": { + "p50": 137.31200248003006, + "p90": 181.98399990797043, + "p95": 185.5359971523285, + "p99": 192.19199568033218 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.43199986219406, + "p90": 99.23200309276581, + "p95": 104.06400263309479, + "p99": 108.03200304508209 + }, + "combine": { + "p50": 66.78400188684464, + "p90": 74.17599856853485, + "p95": 74.81600344181061, + "p99": 80.76799660921097 + }, + "roundtrip": { + "p50": 123.29600006341934, + "p90": 151.45599842071533, + "p95": 153.50399911403656, + "p99": 157.02399611473083 + }, + "isolatedSum": { + "p50": 137.2160017490387, + "p90": 173.40800166130066, + "p95": 178.8800060749054, + "p99": 188.79999965429306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.99199765920639, + "p90": 99.42399710416794, + "p95": 103.61599922180176, + "p99": 109.53599959611893 + }, + "combine": { + "p50": 72.64000177383423, + "p90": 81.82399719953537, + "p95": 82.07999914884567, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 127.36000120639801, + "p90": 153.3759981393814, + "p95": 156.70399367809296, + "p99": 176.79999768733978 + }, + "isolatedSum": { + "p50": 149.63199943304062, + "p90": 181.2479943037033, + "p95": 185.69599837064743, + "p99": 198.36799800395966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.04799956083298, + "p90": 101.24800354242325, + "p95": 104.63999956846237, + "p99": 112.96000331640244 + }, + "combine": { + "p50": 75.32799988985062, + "p90": 89.28000181913376, + "p95": 89.85599875450134, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 138.2399946451187, + "p90": 159.4880074262619, + "p95": 162.84799575805664, + "p99": 167.61599481105804 + }, + "isolatedSum": { + "p50": 161.3759994506836, + "p90": 190.528005361557, + "p95": 194.49599832296371, + "p99": 203.5840004682541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.28800368309021, + "p90": 115.7120019197464, + "p95": 117.0239970088005, + "p99": 121.24799937009811 + }, + "combine": { + "p50": 84.79999750852585, + "p90": 97.98400104045868, + "p95": 98.27200323343277, + "p99": 98.9760011434555 + }, + "roundtrip": { + "p50": 162.01600432395935, + "p90": 178.3680021762848, + "p95": 180.60800433158875, + "p99": 185.98400056362152 + }, + "isolatedSum": { + "p50": 185.08800119161606, + "p90": 213.69600296020508, + "p95": 215.29600024223328, + "p99": 220.22400051355362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.18399888277054, + "p90": 139.55199718475342, + "p95": 140.9599930047989, + "p99": 147.5840061903 + }, + "combine": { + "p50": 108.41599851846695, + "p90": 122.3360002040863, + "p95": 122.68800288438797, + "p99": 123.58400225639343 + }, + "roundtrip": { + "p50": 212.12799847126007, + "p90": 227.87199914455414, + "p95": 229.0560007095337, + "p99": 231.07199370861053 + }, + "isolatedSum": { + "p50": 229.5999974012375, + "p90": 261.8879973888397, + "p95": 263.64799588918686, + "p99": 271.1680084466934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c4f36f1", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_fe0d696f", + "comparisonKey": "f32714d21b77e2d4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:24:09.233237+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 100.28800368309021, + "p90": 107.96800255775452, + "p95": 109.27999764680862, + "p99": 112.47999966144562 + }, + "combine": { + "p50": 82.33600109815598, + "p90": 84.3840017914772, + "p95": 84.79999750852585, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 152.3520052433014, + "p90": 159.0079963207245, + "p95": 160.19199788570404, + "p99": 163.4880006313324 + }, + "isolatedSum": { + "p50": 182.62400478124619, + "p90": 192.35200434923172, + "p95": 194.07999515533447, + "p99": 203.35999876260757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.7600028514862, + "p90": 107.96800255775452, + "p95": 109.0560033917427, + "p99": 112.06399649381638 + }, + "combine": { + "p50": 74.33599978685379, + "p90": 83.96799862384796, + "p95": 84.57600325345993, + "p99": 89.05600011348724 + }, + "roundtrip": { + "p50": 133.15199315547943, + "p90": 160.5760008096695, + "p95": 164.76799547672272, + "p99": 167.71200299263 + }, + "isolatedSum": { + "p50": 148.09600263834, + "p90": 191.93600118160248, + "p95": 193.63200664520264, + "p99": 201.11999660730362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.05599814653397, + "p90": 104.09600287675858, + "p95": 107.04000294208527, + "p99": 113.6000007390976 + }, + "combine": { + "p50": 75.6480023264885, + "p90": 88.73599767684937, + "p95": 89.05600011348724, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 136.00000739097595, + "p90": 167.58400201797485, + "p95": 169.69600319862366, + "p99": 222.56000339984894 + }, + "isolatedSum": { + "p50": 152.70400047302246, + "p90": 192.83200055360794, + "p95": 196.0960030555725, + "p99": 227.77599841356277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.80000364780426, + "p90": 104.89600151777267, + "p95": 106.49599879980087, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 75.87199658155441, + "p90": 85.7279971241951, + "p95": 88.8959988951683, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 134.94400680065155, + "p90": 162.6880019903183, + "p95": 165.79200327396393, + "p99": 170.81600427627563 + }, + "isolatedSum": { + "p50": 152.67200022935867, + "p90": 190.62399864196777, + "p95": 195.39199769496918, + "p99": 201.05600357055664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.93600100278854, + "p90": 104.35199737548828, + "p95": 106.33599758148193, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 89.47200328111649, + "p95": 91.45600348711014, + "p99": 93.98400038480759 + }, + "roundtrip": { + "p50": 137.28000223636627, + "p90": 164.48000073432922, + "p95": 167.29600727558136, + "p99": 171.10399901866913 + }, + "isolatedSum": { + "p50": 176.89599841833115, + "p90": 193.82400065660477, + "p95": 197.79200106859207, + "p99": 205.37599921226501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.71999788284302, + "p90": 108.83200168609619, + "p95": 109.79200154542923, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 83.55200290679932, + "p90": 92.99200028181076, + "p95": 97.05600142478943, + "p99": 98.55999797582626 + }, + "roundtrip": { + "p50": 144.67200636863708, + "p90": 172.83199727535248, + "p95": 174.9120056629181, + "p99": 177.279993891716 + }, + "isolatedSum": { + "p50": 174.27200078964233, + "p90": 201.82400196790695, + "p95": 206.84800297021866, + "p99": 212.6079946756363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.53599828481674, + "p90": 117.53600090742111, + "p95": 118.56000125408173, + "p99": 123.55200201272964 + }, + "combine": { + "p50": 92.73599833250046, + "p90": 106.01600259542465, + "p95": 107.29599744081497, + "p99": 111.61600053310394 + }, + "roundtrip": { + "p50": 167.4560010433197, + "p90": 184.79999899864197, + "p95": 187.96800076961517, + "p99": 194.33599710464478 + }, + "isolatedSum": { + "p50": 194.2719966173172, + "p90": 223.55200350284576, + "p95": 225.8559986948967, + "p99": 235.1680025458336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.04799735546112, + "p90": 135.80800592899323, + "p95": 139.55199718475342, + "p99": 145.1520025730133 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 122.11199849843979, + "p95": 124.12799894809723, + "p99": 125.08800625801086 + }, + "roundtrip": { + "p50": 203.36000621318817, + "p90": 220.35199403762817, + "p95": 222.6880043745041, + "p99": 227.03999280929565 + }, + "isolatedSum": { + "p50": 227.9359996318817, + "p90": 257.920004427433, + "p95": 263.67999613285065, + "p99": 270.24000883102417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-61405eb2", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_6145a872", + "comparisonKey": "5ef36f503e717ed8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:25.765797+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.92800331115723, + "p90": 75.32799988985062, + "p95": 77.63200253248215, + "p99": 83.64800363779068 + }, + "combine": { + "p50": 73.44000041484833, + "p90": 74.94399696588516, + "p95": 75.32799988985062, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 125.85599720478058, + "p90": 130.87999820709229, + "p95": 132.64000415802002, + "p99": 137.92000710964203 + }, + "isolatedSum": { + "p50": 142.36800372600555, + "p90": 150.27199685573578, + "p95": 152.96000242233276, + "p99": 164.86400365829468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.31199878454208, + "p90": 75.55200159549713, + "p95": 78.07999849319458, + "p99": 87.45600283145905 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 75.16799867153168, + "p95": 75.58400183916092, + "p99": 81.05599880218506 + }, + "roundtrip": { + "p50": 131.23199343681335, + "p90": 134.75200533866882, + "p95": 136.48000359535217, + "p99": 142.14399456977844 + }, + "isolatedSum": { + "p50": 142.68799871206284, + "p90": 150.7200002670288, + "p95": 153.6640003323555, + "p99": 168.5120016336441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.55200093984604, + "p90": 102.1760031580925, + "p95": 105.3759977221489, + "p99": 109.0880036354065 + }, + "combine": { + "p50": 74.65600222349167, + "p90": 82.71999657154083, + "p95": 88.8959988951683, + "p99": 89.56799656152725 + }, + "roundtrip": { + "p50": 132.83200562000275, + "p90": 164.44799304008484, + "p95": 165.69599509239197, + "p99": 169.0240055322647 + }, + "isolatedSum": { + "p50": 146.2080031633377, + "p90": 184.89599972963333, + "p95": 194.2719966173172, + "p99": 198.65600019693375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.68000191450119, + "p90": 101.24800354242325, + "p95": 103.74400019645691, + "p99": 107.90400207042694 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 75.52000135183334, + "p95": 75.96799731254578, + "p99": 81.7599967122078 + }, + "roundtrip": { + "p50": 131.84000551700592, + "p90": 136.1600011587143, + "p95": 138.49599659442902, + "p99": 210.27199923992157 + }, + "isolatedSum": { + "p50": 145.75999975204468, + "p90": 176.7680048942566, + "p95": 179.71199750900269, + "p99": 189.66399878263474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 83.80799740552902, + "p90": 101.24800354242325, + "p95": 103.20000350475311, + "p99": 107.58399963378906 + }, + "combine": { + "p50": 75.80800354480743, + "p90": 83.61600339412689, + "p95": 84.25600081682205, + "p99": 90.59199690818787 + }, + "roundtrip": { + "p50": 131.00799918174744, + "p90": 165.53600132465363, + "p95": 166.87999665737152, + "p99": 172.7679967880249 + }, + "isolatedSum": { + "p50": 159.61600095033646, + "p90": 184.86400693655014, + "p95": 187.45600432157516, + "p99": 198.17599654197693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.67199784517288, + "p90": 106.52799904346466, + "p95": 107.35999792814255, + "p99": 111.58400028944016 + }, + "combine": { + "p50": 83.36000144481659, + "p90": 91.71199798583984, + "p95": 92.32000261545181, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 145.6640064716339, + "p90": 166.4000004529953, + "p95": 167.90400445461273, + "p99": 173.3119934797287 + }, + "isolatedSum": { + "p50": 176.03199928998947, + "p90": 198.2399970293045, + "p95": 199.68000054359436, + "p99": 209.1199979186058 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 102.33599692583084, + "p90": 121.5360015630722, + "p95": 122.6240023970604, + "p99": 125.72799623012543 + }, + "combine": { + "p50": 92.16000139713287, + "p90": 105.92000186443329, + "p95": 106.49599879980087, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 170.9440052509308, + "p90": 189.05599415302277, + "p95": 191.26400351524353, + "p99": 197.34400510787964 + }, + "isolatedSum": { + "p50": 194.49599832296371, + "p90": 227.4560034275055, + "p95": 229.12000119686127, + "p99": 233.08799415826797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.56800246238708, + "p90": 140.00000059604645, + "p95": 141.184002161026, + "p99": 146.01600170135498 + }, + "combine": { + "p50": 116.70400202274323, + "p90": 130.36799430847168, + "p95": 131.8719983100891, + "p99": 133.37600231170654 + }, + "roundtrip": { + "p50": 219.200000166893, + "p90": 235.35999655723572, + "p95": 240.48000574111938, + "p99": 243.3599978685379 + }, + "isolatedSum": { + "p50": 242.2720044851303, + "p90": 270.3679949045181, + "p95": 273.0560004711151, + "p99": 279.3920040130615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e3e3bdde", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_de79407c", + "comparisonKey": "2b1280ce47952ca9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:49.964797+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.42399710416794, + "p90": 106.4319983124733, + "p95": 107.87200182676315, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 83.67999643087387, + "p95": 84.22400057315826, + "p99": 89.02399986982346 + }, + "roundtrip": { + "p50": 152.0639955997467, + "p90": 160.25599837303162, + "p95": 164.19200599193573, + "p99": 166.75199568271637 + }, + "isolatedSum": { + "p50": 181.5039962530136, + "p90": 190.11199474334717, + "p95": 192.09600239992142, + "p99": 200.8960023522377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.40000027418137, + "p90": 105.59999942779541, + "p95": 106.65600001811981, + "p99": 110.1439967751503 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 83.3280012011528, + "p95": 83.71199667453766, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 133.88800621032715, + "p90": 159.32799875736237, + "p95": 163.61600160598755, + "p99": 166.75199568271637 + }, + "isolatedSum": { + "p50": 149.27999675273895, + "p90": 188.9280006289482, + "p95": 190.36799669265747, + "p99": 199.583999812603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.85600358247757, + "p90": 105.05600273609161, + "p95": 106.55999928712845, + "p99": 110.84800213575363 + }, + "combine": { + "p50": 74.91199672222137, + "p90": 88.03199976682663, + "p95": 88.83199840784073, + "p99": 89.75999802350998 + }, + "roundtrip": { + "p50": 134.39999520778656, + "p90": 164.19200599193573, + "p95": 165.40800034999847, + "p99": 168.41599345207214 + }, + "isolatedSum": { + "p50": 148.76800030469894, + "p90": 193.08800250291824, + "p95": 195.39199769496918, + "p99": 200.6080001592636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.52000135183334, + "p90": 106.75200074911118, + "p95": 107.51999914646149, + "p99": 110.68800091743469 + }, + "combine": { + "p50": 75.26399940252304, + "p90": 83.20000022649765, + "p95": 83.48800241947174, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 134.88000631332397, + "p90": 164.76799547672272, + "p95": 166.04800522327423, + "p99": 168.60799491405487 + }, + "isolatedSum": { + "p50": 150.78400075435638, + "p90": 189.95200097560883, + "p95": 191.00800156593323, + "p99": 195.3279972076416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 99.32799637317657, + "p90": 103.39199751615524, + "p95": 106.11200332641602, + "p99": 111.32799834012985 + }, + "combine": { + "p50": 83.39200168848038, + "p90": 91.39200299978256, + "p95": 91.77599847316742, + "p99": 96.3520035147667 + }, + "roundtrip": { + "p50": 133.37600231170654, + "p90": 164.44799304008484, + "p95": 166.59200191497803, + "p99": 171.74400389194489 + }, + "isolatedSum": { + "p50": 182.71999806165695, + "p90": 194.7840005159378, + "p95": 197.88800179958344, + "p99": 207.68000185489655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.08800101280212, + "p90": 104.5759990811348, + "p95": 106.46399855613708, + "p99": 110.68800091743469 + }, + "combine": { + "p50": 83.16799998283386, + "p90": 91.64799749851227, + "p95": 92.06400066614151, + "p99": 97.9200005531311 + }, + "roundtrip": { + "p50": 145.1520025730133, + "p90": 173.08799922466278, + "p95": 174.5920032262802, + "p99": 176.256000995636 + }, + "isolatedSum": { + "p50": 176.256000995636, + "p90": 196.22399657964706, + "p95": 198.5279992222786, + "p99": 208.6080014705658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.75999933481216, + "p90": 119.07199770212173, + "p95": 121.05599790811539, + "p99": 126.78399682044983 + }, + "combine": { + "p50": 91.71199798583984, + "p90": 105.59999942779541, + "p95": 106.11200332641602, + "p99": 107.13600367307663 + }, + "roundtrip": { + "p50": 165.6319946050644, + "p90": 183.61599743366241, + "p95": 186.27199530601501, + "p99": 190.33600389957428 + }, + "isolatedSum": { + "p50": 189.471997320652, + "p90": 224.67199712991714, + "p95": 227.1680012345314, + "p99": 233.92000049352646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.94399672746658, + "p90": 134.5600038766861, + "p95": 138.14400136470795, + "p99": 140.8960074186325 + }, + "combine": { + "p50": 108.22399705648422, + "p90": 121.8239963054657, + "p95": 122.8799968957901, + "p99": 125.21600723266602 + }, + "roundtrip": { + "p50": 198.33600521087646, + "p90": 216.44799411296844, + "p95": 217.50399470329285, + "p99": 224.12799298763275 + }, + "isolatedSum": { + "p50": 227.1679937839508, + "p90": 256.3840001821518, + "p95": 261.02399826049805, + "p99": 266.1120146512985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2cd815b6", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_03e5f4f9", + "comparisonKey": "02f89b5297454219", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:17.375105+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.1359977722168, + "p90": 104.44799810647964, + "p95": 105.59999942779541, + "p99": 112.5119999051094 + }, + "combine": { + "p50": 68.70400160551071, + "p90": 81.7599967122078, + "p95": 83.23200047016144, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 127.80800461769104, + "p90": 154.6880006790161, + "p95": 156.2879979610443, + "p99": 160.51200032234192 + }, + "isolatedSum": { + "p50": 139.8399993777275, + "p90": 186.20799481868744, + "p95": 188.83199989795685, + "p99": 196.44799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.39999961853027, + "p90": 103.71199995279312, + "p95": 104.92800176143646, + "p99": 110.91200262308121 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 82.56000280380249, + "p95": 83.13599973917007, + "p99": 84.19200032949448 + }, + "roundtrip": { + "p50": 127.00800597667694, + "p90": 133.5040032863617, + "p95": 134.62400436401367, + "p99": 137.82399892807007 + }, + "isolatedSum": { + "p50": 143.6159983277321, + "p90": 186.2720027565956, + "p95": 188.06400150060654, + "p99": 195.10400295257568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.87200337648392, + "p90": 104.25599664449692, + "p95": 106.01600259542465, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 73.85600358247757, + "p90": 82.07999914884567, + "p95": 82.87999778985977, + "p99": 88.83199840784073 + }, + "roundtrip": { + "p50": 132.4480026960373, + "p90": 157.21599757671356, + "p95": 159.45599973201752, + "p99": 165.82399606704712 + }, + "isolatedSum": { + "p50": 145.7280069589615, + "p90": 186.3359957933426, + "p95": 188.89600038528442, + "p99": 198.39999824762344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.06399738788605, + "p90": 101.15200281143188, + "p95": 104.22399640083313, + "p99": 112.38399893045425 + }, + "combine": { + "p50": 73.44000041484833, + "p90": 83.26400071382523, + "p95": 83.71199667453766, + "p99": 88.99199962615967 + }, + "roundtrip": { + "p50": 131.52000308036804, + "p90": 162.4000072479248, + "p95": 163.83999586105347, + "p99": 168.44800114631653 + }, + "isolatedSum": { + "p50": 145.50399780273438, + "p90": 184.4160035252571, + "p95": 187.9359930753708, + "p99": 201.37599855661392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.06399935483932, + "p90": 100.41599720716476, + "p95": 103.39199751615524, + "p99": 108.57599973678589 + }, + "combine": { + "p50": 75.13599842786789, + "p90": 88.83199840784073, + "p95": 89.31200206279755, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 132.64000415802002, + "p90": 164.38399255275726, + "p95": 165.92000424861908, + "p99": 169.72799599170685 + }, + "isolatedSum": { + "p50": 159.19999778270721, + "p90": 189.2479956150055, + "p95": 192.7039995789528, + "p99": 198.7520009279251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 87.36000210046768, + "p90": 105.21599650382996, + "p95": 106.84800148010254, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 82.36800134181976, + "p90": 91.23200178146362, + "p95": 91.64799749851227, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 141.7279988527298, + "p90": 165.40800034999847, + "p95": 170.1440066099167, + "p99": 174.30399358272552 + }, + "isolatedSum": { + "p50": 169.72800344228745, + "p90": 196.44799828529358, + "p95": 198.4959989786148, + "p99": 209.9519968032837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.31199723482132, + "p90": 120.80000340938568, + "p95": 121.95199728012085, + "p99": 128.48000228405 + }, + "combine": { + "p50": 91.0400003194809, + "p90": 100.44799745082855, + "p95": 105.56799918413162, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 166.27199947834015, + "p90": 184.1599941253662, + "p95": 187.00799345970154, + "p99": 190.97599387168884 + }, + "isolatedSum": { + "p50": 196.35199755430222, + "p90": 221.24800086021423, + "p95": 227.51999646425247, + "p99": 235.29600352048874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.4480026960373, + "p90": 140.32000303268433, + "p95": 144.57599818706512, + "p99": 420.9280014038086 + }, + "combine": { + "p50": 115.29599875211716, + "p90": 124.44800138473511, + "p95": 125.15200674533844, + "p99": 130.5920034646988 + }, + "roundtrip": { + "p50": 218.59200298786163, + "p90": 233.40800404548645, + "p95": 236.86400055885315, + "p99": 239.77600038051605 + }, + "isolatedSum": { + "p50": 247.74400144815445, + "p90": 264.76800441741943, + "p95": 269.72800493240356, + "p99": 551.5200048685074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc067e0e", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_ce177875", + "comparisonKey": "76a730518aa88f4d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:41.594786+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.09599763154984, + "p90": 101.79200023412704, + "p95": 105.43999820947647, + "p99": 107.71200060844421 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 82.94399827718735, + "p95": 83.23200047016144, + "p99": 84.99199897050858 + }, + "roundtrip": { + "p50": 130.048006772995, + "p90": 155.7759940624237, + "p95": 156.89599514007568, + "p99": 159.2320054769516 + }, + "isolatedSum": { + "p50": 145.27999609708786, + "p90": 184.7359985113144, + "p95": 188.6719986796379, + "p99": 192.7039995789528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.41600006818771, + "p90": 105.8880016207695, + "p95": 106.84800148010254, + "p99": 112.22399771213531 + }, + "combine": { + "p50": 73.72800260782242, + "p90": 81.85599744319916, + "p95": 82.75199681520462, + "p99": 84.06399935483932 + }, + "roundtrip": { + "p50": 131.77600502967834, + "p90": 135.68000495433807, + "p95": 137.53600418567657, + "p99": 141.12000167369843 + }, + "isolatedSum": { + "p50": 146.14400267601013, + "p90": 187.74399906396866, + "p95": 189.59999829530716, + "p99": 196.28799706697464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.66400212049484, + "p90": 103.55199873447418, + "p95": 105.76000064611435, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 74.30399954319, + "p90": 84.16000008583069, + "p95": 88.92799913883209, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 133.63200426101685, + "p90": 164.19200599193573, + "p95": 165.50399363040924, + "p99": 171.51999473571777 + }, + "isolatedSum": { + "p50": 147.96800166368484, + "p90": 187.71199882030487, + "p95": 194.68799978494644, + "p99": 199.93600249290466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.36800003051758, + "p90": 100.51199793815613, + "p95": 104.25599664449692, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 82.68799632787704, + "p95": 83.26400071382523, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 133.95200669765472, + "p90": 163.32800686359406, + "p95": 164.99200463294983, + "p99": 167.58400201797485 + }, + "isolatedSum": { + "p50": 149.24799650907516, + "p90": 183.19999426603317, + "p95": 187.51999735832214, + "p99": 197.50399887561798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.1279998421669, + "p90": 100.3199964761734, + "p95": 102.94400155544281, + "p99": 108.86400192975998 + }, + "combine": { + "p50": 75.23199915885925, + "p90": 89.63199704885483, + "p95": 90.40000289678574, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 133.05599987506866, + "p90": 163.455992937088, + "p95": 164.92800414562225, + "p99": 169.3120002746582 + }, + "isolatedSum": { + "p50": 159.35999900102615, + "p90": 189.95199352502823, + "p95": 193.34400445222855, + "p99": 200.41599869728088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.87199920415878, + "p90": 107.04000294208527, + "p95": 108.22399705648422, + "p99": 113.50400000810623 + }, + "combine": { + "p50": 82.43200182914734, + "p90": 91.2960022687912, + "p95": 91.90399944782257, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 140.73599874973297, + "p90": 171.6800034046173, + "p95": 173.3119934797287, + "p99": 177.69600450992584 + }, + "isolatedSum": { + "p50": 174.30400103330612, + "p90": 198.33600521087646, + "p95": 200.1279965043068, + "p99": 210.7520028948784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.15200215578079, + "p90": 117.0239970088005, + "p95": 118.97599697113037, + "p99": 123.55200201272964 + }, + "combine": { + "p50": 91.39200299978256, + "p90": 105.40799796581268, + "p95": 106.175996363163, + "p99": 106.59199953079224 + }, + "roundtrip": { + "p50": 166.55999422073364, + "p90": 181.88799917697906, + "p95": 185.31200289726257, + "p99": 189.85599279403687 + }, + "isolatedSum": { + "p50": 188.54400515556335, + "p90": 222.4319949746132, + "p95": 225.15199333429337, + "p99": 230.14400154352188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.46399986743927, + "p90": 132.76800513267517, + "p95": 134.94400680065155, + "p99": 139.55199718475342 + }, + "combine": { + "p50": 108.0000028014183, + "p90": 121.56800180673599, + "p95": 122.27199971675873, + "p99": 123.48800152540207 + }, + "roundtrip": { + "p50": 200.83199441432953, + "p90": 214.6880030632019, + "p95": 216.70399606227875, + "p99": 220.44800221920013 + }, + "isolatedSum": { + "p50": 222.46400266885757, + "p90": 254.33600693941116, + "p95": 257.2160065174103, + "p99": 263.0399987101555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8072d7aa", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_e5da7451", + "comparisonKey": "bd5fbda893e6ce4a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:29.612054+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.34399902820587, + "p90": 75.99999755620956, + "p95": 78.78399640321732, + "p99": 87.07199990749359 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 74.01599735021591, + "p95": 74.49600100517273, + "p99": 80.76799660921097 + }, + "roundtrip": { + "p50": 127.77599692344666, + "p90": 131.96800649166107, + "p95": 133.2480013370514, + "p99": 138.2399946451187 + }, + "isolatedSum": { + "p50": 141.79199934005737, + "p90": 150.01599490642548, + "p95": 153.27999740839005, + "p99": 167.83999651670456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.69600170850754, + "p90": 76.31999999284744, + "p95": 78.59200239181519, + "p99": 84.16000008583069 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 74.72000271081924, + "p95": 75.16799867153168, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 132.1599930524826, + "p90": 135.0719928741455, + "p95": 137.05599308013916, + "p99": 141.27999544143677 + }, + "isolatedSum": { + "p50": 142.71999895572662, + "p90": 151.0400027036667, + "p95": 153.76000106334686, + "p99": 165.3760001063347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.25599884986877, + "p90": 103.96800190210342, + "p95": 106.36799782514572, + "p99": 110.49599945545197 + }, + "combine": { + "p50": 73.95199686288834, + "p90": 87.90399879217148, + "p95": 88.92799913883209, + "p99": 89.72799777984619 + }, + "roundtrip": { + "p50": 133.5040032863617, + "p90": 165.79200327396393, + "p95": 167.10400581359863, + "p99": 170.84799706935883 + }, + "isolatedSum": { + "p50": 146.2079957127571, + "p90": 191.8720006942749, + "p95": 195.2959969639778, + "p99": 200.22399723529816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.82400333881378, + "p90": 101.72799974679947, + "p95": 104.22399640083313, + "p99": 108.19199681282043 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 82.78399705886841, + "p95": 88.0960002541542, + "p99": 90.17600119113922 + }, + "roundtrip": { + "p50": 133.44000279903412, + "p90": 163.455992937088, + "p95": 164.57599401474, + "p99": 166.81599617004395 + }, + "isolatedSum": { + "p50": 148.3840048313141, + "p90": 184.51199680566788, + "p95": 192.31999665498734, + "p99": 198.36799800395966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 84.03199911117554, + "p90": 103.26399654150009, + "p95": 105.40799796581268, + "p99": 111.16799712181091 + }, + "combine": { + "p50": 75.26399940252304, + "p90": 90.2400016784668, + "p95": 90.65599739551544, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 130.97600638866425, + "p90": 163.7759953737259, + "p95": 167.00799763202667, + "p99": 189.66400623321533 + }, + "isolatedSum": { + "p50": 159.29599851369858, + "p90": 193.5039982199669, + "p95": 196.06399536132812, + "p99": 208.6079940199852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.22400188446045, + "p90": 106.4319983124733, + "p95": 108.41599851846695, + "p99": 113.47199976444244 + }, + "combine": { + "p50": 82.24000036716461, + "p90": 90.91199934482574, + "p95": 97.02400118112564, + "p99": 98.2080027461052 + }, + "roundtrip": { + "p50": 140.22399485111237, + "p90": 172.60800302028656, + "p95": 174.6239960193634, + "p99": 178.94400656223297 + }, + "isolatedSum": { + "p50": 174.46400225162506, + "p90": 197.34399765729904, + "p95": 205.4399996995926, + "p99": 211.68000251054764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.63999825716019, + "p90": 116.73600226640701, + "p95": 119.03999745845795, + "p99": 123.26399981975555 + }, + "combine": { + "p50": 91.26400202512741, + "p90": 105.27999699115753, + "p95": 105.6319996714592, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 165.82399606704712, + "p90": 181.08800053596497, + "p95": 183.20000171661377, + "p99": 188.4160041809082 + }, + "isolatedSum": { + "p50": 187.9040002822876, + "p90": 222.01599925756454, + "p95": 224.67199712991714, + "p99": 229.69599813222885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.98400366306305, + "p90": 134.62400436401367, + "p95": 136.80000603199005, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 107.87200182676315, + "p90": 121.95199728012085, + "p95": 122.5920021533966, + "p99": 123.23199957609177 + }, + "roundtrip": { + "p50": 198.68800044059753, + "p90": 217.31199324131012, + "p95": 220.09600698947906, + "p99": 222.84799814224243 + }, + "isolatedSum": { + "p50": 221.8560054898262, + "p90": 256.5760016441345, + "p95": 259.39200818538666, + "p99": 267.67999678850174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-151751b4", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_882de7be", + "comparisonKey": "2d3a35e74821a916", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:11.972051+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 79.52000200748444, + "p90": 83.61600339412689, + "p95": 86.36800199747086, + "p99": 93.72799843549728 + }, + "combine": { + "p50": 81.56800270080566, + "p90": 83.45600217580795, + "p95": 87.93599903583527, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 137.28000223636627, + "p90": 142.81600713729858, + "p95": 145.53600549697876, + "p99": 150.2400040626526 + }, + "isolatedSum": { + "p50": 161.0880047082901, + "p90": 167.07200556993484, + "p95": 174.30400103330612, + "p99": 184.51199680566788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 60.95999851822853, + "p90": 85.79199761152267, + "p95": 87.26400136947632, + "p99": 90.7839983701706 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 82.94399827718735, + "p95": 83.20000022649765, + "p99": 83.90399813652039 + }, + "roundtrip": { + "p50": 121.66400253772736, + "p90": 145.9520012140274, + "p95": 147.5519984960556, + "p99": 157.24800527095795 + }, + "isolatedSum": { + "p50": 135.1039968430996, + "p90": 168.73599588871002, + "p95": 170.46400159597397, + "p99": 174.68799650669098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 78.62400263547897, + "p90": 84.51200276613235, + "p95": 86.62399649620056, + "p99": 90.59199690818787 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 88.35200220346451, + "p95": 89.31200206279755, + "p99": 89.88799899816513 + }, + "roundtrip": { + "p50": 134.8479986190796, + "p90": 146.11199498176575, + "p95": 148.0640023946762, + "p99": 153.47200632095337 + }, + "isolatedSum": { + "p50": 159.13600474596024, + "p90": 172.86400496959686, + "p95": 175.9359985589981, + "p99": 180.479995906353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 77.66400277614594, + "p90": 82.75199681520462, + "p95": 85.21600067615509, + "p99": 90.36800265312195 + }, + "combine": { + "p50": 80.99199831485748, + "p90": 88.86399865150452, + "p95": 91.51999652385712, + "p99": 134.0160071849823 + }, + "roundtrip": { + "p50": 136.7039978504181, + "p90": 145.91999351978302, + "p95": 147.77599275112152, + "p99": 238.68800699710846 + }, + "isolatedSum": { + "p50": 158.65600109100342, + "p90": 171.61599546670914, + "p95": 176.7359972000122, + "p99": 224.38400983810425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.53600180149078, + "p90": 83.5840031504631, + "p95": 86.97599917650223, + "p99": 94.08000111579895 + }, + "combine": { + "p50": 83.48800241947174, + "p90": 89.79199826717377, + "p95": 90.40000289678574, + "p99": 96.99200093746185 + }, + "roundtrip": { + "p50": 138.94400000572205, + "p90": 144.73600685596466, + "p95": 147.32800424098969, + "p99": 151.58399939537048 + }, + "isolatedSum": { + "p50": 161.02400422096252, + "p90": 173.37600141763687, + "p95": 177.37600207328796, + "p99": 191.0720020532608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.2159993648529, + "p90": 89.63199704885483, + "p95": 91.10400080680847, + "p99": 95.93600034713745 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 92.03200042247772, + "p95": 97.18400239944458, + "p99": 98.11200201511383 + }, + "roundtrip": { + "p50": 132.9600065946579, + "p90": 154.62400019168854, + "p95": 155.58399260044098, + "p99": 160.60799360275269 + }, + "isolatedSum": { + "p50": 160.0319966673851, + "p90": 181.66399747133255, + "p95": 188.28800320625305, + "p99": 194.04800236225128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.02400052547455, + "p90": 102.75200009346008, + "p95": 104.8320010304451, + "p99": 109.43999886512756 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 105.98400235176086, + "p95": 106.36799782514572, + "p99": 109.95200276374817 + }, + "roundtrip": { + "p50": 160.38399934768677, + "p90": 166.46400094032288, + "p95": 169.69600319862366, + "p99": 172.83199727535248 + }, + "isolatedSum": { + "p50": 190.7839998602867, + "p90": 208.73600244522095, + "p95": 211.19999885559082, + "p99": 219.39200162887573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.15999722480774, + "p90": 120.19199877977371, + "p95": 121.8239963054657, + "p99": 136.7039978504181 + }, + "combine": { + "p50": 115.23199826478958, + "p90": 122.78400361537933, + "p95": 123.32800030708313, + "p99": 126.81600451469421 + }, + "roundtrip": { + "p50": 190.68799912929535, + "p90": 202.4960070848465, + "p95": 205.6960016489029, + "p99": 209.1200053691864 + }, + "isolatedSum": { + "p50": 227.39199548959732, + "p90": 242.97600239515305, + "p95": 245.15199661254883, + "p99": 263.5200023651123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6657fd27", + "identity": "h100|deepep|v2|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_61b3262b", + "comparisonKey": "5b8eb9bb4300fe2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:59.604775+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.208001017570496, + "p90": 49.056001007556915, + "p95": 49.27999898791313, + "p99": 52.5440014898777 + }, + "combine": { + "p50": 36.959998309612274, + "p90": 44.256001710891724, + "p95": 44.64000090956688, + "p99": 45.1200008392334 + }, + "roundtrip": { + "p50": 59.7120001912117, + "p90": 67.16799736022949, + "p95": 67.71200150251389, + "p99": 69.18399780988693 + }, + "isolatedSum": { + "p50": 79.16799932718277, + "p90": 93.31200271844864, + "p95": 93.91999989748001, + "p99": 97.6640023291111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 41.21600091457367, + "p90": 48.79999905824661, + "p95": 48.99200052022934, + "p99": 51.80799961090088 + }, + "combine": { + "p50": 30.047999694943428, + "p90": 37.151999771595, + "p95": 38.07999938726425, + "p99": 45.024000108242035 + }, + "roundtrip": { + "p50": 57.760000228881836, + "p90": 62.04799935221672, + "p95": 67.48799979686737, + "p99": 68.12799721956253 + }, + "isolatedSum": { + "p50": 71.2640006095171, + "p90": 85.95199882984161, + "p95": 87.07199990749359, + "p99": 96.83199971914291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 41.21600091457367, + "p90": 48.64000156521797, + "p95": 48.895999789237976, + "p99": 50.87999999523163 + }, + "combine": { + "p50": 36.99199855327606, + "p90": 37.66399994492531, + "p95": 38.015998899936676, + "p99": 45.24800181388855 + }, + "roundtrip": { + "p50": 59.51999872922897, + "p90": 65.72800129652023, + "p95": 67.61600077152252, + "p99": 68.64000111818314 + }, + "isolatedSum": { + "p50": 78.20799946784973, + "p90": 86.30400151014328, + "p95": 86.91199868917465, + "p99": 96.12800180912018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.74399846792221, + "p90": 49.056001007556915, + "p95": 49.375999718904495, + "p99": 56.543998420238495 + }, + "combine": { + "p50": 36.86400130391121, + "p90": 44.28799822926521, + "p95": 44.576000422239304, + "p99": 45.152001082897186 + }, + "roundtrip": { + "p50": 66.78400188684464, + "p90": 68.00000369548798, + "p95": 68.35199892520905, + "p99": 74.01599735021591 + }, + "isolatedSum": { + "p50": 84.60799977183342, + "p90": 93.34399923682213, + "p95": 93.9520001411438, + "p99": 101.69599950313568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 48.767998814582825, + "p90": 53.05600166320801, + "p95": 56.32000043988228, + "p99": 57.53599852323532 + }, + "combine": { + "p50": 44.60800066590309, + "p90": 45.24800181388855, + "p95": 45.53600028157234, + "p99": 48.67200180888176 + }, + "roundtrip": { + "p50": 67.84000247716904, + "p90": 75.48800110816956, + "p95": 75.80800354480743, + "p99": 78.84799689054489 + }, + "isolatedSum": { + "p50": 93.37599948048592, + "p90": 98.30400347709656, + "p95": 101.85600072145462, + "p99": 106.20800033211708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.18399825692177, + "p90": 56.41600117087364, + "p95": 56.992001831531525, + "p99": 57.34400078654289 + }, + "combine": { + "p50": 52.12799832224846, + "p90": 53.37600037455559, + "p95": 53.53600159287453, + "p99": 53.92000079154968 + }, + "roundtrip": { + "p50": 82.20800012350082, + "p90": 84.51200276613235, + "p95": 84.79999750852585, + "p99": 87.2960016131401 + }, + "isolatedSum": { + "p50": 101.31199657917023, + "p90": 109.79200154542923, + "p95": 110.52800342440605, + "p99": 111.26400157809258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 65.11999666690826, + "p90": 66.39999896287918, + "p95": 67.61600077152252, + "p99": 73.34399968385696 + }, + "combine": { + "p50": 61.28000095486641, + "p90": 63.45599889755249, + "p95": 68.9919963479042, + "p99": 77.40800082683563 + }, + "roundtrip": { + "p50": 101.82400047779083, + "p90": 109.11999642848969, + "p95": 109.69600081443787, + "p99": 111.77600175142288 + }, + "isolatedSum": { + "p50": 126.39999762177467, + "p90": 129.85599786043167, + "p95": 136.60799711942673, + "p99": 150.7520005106926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.00799942016602, + "p90": 94.52799707651138, + "p95": 95.04000097513199, + "p99": 97.18400239944458 + }, + "combine": { + "p50": 88.67199718952179, + "p90": 95.64799815416336, + "p95": 96.03200107812881, + "p99": 96.79999947547913 + }, + "roundtrip": { + "p50": 152.5759994983673, + "p90": 159.36000645160675, + "p95": 160.16000509262085, + "p99": 161.9199961423874 + }, + "isolatedSum": { + "p50": 175.6799966096878, + "p90": 190.17599523067474, + "p95": 191.0720020532608, + "p99": 193.9840018749237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cce56cc3", + "identity": "h100|deepep|v2|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_f9bbecdf", + "comparisonKey": "227bf8e96f877b38", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:25.866278+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 44.03200000524521, + "p90": 49.40799996256828, + "p95": 49.82399940490723, + "p99": 56.543998420238495 + }, + "combine": { + "p50": 37.59999945759773, + "p90": 43.90399903059006, + "p95": 45.21600157022476, + "p99": 46.14400118589401 + }, + "roundtrip": { + "p50": 61.08799949288368, + "p90": 68.67200136184692, + "p95": 69.60000097751617, + "p99": 77.91999727487564 + }, + "isolatedSum": { + "p50": 81.63199946284294, + "p90": 93.31199899315834, + "p95": 95.04000097513199, + "p99": 102.68799960613251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.247998744249344, + "p90": 50.335999578237534, + "p95": 51.392000168561935, + "p99": 57.88800120353699 + }, + "combine": { + "p50": 38.336001336574554, + "p90": 45.75999826192856, + "p95": 46.112000942230225, + "p99": 46.68800160288811 + }, + "roundtrip": { + "p50": 61.76000088453293, + "p90": 68.12799721956253, + "p95": 68.92800331115723, + "p99": 70.3359991312027 + }, + "isolatedSum": { + "p50": 87.5840000808239, + "p90": 96.09599784016609, + "p95": 97.50400111079216, + "p99": 104.5760028064251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.136000633239746, + "p90": 49.82399940490723, + "p95": 50.175998359918594, + "p99": 51.83999985456467 + }, + "combine": { + "p50": 37.82400116324425, + "p90": 39.135999977588654, + "p95": 44.44799944758415, + "p99": 45.88799923658371 + }, + "roundtrip": { + "p50": 60.127999633550644, + "p90": 66.65600091218948, + "p95": 67.6800012588501, + "p99": 70.01599669456482 + }, + "isolatedSum": { + "p50": 80.960001796484, + "p90": 88.95999938249588, + "p95": 94.62399780750275, + "p99": 97.72799909114838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 47.42399975657463, + "p90": 49.82399940490723, + "p95": 50.335999578237534, + "p99": 55.96800148487091 + }, + "combine": { + "p50": 43.35999861359596, + "p90": 44.89599913358688, + "p95": 45.184001326560974, + "p99": 47.00800031423569 + }, + "roundtrip": { + "p50": 65.95200300216675, + "p90": 68.96000355482101, + "p95": 69.43999975919724, + "p99": 74.5920017361641 + }, + "isolatedSum": { + "p50": 90.7839983701706, + "p90": 94.71999853849411, + "p95": 95.52000090479851, + "p99": 102.9760017991066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 48.16000163555145, + "p90": 55.64799904823303, + "p95": 56.28800019621849, + "p99": 58.01599845290184 + }, + "combine": { + "p50": 43.96799951791763, + "p90": 46.112000942230225, + "p95": 46.78399860858917, + "p99": 52.480001002550125 + }, + "roundtrip": { + "p50": 68.03199648857117, + "p90": 74.94399696588516, + "p95": 75.52000135183334, + "p99": 78.46400141716003 + }, + "isolatedSum": { + "p50": 92.12800115346909, + "p90": 101.75999999046326, + "p95": 103.07199880480766, + "p99": 110.49599945545197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 50.783999264240265, + "p90": 57.11999908089638, + "p95": 57.72799998521805, + "p99": 58.848001062870026 + }, + "combine": { + "p50": 52.319999784231186, + "p90": 53.727999329566956, + "p95": 53.98400127887726, + "p99": 54.91200089454651 + }, + "roundtrip": { + "p50": 78.11199873685837, + "p90": 84.73599702119827, + "p95": 85.08799970149994, + "p99": 86.7839977145195 + }, + "isolatedSum": { + "p50": 103.10399904847145, + "p90": 110.84799841046333, + "p95": 111.7120012640953, + "p99": 113.76000195741653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 61.43999844789505, + "p90": 67.6800012588501, + "p95": 68.60800087451935, + "p99": 72.76800274848938 + }, + "combine": { + "p50": 62.33600154519081, + "p90": 66.81600213050842, + "p95": 68.1919977068901, + "p99": 69.11999732255936 + }, + "roundtrip": { + "p50": 101.85600072145462, + "p90": 107.744000852108, + "p95": 108.60799998044968, + "p99": 110.11199653148651 + }, + "isolatedSum": { + "p50": 123.77599999308586, + "p90": 134.49600338935852, + "p95": 136.79999858140945, + "p99": 141.88800007104874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.20800077915192, + "p90": 90.7519981265068, + "p95": 94.68799829483032, + "p99": 96.16000205278397 + }, + "combine": { + "p50": 89.6959975361824, + "p90": 95.23200243711472, + "p95": 95.64799815416336, + "p99": 97.85600006580353 + }, + "roundtrip": { + "p50": 153.3759981393814, + "p90": 158.62399339675903, + "p95": 159.84000265598297, + "p99": 161.24799847602844 + }, + "isolatedSum": { + "p50": 175.90399831533432, + "p90": 185.98400056362152, + "p95": 190.33599644899368, + "p99": 194.0160021185875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e5f0db2", + "identity": "h100|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_557e3a7d", + "comparisonKey": "7b472a5d21d5aa77", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:32.738351+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 197.24799692630768, + "p90": 207.42399990558624, + "p95": 212.0320051908493, + "p99": 233.63199830055237 + }, + "combine": { + "p50": 52.70399898290634, + "p90": 55.64799904823303, + "p95": 58.079998940229416, + "p99": 62.01599910855293 + }, + "roundtrip": { + "p50": 233.66400599479675, + "p90": 242.68800020217896, + "p95": 261.9520127773285, + "p99": 357.91999101638794 + }, + "isolatedSum": { + "p50": 249.95199590921402, + "p90": 263.0719989538193, + "p95": 270.1120041310787, + "p99": 295.6479974091053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 196.73599302768707, + "p90": 204.70400154590607, + "p95": 208.80000293254852, + "p99": 217.43999421596527 + }, + "combine": { + "p50": 53.3440001308918, + "p90": 56.44800141453743, + "p95": 57.8560009598732, + "p99": 61.95199862122536 + }, + "roundtrip": { + "p50": 234.68799889087677, + "p90": 240.60800671577454, + "p95": 242.2720044851303, + "p99": 246.94399535655975 + }, + "isolatedSum": { + "p50": 250.07999315857887, + "p90": 261.1520029604435, + "p95": 266.6560038924217, + "p99": 279.3919928371906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 200.95999538898468, + "p90": 291.1039888858795, + "p95": 295.74400186538696, + "p99": 310.7840120792389 + }, + "combine": { + "p50": 54.976001381874084, + "p90": 67.32799857854843, + "p95": 68.67200136184692, + "p99": 73.82400333881378 + }, + "roundtrip": { + "p50": 239.00799453258514, + "p90": 335.3919982910156, + "p95": 338.0799889564514, + "p99": 349.98399019241333 + }, + "isolatedSum": { + "p50": 255.93599677085876, + "p90": 358.43198746442795, + "p95": 364.4160032272339, + "p99": 384.6080154180527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 197.79199361801147, + "p90": 284.960001707077, + "p95": 288.9919877052307, + "p99": 298.8159954547882 + }, + "combine": { + "p50": 54.23999950289726, + "p90": 57.82400071620941, + "p95": 59.808000922203064, + "p99": 63.00800293684006 + }, + "roundtrip": { + "p50": 235.52000522613525, + "p90": 241.69600009918213, + "p95": 243.77599358558655, + "p99": 248.3839988708496 + }, + "isolatedSum": { + "p50": 252.03199312090874, + "p90": 342.78400242328644, + "p95": 348.7999886274338, + "p99": 361.82399839162827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.05600357055664, + "p90": 288.4800136089325, + "p95": 293.8559949398041, + "p99": 303.42400074005127 + }, + "combine": { + "p50": 57.023998349905014, + "p90": 69.47200000286102, + "p95": 72.06399738788605, + "p99": 284.5439910888672 + }, + "roundtrip": { + "p50": 238.97600173950195, + "p90": 334.84798669815063, + "p95": 337.15200424194336, + "p99": 341.3439989089966 + }, + "isolatedSum": { + "p50": 258.08000192046165, + "p90": 357.9520136117935, + "p95": 365.9199923276901, + "p99": 587.9679918289185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.99200308322906, + "p90": 288.86398673057556, + "p95": 293.7279939651489, + "p99": 298.7839877605438 + }, + "combine": { + "p50": 60.54399907588959, + "p90": 72.48000055551529, + "p95": 73.66400212049484, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 242.88000166416168, + "p90": 335.999995470047, + "p95": 338.9759957790375, + "p99": 344.1919982433319 + }, + "isolatedSum": { + "p50": 261.53600215911865, + "p90": 361.34398728609085, + "p95": 367.39199608564377, + "p99": 377.6639848947525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 202.11200416088104, + "p90": 288.7040078639984, + "p95": 293.8239872455597, + "p99": 301.82400345802307 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 79.8719972372055, + "p95": 80.9599980711937, + "p99": 84.83199775218964 + }, + "roundtrip": { + "p50": 251.3599991798401, + "p90": 338.3679986000061, + "p95": 341.15201234817505, + "p99": 345.8240032196045 + }, + "isolatedSum": { + "p50": 270.176000893116, + "p90": 368.5760051012039, + "p95": 374.7839853167534, + "p99": 386.6560012102127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.2640061378479, + "p90": 292.5119996070862, + "p95": 296.4160144329071, + "p99": 302.68800258636475 + }, + "combine": { + "p50": 81.37600123882294, + "p90": 91.16800129413605, + "p95": 92.41600334644318, + "p99": 97.43999689817429 + }, + "roundtrip": { + "p50": 268.67198944091797, + "p90": 353.4719944000244, + "p95": 356.28798604011536, + "p99": 360.73601245880127 + }, + "isolatedSum": { + "p50": 288.64000737667084, + "p90": 383.68000090122223, + "p95": 388.8320177793503, + "p99": 400.12799948453903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6f6e7201", + "identity": "h100|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_557e3a7d", + "comparisonKey": "b8d9289173cdacfd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:22.856269+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 186.91200017929077, + "p90": 195.360004901886, + "p95": 198.7839937210083, + "p99": 205.47200739383698 + }, + "combine": { + "p50": 54.976001381874084, + "p90": 57.11999908089638, + "p95": 59.967998415231705, + "p99": 62.591999769210815 + }, + "roundtrip": { + "p50": 227.23199427127838, + "p90": 235.00800132751465, + "p95": 237.05600202083588, + "p99": 245.37600576877594 + }, + "isolatedSum": { + "p50": 241.88800156116486, + "p90": 252.48000398278236, + "p95": 258.75199213624, + "p99": 268.0640071630478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 186.78399920463562, + "p90": 195.13599574565887, + "p95": 197.66399264335632, + "p99": 208.25600624084473 + }, + "combine": { + "p50": 56.384000927209854, + "p90": 59.20000001788139, + "p95": 61.37600168585777, + "p99": 64.06400352716446 + }, + "roundtrip": { + "p50": 229.88800704479218, + "p90": 236.32000386714935, + "p95": 239.51999843120575, + "p99": 246.5600073337555 + }, + "isolatedSum": { + "p50": 243.16800013184547, + "p90": 254.33599576354027, + "p95": 259.0399943292141, + "p99": 272.3200097680092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 191.26400351524353, + "p90": 275.2639949321747, + "p95": 280.9920012950897, + "p99": 293.4719920158386 + }, + "combine": { + "p50": 57.88800120353699, + "p90": 71.07199728488922, + "p95": 72.48000055551529, + "p99": 75.83999633789062 + }, + "roundtrip": { + "p50": 233.5360050201416, + "p90": 320.41600346565247, + "p95": 326.81599259376526, + "p99": 345.15199065208435 + }, + "isolatedSum": { + "p50": 249.15200471878052, + "p90": 346.3359922170639, + "p95": 353.472001850605, + "p99": 369.31198835372925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 187.51999735832214, + "p90": 195.360004901886, + "p95": 198.33600521087646, + "p99": 216.2880003452301 + }, + "combine": { + "p50": 57.792000472545624, + "p90": 60.47999858856201, + "p95": 62.94400244951248, + "p99": 65.5359998345375 + }, + "roundtrip": { + "p50": 231.99999332427979, + "p90": 242.71999299526215, + "p95": 252.76800990104675, + "p99": 363.45601081848145 + }, + "isolatedSum": { + "p50": 245.31199783086777, + "p90": 255.840003490448, + "p95": 261.28000766038895, + "p99": 281.8240001797676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 191.64800643920898, + "p90": 271.84000611305237, + "p95": 277.3439884185791, + "p99": 459.3600034713745 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 71.26399874687195, + "p95": 71.96799665689468, + "p99": 78.5600021481514 + }, + "roundtrip": { + "p50": 235.74399948120117, + "p90": 316.22400879859924, + "p95": 319.39199566841125, + "p99": 325.6959915161133 + }, + "isolatedSum": { + "p50": 251.13600492477417, + "p90": 343.1040048599243, + "p95": 349.3119850754738, + "p99": 537.9200056195259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 190.0479942560196, + "p90": 270.04799246788025, + "p95": 273.9520072937012, + "p99": 281.5999984741211 + }, + "combine": { + "p50": 63.00800293684006, + "p90": 65.85600227117538, + "p95": 68.09599697589874, + "p99": 70.91200351715088 + }, + "roundtrip": { + "p50": 237.05600202083588, + "p90": 244.4159984588623, + "p95": 248.09600412845612, + "p99": 260.8639895915985 + }, + "isolatedSum": { + "p50": 253.05599719285965, + "p90": 335.90399473905563, + "p95": 342.0480042695999, + "p99": 352.512001991272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 191.6159987449646, + "p90": 275.32801032066345, + "p95": 280.0639867782593, + "p99": 296.9920039176941 + }, + "combine": { + "p50": 73.66400212049484, + "p90": 86.68799698352814, + "p95": 87.99999952316284, + "p99": 90.84799885749817 + }, + "roundtrip": { + "p50": 249.66399371623993, + "p90": 335.04000306129456, + "p95": 339.04001116752625, + "p99": 344.4159924983978 + }, + "isolatedSum": { + "p50": 265.28000086545944, + "p90": 362.0160073041916, + "p95": 368.0639863014221, + "p99": 387.84000277519226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 200.99200308322906, + "p90": 276.16000175476074, + "p95": 280.09599447250366, + "p99": 292.89600253105164 + }, + "combine": { + "p50": 87.45600283145905, + "p90": 101.27999633550644, + "p95": 102.20800340175629, + "p99": 107.4879989027977 + }, + "roundtrip": { + "p50": 271.39198780059814, + "p90": 346.0800051689148, + "p95": 349.8240113258362, + "p99": 358.65598917007446 + }, + "isolatedSum": { + "p50": 288.4480059146881, + "p90": 377.4399980902672, + "p95": 382.30399787425995, + "p99": 400.38400143384933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-596f2b4a", + "identity": "h100|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_557e3a7d", + "comparisonKey": "20f8562e9a9763a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:15.067599+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 195.39199769496918, + "p90": 207.2959989309311, + "p95": 227.80799865722656, + "p99": 255.77598810195923 + }, + "combine": { + "p50": 59.967998415231705, + "p90": 62.20800057053566, + "p95": 64.31999802589417, + "p99": 68.57600063085556 + }, + "roundtrip": { + "p50": 238.49600553512573, + "p90": 263.39200139045715, + "p95": 274.4320034980774, + "p99": 395.52000164985657 + }, + "isolatedSum": { + "p50": 255.35999611020088, + "p90": 269.50399950146675, + "p95": 292.1279966831207, + "p99": 324.3519887328148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 195.8719938993454, + "p90": 214.7199958562851, + "p95": 228.09599339962006, + "p99": 237.40799725055695 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 64.25599753856659, + "p95": 66.11199676990509, + "p99": 70.39999961853027 + }, + "roundtrip": { + "p50": 240.7039999961853, + "p90": 249.63200092315674, + "p95": 264.0959918498993, + "p99": 370.2400028705597 + }, + "isolatedSum": { + "p50": 256.9599933922291, + "p90": 278.9759933948517, + "p95": 294.20799016952515, + "p99": 307.8079968690872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.1519968509674, + "p90": 273.21600914001465, + "p95": 277.3120105266571, + "p99": 290.336012840271 + }, + "combine": { + "p50": 62.30400130152702, + "p90": 73.91999661922455, + "p95": 74.52800124883652, + "p99": 77.63200253248215 + }, + "roundtrip": { + "p50": 245.56800723075867, + "p90": 316.76799058914185, + "p95": 319.64799761772156, + "p99": 323.5839903354645 + }, + "isolatedSum": { + "p50": 263.45599815249443, + "p90": 347.1360057592392, + "p95": 351.8400117754936, + "p99": 367.96801537275314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 196.51199877262115, + "p90": 225.43999552726746, + "p95": 235.07200181484222, + "p99": 338.0799889564514 + }, + "combine": { + "p50": 61.85600161552429, + "p90": 66.91200286149979, + "p95": 68.76800209283829, + "p99": 71.58400118350983 + }, + "roundtrip": { + "p50": 241.88800156116486, + "p90": 253.82399559020996, + "p95": 273.9199995994568, + "p99": 282.368004322052 + }, + "isolatedSum": { + "p50": 258.36800038814545, + "p90": 292.35199838876724, + "p95": 303.8400039076805, + "p99": 409.66399013996124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 195.0400024652481, + "p90": 207.93600380420685, + "p95": 224.8000055551529, + "p99": 234.592005610466 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 67.9360032081604, + "p95": 70.23999840021133, + "p99": 73.08799773454666 + }, + "roundtrip": { + "p50": 244.57600712776184, + "p90": 254.5279860496521, + "p95": 263.2319927215576, + "p99": 284.0000092983246 + }, + "isolatedSum": { + "p50": 258.11200588941574, + "p90": 275.87200701236725, + "p95": 295.0400039553642, + "p99": 307.68000334501266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 196.06399536132812, + "p90": 208.76799523830414, + "p95": 218.23999285697937, + "p99": 231.83999955654144 + }, + "combine": { + "p50": 68.44799965620041, + "p90": 72.57600128650665, + "p95": 74.68800246715546, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 249.7600018978119, + "p90": 259.74398851394653, + "p95": 278.9759933948517, + "p99": 295.77600955963135 + }, + "isolatedSum": { + "p50": 264.51199501752853, + "p90": 281.3439965248108, + "p95": 292.9279953241348, + "p99": 312.25600093603134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.93600249290466, + "p90": 287.51999139785767, + "p95": 292.1920120716095, + "p99": 348.25599193573 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 91.13600105047226, + "p95": 92.6079973578453, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 262.4320089817047, + "p90": 349.2479920387268, + "p95": 352.2239923477173, + "p99": 358.0799996852875 + }, + "isolatedSum": { + "p50": 278.4320041537285, + "p90": 378.6559924483299, + "p95": 384.8000094294548, + "p99": 445.2799931168556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 213.95200490951538, + "p90": 269.6959972381592, + "p95": 272.4800109863281, + "p99": 279.1999876499176 + }, + "combine": { + "p50": 94.24000233411789, + "p90": 102.81600058078766, + "p95": 103.80800068378448, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 288.1599962711334, + "p90": 312.8319978713989, + "p95": 340.2880132198334, + "p99": 349.40800070762634 + }, + "isolatedSum": { + "p50": 308.19200724363327, + "p90": 372.51199781894684, + "p95": 376.2880116701126, + "p99": 386.01598888635635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a22d173f", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_317edb15", + "comparisonKey": "9848e6847763223a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:04.008358+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.99200093746185, + "p90": 101.56799852848053, + "p95": 104.67199981212616, + "p99": 112.96000331640244 + }, + "combine": { + "p50": 71.03999704122543, + "p90": 75.26399940252304, + "p95": 75.80800354480743, + "p99": 78.43200117349625 + }, + "roundtrip": { + "p50": 192.19200313091278, + "p90": 199.96799528598785, + "p95": 201.92000269889832, + "p99": 206.68800175189972 + }, + "isolatedSum": { + "p50": 168.0319979786873, + "p90": 176.83199793100357, + "p95": 180.4800033569336, + "p99": 191.39200448989868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.96799665689468, + "p90": 100.28800368309021, + "p95": 101.79200023412704, + "p99": 106.78400099277496 + }, + "combine": { + "p50": 64.09599632024765, + "p90": 74.91199672222137, + "p95": 75.58400183916092, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 155.87200224399567, + "p90": 196.86399400234222, + "p95": 198.7839937210083, + "p99": 204.41600680351257 + }, + "isolatedSum": { + "p50": 136.06399297714233, + "p90": 175.20000040531158, + "p95": 177.37600207328796, + "p99": 185.66399812698364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.86400347948074, + "p90": 102.1760031580925, + "p95": 104.99200224876404, + "p99": 109.6000000834465 + }, + "combine": { + "p50": 65.63200056552887, + "p90": 78.20799946784973, + "p95": 79.9039974808693, + "p99": 83.16799998283386 + }, + "roundtrip": { + "p50": 158.1760048866272, + "p90": 208.12800526618958, + "p95": 210.4640007019043, + "p99": 214.6880030632019 + }, + "isolatedSum": { + "p50": 138.4960040450096, + "p90": 180.38400262594223, + "p95": 184.89599972963333, + "p99": 192.76800006628036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.84000313282013, + "p90": 101.72799974679947, + "p95": 103.84000092744827, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 65.8240020275116, + "p90": 77.11999863386154, + "p95": 77.91999727487564, + "p99": 82.40000158548355 + }, + "roundtrip": { + "p50": 158.6879938840866, + "p90": 207.48800039291382, + "p95": 209.24800634384155, + "p99": 213.47199380397797 + }, + "isolatedSum": { + "p50": 137.66400516033173, + "p90": 178.847998380661, + "p95": 181.7599982023239, + "p99": 190.5599981546402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.98400104045868, + "p90": 103.71199995279312, + "p95": 106.81600123643875, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 67.64800101518631, + "p90": 80.60800284147263, + "p95": 81.44000172615051, + "p99": 85.05599945783615 + }, + "roundtrip": { + "p50": 160.288006067276, + "p90": 209.6319943666458, + "p95": 212.79999613761902, + "p99": 216.67200326919556 + }, + "isolatedSum": { + "p50": 165.632002055645, + "p90": 184.32000279426575, + "p95": 188.25600296258926, + "p99": 196.96000218391418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.98399710655212, + "p90": 102.88000106811523, + "p95": 105.3759977221489, + "p99": 111.32799834012985 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 84.86399799585342, + "p95": 85.69599688053131, + "p99": 88.95999938249588 + }, + "roundtrip": { + "p50": 166.84800386428833, + "p90": 215.5199944972992, + "p95": 218.52800250053406, + "p99": 253.08799743652344 + }, + "isolatedSum": { + "p50": 147.23199605941772, + "p90": 187.74399906396866, + "p95": 191.0719946026802, + "p99": 200.28799772262573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.8159973025322, + "p90": 106.1440035700798, + "p95": 108.89600217342377, + "p99": 113.6000007390976 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 95.74399888515472, + "p95": 97.21600264310837, + "p99": 99.90400075912476 + }, + "roundtrip": { + "p50": 176.67199671268463, + "p90": 227.07200050354004, + "p95": 229.5999974012375, + "p99": 232.28800296783447 + }, + "isolatedSum": { + "p50": 165.75999557971954, + "p90": 201.88800245523453, + "p95": 206.11200481653214, + "p99": 213.50400149822235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.17600119113922, + "p90": 112.09599673748016, + "p95": 114.81600254774094, + "p99": 124.03199821710587 + }, + "combine": { + "p50": 101.27999633550644, + "p90": 111.96800321340561, + "p95": 113.88800293207169, + "p99": 117.53600090742111 + }, + "roundtrip": { + "p50": 219.00799870491028, + "p90": 247.0400035381317, + "p95": 250.84799528121948, + "p99": 673.6639738082886 + }, + "isolatedSum": { + "p50": 191.45599752664566, + "p90": 224.06399995088577, + "p95": 228.70400547981262, + "p99": 241.56799912452698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-be3b04eb", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_557e3a7d", + "comparisonKey": "3a0306239b35d618", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:59.096740+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 193.92000138759613, + "p90": 201.664000749588, + "p95": 204.41600680351257, + "p99": 210.65600216388702 + }, + "combine": { + "p50": 62.72000074386597, + "p90": 65.34399837255478, + "p95": 67.35999882221222, + "p99": 70.52800059318542 + }, + "roundtrip": { + "p50": 241.34400486946106, + "p90": 247.71200120449066, + "p95": 250.07998943328857, + "p99": 257.82400369644165 + }, + "isolatedSum": { + "p50": 256.6400021314621, + "p90": 267.0079991221428, + "p95": 271.7760056257248, + "p99": 281.18400275707245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 194.2719966173172, + "p90": 203.2639980316162, + "p95": 206.91199600696564, + "p99": 217.8560048341751 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 66.39999896287918, + "p95": 69.21599805355072, + "p99": 74.43200051784515 + }, + "roundtrip": { + "p50": 242.2720044851303, + "p90": 249.15200471878052, + "p95": 251.45599246025085, + "p99": 263.839989900589 + }, + "isolatedSum": { + "p50": 258.0159977078438, + "p90": 269.6639969944954, + "p95": 276.12799406051636, + "p99": 292.28800535202026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.82400131225586, + "p90": 282.9119861125946, + "p95": 286.24001145362854, + "p99": 295.1039969921112 + }, + "combine": { + "p50": 65.85600227117538, + "p90": 77.27999985218048, + "p95": 78.5600021481514, + "p99": 85.21600067615509 + }, + "roundtrip": { + "p50": 246.0159957408905, + "p90": 331.8080008029938, + "p95": 335.1359963417053, + "p99": 339.58399295806885 + }, + "isolatedSum": { + "p50": 263.68000358343124, + "p90": 360.1919859647751, + "p95": 364.80001360177994, + "p99": 380.3199976682663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 194.11200284957886, + "p90": 202.2400051355362, + "p95": 204.92799580097198, + "p99": 210.7519954442978 + }, + "combine": { + "p50": 64.89600241184235, + "p90": 67.48799979686737, + "p95": 70.17599791288376, + "p99": 73.27999919652939 + }, + "roundtrip": { + "p50": 244.3840056657791, + "p90": 251.0400116443634, + "p95": 253.4399926662445, + "p99": 800.2240061759949 + }, + "isolatedSum": { + "p50": 259.0080052614212, + "p90": 269.72800493240356, + "p95": 275.10399371385574, + "p99": 284.0319946408272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 198.65599274635315, + "p90": 282.0799946784973, + "p95": 286.01598739624023, + "p99": 298.2400059700012 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 78.94399762153625, + "p95": 79.77599650621414, + "p99": 82.46400207281113 + }, + "roundtrip": { + "p50": 246.7840015888214, + "p90": 325.1839876174927, + "p95": 328.67199182510376, + "p99": 596.8639850616455 + }, + "isolatedSum": { + "p50": 266.2399932742119, + "p90": 361.02399230003357, + "p95": 365.7919839024544, + "p99": 380.70400804281235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 194.62400674819946, + "p90": 202.43200659751892, + "p95": 205.24799823760986, + "p99": 221.5999960899353 + }, + "combine": { + "p50": 72.15999811887741, + "p90": 75.3600001335144, + "p95": 77.2479996085167, + "p99": 84.86399799585342 + }, + "roundtrip": { + "p50": 252.99200415611267, + "p90": 259.45600867271423, + "p95": 261.59998774528503, + "p99": 269.6639895439148 + }, + "isolatedSum": { + "p50": 266.7840048670769, + "p90": 277.7920067310333, + "p95": 282.49599784612656, + "p99": 306.4639940857887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 198.81600141525269, + "p90": 286.624014377594, + "p95": 291.1359965801239, + "p99": 316.32000207901 + }, + "combine": { + "p50": 83.42400193214417, + "p90": 95.13600170612335, + "p95": 96.44799679517746, + "p99": 100.80000013113022 + }, + "roundtrip": { + "p50": 266.55998826026917, + "p90": 352.6400029659271, + "p95": 355.1360070705414, + "p99": 359.96800661087036 + }, + "isolatedSum": { + "p50": 282.24000334739685, + "p90": 381.76001608371735, + "p95": 387.58399337530136, + "p99": 417.1200022101402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.27999365329742, + "p90": 287.23201155662537, + "p95": 290.43200612068176, + "p99": 296.1919903755188 + }, + "combine": { + "p50": 100.00000149011612, + "p90": 113.11999708414078, + "p95": 114.27199840545654, + "p99": 119.10399794578552 + }, + "roundtrip": { + "p50": 306.335985660553, + "p90": 372.5759983062744, + "p95": 375.61601400375366, + "p99": 380.3839981555939 + }, + "isolatedSum": { + "p50": 321.27999514341354, + "p90": 400.35200864076614, + "p95": 404.7040045261383, + "p99": 415.2959883213043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4108170e", + "identity": "h100|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_557e3a7d", + "comparisonKey": "1c15ec560f33cb68", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:35.512188+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 185.69600582122803, + "p90": 195.68000733852386, + "p95": 198.7839937210083, + "p99": 207.87200331687927 + }, + "combine": { + "p50": 62.49599903821945, + "p90": 64.89600241184235, + "p95": 66.46399945020676, + "p99": 69.88800317049026 + }, + "roundtrip": { + "p50": 233.63199830055237, + "p90": 241.7919933795929, + "p95": 244.4480061531067, + "p99": 251.23199820518494 + }, + "isolatedSum": { + "p50": 248.19200485944748, + "p90": 260.5760097503662, + "p95": 265.24799317121506, + "p99": 277.76000648736954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 185.7600063085556, + "p90": 195.19999623298645, + "p95": 199.13600385189056, + "p99": 210.01599729061127 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 65.2799978852272, + "p95": 66.78400188684464, + "p99": 70.36799937486649 + }, + "roundtrip": { + "p50": 234.65600609779358, + "p90": 242.8479939699173, + "p95": 246.33599817752838, + "p99": 252.9279887676239 + }, + "isolatedSum": { + "p50": 248.83200973272324, + "p90": 260.47999411821365, + "p95": 265.9200057387352, + "p99": 280.38399666547775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 190.0479942560196, + "p90": 270.55999636650085, + "p95": 274.3679881095886, + "p99": 283.00800919532776 + }, + "combine": { + "p50": 64.25599753856659, + "p90": 76.7040029168129, + "p95": 77.66400277614594, + "p99": 81.44000172615051 + }, + "roundtrip": { + "p50": 240.7039999961853, + "p90": 310.88000535964966, + "p95": 313.8880133628845, + "p99": 319.7759985923767 + }, + "isolatedSum": { + "p50": 254.30399179458618, + "p90": 347.26399928331375, + "p95": 352.03199088573456, + "p99": 364.44801092147827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 186.88000738620758, + "p90": 198.4959989786148, + "p95": 201.9519954919815, + "p99": 252.86400318145752 + }, + "combine": { + "p50": 64.2239972949028, + "p90": 67.16799736022949, + "p95": 69.50400024652481, + "p99": 71.58400118350983 + }, + "roundtrip": { + "p50": 237.85600066184998, + "p90": 245.44000625610352, + "p95": 248.99199604988098, + "p99": 256.415992975235 + }, + "isolatedSum": { + "p50": 251.10400468111038, + "p90": 265.6639963388443, + "p95": 271.4559957385063, + "p99": 324.44800436496735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 186.68800592422485, + "p90": 197.31199741363525, + "p95": 201.34399831295013, + "p99": 209.88799631595612 + }, + "combine": { + "p50": 66.39999896287918, + "p90": 68.92800331115723, + "p95": 71.23199850320816, + "p99": 74.46400076150894 + }, + "roundtrip": { + "p50": 240.4160052537918, + "p90": 247.871994972229, + "p95": 252.22399830818176, + "p99": 257.9199969768524 + }, + "isolatedSum": { + "p50": 253.08800488710403, + "p90": 266.2400007247925, + "p95": 272.5759968161583, + "p99": 284.35199707746506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.23200261592865, + "p90": 198.40000569820404, + "p95": 202.7519941329956, + "p99": 573.1520056724548 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 75.07199794054031, + "p95": 76.83199644088745, + "p99": 80.57600259780884 + }, + "roundtrip": { + "p50": 246.5279996395111, + "p90": 254.4960081577301, + "p95": 256.8320035934448, + "p99": 262.2720003128052 + }, + "isolatedSum": { + "p50": 259.5840021967888, + "p90": 273.47200363874435, + "p95": 279.58399057388306, + "p99": 653.7280082702637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 193.4719979763031, + "p90": 277.24799513816833, + "p95": 280.70399165153503, + "p99": 288.5439991950989 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 96.47999703884125, + "p95": 97.85600006580353, + "p99": 124.1919994354248 + }, + "roundtrip": { + "p50": 261.56800985336304, + "p90": 344.0000116825104, + "p95": 348.7359881401062, + "p99": 357.66398906707764 + }, + "isolatedSum": { + "p50": 276.2559950351715, + "p90": 373.7279921770096, + "p95": 378.55999171733856, + "p99": 412.7359986305237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 216.2880003452301, + "p90": 264.3519937992096, + "p95": 266.59199595451355, + "p99": 271.263986825943 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 109.98400300741196, + "p95": 111.10399663448334, + "p99": 115.10399729013443 + }, + "roundtrip": { + "p50": 300.54399371147156, + "p90": 348.89599680900574, + "p95": 351.936012506485, + "p99": 357.5359880924225 + }, + "isolatedSum": { + "p50": 316.5760040283203, + "p90": 374.33599680662155, + "p95": 377.6959925889969, + "p99": 386.3679841160774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a95aa69f", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_6c115061", + "comparisonKey": "22b301360200e405", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:25.378541+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_07", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 57.472001761198044, + "p90": 64.54399973154068, + "p95": 68.28799843788147, + "p99": 83.8719978928566 + }, + "combine": { + "p50": 63.48799914121628, + "p90": 68.09599697589874, + "p95": 72.54400104284286, + "p99": 79.00799810886383 + }, + "roundtrip": { + "p50": 145.11999487876892, + "p90": 171.26399278640747, + "p95": 177.44000256061554, + "p99": 191.13600254058838 + }, + "isolatedSum": { + "p50": 120.96000090241432, + "p90": 132.63999670743942, + "p95": 140.83199948072433, + "p99": 162.87999600172043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 57.37600103020668, + "p90": 62.752000987529755, + "p95": 64.60800021886826, + "p99": 69.43999975919724 + }, + "combine": { + "p50": 64.44799900054932, + "p90": 66.49599969387054, + "p95": 68.64000111818314, + "p99": 71.55200093984604 + }, + "roundtrip": { + "p50": 145.6640064716339, + "p90": 150.11200308799744, + "p95": 152.5759994983673, + "p99": 161.47199273109436 + }, + "isolatedSum": { + "p50": 121.824000030756, + "p90": 129.2480006814003, + "p95": 133.2480013370514, + "p99": 140.99200069904327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 80.6720033288002, + "p90": 86.84799820184708, + "p95": 90.04800021648407, + "p99": 97.18400239944458 + }, + "combine": { + "p50": 75.6480023264885, + "p90": 79.3600007891655, + "p95": 80.54400235414505, + "p99": 84.57600325345993 + }, + "roundtrip": { + "p50": 173.40800166130066, + "p90": 185.2799952030182, + "p95": 187.16800212860107, + "p99": 191.103994846344 + }, + "isolatedSum": { + "p50": 156.3200056552887, + "p90": 166.20799899101257, + "p95": 170.59200257062912, + "p99": 181.7600056529045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 57.82400071620941, + "p90": 63.61600011587143, + "p95": 65.98400324583054, + "p99": 74.14399832487106 + }, + "combine": { + "p50": 65.15199691057205, + "p90": 67.77600198984146, + "p95": 69.66400146484375, + "p99": 71.84000313282013 + }, + "roundtrip": { + "p50": 146.33600413799286, + "p90": 151.16800367832184, + "p95": 153.28000485897064, + "p99": 158.04800391197205 + }, + "isolatedSum": { + "p50": 122.97599762678146, + "p90": 131.3920021057129, + "p95": 135.6480047106743, + "p99": 145.9840014576912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 62.01599910855293, + "p90": 81.08799904584885, + "p95": 82.87999778985977, + "p99": 92.54399687051773 + }, + "combine": { + "p50": 68.60800087451935, + "p90": 77.63200253248215, + "p95": 79.29600030183792, + "p99": 83.13599973917007 + }, + "roundtrip": { + "p50": 150.751993060112, + "p90": 180.00000715255737, + "p95": 182.3039948940277, + "p99": 188.09600174427032 + }, + "isolatedSum": { + "p50": 130.62399998307228, + "p90": 158.720001578331, + "p95": 162.1759980916977, + "p99": 175.6799966096878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 63.10400366783142, + "p90": 80.86399734020233, + "p95": 82.43200182914734, + "p99": 87.99999952316284 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 82.71999657154083, + "p95": 83.42400193214417, + "p99": 86.81599795818329 + }, + "roundtrip": { + "p50": 154.81600165367126, + "p90": 184.06400084495544, + "p95": 185.40799617767334, + "p99": 191.42399728298187 + }, + "isolatedSum": { + "p50": 136.19200140237808, + "p90": 163.58399391174316, + "p95": 165.8560037612915, + "p99": 174.81599748134613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 72.89600372314453, + "p90": 87.42400258779526, + "p95": 90.62399715185165, + "p99": 96.00000083446503 + }, + "combine": { + "p50": 83.90399813652039, + "p90": 95.48799693584442, + "p95": 96.38399630784988, + "p99": 100.35199671983719 + }, + "roundtrip": { + "p50": 167.52000153064728, + "p90": 203.96800339221954, + "p95": 206.9759964942932, + "p99": 212.12799847126007 + }, + "isolatedSum": { + "p50": 156.80000185966492, + "p90": 182.91199952363968, + "p95": 187.00799345970154, + "p99": 196.35199755430222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 79.39200103282928, + "p90": 92.47999638319016, + "p95": 94.52799707651138, + "p99": 97.98400104045868 + }, + "combine": { + "p50": 100.63999891281128, + "p90": 110.88000237941742, + "p95": 111.64800077676773, + "p99": 115.90400338172913 + }, + "roundtrip": { + "p50": 208.19200575351715, + "p90": 220.2560007572174, + "p95": 223.4240025281906, + "p99": 234.23999547958374 + }, + "isolatedSum": { + "p50": 180.03199994564056, + "p90": 203.35999876260757, + "p95": 206.1759978532791, + "p99": 213.8880044221878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2c566c40", + "identity": "h100|deepep|v2|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_8812d550", + "comparisonKey": "09d43c3f68fa7ce0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:16.429775+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 31.55200183391571, + "p90": 33.79200026392937, + "p95": 35.00799834728241, + "p99": 37.79200091958046 + }, + "combine": { + "p50": 35.96799820661545, + "p90": 38.336001336574554, + "p95": 39.16800022125244, + "p99": 41.21600091457367 + }, + "roundtrip": { + "p50": 2064.255952835083, + "p90": 2067.0719146728516, + "p95": 2067.9678916931152, + "p99": 2071.7759132385254 + }, + "isolatedSum": { + "p50": 67.52000004053116, + "p90": 72.12800160050392, + "p95": 74.17599856853485, + "p99": 79.00800183415413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 31.647998839616776, + "p90": 33.76000002026558, + "p95": 36.0959991812706, + "p99": 42.33599826693535 + }, + "combine": { + "p50": 37.53599897027016, + "p90": 39.583999663591385, + "p95": 40.22400081157684, + "p99": 42.30400174856186 + }, + "roundtrip": { + "p50": 2065.632104873657, + "p90": 2068.0320262908936, + "p95": 2068.9918994903564, + "p99": 2078.144073486328 + }, + "isolatedSum": { + "p50": 69.18399780988693, + "p90": 73.34399968385696, + "p95": 76.31999999284744, + "p99": 84.64000001549721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 31.5839983522892, + "p90": 33.88800099492073, + "p95": 35.10399907827377, + "p99": 38.46399858593941 + }, + "combine": { + "p50": 37.63199970126152, + "p90": 39.96799886226654, + "p95": 40.76800122857094, + "p99": 45.632001012563705 + }, + "roundtrip": { + "p50": 2065.664052963257, + "p90": 2068.4800148010254, + "p95": 2069.6640014648438, + "p99": 2071.9680786132812 + }, + "isolatedSum": { + "p50": 69.21599805355072, + "p90": 73.85599985718727, + "p95": 75.87200030684471, + "p99": 84.09599959850311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 32.19199925661087, + "p90": 34.752000123262405, + "p95": 37.248000502586365, + "p99": 44.67200115323067 + }, + "combine": { + "p50": 38.43199834227562, + "p90": 40.608000010252, + "p95": 41.85599833726883, + "p99": 44.544000178575516 + }, + "roundtrip": { + "p50": 2067.487955093384, + "p90": 2069.8559284210205, + "p95": 2070.43194770813, + "p99": 2072.9598999023438 + }, + "isolatedSum": { + "p50": 70.62399759888649, + "p90": 75.3600001335144, + "p95": 79.1039988398552, + "p99": 89.21600133180618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 33.984001725912094, + "p90": 35.87200120091438, + "p95": 37.087999284267426, + "p99": 41.88799858093262 + }, + "combine": { + "p50": 40.800001472234726, + "p90": 43.007999658584595, + "p95": 43.776001781225204, + "p99": 46.46399989724159 + }, + "roundtrip": { + "p50": 2071.295976638794, + "p90": 2073.823928833008, + "p95": 2075.0720500946045, + "p99": 2078.9120197296143 + }, + "isolatedSum": { + "p50": 74.78400319814682, + "p90": 78.88000085949898, + "p95": 80.86400106549263, + "p99": 88.35199847817421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 35.96799820661545, + "p90": 37.28000074625015, + "p95": 38.72000053524971, + "p99": 43.776001781225204 + }, + "combine": { + "p50": 47.26399853825569, + "p90": 49.215998500585556, + "p95": 49.95200037956238, + "p99": 54.46400120854378 + }, + "roundtrip": { + "p50": 2080.127954483032, + "p90": 2081.8240642547607, + "p95": 2082.688093185425, + "p99": 2085.69598197937 + }, + "isolatedSum": { + "p50": 83.23199674487114, + "p90": 86.49599924683571, + "p95": 88.67200091481209, + "p99": 98.24000298976898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 43.20000112056732, + "p90": 53.63199859857559, + "p95": 54.368000477552414, + "p99": 58.01599845290184 + }, + "combine": { + "p50": 59.647999703884125, + "p90": 69.98399645090103, + "p95": 71.29599899053574, + "p99": 75.03999769687653 + }, + "roundtrip": { + "p50": 2098.560094833374, + "p90": 2102.8800010681152, + "p95": 2107.583999633789, + "p99": 2110.208034515381 + }, + "isolatedSum": { + "p50": 102.84800082445145, + "p90": 123.61599504947662, + "p95": 125.66399946808815, + "p99": 133.05599614977837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 54.59199845790863, + "p90": 56.32000043988228, + "p95": 57.440001517534256, + "p99": 61.184000223875046 + }, + "combine": { + "p50": 86.36800199747086, + "p90": 89.75999802350998, + "p95": 90.87999910116196, + "p99": 154.36799824237823 + }, + "roundtrip": { + "p50": 2139.3280029296875, + "p90": 2142.688035964966, + "p95": 2143.807888031006, + "p99": 2147.007942199707 + }, + "isolatedSum": { + "p50": 140.9600004553795, + "p90": 146.07999846339226, + "p95": 148.3200006186962, + "p99": 215.55199846625328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a7339e4", + "identity": "h100|deepep|v2|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_7d89a2f8", + "comparisonKey": "407b92d1dd957b2b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:07.321219+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 31.10400028526783, + "p90": 33.02399814128876, + "p95": 34.94400158524513, + "p99": 38.30400109291077 + }, + "combine": { + "p50": 37.856001406908035, + "p90": 40.25600105524063, + "p95": 42.55999997258186, + "p99": 67.77600198984146 + }, + "roundtrip": { + "p50": 2065.664052963257, + "p90": 2095.968008041382, + "p95": 2100.7039546966553, + "p99": 2104.032039642334 + }, + "isolatedSum": { + "p50": 68.96000169217587, + "p90": 73.27999919652939, + "p95": 77.504001557827, + "p99": 106.08000308275223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 31.072000041604042, + "p90": 32.92800113558769, + "p95": 34.84800085425377, + "p99": 39.45599868893623 + }, + "combine": { + "p50": 37.31200098991394, + "p90": 39.5519994199276, + "p95": 40.352001786231995, + "p99": 43.96799951791763 + }, + "roundtrip": { + "p50": 2065.023899078369, + "p90": 2067.0719146728516, + "p95": 2068.25590133667, + "p99": 2070.688009262085 + }, + "isolatedSum": { + "p50": 68.38400103151798, + "p90": 72.48000055551529, + "p95": 75.20000264048576, + "p99": 83.42399820685387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 31.039999797940254, + "p90": 33.055998384952545, + "p95": 34.71999987959862, + "p99": 37.59999945759773 + }, + "combine": { + "p50": 38.14399987459183, + "p90": 40.511999279260635, + "p95": 42.11200028657913, + "p99": 60.864001512527466 + }, + "roundtrip": { + "p50": 2066.5600299835205, + "p90": 2068.671941757202, + "p95": 2070.1119899749756, + "p99": 2085.0560665130615 + }, + "isolatedSum": { + "p50": 69.18399967253208, + "p90": 73.56799766421318, + "p95": 76.83200016617775, + "p99": 98.4640009701252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 31.488001346588135, + "p90": 33.76000002026558, + "p95": 36.448001861572266, + "p99": 41.05599969625473 + }, + "combine": { + "p50": 38.91199827194214, + "p90": 41.31200164556503, + "p95": 43.136000633239746, + "p99": 110.6560006737709 + }, + "roundtrip": { + "p50": 2068.063974380493, + "p90": 2069.920063018799, + "p95": 2070.847988128662, + "p99": 2075.455904006958 + }, + "isolatedSum": { + "p50": 70.39999961853027, + "p90": 75.07200166583061, + "p95": 79.58400249481201, + "p99": 151.71200037002563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 33.535998314619064, + "p90": 35.32800078392029, + "p95": 36.54399886727333, + "p99": 40.991999208927155 + }, + "combine": { + "p50": 41.760001331567764, + "p90": 43.83999854326248, + "p95": 45.05600035190582, + "p99": 48.73599857091904 + }, + "roundtrip": { + "p50": 2072.927951812744, + "p90": 2075.1359462738037, + "p95": 2077.471971511841, + "p99": 2091.871976852417 + }, + "isolatedSum": { + "p50": 75.29599964618683, + "p90": 79.16799932718277, + "p95": 81.59999921917915, + "p99": 89.72799777984619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 35.51999852061272, + "p90": 36.80000081658363, + "p95": 38.55999931693077, + "p99": 42.17600077390671 + }, + "combine": { + "p50": 48.19199815392494, + "p90": 50.27199909090996, + "p95": 51.58400163054466, + "p99": 57.08799883723259 + }, + "roundtrip": { + "p50": 2080.8959007263184, + "p90": 2082.848072052002, + "p95": 2083.712100982666, + "p99": 2086.4639282226562 + }, + "isolatedSum": { + "p50": 83.71199667453766, + "p90": 87.07199990749359, + "p95": 90.14400094747543, + "p99": 99.2639996111393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 42.11200028657913, + "p90": 48.48000034689903, + "p95": 49.47200044989586, + "p99": 53.18399891257286 + }, + "combine": { + "p50": 60.896001756191254, + "p90": 71.26399874687195, + "p95": 73.40800017118454, + "p99": 75.93599706888199 + }, + "roundtrip": { + "p50": 2100.192070007324, + "p90": 2109.8239421844482, + "p95": 2111.743927001953, + "p99": 2129.7600269317627 + }, + "isolatedSum": { + "p50": 103.00800204277039, + "p90": 119.74399909377098, + "p95": 122.8800006210804, + "p99": 129.11999598145485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 53.98400127887726, + "p90": 55.456001311540604, + "p95": 56.76800012588501, + "p99": 60.92799827456474 + }, + "combine": { + "p50": 87.52000331878662, + "p90": 90.04800021648407, + "p95": 91.36000275611877, + "p99": 94.4959968328476 + }, + "roundtrip": { + "p50": 2140.160083770752, + "p90": 2142.6239013671875, + "p95": 2143.7759399414062, + "p99": 2148.7998962402344 + }, + "isolatedSum": { + "p50": 141.50400459766388, + "p90": 145.50400152802467, + "p95": 148.12800288200378, + "p99": 155.42399510741234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-070b3105", + "identity": "h100|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_3514e1c5", + "comparisonKey": "91f570988565de58", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:00.420132+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.18400174379349, + "p90": 98.01600128412247, + "p95": 99.67999905347824, + "p99": 103.10400277376175 + }, + "combine": { + "p50": 87.99999952316284, + "p90": 89.9839997291565, + "p95": 90.33600240945816, + "p99": 95.87199985980988 + }, + "roundtrip": { + "p50": 158.87999534606934, + "p90": 163.16799819469452, + "p95": 164.41600024700165, + "p99": 168.19199919700623 + }, + "isolatedSum": { + "p50": 181.18400126695633, + "p90": 188.00000101327896, + "p95": 190.0160014629364, + "p99": 198.97600263357162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 109.72800105810165, + "p90": 114.49600011110306, + "p95": 116.67200177907944, + "p99": 122.46400117874146 + }, + "combine": { + "p50": 113.34399878978729, + "p90": 115.55200070142746, + "p95": 120.31999975442886, + "p99": 122.46400117874146 + }, + "roundtrip": { + "p50": 200.47999918460846, + "p90": 204.22400534152985, + "p95": 205.47200739383698, + "p99": 212.3199999332428 + }, + "isolatedSum": { + "p50": 223.07199984788895, + "p90": 230.04800081253052, + "p95": 236.9920015335083, + "p99": 244.9280023574829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.28800475597382, + "p90": 157.56799280643463, + "p95": 158.24000537395477, + "p99": 160.47999262809753 + }, + "combine": { + "p50": 170.33599317073822, + "p90": 173.92000555992126, + "p95": 175.1679927110672, + "p99": 180.03199994564056 + }, + "roundtrip": { + "p50": 287.87198662757874, + "p90": 293.7600016593933, + "p95": 295.199990272522, + "p99": 299.48800802230835 + }, + "isolatedSum": { + "p50": 322.62399792671204, + "p90": 331.4879983663559, + "p95": 333.407998085022, + "p99": 340.5119925737381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 218.4319943189621, + "p90": 221.98399901390076, + "p95": 224.2559939622879, + "p99": 234.68799889087677 + }, + "combine": { + "p50": 275.39199590682983, + "p90": 281.5040051937103, + "p95": 282.71999955177307, + "p99": 285.18399596214294 + }, + "roundtrip": { + "p50": 467.9040014743805, + "p90": 474.62400794029236, + "p95": 475.96800327301025, + "p99": 479.2320132255554 + }, + "isolatedSum": { + "p50": 493.82399022579193, + "p90": 503.4880042076111, + "p95": 506.975993514061, + "p99": 519.8719948530197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 357.5359880924225, + "p90": 371.36000394821167, + "p95": 373.4079897403717, + "p99": 378.464013338089 + }, + "combine": { + "p50": 469.05601024627686, + "p90": 477.5039851665497, + "p95": 480.1279902458191, + "p99": 483.7760031223297 + }, + "roundtrip": { + "p50": 794.8799729347229, + "p90": 802.7200102806091, + "p95": 806.1760067939758, + "p99": 816.7359828948975 + }, + "isolatedSum": { + "p50": 826.5919983386993, + "p90": 848.8639891147614, + "p95": 853.5359799861908, + "p99": 862.2400164604187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 639.0720009803772, + "p90": 646.5920209884644, + "p95": 648.639976978302, + "p99": 656.607985496521 + }, + "combine": { + "p50": 847.9359745979309, + "p90": 855.6159734725952, + "p95": 858.3359718322754, + "p99": 862.5280261039734 + }, + "roundtrip": { + "p50": 1463.6800289154053, + "p90": 1475.551962852478, + "p95": 1478.7520170211792, + "p99": 1484.0960502624512 + }, + "isolatedSum": { + "p50": 1487.007975578308, + "p90": 1502.2079944610596, + "p95": 1506.9759488105774, + "p99": 1519.1360116004944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e0a10fb", + "identity": "h100|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_3514e1c5", + "comparisonKey": "25e98e4d3ca0144e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:25.799075+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.58399897813797, + "p90": 107.68000036478043, + "p95": 108.76800119876862, + "p99": 113.43999952077866 + }, + "combine": { + "p50": 91.20000153779984, + "p90": 97.18400239944458, + "p95": 97.79199957847595, + "p99": 98.65599870681763 + }, + "roundtrip": { + "p50": 170.04799842834473, + "p90": 174.55999553203583, + "p95": 175.80799758434296, + "p99": 181.47200345993042 + }, + "isolatedSum": { + "p50": 194.7840005159378, + "p90": 204.864002764225, + "p95": 206.56000077724457, + "p99": 212.09599822759628 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 121.63200229406357, + "p90": 126.3359934091568, + "p95": 128.51199507713318, + "p99": 131.071999669075 + }, + "combine": { + "p50": 123.32800030708313, + "p90": 128.54400277137756, + "p95": 129.85600531101227, + "p99": 131.55199587345123 + }, + "roundtrip": { + "p50": 220.32000124454498, + "p90": 223.29600155353546, + "p95": 224.57599639892578, + "p99": 230.9119999408722 + }, + "isolatedSum": { + "p50": 244.9600026011467, + "p90": 254.87999618053436, + "p95": 258.36800038814545, + "p99": 262.62399554252625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 164.60800170898438, + "p90": 168.06399822235107, + "p95": 169.11999881267548, + "p99": 172.38399386405945 + }, + "combine": { + "p50": 190.36799669265747, + "p90": 195.2960044145584, + "p95": 197.1839964389801, + "p99": 201.4400064945221 + }, + "roundtrip": { + "p50": 326.84800028800964, + "p90": 331.4560055732727, + "p95": 333.0560028553009, + "p99": 335.35999059677124 + }, + "isolatedSum": { + "p50": 354.97599840164185, + "p90": 363.3600026369095, + "p95": 366.3039952516556, + "p99": 373.82400035858154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 242.5280064344406, + "p90": 247.51999974250793, + "p95": 249.7279942035675, + "p99": 253.88801097869873 + }, + "combine": { + "p50": 298.0160117149353, + "p90": 303.3280074596405, + "p95": 304.86398935317993, + "p99": 310.68798899650574 + }, + "roundtrip": { + "p50": 515.6800150871277, + "p90": 522.3039984703064, + "p95": 525.2159833908081, + "p99": 534.0160131454468 + }, + "isolatedSum": { + "p50": 540.5440181493759, + "p90": 550.8480072021484, + "p95": 554.5919835567474, + "p99": 564.5759999752045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 413.567990064621, + "p90": 422.0159947872162, + "p95": 424.9280095100403, + "p99": 629.5040249824524 + }, + "combine": { + "p50": 511.680006980896, + "p90": 519.4560289382935, + "p95": 521.1520195007324, + "p99": 566.2720203399658 + }, + "roundtrip": { + "p50": 899.071991443634, + "p90": 907.7119827270508, + "p95": 910.7840061187744, + "p99": 919.2320108413696 + }, + "isolatedSum": { + "p50": 925.247997045517, + "p90": 941.4720237255096, + "p95": 946.0800290107727, + "p99": 1195.7760453224182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 746.1119890213013, + "p90": 760.479986667633, + "p95": 764.2880082130432, + "p99": 773.1199860572815 + }, + "combine": { + "p50": 935.2319836616516, + "p90": 944.383978843689, + "p95": 947.1039772033691, + "p99": 953.0559778213501 + }, + "roundtrip": { + "p50": 1648.2880115509033, + "p90": 1661.5359783172607, + "p95": 1665.3759479522705, + "p99": 1673.9519834518433 + }, + "isolatedSum": { + "p50": 1681.3439726829529, + "p90": 1704.863965511322, + "p95": 1711.3919854164124, + "p99": 1726.1759638786316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bdb6c78d", + "identity": "h100|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_3514e1c5", + "comparisonKey": "547c78b118144bdc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:54.773649+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.2879975438118, + "p90": 112.64000087976456, + "p95": 113.92000317573547, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 104.19200360774994, + "p95": 105.69600015878677, + "p99": 114.62400108575821 + }, + "roundtrip": { + "p50": 183.20000171661377, + "p90": 187.55200505256653, + "p95": 188.31999599933624, + "p99": 193.53599846363068 + }, + "isolatedSum": { + "p50": 206.59200102090836, + "p90": 216.8320044875145, + "p95": 219.61600333452225, + "p99": 232.79999941587448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 131.55199587345123, + "p90": 136.9599997997284, + "p95": 139.8400068283081, + "p99": 155.68000078201294 + }, + "combine": { + "p50": 136.83199882507324, + "p90": 138.72000575065613, + "p95": 139.26400244235992, + "p99": 142.5279974937439 + }, + "roundtrip": { + "p50": 239.07199501991272, + "p90": 243.45600605010986, + "p95": 245.66400051116943, + "p99": 252.86400318145752 + }, + "isolatedSum": { + "p50": 268.3839946985245, + "p90": 275.6800055503845, + "p95": 279.10400927066803, + "p99": 298.20799827575684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 180.4800033569336, + "p90": 184.28799510002136, + "p95": 185.40799617767334, + "p99": 188.38399648666382 + }, + "combine": { + "p50": 208.48000049591064, + "p90": 212.8639966249466, + "p95": 215.32799303531647, + "p99": 217.15199947357178 + }, + "roundtrip": { + "p50": 361.6960048675537, + "p90": 366.7199909687042, + "p95": 368.5440123081207, + "p99": 370.4319894313812 + }, + "isolatedSum": { + "p50": 388.96000385284424, + "p90": 397.15199172496796, + "p95": 400.7359892129898, + "p99": 405.5359959602356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 270.81599831581116, + "p90": 275.29600262641907, + "p95": 276.92800760269165, + "p99": 281.823992729187 + }, + "combine": { + "p50": 330.55999875068665, + "p90": 336.67200803756714, + "p95": 338.01600337028503, + "p99": 341.91998839378357 + }, + "roundtrip": { + "p50": 575.5519866943359, + "p90": 581.0880064964294, + "p95": 582.9439759254456, + "p99": 594.5280194282532 + }, + "isolatedSum": { + "p50": 601.3759970664978, + "p90": 611.9680106639862, + "p95": 614.9440109729767, + "p99": 623.7439811229706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 463.1359875202179, + "p90": 471.42401337623596, + "p95": 473.88800978660583, + "p99": 479.8080027103424 + }, + "combine": { + "p50": 577.6960253715515, + "p90": 584.2880010604858, + "p95": 586.8480205535889, + "p99": 589.9519920349121 + }, + "roundtrip": { + "p50": 1015.328049659729, + "p90": 1023.2959985733032, + "p95": 1026.3999700546265, + "p99": 1031.4240455627441 + }, + "isolatedSum": { + "p50": 1040.8320128917694, + "p90": 1055.7120144367218, + "p95": 1060.7360303401947, + "p99": 1069.7599947452545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 868.3199882507324, + "p90": 894.6239948272705, + "p95": 902.2079706192017, + "p99": 917.3439741134644 + }, + "combine": { + "p50": 1059.6799850463867, + "p90": 1067.4879550933838, + "p95": 1069.85604763031, + "p99": 1074.6560096740723 + }, + "roundtrip": { + "p50": 1892.7680253982544, + "p90": 1914.1119718551636, + "p95": 1919.935941696167, + "p99": 1940.224051475525 + }, + "isolatedSum": { + "p50": 1927.9999732971191, + "p90": 1962.1119499206543, + "p95": 1972.0640182495117, + "p99": 1991.9999837875366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cdc50de8", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_e08c0fcd", + "comparisonKey": "e30d720b8c20c3ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:15.810555+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 109.69600081443787, + "p90": 113.69600147008896, + "p95": 115.68000167608261, + "p99": 120.51200121641159 + }, + "combine": { + "p50": 105.66399991512299, + "p90": 107.10400342941284, + "p95": 109.63200032711029, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 192.06400215625763, + "p90": 198.17599654197693, + "p95": 199.072003364563, + "p99": 201.82399451732635 + }, + "isolatedSum": { + "p50": 215.36000072956085, + "p90": 220.8000048995018, + "p95": 225.3120020031929, + "p99": 233.47200453281403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 141.59999787807465, + "p90": 146.17599546909332, + "p95": 147.16799557209015, + "p99": 148.80000054836273 + }, + "combine": { + "p50": 149.31200444698334, + "p90": 153.6320000886917, + "p95": 154.4640064239502, + "p99": 156.3519984483719 + }, + "roundtrip": { + "p50": 262.5280022621155, + "p90": 265.79201221466064, + "p95": 266.81599020957947, + "p99": 269.9519991874695 + }, + "isolatedSum": { + "p50": 290.912002325058, + "p90": 299.80799555778503, + "p95": 301.63200199604034, + "p99": 305.1519989967346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 196.3520050048828, + "p90": 200.95999538898468, + "p95": 202.07999646663666, + "p99": 204.0960043668747 + }, + "combine": { + "p50": 229.50400412082672, + "p90": 235.20000278949738, + "p95": 236.12800240516663, + "p99": 239.26399648189545 + }, + "roundtrip": { + "p50": 400.89601278305054, + "p90": 405.37598729133606, + "p95": 406.97601437568665, + "p99": 410.43201088905334 + }, + "isolatedSum": { + "p50": 425.85600912570953, + "p90": 436.15999817848206, + "p95": 438.2079988718033, + "p99": 443.36000084877014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 300.6719946861267, + "p90": 305.08801341056824, + "p95": 306.5280020236969, + "p99": 311.74400448799133 + }, + "combine": { + "p50": 364.4160032272339, + "p90": 369.53601241111755, + "p95": 372.3840117454529, + "p99": 375.7759928703308 + }, + "roundtrip": { + "p50": 639.3920183181763, + "p90": 645.6000208854675, + "p95": 647.6799845695496, + "p99": 652.4159908294678 + }, + "isolatedSum": { + "p50": 665.0879979133606, + "p90": 674.6240258216858, + "p95": 678.9120137691498, + "p99": 687.5199973583221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 530.1439762115479, + "p90": 538.3679866790771, + "p95": 540.8639907836914, + "p99": 545.9520220756531 + }, + "combine": { + "p50": 635.3600025177002, + "p90": 642.304003238678, + "p95": 644.9599862098694, + "p99": 649.0240097045898 + }, + "roundtrip": { + "p50": 1139.5519971847534, + "p90": 1149.1520404815674, + "p95": 1152.7680158615112, + "p99": 1165.2799844741821 + }, + "isolatedSum": { + "p50": 1165.503978729248, + "p90": 1180.6719899177551, + "p95": 1185.8239769935608, + "p99": 1194.976031780243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.1759705543518, + "p90": 1019.3920135498047, + "p95": 1025.7920026779175, + "p99": 1050.2079725265503 + }, + "combine": { + "p50": 1165.4080152511597, + "p90": 1175.0080585479736, + "p95": 1179.0080070495605, + "p99": 1184.3199729919434 + }, + "roundtrip": { + "p50": 2121.3760375976562, + "p90": 2140.223979949951, + "p95": 2144.9599266052246, + "p99": 2152.992010116577 + }, + "isolatedSum": { + "p50": 2159.5839858055115, + "p90": 2194.4000720977783, + "p95": 2204.800009727478, + "p99": 2234.5279455184937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-04a06d2c", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_3514e1c5", + "comparisonKey": "aec63ef0f5fc00e5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:40.751390+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.55999994277954, + "p90": 115.23199826478958, + "p95": 117.27999895811081, + "p99": 122.40000069141388 + }, + "combine": { + "p50": 106.11200332641602, + "p90": 107.04000294208527, + "p95": 111.07199639081955, + "p99": 112.5440001487732 + }, + "roundtrip": { + "p50": 193.66399943828583, + "p90": 198.59200716018677, + "p95": 200.6080001592636, + "p99": 205.4080069065094 + }, + "isolatedSum": { + "p50": 216.67200326919556, + "p90": 222.27200120687485, + "p95": 228.35199534893036, + "p99": 234.94400084018707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.91200041770935, + "p90": 147.13600277900696, + "p95": 148.6400067806244, + "p99": 153.6960005760193 + }, + "combine": { + "p50": 149.4400054216385, + "p90": 154.01600301265717, + "p95": 154.94400262832642, + "p99": 157.3439985513687 + }, + "roundtrip": { + "p50": 262.11199164390564, + "p90": 266.6879892349243, + "p95": 268.0320143699646, + "p99": 271.232008934021 + }, + "isolatedSum": { + "p50": 292.35200583934784, + "p90": 301.1520057916641, + "p95": 303.5840094089508, + "p99": 311.039999127388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.360004901886, + "p90": 199.48799908161163, + "p95": 201.27999782562256, + "p99": 204.48000729084015 + }, + "combine": { + "p50": 230.27199506759644, + "p90": 235.87200045585632, + "p95": 236.9920015335083, + "p99": 239.45599794387817 + }, + "roundtrip": { + "p50": 400.9599983692169, + "p90": 406.0479998588562, + "p95": 407.61598944664, + "p99": 412.80001401901245 + }, + "isolatedSum": { + "p50": 425.6319999694824, + "p90": 435.35999953746796, + "p95": 438.27199935913086, + "p99": 443.9360052347183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.15199089050293, + "p90": 306.40000104904175, + "p95": 308.22399258613586, + "p99": 313.6959969997406 + }, + "combine": { + "p50": 363.99999260902405, + "p90": 370.5599904060364, + "p95": 372.73600697517395, + "p99": 407.3280096054077 + }, + "roundtrip": { + "p50": 639.7119760513306, + "p90": 646.0480093955994, + "p95": 647.9679942131042, + "p99": 652.7360081672668 + }, + "isolatedSum": { + "p50": 665.151983499527, + "p90": 676.9599914550781, + "p95": 680.9599995613098, + "p99": 721.0240066051483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 530.0800204277039, + "p90": 538.2400155067444, + "p95": 541.2480235099792, + "p99": 545.2479720115662 + }, + "combine": { + "p50": 634.8479986190796, + "p90": 642.911970615387, + "p95": 644.864022731781, + "p99": 648.4479904174805 + }, + "roundtrip": { + "p50": 1139.1040086746216, + "p90": 1148.9280462265015, + "p95": 1153.3119678497314, + "p99": 1158.3679914474487 + }, + "isolatedSum": { + "p50": 1164.9280190467834, + "p90": 1181.1519861221313, + "p95": 1186.1120462417603, + "p99": 1193.6959624290466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.1120147705078, + "p90": 1021.3760137557983, + "p95": 1030.7519435882568, + "p99": 1078.1760215759277 + }, + "combine": { + "p50": 1165.2159690856934, + "p90": 1174.4639873504639, + "p95": 1179.3279647827148, + "p99": 1186.079978942871 + }, + "roundtrip": { + "p50": 2119.9679374694824, + "p90": 2137.3438835144043, + "p95": 2140.415906906128, + "p99": 2161.151885986328 + }, + "isolatedSum": { + "p50": 2159.327983856201, + "p90": 2195.840001106262, + "p95": 2210.0799083709717, + "p99": 2264.256000518799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2220a97", + "identity": "h100|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_3514e1c5", + "comparisonKey": "52185211b58dd4e5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:37.017763+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.24799805879593, + "p90": 118.46400052309036, + "p95": 121.21599912643433, + "p99": 138.40000331401825 + }, + "combine": { + "p50": 106.49599879980087, + "p90": 111.455999314785, + "p95": 112.28799819946289, + "p99": 114.84800279140472 + }, + "roundtrip": { + "p50": 195.90400159358978, + "p90": 201.9519954919815, + "p95": 202.78400182724, + "p99": 206.7839950323105 + }, + "isolatedSum": { + "p50": 219.7439968585968, + "p90": 229.91999983787537, + "p95": 233.50399732589722, + "p99": 253.24800610542297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.5520052909851, + "p90": 147.90399372577667, + "p95": 149.72800016403198, + "p99": 152.319997549057 + }, + "combine": { + "p50": 151.7760008573532, + "p90": 155.32800555229187, + "p95": 156.00000321865082, + "p99": 159.04000401496887 + }, + "roundtrip": { + "p50": 266.52801036834717, + "p90": 271.7440128326416, + "p95": 273.0880081653595, + "p99": 516.0319805145264 + }, + "isolatedSum": { + "p50": 295.3280061483383, + "p90": 303.23199927806854, + "p95": 305.7280033826828, + "p99": 311.3600015640259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.0399956703186, + "p90": 203.45599949359894, + "p95": 204.83200252056122, + "p99": 207.96799659729004 + }, + "combine": { + "p50": 229.08799350261688, + "p90": 232.83199965953827, + "p95": 235.20000278949738, + "p99": 238.8480007648468 + }, + "roundtrip": { + "p50": 402.6240110397339, + "p90": 408.06400775909424, + "p95": 410.17600893974304, + "p99": 416.76801443099976 + }, + "isolatedSum": { + "p50": 428.1279891729355, + "p90": 436.2879991531372, + "p95": 440.0320053100586, + "p99": 446.81599736213684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.5840094089508, + "p90": 308.25600028038025, + "p95": 309.7600042819977, + "p99": 311.8720054626465 + }, + "combine": { + "p50": 365.7279908657074, + "p90": 370.84800004959106, + "p95": 372.96000123023987, + "p99": 375.0720024108887 + }, + "roundtrip": { + "p50": 643.9039707183838, + "p90": 649.6959924697876, + "p95": 652.2560119628906, + "p99": 656.3519835472107 + }, + "isolatedSum": { + "p50": 669.3120002746582, + "p90": 679.1040003299713, + "p95": 682.7200055122375, + "p99": 686.9440078735352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 520.7359790802002, + "p90": 529.695987701416, + "p95": 533.2800149917603, + "p99": 537.9199981689453 + }, + "combine": { + "p50": 632.5759887695312, + "p90": 640.1600241661072, + "p95": 643.4879899024963, + "p99": 649.6319770812988 + }, + "roundtrip": { + "p50": 1125.440001487732, + "p90": 1135.200023651123, + "p95": 1137.8560066223145, + "p99": 1145.0560092926025 + }, + "isolatedSum": { + "p50": 1153.3119678497314, + "p90": 1169.8560118675232, + "p95": 1176.7680048942566, + "p99": 1187.5519752502441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1001.5679597854614, + "p90": 1026.4960527420044, + "p95": 1032.256007194519, + "p99": 1042.5599813461304 + }, + "combine": { + "p50": 1154.528021812439, + "p90": 1162.9120111465454, + "p95": 1165.7600402832031, + "p99": 1171.7760562896729 + }, + "roundtrip": { + "p50": 2114.8478984832764, + "p90": 2138.9760971069336, + "p95": 2146.559953689575, + "p99": 2333.8561058044434 + }, + "isolatedSum": { + "p50": 2156.0959815979004, + "p90": 2189.40806388855, + "p95": 2198.016047477722, + "p99": 2214.336037635803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0ac6c6eb", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_f3bcaf4f", + "comparisonKey": "aea3fa702d4eedb8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:39.244098+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.4160017967224, + "p90": 132.54399597644806, + "p95": 133.85599851608276, + "p99": 137.85600662231445 + }, + "combine": { + "p50": 127.51999497413635, + "p90": 131.04000687599182, + "p95": 132.57600367069244, + "p99": 138.72000575065613 + }, + "roundtrip": { + "p50": 229.50400412082672, + "p90": 233.8239997625351, + "p95": 234.9119931459427, + "p99": 237.18400299549103 + }, + "isolatedSum": { + "p50": 255.93599677085876, + "p90": 263.5840028524399, + "p95": 266.4320021867752, + "p99": 276.5760123729706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 178.30400168895721, + "p90": 193.27999651432037, + "p95": 194.62400674819946, + "p99": 196.99199497699738 + }, + "combine": { + "p50": 182.78400599956512, + "p90": 186.0159933567047, + "p95": 189.88800048828125, + "p99": 193.27999651432037 + }, + "roundtrip": { + "p50": 330.4319977760315, + "p90": 343.423992395401, + "p95": 345.08800506591797, + "p99": 360.25598645210266 + }, + "isolatedSum": { + "p50": 361.08800768852234, + "p90": 379.2959898710251, + "p95": 384.5120072364807, + "p99": 390.27199149131775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 269.9199914932251, + "p90": 283.07199478149414, + "p95": 285.6000065803528, + "p99": 287.9039943218231 + }, + "combine": { + "p50": 275.7120132446289, + "p90": 282.27201104164124, + "p95": 284.0319871902466, + "p99": 286.4319980144501 + }, + "roundtrip": { + "p50": 517.952024936676, + "p90": 530.1439762115479, + "p95": 531.8719744682312, + "p99": 535.0080132484436 + }, + "isolatedSum": { + "p50": 545.632004737854, + "p90": 565.3440058231354, + "p95": 569.6319937705994, + "p99": 574.3359923362732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 450.8480131626129, + "p90": 463.00798654556274, + "p95": 465.63199162483215, + "p99": 469.02400255203247 + }, + "combine": { + "p50": 469.2800045013428, + "p90": 476.6719937324524, + "p95": 478.7519872188568, + "p99": 484.8639965057373 + }, + "roundtrip": { + "p50": 891.3599848747253, + "p90": 904.5439958572388, + "p95": 907.5520038604736, + "p99": 914.6559834480286 + }, + "isolatedSum": { + "p50": 920.1280176639557, + "p90": 939.6799802780151, + "p95": 944.383978843689, + "p99": 953.8879990577698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 811.2319707870483, + "p90": 827.072024345398, + "p95": 829.4079899787903, + "p99": 836.0639810562134 + }, + "combine": { + "p50": 855.2320003509521, + "p90": 862.7520203590393, + "p95": 866.1440014839172, + "p99": 870.9120154380798 + }, + "roundtrip": { + "p50": 1636.7679834365845, + "p90": 1649.440050125122, + "p95": 1652.0320177078247, + "p99": 1661.56804561615 + }, + "isolatedSum": { + "p50": 1666.4639711380005, + "p90": 1689.8240447044373, + "p95": 1695.5519914627075, + "p99": 1706.9759964942932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1540.6080484390259, + "p90": 1549.5680570602417, + "p95": 1552.0000457763672, + "p99": 1557.1520328521729 + }, + "combine": { + "p50": 1598.207950592041, + "p90": 1606.0160398483276, + "p95": 1609.1840267181396, + "p99": 1615.3919696807861 + }, + "roundtrip": { + "p50": 3117.3439025878906, + "p90": 3126.944065093994, + "p95": 3132.4799060821533, + "p99": 3175.2960681915283 + }, + "isolatedSum": { + "p50": 3138.815999031067, + "p90": 3155.5840969085693, + "p95": 3161.184072494507, + "p99": 3172.544002532959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6801056f", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_5dd7acd6", + "comparisonKey": "f045b60f606c95ff", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:13.131093+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.67999905347824, + "p90": 103.35999727249146, + "p95": 104.47999835014343, + "p99": 109.79200154542923 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 87.5839963555336, + "p95": 88.19200098514557, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 153.02400290966034, + "p90": 159.96800363063812, + "p95": 160.89600324630737, + "p99": 165.40800034999847 + }, + "isolatedSum": { + "p50": 180.1920011639595, + "p90": 190.94399362802505, + "p95": 192.671999335289, + "p99": 199.13600385189056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 120.12799829244614, + "p90": 126.94400548934937, + "p95": 128.54400277137756, + "p99": 132.76800513267517 + }, + "combine": { + "p50": 128.38399410247803, + "p90": 131.00799918174744, + "p95": 138.3039951324463, + "p99": 310.3039860725403 + }, + "roundtrip": { + "p50": 212.5760018825531, + "p90": 217.3440009355545, + "p95": 218.87999773025513, + "p99": 223.03999960422516 + }, + "isolatedSum": { + "p50": 248.51199239492416, + "p90": 257.9520046710968, + "p95": 266.84799790382385, + "p99": 443.07199120521545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 200.6399929523468, + "p90": 207.45599269866943, + "p95": 209.1200053691864, + "p99": 212.96000480651855 + }, + "combine": { + "p50": 287.87198662757874, + "p90": 297.60000109672546, + "p95": 298.8159954547882, + "p99": 301.2160062789917 + }, + "roundtrip": { + "p50": 452.63999700546265, + "p90": 465.5359983444214, + "p95": 467.3280119895935, + "p99": 470.68798542022705 + }, + "isolatedSum": { + "p50": 488.51197957992554, + "p90": 505.0559937953949, + "p95": 507.9360008239746, + "p99": 514.1760110855103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1b2eabea", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_6f8dc9ef", + "comparisonKey": "e6739f1faab3a3bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:48.508227+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.52799707651138, + "p90": 98.94400089979172, + "p95": 101.1200025677681, + "p99": 105.12000322341919 + }, + "combine": { + "p50": 82.62400329113007, + "p90": 88.06400001049042, + "p95": 89.05600011348724, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 156.67200088500977, + "p90": 161.79199516773224, + "p95": 165.3439998626709, + "p99": 183.07200074195862 + }, + "isolatedSum": { + "p50": 177.15200036764145, + "p90": 187.00800091028214, + "p95": 190.17600268125534, + "p99": 196.19200378656387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 120.89599668979645, + "p90": 125.2799928188324, + "p95": 136.1279934644699, + "p99": 148.76799285411835 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 114.9120032787323, + "p95": 116.15999788045883, + "p99": 121.63200229406357 + }, + "roundtrip": { + "p50": 200.76799392700195, + "p90": 214.36800062656403, + "p95": 218.52800250053406, + "p99": 220.86399793624878 + }, + "isolatedSum": { + "p50": 227.1679937839508, + "p90": 240.1919960975647, + "p95": 252.28799134492874, + "p99": 270.3999951481819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 150.07999539375305, + "p90": 155.4879993200302, + "p95": 156.99200332164764, + "p99": 162.20800578594208 + }, + "combine": { + "p50": 144.99199390411377, + "p90": 148.19200336933136, + "p95": 152.38399803638458, + "p99": 153.82400155067444 + }, + "roundtrip": { + "p50": 266.975998878479, + "p90": 279.1360020637512, + "p95": 282.4319899082184, + "p99": 288.35201263427734 + }, + "isolatedSum": { + "p50": 295.0719892978668, + "p90": 303.6800026893616, + "p95": 309.3760013580322, + "p99": 316.0320073366165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 207.7759951353073, + "p90": 213.50400149822235, + "p95": 215.32799303531647, + "p99": 218.59200298786163 + }, + "combine": { + "p50": 219.93599832057953, + "p90": 224.09600019454956, + "p95": 226.68799757957458, + "p99": 236.89599335193634 + }, + "roundtrip": { + "p50": 405.023992061615, + "p90": 410.1119935512543, + "p95": 411.8080139160156, + "p99": 414.0479862689972 + }, + "isolatedSum": { + "p50": 427.71199345588684, + "p90": 437.6000016927719, + "p95": 442.01599061489105, + "p99": 455.487996339798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 332.0640027523041, + "p90": 341.8880105018616, + "p95": 345.3119993209839, + "p99": 352.8960049152374 + }, + "combine": { + "p50": 367.64800548553467, + "p90": 374.39998984336853, + "p95": 377.56800651550293, + "p99": 383.2319974899292 + }, + "roundtrip": { + "p50": 671.3280081748962, + "p90": 680.6079745292664, + "p95": 684.1279864311218, + "p99": 691.8399930000305 + }, + "isolatedSum": { + "p50": 699.7120082378387, + "p90": 716.2880003452301, + "p95": 722.8800058364868, + "p99": 736.1280024051666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 580.8320045471191, + "p90": 589.6959900856018, + "p95": 592.5440192222595, + "p99": 598.8159775733948 + }, + "combine": { + "p50": 647.9679942131042, + "p90": 656.544029712677, + "p95": 660.0639820098877, + "p99": 667.5199866294861 + }, + "roundtrip": { + "p50": 1205.3120136260986, + "p90": 1214.0799760818481, + "p95": 1215.9039974212646, + "p99": 1222.9119539260864 + }, + "isolatedSum": { + "p50": 1228.7999987602234, + "p90": 1246.2400197982788, + "p95": 1252.6080012321472, + "p99": 1266.3359642028809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c3e14a83", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_4d411159", + "comparisonKey": "a1b735cb0ba49c78", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:24.815036+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.80799651145935, + "p90": 128.31999361515045, + "p95": 131.9040060043335, + "p99": 133.95200669765472 + }, + "combine": { + "p50": 116.28799885511398, + "p90": 121.0239976644516, + "p95": 122.20799922943115, + "p99": 123.77600371837616 + }, + "roundtrip": { + "p50": 216.73600375652313, + "p90": 221.82400524616241, + "p95": 222.71999716758728, + "p99": 226.97600722312927 + }, + "isolatedSum": { + "p50": 240.09599536657333, + "p90": 249.34399127960205, + "p95": 254.11200523376465, + "p99": 257.7280104160309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.2560064792633, + "p90": 170.68800330162048, + "p95": 171.83999717235565, + "p99": 174.3679940700531 + }, + "combine": { + "p50": 172.28800058364868, + "p90": 176.09600722789764, + "p95": 177.824005484581, + "p99": 180.63999712467194 + }, + "roundtrip": { + "p50": 308.57598781585693, + "p90": 312.4159872531891, + "p95": 314.07999992370605, + "p99": 321.6640055179596 + }, + "isolatedSum": { + "p50": 336.544007062912, + "p90": 346.7840105295181, + "p95": 349.66400265693665, + "p99": 355.00799119472504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.85600066184998, + "p90": 243.74400079250336, + "p95": 245.53599953651428, + "p99": 248.51199984550476 + }, + "combine": { + "p50": 270.9439992904663, + "p90": 274.944007396698, + "p95": 276.38399600982666, + "p99": 280.0000011920929 + }, + "roundtrip": { + "p50": 485.79201102256775, + "p90": 490.27198553085327, + "p95": 492.7999973297119, + "p99": 495.6800043582916 + }, + "isolatedSum": { + "p50": 508.7999999523163, + "p90": 518.6880081892014, + "p95": 521.9199955463409, + "p99": 528.5120010375977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 390.0800049304962, + "p90": 396.35199308395386, + "p95": 398.8800048828125, + "p99": 402.5599956512451 + }, + "combine": { + "p50": 460.54399013519287, + "p90": 465.472012758255, + "p95": 467.20001101493835, + "p99": 471.42401337623596 + }, + "roundtrip": { + "p50": 826.528012752533, + "p90": 833.3759903907776, + "p95": 835.3599905967712, + "p99": 838.591992855072 + }, + "isolatedSum": { + "p50": 850.6239950656891, + "p90": 861.8240058422089, + "p95": 866.0800158977509, + "p99": 873.9840090274811 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 715.9680128097534, + "p90": 724.2879867553711, + "p95": 727.1680235862732, + "p99": 731.4239740371704 + }, + "combine": { + "p50": 838.5279774665833, + "p90": 847.8400111198425, + "p95": 850.6240248680115, + "p99": 855.679988861084 + }, + "roundtrip": { + "p50": 1525.7279872894287, + "p90": 1535.4880094528198, + "p95": 1538.0159616470337, + "p99": 1542.8800582885742 + }, + "isolatedSum": { + "p50": 1554.4959902763367, + "p90": 1572.1279978752136, + "p95": 1577.7920484542847, + "p99": 1587.1039628982544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1380.511999130249, + "p90": 1393.183946609497, + "p95": 1396.7039585113525, + "p99": 1404.2240381240845 + }, + "combine": { + "p50": 1564.5760297775269, + "p90": 1572.8319883346558, + "p95": 1576.640009880066, + "p99": 1588.4159803390503 + }, + "roundtrip": { + "p50": 2917.248010635376, + "p90": 2926.8798828125, + "p95": 2930.2079677581787, + "p99": 2940.5438899993896 + }, + "isolatedSum": { + "p50": 2945.088028907776, + "p90": 2966.015934944153, + "p95": 2973.3439683914185, + "p99": 2992.6400184631348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5ff1a12", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_d237e055", + "comparisonKey": "67a850f63f729849", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:50.747787+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.35999858379364, + "p90": 115.64800143241882, + "p95": 117.63200163841248, + "p99": 121.5360015630722 + }, + "combine": { + "p50": 106.36799782514572, + "p90": 111.96800321340561, + "p95": 112.44799941778183, + "p99": 114.43199962377548 + }, + "roundtrip": { + "p50": 198.40000569820404, + "p90": 203.23200523853302, + "p95": 205.1520049571991, + "p99": 217.53600239753723 + }, + "isolatedSum": { + "p50": 217.72799640893936, + "p90": 227.61600464582443, + "p95": 230.0800010561943, + "p99": 235.9680011868477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 140.44800400733948, + "p90": 145.56799829006195, + "p95": 147.2959965467453, + "p99": 151.2320041656494 + }, + "combine": { + "p50": 154.04799580574036, + "p90": 156.19200468063354, + "p95": 159.0079963207245, + "p99": 163.32800686359406 + }, + "roundtrip": { + "p50": 261.9839906692505, + "p90": 266.9439911842346, + "p95": 268.67198944091797, + "p99": 274.7200131416321 + }, + "isolatedSum": { + "p50": 294.49599981307983, + "p90": 301.7600029706955, + "p95": 306.3039928674698, + "p99": 314.56001102924347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.96799528598785, + "p90": 204.0639966726303, + "p95": 205.4080069065094, + "p99": 209.9519968032837 + }, + "combine": { + "p50": 228.0000001192093, + "p90": 231.48800432682037, + "p95": 234.43199694156647, + "p99": 241.5039986371994 + }, + "roundtrip": { + "p50": 401.7280042171478, + "p90": 406.17600083351135, + "p95": 407.61598944664, + "p99": 411.42401099205017 + }, + "isolatedSum": { + "p50": 427.96799540519714, + "p90": 435.5520009994507, + "p95": 439.84000384807587, + "p99": 451.4559954404831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.3839931488037, + "p90": 309.85599756240845, + "p95": 311.2959861755371, + "p99": 316.48001074790955 + }, + "combine": { + "p50": 365.88799953460693, + "p90": 370.5280125141144, + "p95": 373.4079897403717, + "p99": 376.6080141067505 + }, + "roundtrip": { + "p50": 644.3520188331604, + "p90": 651.7120003700256, + "p95": 653.6639928817749, + "p99": 656.7680239677429 + }, + "isolatedSum": { + "p50": 670.2719926834106, + "p90": 680.3840100765228, + "p95": 684.7039759159088, + "p99": 693.08802485466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 521.5039849281311, + "p90": 529.9519896507263, + "p95": 532.8320264816284, + "p99": 540.4800176620483 + }, + "combine": { + "p50": 640.2559876441956, + "p90": 648.6080288887024, + "p95": 650.65598487854, + "p99": 654.0480256080627 + }, + "roundtrip": { + "p50": 1135.7760429382324, + "p90": 1145.9840536117554, + "p95": 1149.6000289916992, + "p99": 1153.2479524612427 + }, + "isolatedSum": { + "p50": 1161.7599725723267, + "p90": 1178.5600185394287, + "p95": 1183.4880113601685, + "p99": 1194.528043270111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 983.7120175361633, + "p90": 1006.943941116333, + "p95": 1016.1279439926147, + "p99": 1036.352038383484 + }, + "combine": { + "p50": 1153.2479524612427, + "p90": 1162.6880168914795, + "p95": 1165.120005607605, + "p99": 1173.0560064315796 + }, + "roundtrip": { + "p50": 2099.071979522705, + "p90": 2116.28794670105, + "p95": 2120.3200817108154, + "p99": 2131.488084793091 + }, + "isolatedSum": { + "p50": 2136.959969997406, + "p90": 2169.6319580078125, + "p95": 2181.2479496002197, + "p99": 2209.4080448150635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-532e88a3", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_1687ef91", + "comparisonKey": "cf3185b7374a4bbf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:09.708403+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.455999314785, + "p90": 116.73600226640701, + "p95": 119.29599940776825, + "p99": 121.8239963054657 + }, + "combine": { + "p50": 105.85600137710571, + "p90": 107.87200182676315, + "p95": 111.23199760913849, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 193.34399700164795, + "p90": 198.94400238990784, + "p95": 201.63199305534363, + "p99": 479.8400104045868 + }, + "isolatedSum": { + "p50": 217.31200069189072, + "p90": 224.60800409317017, + "p95": 230.52799701690674, + "p99": 235.07199436426163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.7200070619583, + "p90": 149.88799393177032, + "p95": 152.6080071926117, + "p99": 176.7680048942566 + }, + "combine": { + "p50": 149.75999295711517, + "p90": 154.1759967803955, + "p95": 155.13600409030914, + "p99": 157.53600001335144 + }, + "roundtrip": { + "p50": 263.71198892593384, + "p90": 268.15998554229736, + "p95": 269.3760097026825, + "p99": 271.7759907245636 + }, + "isolatedSum": { + "p50": 296.4800000190735, + "p90": 304.06399071216583, + "p95": 307.74401128292084, + "p99": 334.30400490760803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 198.2399970293045, + "p90": 202.14399695396423, + "p95": 203.2960057258606, + "p99": 207.10399746894836 + }, + "combine": { + "p50": 230.71999847888947, + "p90": 236.54399812221527, + "p95": 237.5359982252121, + "p99": 240.38399755954742 + }, + "roundtrip": { + "p50": 402.3360013961792, + "p90": 407.45601058006287, + "p95": 409.15200114250183, + "p99": 411.6159975528717 + }, + "isolatedSum": { + "p50": 428.95999550819397, + "p90": 438.6879950761795, + "p95": 440.8320039510727, + "p99": 447.4879950284958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.9520044326782, + "p90": 306.17600679397583, + "p95": 308.0959916114807, + "p99": 311.5519881248474 + }, + "combine": { + "p50": 366.87999963760376, + "p90": 373.82400035858154, + "p95": 375.10401010513306, + "p99": 376.8320083618164 + }, + "roundtrip": { + "p50": 643.0720090866089, + "p90": 650.4319906234741, + "p95": 652.4800062179565, + "p99": 657.9840183258057 + }, + "isolatedSum": { + "p50": 668.832004070282, + "p90": 680.0000071525574, + "p95": 683.2000017166138, + "p99": 688.3839964866638 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.1519923210144, + "p90": 545.1520085334778, + "p95": 548.2239723205566, + "p99": 553.9519786834717 + }, + "combine": { + "p50": 625.5679726600647, + "p90": 633.247971534729, + "p95": 636.2239718437195, + "p99": 641.4719820022583 + }, + "roundtrip": { + "p50": 1134.559988975525, + "p90": 1144.4480419158936, + "p95": 1146.8479633331299, + "p99": 1152.5440216064453 + }, + "isolatedSum": { + "p50": 1162.719964981079, + "p90": 1178.3999800682068, + "p95": 1184.4479441642761, + "p99": 1195.42396068573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1020.0639963150024, + "p90": 1047.968029975891, + "p95": 1055.13596534729, + "p99": 1074.463963508606 + }, + "combine": { + "p50": 1149.664044380188, + "p90": 1159.4879627227783, + "p95": 1162.559986114502, + "p99": 1167.5519943237305 + }, + "roundtrip": { + "p50": 2128.864049911499, + "p90": 2149.9199867248535, + "p95": 2155.168056488037, + "p99": 2163.4879112243652 + }, + "isolatedSum": { + "p50": 2169.7280406951904, + "p90": 2207.4559926986694, + "p95": 2217.695951461792, + "p99": 2242.0159578323364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d87f15c", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_63732aec", + "comparisonKey": "59a4eda8ee0f6bfd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:56.962782+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.81600451469421, + "p90": 134.14399325847626, + "p95": 137.79200613498688, + "p99": 151.36000514030457 + }, + "combine": { + "p50": 112.67200112342834, + "p90": 114.88000303506851, + "p95": 115.48800021409988, + "p99": 120.99199742078781 + }, + "roundtrip": { + "p50": 216.51199460029602, + "p90": 221.69600427150726, + "p95": 223.83999824523926, + "p99": 231.1680018901825 + }, + "isolatedSum": { + "p50": 239.48800563812256, + "p90": 249.02399629354477, + "p95": 253.28000634908676, + "p99": 272.3520025610924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.8880033493042, + "p90": 168.41599345207214, + "p95": 169.11999881267548, + "p99": 172.06400632858276 + }, + "combine": { + "p50": 164.8000031709671, + "p90": 169.8240041732788, + "p95": 172.63999581336975, + "p99": 194.4960057735443 + }, + "roundtrip": { + "p50": 299.4239926338196, + "p90": 303.8080036640167, + "p95": 304.8959970474243, + "p99": 308.6079955101013 + }, + "isolatedSum": { + "p50": 326.6880065202713, + "p90": 338.23999762535095, + "p95": 341.7599946260452, + "p99": 366.5600121021271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 236.80000007152557, + "p90": 241.28000438213348, + "p95": 242.08000302314758, + "p99": 244.57600712776184 + }, + "combine": { + "p50": 263.90400528907776, + "p90": 268.0000066757202, + "p95": 269.6000039577484, + "p99": 273.6319899559021 + }, + "roundtrip": { + "p50": 475.39201378822327, + "p90": 479.45600748062134, + "p95": 481.0880124568939, + "p99": 485.0879907608032 + }, + "isolatedSum": { + "p50": 500.70400536060333, + "p90": 509.2800110578537, + "p95": 511.680006980896, + "p99": 518.2079970836639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.73598527908325, + "p90": 383.04001092910767, + "p95": 384.768009185791, + "p99": 389.21600580215454 + }, + "combine": { + "p50": 446.7200040817261, + "p90": 452.1920084953308, + "p95": 453.18400859832764, + "p99": 456.86399936676025 + }, + "roundtrip": { + "p50": 798.3999848365784, + "p90": 804.0639758110046, + "p95": 805.6640028953552, + "p99": 809.6640110015869 + }, + "isolatedSum": { + "p50": 823.4559893608093, + "p90": 835.2320194244385, + "p95": 837.9520177841187, + "p99": 846.0800051689148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 672.5760102272034, + "p90": 682.59197473526, + "p95": 685.2480173110962, + "p99": 690.0799870491028 + }, + "combine": { + "p50": 815.392017364502, + "p90": 822.4319815635681, + "p95": 824.3520259857178, + "p99": 830.5919766426086 + }, + "roundtrip": { + "p50": 1458.9760303497314, + "p90": 1468.127965927124, + "p95": 1470.52800655365, + "p99": 1477.6959419250488 + }, + "isolatedSum": { + "p50": 1487.9680275917053, + "p90": 1505.0239562988281, + "p95": 1509.600043296814, + "p99": 1520.6719636917114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1274.783968925476, + "p90": 1287.3599529266357, + "p95": 1290.4640436172485, + "p99": 1300.8639812469482 + }, + "combine": { + "p50": 1537.9199981689453, + "p90": 1546.8480587005615, + "p95": 1550.4640340805054, + "p99": 1561.951994895935 + }, + "roundtrip": { + "p50": 2785.6318950653076, + "p90": 2797.2159385681152, + "p95": 2802.3040294647217, + "p99": 2806.78391456604 + }, + "isolatedSum": { + "p50": 2812.7039670944214, + "p90": 2834.2080116271973, + "p95": 2840.928077697754, + "p99": 2862.8159761428833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75c98e1e", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_3c21fbbe", + "comparisonKey": "3dbbcfe00b9f5bd5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:40.384126+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.08799749612808, + "p90": 119.77600306272507, + "p95": 121.5360015630722, + "p99": 125.56800246238708 + }, + "combine": { + "p50": 106.88000172376633, + "p90": 112.35199868679047, + "p95": 113.21599781513214, + "p99": 115.80800265073776 + }, + "roundtrip": { + "p50": 206.40000700950623, + "p90": 210.91200411319733, + "p95": 212.0320051908493, + "p99": 215.80800414085388 + }, + "isolatedSum": { + "p50": 223.9679992198944, + "p90": 232.12800174951553, + "p95": 234.75199937820435, + "p99": 241.37600511312485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.89600324630737, + "p90": 167.77600347995758, + "p95": 168.67199540138245, + "p99": 173.72800409793854 + }, + "combine": { + "p50": 153.9520025253296, + "p90": 156.19200468063354, + "p95": 156.95999562740326, + "p99": 169.44000124931335 + }, + "roundtrip": { + "p50": 289.08801078796387, + "p90": 293.0240035057068, + "p95": 295.199990272522, + "p99": 302.0159900188446 + }, + "isolatedSum": { + "p50": 314.84800577163696, + "p90": 323.9680081605911, + "p95": 325.6319910287857, + "p99": 343.1680053472519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.63999819755554, + "p90": 238.49600553512573, + "p95": 239.6479994058609, + "p99": 244.83199417591095 + }, + "combine": { + "p50": 251.90401077270508, + "p90": 255.36000728607178, + "p95": 259.00799036026, + "p99": 262.1760070323944 + }, + "roundtrip": { + "p50": 459.3279957771301, + "p90": 464.352011680603, + "p95": 465.5359983444214, + "p99": 468.32001209259033 + }, + "isolatedSum": { + "p50": 484.5440089702606, + "p90": 493.8560128211975, + "p95": 498.6559897661209, + "p99": 507.00800120830536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.4960129261017, + "p90": 380.95998764038086, + "p95": 382.7199935913086, + "p99": 386.52798533439636 + }, + "combine": { + "p50": 434.81600284576416, + "p90": 439.35999274253845, + "p95": 440.70398807525635, + "p99": 444.70399618148804 + }, + "roundtrip": { + "p50": 781.8880081176758, + "p90": 786.8480086326599, + "p95": 787.9679799079895, + "p99": 791.5520071983337 + }, + "isolatedSum": { + "p50": 809.3120157718658, + "p90": 820.3199803829193, + "p95": 823.4239816665649, + "p99": 831.2319815158844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 659.2000126838684, + "p90": 666.3359999656677, + "p95": 668.9280271530151, + "p99": 674.2079854011536 + }, + "combine": { + "p50": 798.8160252571106, + "p90": 804.9920201301575, + "p95": 806.4320087432861, + "p99": 809.1199994087219 + }, + "roundtrip": { + "p50": 1428.7359714508057, + "p90": 1435.6160163879395, + "p95": 1437.9199743270874, + "p99": 1441.3440227508545 + }, + "isolatedSum": { + "p50": 1458.016037940979, + "p90": 1471.3280200958252, + "p95": 1475.3600358963013, + "p99": 1483.3279848098755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1243.6480522155762, + "p90": 1256.1919689178467, + "p95": 1263.3600234985352, + "p99": 1405.5360555648804 + }, + "combine": { + "p50": 1503.7120580673218, + "p90": 1509.6319913864136, + "p95": 1511.9359493255615, + "p99": 1515.328049659729 + }, + "roundtrip": { + "p50": 2720.3519344329834, + "p90": 2729.055881500244, + "p95": 2731.7440509796143, + "p99": 2739.07208442688 + }, + "isolatedSum": { + "p50": 2747.360110282898, + "p90": 2765.8239603042603, + "p95": 2775.2959728240967, + "p99": 2920.8641052246094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff84dfb1", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_905c5730", + "comparisonKey": "1b5cd2ceb486945f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:08.242431+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.03200370073318, + "p90": 116.99199676513672, + "p95": 119.35999989509583, + "p99": 125.2480000257492 + }, + "combine": { + "p50": 106.33599758148193, + "p90": 111.35999858379364, + "p95": 112.2559979557991, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 195.00799477100372, + "p90": 200.73600113391876, + "p95": 201.82399451732635, + "p99": 206.59199357032776 + }, + "isolatedSum": { + "p50": 218.36800128221512, + "p90": 228.35199534893036, + "p95": 231.61599785089493, + "p99": 239.58399891853333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.33600413799286, + "p90": 151.0400027036667, + "p95": 152.0960032939911, + "p99": 155.008003115654 + }, + "combine": { + "p50": 151.39199793338776, + "p90": 154.36799824237823, + "p95": 155.29599785804749, + "p99": 160.35200655460358 + }, + "roundtrip": { + "p50": 268.6080038547516, + "p90": 273.0880081653595, + "p95": 274.59201216697693, + "p99": 276.8320143222809 + }, + "isolatedSum": { + "p50": 297.7280020713806, + "p90": 305.4080009460449, + "p95": 307.3920011520386, + "p99": 315.36000967025757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.01599597930908, + "p90": 206.2399983406067, + "p95": 208.12800526618958, + "p99": 213.53599429130554 + }, + "combine": { + "p50": 227.87199914455414, + "p90": 231.04000091552734, + "p95": 233.60000550746918, + "p99": 236.00000143051147 + }, + "roundtrip": { + "p50": 404.7360122203827, + "p90": 409.7920060157776, + "p95": 411.23199462890625, + "p99": 414.0160083770752 + }, + "isolatedSum": { + "p50": 429.8879951238632, + "p90": 437.27999925613403, + "p95": 441.72801077365875, + "p99": 449.535995721817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.51201033592224, + "p90": 317.82400608062744, + "p95": 319.3280100822449, + "p99": 322.6880133152008 + }, + "combine": { + "p50": 368.4479892253876, + "p90": 374.59200620651245, + "p95": 376.51199102401733, + "p99": 381.5680146217346 + }, + "roundtrip": { + "p50": 659.712016582489, + "p90": 666.5599942207336, + "p95": 669.152021408081, + "p99": 674.3040084838867 + }, + "isolatedSum": { + "p50": 680.9599995613098, + "p90": 692.4160122871399, + "p95": 695.8400011062622, + "p99": 704.2560279369354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 535.7120037078857, + "p90": 543.071985244751, + "p95": 544.6720123291016, + "p99": 548.0960011482239 + }, + "combine": { + "p50": 643.775999546051, + "p90": 651.5200138092041, + "p95": 653.6639928817749, + "p99": 657.3439836502075 + }, + "roundtrip": { + "p50": 1154.6239852905273, + "p90": 1164.3840074539185, + "p95": 1167.0080423355103, + "p99": 1171.839952468872 + }, + "isolatedSum": { + "p50": 1179.4880032539368, + "p90": 1194.591999053955, + "p95": 1198.3360052108765, + "p99": 1205.4399847984314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1025.9840488433838, + "p90": 1050.1439571380615, + "p95": 1056.3520193099976, + "p99": 1075.9680271148682 + }, + "combine": { + "p50": 1187.3600482940674, + "p90": 1195.3920125961304, + "p95": 1197.2800493240356, + "p99": 1204.416036605835 + }, + "roundtrip": { + "p50": 2168.447971343994, + "p90": 2186.2399578094482, + "p95": 2190.783977508545, + "p99": 2199.5840072631836 + }, + "isolatedSum": { + "p50": 2213.344097137451, + "p90": 2245.535969734192, + "p95": 2253.632068634033, + "p99": 2280.384063720703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a6e27ee2", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_eaf2a101", + "comparisonKey": "a27499dd8aa6624c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:32.251249+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.33599954843521, + "p90": 124.41600114107132, + "p95": 125.85599720478058, + "p99": 134.71999764442444 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 118.84800344705582, + "p95": 120.28799951076508, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 214.7199958562851, + "p90": 219.64800357818604, + "p95": 221.02400660514832, + "p99": 225.50399601459503 + }, + "isolatedSum": { + "p50": 232.89600014686584, + "p90": 243.26400458812714, + "p95": 246.14399671554565, + "p99": 258.2719996571541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.51200032234192, + "p90": 165.6319946050644, + "p95": 168.73599588871002, + "p99": 178.0800074338913 + }, + "combine": { + "p50": 169.95200514793396, + "p90": 173.88799786567688, + "p95": 176.7359972000122, + "p99": 178.39999496936798 + }, + "roundtrip": { + "p50": 302.39999294281006, + "p90": 307.74399638175964, + "p95": 309.2480003833771, + "p99": 312.5759959220886 + }, + "isolatedSum": { + "p50": 330.4640054702759, + "p90": 339.5199924707413, + "p95": 345.47199308872223, + "p99": 356.4800024032593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 233.18399488925934, + "p90": 239.1040027141571, + "p95": 240.89600145816803, + "p99": 244.09599602222443 + }, + "combine": { + "p50": 266.07999205589294, + "p90": 271.36000990867615, + "p95": 273.50398898124695, + "p99": 279.3920040130615 + }, + "roundtrip": { + "p50": 473.7600088119507, + "p90": 478.7839949131012, + "p95": 480.76799511909485, + "p99": 485.3760004043579 + }, + "isolatedSum": { + "p50": 499.2639869451523, + "p90": 510.46401262283325, + "p95": 514.399990439415, + "p99": 523.488000035286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 375.5199909210205, + "p90": 382.6560080051422, + "p95": 384.768009185791, + "p99": 391.4879858493805 + }, + "combine": { + "p50": 449.18400049209595, + "p90": 455.7119905948639, + "p95": 457.2800099849701, + "p99": 465.05600214004517 + }, + "roundtrip": { + "p50": 800.8000254631042, + "p90": 807.7120184898376, + "p95": 810.6560111045837, + "p99": 819.0079927444458 + }, + "isolatedSum": { + "p50": 824.7039914131165, + "p90": 838.3679986000061, + "p95": 842.0480191707611, + "p99": 856.5439879894257 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 675.3280162811279, + "p90": 684.7040057182312, + "p95": 687.4880194664001, + "p99": 692.4800276756287 + }, + "combine": { + "p50": 802.1119832992554, + "p90": 809.984028339386, + "p95": 812.3520016670227, + "p99": 817.791998386383 + }, + "roundtrip": { + "p50": 1448.0639696121216, + "p90": 1458.0800533294678, + "p95": 1461.4399671554565, + "p99": 1495.6480264663696 + }, + "isolatedSum": { + "p50": 1477.4399995803833, + "p90": 1494.6880340576172, + "p95": 1499.8400211334229, + "p99": 1510.2720260620117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1283.136010169983, + "p90": 1295.3599691390991, + "p95": 1301.0239601135254, + "p99": 1310.0160360336304 + }, + "combine": { + "p50": 1514.623999595642, + "p90": 1525.056004524231, + "p95": 1528.3199548721313, + "p99": 1534.8479747772217 + }, + "roundtrip": { + "p50": 2771.7440128326416, + "p90": 2784.480094909668, + "p95": 2788.3520126342773, + "p99": 2798.1441020965576 + }, + "isolatedSum": { + "p50": 2797.760009765625, + "p90": 2820.41597366333, + "p95": 2829.3439149856567, + "p99": 2844.864010810852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-77a9c542", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_f649ca0d", + "comparisonKey": "9e4c1848209c3d40", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:57.343065+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.44799941778183, + "p90": 117.85600334405899, + "p95": 119.48800086975098, + "p99": 125.791996717453 + }, + "combine": { + "p50": 106.81600123643875, + "p90": 112.60800063610077, + "p95": 113.63200098276138, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 195.2960044145584, + "p90": 201.92000269889832, + "p95": 204.73599433898926, + "p99": 212.351992726326 + }, + "isolatedSum": { + "p50": 219.26400065422058, + "p90": 230.46400398015976, + "p95": 233.12000185251236, + "p99": 240.79999327659607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.84799313545227, + "p90": 150.751993060112, + "p95": 151.87199413776398, + "p99": 160.35200655460358 + }, + "combine": { + "p50": 149.75999295711517, + "p90": 154.33600544929504, + "p95": 155.07200360298157, + "p99": 157.18400478363037 + }, + "roundtrip": { + "p50": 266.81599020957947, + "p90": 272.19200134277344, + "p95": 273.69600534439087, + "p99": 278.56001257896423 + }, + "isolatedSum": { + "p50": 296.60798609256744, + "p90": 305.08799850940704, + "p95": 306.94399774074554, + "p99": 317.53601133823395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 198.33600521087646, + "p90": 203.10400426387787, + "p95": 204.28800582885742, + "p99": 207.8080028295517 + }, + "combine": { + "p50": 229.34399545192719, + "p90": 235.26400327682495, + "p95": 236.54399812221527, + "p99": 239.48800563812256 + }, + "roundtrip": { + "p50": 401.7280042171478, + "p90": 406.8480134010315, + "p95": 408.8959991931915, + "p99": 414.65601325035095 + }, + "isolatedSum": { + "p50": 427.68000066280365, + "p90": 438.3680075407028, + "p95": 440.8320039510727, + "p99": 447.29600846767426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 307.5839877128601, + "p90": 313.31199407577515, + "p95": 314.6879971027374, + "p99": 320.47998905181885 + }, + "combine": { + "p50": 366.94398522377014, + "p90": 374.2719888687134, + "p95": 376.3839900493622, + "p99": 381.056010723114 + }, + "roundtrip": { + "p50": 649.7920155525208, + "p90": 657.2160124778748, + "p95": 659.5839858055115, + "p99": 664.2559766769409 + }, + "isolatedSum": { + "p50": 674.5279729366302, + "p90": 687.5839829444885, + "p95": 691.0719871520996, + "p99": 701.5359997749329 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 528.1280279159546, + "p90": 540.831983089447, + "p95": 544.9920296669006, + "p99": 556.384027004242 + }, + "combine": { + "p50": 633.9840292930603, + "p90": 641.6959762573242, + "p95": 644.1280245780945, + "p99": 648.8320231437683 + }, + "roundtrip": { + "p50": 1134.2719793319702, + "p90": 1144.8639631271362, + "p95": 1148.4160423278809, + "p99": 1155.7120084762573 + }, + "isolatedSum": { + "p50": 1162.112057209015, + "p90": 1182.5279593467712, + "p95": 1189.1200542449951, + "p99": 1205.2160501480103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1005.9200525283813, + "p90": 1030.8159589767456, + "p95": 1041.599988937378, + "p99": 1063.3280277252197 + }, + "combine": { + "p50": 1170.7839965820312, + "p90": 1179.6799898147583, + "p95": 1182.8800439834595, + "p99": 1191.1040544509888 + }, + "roundtrip": { + "p50": 2141.6640281677246, + "p90": 2165.7919883728027, + "p95": 2176.095962524414, + "p99": 2198.2080936431885 + }, + "isolatedSum": { + "p50": 2176.7040491104126, + "p90": 2210.495948791504, + "p95": 2224.4800329208374, + "p99": 2254.4320821762085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6cf806f8", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_68d64fee", + "comparisonKey": "a12c9892fb7fd143", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:00.169026+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.6800029873848, + "p90": 129.85600531101227, + "p95": 130.65600395202637, + "p99": 136.54400408267975 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 114.33599889278412, + "p95": 114.81600254774094, + "p99": 120.64000219106674 + }, + "roundtrip": { + "p50": 216.06400609016418, + "p90": 220.44800221920013, + "p95": 221.27999365329742, + "p99": 224.19199347496033 + }, + "isolatedSum": { + "p50": 236.25600337982178, + "p90": 244.1920042037964, + "p95": 245.4720064997673, + "p99": 257.1840062737465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.49600052833557, + "p90": 165.75999557971954, + "p95": 166.49599373340607, + "p99": 169.0559983253479 + }, + "combine": { + "p50": 164.19200599193573, + "p90": 169.91999745368958, + "p95": 171.07200622558594, + "p99": 172.60800302028656 + }, + "roundtrip": { + "p50": 297.791987657547, + "p90": 302.3679852485657, + "p95": 304.28799986839294, + "p99": 307.16800689697266 + }, + "isolatedSum": { + "p50": 326.6880065202713, + "p90": 335.6799930334091, + "p95": 337.567999958992, + "p99": 341.66400134563446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.9119931459427, + "p90": 238.3359968662262, + "p95": 239.9359941482544, + "p99": 245.15199661254883 + }, + "combine": { + "p50": 264.70398902893066, + "p90": 268.67198944091797, + "p95": 270.27198672294617, + "p99": 273.79199862480164 + }, + "roundtrip": { + "p50": 474.4639992713928, + "p90": 479.5840084552765, + "p95": 480.99198937416077, + "p99": 483.64800214767456 + }, + "isolatedSum": { + "p50": 499.61598217487335, + "p90": 507.00798630714417, + "p95": 510.20798087120056, + "p99": 518.9439952373505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.11201000213623, + "p90": 380.2559971809387, + "p95": 381.632000207901, + "p99": 387.58400082588196 + }, + "combine": { + "p50": 447.90399074554443, + "p90": 453.3120095729828, + "p95": 455.04000782966614, + "p99": 458.079993724823 + }, + "roundtrip": { + "p50": 798.4319925308228, + "p90": 804.0639758110046, + "p95": 806.879997253418, + "p99": 831.5520286560059 + }, + "isolatedSum": { + "p50": 822.0160007476807, + "p90": 833.5680067539215, + "p95": 836.6720080375671, + "p99": 845.663994550705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 670.7519888877869, + "p90": 680.7360053062439, + "p95": 684.0000152587891, + "p99": 689.8880004882812 + }, + "combine": { + "p50": 815.6800270080566, + "p90": 822.4639892578125, + "p95": 825.1839876174927, + "p99": 830.9760093688965 + }, + "roundtrip": { + "p50": 1459.488034248352, + "p90": 1468.4480428695679, + "p95": 1471.4239835739136, + "p99": 1482.5600385665894 + }, + "isolatedSum": { + "p50": 1486.4320158958435, + "p90": 1503.1999945640564, + "p95": 1509.1840028762817, + "p99": 1520.8640098571777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1273.5040187835693, + "p90": 1285.9519720077515, + "p95": 1289.4400358200073, + "p99": 1295.0079441070557 + }, + "combine": { + "p50": 1538.6559963226318, + "p90": 1546.336054801941, + "p95": 1548.9599704742432, + "p99": 1553.0879497528076 + }, + "roundtrip": { + "p50": 2787.008047103882, + "p90": 2798.5599040985107, + "p95": 2801.1839389801025, + "p99": 2808.799982070923 + }, + "isolatedSum": { + "p50": 2812.160015106201, + "p90": 2832.2880268096924, + "p95": 2838.4000062942505, + "p99": 2848.0958938598633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f0849152", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_3d319f20", + "comparisonKey": "55b7117a81944c02", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:25.445846+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.3119985461235, + "p90": 118.94399672746658, + "p95": 120.54400146007538, + "p99": 123.26399981975555 + }, + "combine": { + "p50": 105.95200210809708, + "p90": 107.26399719715118, + "p95": 111.42399907112122, + "p99": 113.43999952077866 + }, + "roundtrip": { + "p50": 195.93599438667297, + "p90": 200.8959949016571, + "p95": 202.14399695396423, + "p99": 206.7520022392273 + }, + "isolatedSum": { + "p50": 219.26400065422058, + "p90": 226.20799392461777, + "p95": 231.9680005311966, + "p99": 236.7039993405342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 141.12000167369843, + "p90": 146.7200070619583, + "p95": 147.8080004453659, + "p99": 151.58399939537048 + }, + "combine": { + "p50": 149.02399480342865, + "p90": 153.888002038002, + "p95": 154.55999970436096, + "p99": 160.25599837303162 + }, + "roundtrip": { + "p50": 264.0320062637329, + "p90": 269.53598856925964, + "p95": 272.2879946231842, + "p99": 475.9039878845215 + }, + "isolatedSum": { + "p50": 290.1439964771271, + "p90": 300.6080090999603, + "p95": 302.36800014972687, + "p99": 311.8399977684021 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.06400346755981, + "p90": 203.61599326133728, + "p95": 204.73599433898926, + "p99": 208.67200195789337 + }, + "combine": { + "p50": 227.90400683879852, + "p90": 233.63199830055237, + "p95": 234.3679964542389, + "p99": 237.0239943265915 + }, + "roundtrip": { + "p50": 402.0479917526245, + "p90": 407.4240028858185, + "p95": 408.8959991931915, + "p99": 414.14400935173035 + }, + "isolatedSum": { + "p50": 427.96801030635834, + "p90": 437.24799156188965, + "p95": 439.10399079322815, + "p99": 445.69599628448486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.20000648498535, + "p90": 308.3840012550354, + "p95": 310.4960024356842, + "p99": 314.1759932041168 + }, + "combine": { + "p50": 361.82400584220886, + "p90": 367.74399876594543, + "p95": 369.4080114364624, + "p99": 371.5519905090332 + }, + "roundtrip": { + "p50": 640.3840184211731, + "p90": 646.4639902114868, + "p95": 648.1599807739258, + "p99": 654.4960141181946 + }, + "isolatedSum": { + "p50": 665.0240123271942, + "p90": 676.1280000209808, + "p95": 679.9040138721466, + "p99": 685.72798371315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.2959985733032, + "p90": 531.93598985672, + "p95": 535.3279709815979, + "p99": 539.8719906806946 + }, + "combine": { + "p50": 641.7919993400574, + "p90": 649.8240232467651, + "p95": 652.7040004730225, + "p99": 656.544029712677 + }, + "roundtrip": { + "p50": 1134.81605052948, + "p90": 1145.2800035476685, + "p95": 1149.0240097045898, + "p99": 1156.383991241455 + }, + "isolatedSum": { + "p50": 1165.0879979133606, + "p90": 1181.760013103485, + "p95": 1188.0319714546204, + "p99": 1196.4160203933716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 998.0159997940063, + "p90": 1021.5359926223755, + "p95": 1028.480052947998, + "p99": 1039.039969444275 + }, + "combine": { + "p50": 1156.5760374069214, + "p90": 1164.86394405365, + "p95": 1167.7119731903076, + "p99": 1176.095962524414 + }, + "roundtrip": { + "p50": 2115.391969680786, + "p90": 2132.352113723755, + "p95": 2137.9520893096924, + "p99": 2148.9920616149902 + }, + "isolatedSum": { + "p50": 2154.5920372009277, + "p90": 2186.3999366760254, + "p95": 2196.1920261383057, + "p99": 2215.135931968689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d59d494", + "identity": "h100|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_2f577c82", + "comparisonKey": "1a6e17b8aa3b7dd1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:30.385825+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.67200112342834, + "p90": 117.69600212574005, + "p95": 119.61600184440613, + "p99": 121.85599654912949 + }, + "combine": { + "p50": 106.20799660682678, + "p90": 111.48799955844879, + "p95": 112.38399893045425, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 194.91200149059296, + "p90": 200.70399343967438, + "p95": 201.9519954919815, + "p99": 204.19199764728546 + }, + "isolatedSum": { + "p50": 218.87999773025513, + "p90": 229.18400168418884, + "p95": 232.00000077486038, + "p99": 236.1919954419136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.62400567531586, + "p90": 147.48799800872803, + "p95": 148.73600006103516, + "p99": 152.319997549057 + }, + "combine": { + "p50": 151.2320041656494, + "p90": 154.08000349998474, + "p95": 154.9759954214096, + "p99": 160.288006067276 + }, + "roundtrip": { + "p50": 264.5440101623535, + "p90": 268.3520019054413, + "p95": 269.1200077533722, + "p99": 272.7679908275604 + }, + "isolatedSum": { + "p50": 293.85600984096527, + "p90": 301.56800150871277, + "p95": 303.71199548244476, + "p99": 312.608003616333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.73600113391876, + "p90": 204.44799959659576, + "p95": 206.08000457286835, + "p99": 210.04800498485565 + }, + "combine": { + "p50": 228.2560020685196, + "p90": 234.0800017118454, + "p95": 235.4239970445633, + "p99": 238.68800699710846 + }, + "roundtrip": { + "p50": 400.736004114151, + "p90": 406.20800852775574, + "p95": 407.77599811553955, + "p99": 412.6400053501129 + }, + "isolatedSum": { + "p50": 428.99200320243835, + "p90": 438.52800130844116, + "p95": 441.50400161743164, + "p99": 448.7360119819641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.5520017147064, + "p90": 309.471994638443, + "p95": 311.16798520088196, + "p99": 315.61601161956787 + }, + "combine": { + "p50": 361.88799142837524, + "p90": 367.8399920463562, + "p95": 371.2320029735565, + "p99": 553.6320209503174 + }, + "roundtrip": { + "p50": 639.9040222167969, + "p90": 645.7920074462891, + "p95": 647.3280191421509, + "p99": 651.9359946250916 + }, + "isolatedSum": { + "p50": 665.4399931430817, + "p90": 677.3119866847992, + "p95": 682.3999881744385, + "p99": 869.2480325698853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.2639908790588, + "p90": 531.711995601654, + "p95": 534.4319939613342, + "p99": 704.5120000839233 + }, + "combine": { + "p50": 639.5840048789978, + "p90": 646.336019039154, + "p95": 648.4799981117249, + "p99": 653.4720063209534 + }, + "roundtrip": { + "p50": 1134.4000101089478, + "p90": 1143.2960033416748, + "p95": 1146.9119787216187, + "p99": 1152.83203125 + }, + "isolatedSum": { + "p50": 1162.8479957580566, + "p90": 1178.048014640808, + "p95": 1182.911992073059, + "p99": 1357.9840064048767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 999.0079998970032, + "p90": 1019.9999809265137, + "p95": 1024.9919891357422, + "p99": 1037.8559827804565 + }, + "combine": { + "p50": 1154.8479795455933, + "p90": 1162.719964981079, + "p95": 1164.896011352539, + "p99": 1172.0000505447388 + }, + "roundtrip": { + "p50": 2113.312005996704, + "p90": 2130.176067352295, + "p95": 2136.0321044921875, + "p99": 2153.887987136841 + }, + "isolatedSum": { + "p50": 2153.8559794425964, + "p90": 2182.719945907593, + "p95": 2189.8880004882812, + "p99": 2209.8560333251953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9f657a50", + "identity": "h100|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_65927f79", + "comparisonKey": "f8b2908f08e00133", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:57.901873+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 98.49599748849869, + "p90": 103.61599922180176, + "p95": 104.8320010304451, + "p99": 108.51199924945831 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 107.19999670982361, + "p95": 111.48799955844879, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 184.83200669288635, + "p90": 189.60000574588776, + "p95": 190.40000438690186, + "p99": 193.92000138759613 + }, + "isolatedSum": { + "p50": 204.3199986219406, + "p90": 210.81599593162537, + "p95": 216.3200005888939, + "p99": 221.53599560260773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.9600065946579, + "p90": 136.60800457000732, + "p95": 137.34400272369385, + "p99": 141.40799641609192 + }, + "combine": { + "p50": 149.6960073709488, + "p90": 154.2080044746399, + "p95": 154.81600165367126, + "p99": 157.95199573040009 + }, + "roundtrip": { + "p50": 252.54398584365845, + "p90": 256.44800066947937, + "p95": 258.65599513053894, + "p99": 269.1200077533722 + }, + "isolatedSum": { + "p50": 282.6560139656067, + "p90": 290.8160090446472, + "p95": 292.1600043773651, + "p99": 299.359992146492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 186.52799725532532, + "p90": 189.82400000095367, + "p95": 190.94400107860565, + "p99": 193.37600469589233 + }, + "combine": { + "p50": 229.12000119686127, + "p90": 234.14400219917297, + "p95": 235.80799996852875, + "p99": 238.01599442958832 + }, + "roundtrip": { + "p50": 389.5359933376312, + "p90": 394.01599764823914, + "p95": 396.09599113464355, + "p99": 406.20800852775574 + }, + "isolatedSum": { + "p50": 415.6479984521866, + "p90": 423.96800220012665, + "p95": 426.7520010471344, + "p99": 431.39199912548065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 291.48799180984497, + "p90": 302.72001028060913, + "p95": 306.11199140548706, + "p99": 317.1840012073517 + }, + "combine": { + "p50": 364.9600148200989, + "p90": 372.6080060005188, + "p95": 374.783992767334, + "p99": 381.53600692749023 + }, + "roundtrip": { + "p50": 630.0479769706726, + "p90": 636.6080045700073, + "p95": 638.2399797439575, + "p99": 643.3280110359192 + }, + "isolatedSum": { + "p50": 656.4480066299438, + "p90": 675.3280162811279, + "p95": 680.895984172821, + "p99": 698.7200081348419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 511.3919973373413, + "p90": 521.2799906730652, + "p95": 524.2559909820557, + "p99": 529.375970363617 + }, + "combine": { + "p50": 634.5279812812805, + "p90": 641.8240070343018, + "p95": 643.6480283737183, + "p99": 647.0720171928406 + }, + "roundtrip": { + "p50": 1119.9040412902832, + "p90": 1130.5279731750488, + "p95": 1133.631944656372, + "p99": 1140.768051147461 + }, + "isolatedSum": { + "p50": 1145.9199786186218, + "p90": 1163.103997707367, + "p95": 1167.904019355774, + "p99": 1176.4479875564575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 963.1999731063843, + "p90": 986.3680005073547, + "p95": 992.6720261573792, + "p99": 1017.9200172424316 + }, + "combine": { + "p50": 1164.9600267410278, + "p90": 1174.496054649353, + "p95": 1176.8959760665894, + "p99": 1184.3520402908325 + }, + "roundtrip": { + "p50": 2091.264009475708, + "p90": 2111.327886581421, + "p95": 2118.335962295532, + "p99": 2134.4640254974365 + }, + "isolatedSum": { + "p50": 2128.159999847412, + "p90": 2160.8640551567078, + "p95": 2169.5680022239685, + "p99": 2202.272057533264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-09d4afe4", + "identity": "h100|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_50016168", + "comparisonKey": "61169f4733612e01", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:20.963951+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 199.8399943113327, + "p90": 206.7199945449829, + "p95": 208.92800390720367, + "p99": 229.40799593925476 + }, + "combine": { + "p50": 77.69600301980972, + "p90": 80.1599994301796, + "p95": 82.07999914884567, + "p99": 86.2400010228157 + }, + "roundtrip": { + "p50": 258.62398743629456, + "p90": 264.8639976978302, + "p95": 267.5839960575104, + "p99": 276.89599990844727 + }, + "isolatedSum": { + "p50": 277.5359973311424, + "p90": 286.8799939751625, + "p95": 291.00800305604935, + "p99": 315.64799696207047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 233.5679978132248, + "p90": 238.5600060224533, + "p95": 240.38399755954742, + "p99": 252.25600600242615 + }, + "combine": { + "p50": 103.26399654150009, + "p90": 105.98400235176086, + "p95": 108.19199681282043, + "p99": 113.72800171375275 + }, + "roundtrip": { + "p50": 323.4559893608093, + "p90": 329.18399572372437, + "p95": 330.9760093688965, + "p99": 336.32001280784607 + }, + "isolatedSum": { + "p50": 336.8319943547249, + "p90": 344.5440083742142, + "p95": 348.57599437236786, + "p99": 365.9840077161789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 319.487988948822, + "p90": 324.8000144958496, + "p95": 326.9760012626648, + "p99": 333.0560028553009 + }, + "combine": { + "p50": 161.05599701404572, + "p90": 164.57599401474, + "p95": 165.50399363040924, + "p99": 168.06399822235107 + }, + "roundtrip": { + "p50": 464.06400203704834, + "p90": 469.7279930114746, + "p95": 471.3279902935028, + "p99": 476.8959879875183 + }, + "isolatedSum": { + "p50": 480.54398596286774, + "p90": 489.3760085105896, + "p95": 492.47999489307404, + "p99": 501.120001077652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 474.3039906024933, + "p90": 480.54400086402893, + "p95": 483.16800594329834, + "p99": 491.7759895324707 + }, + "combine": { + "p50": 270.2080011367798, + "p90": 274.78399872779846, + "p95": 277.536004781723, + "p99": 474.43199157714844 + }, + "roundtrip": { + "p50": 728.8960218429565, + "p90": 736.4479899406433, + "p95": 738.4960055351257, + "p99": 795.2319979667664 + }, + "isolatedSum": { + "p50": 744.5119917392731, + "p90": 755.3279995918274, + "p95": 760.7040107250214, + "p99": 966.2079811096191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 822.3040103912354, + "p90": 827.2960186004639, + "p95": 828.7039995193481, + "p99": 835.2320194244385 + }, + "combine": { + "p50": 458.5280120372772, + "p90": 464.4800126552582, + "p95": 466.3040041923523, + "p99": 469.7279930114746 + }, + "roundtrip": { + "p50": 1266.3359642028809, + "p90": 1274.8479843139648, + "p95": 1276.9919633865356, + "p99": 1282.1439504623413 + }, + "isolatedSum": { + "p50": 1280.8320224285126, + "p90": 1291.776031255722, + "p95": 1295.0080037117004, + "p99": 1304.960012435913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1549.631953239441, + "p90": 1555.0719499588013, + "p95": 1557.695984840393, + "p99": 1564.0640258789062 + }, + "combine": { + "p50": 845.6000089645386, + "p90": 853.1519770622253, + "p95": 855.9039831161499, + "p99": 860.4159951210022 + }, + "roundtrip": { + "p50": 2392.1279907226562, + "p90": 2402.1120071411133, + "p95": 2405.951976776123, + "p99": 2412.544012069702 + }, + "isolatedSum": { + "p50": 2395.2319622039795, + "p90": 2408.2239270210266, + "p95": 2413.599967956543, + "p99": 2424.4800209999084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d0726d0", + "identity": "h100|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_50016168", + "comparisonKey": "cdfb649a2a353875", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:45.546491+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.6399997472763, + "p90": 202.4960070848465, + "p95": 204.51200008392334, + "p99": 213.1199985742569 + }, + "combine": { + "p50": 84.16000008583069, + "p90": 86.46400272846222, + "p95": 88.92799913883209, + "p99": 91.51999652385712 + }, + "roundtrip": { + "p50": 265.4719948768616, + "p90": 272.38398790359497, + "p95": 274.04800057411194, + "p99": 280.95999360084534 + }, + "isolatedSum": { + "p50": 280.799999833107, + "p90": 288.9600098133087, + "p95": 293.43999922275543, + "p99": 304.639995098114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 245.82399427890778, + "p90": 251.67998671531677, + "p95": 253.63200902938843, + "p99": 263.39200139045715 + }, + "combine": { + "p50": 117.15199798345566, + "p90": 120.03199756145477, + "p95": 121.11999839544296, + "p99": 124.57600235939026 + }, + "roundtrip": { + "p50": 351.7119884490967, + "p90": 357.2480082511902, + "p95": 359.5840036869049, + "p99": 369.56799030303955 + }, + "isolatedSum": { + "p50": 362.97599226236343, + "p90": 371.71198427677155, + "p95": 374.7520074248314, + "p99": 387.9680037498474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 339.1039967536926, + "p90": 345.7280099391937, + "p95": 348.57600927352905, + "p99": 384.799987077713 + }, + "combine": { + "p50": 184.83200669288635, + "p90": 188.06399405002594, + "p95": 189.37599658966064, + "p99": 192.03199446201324 + }, + "roundtrip": { + "p50": 504.57602739334106, + "p90": 509.5999836921692, + "p95": 511.29597425460815, + "p99": 515.1360034942627 + }, + "isolatedSum": { + "p50": 523.936003446579, + "p90": 533.7920039892197, + "p95": 537.9520058631897, + "p99": 576.8319815397263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 518.9759731292725, + "p90": 524.7039794921875, + "p95": 526.4959931373596, + "p99": 535.1359844207764 + }, + "combine": { + "p50": 291.83998703956604, + "p90": 296.54398560523987, + "p95": 298.40001463890076, + "p99": 301.4400005340576 + }, + "roundtrip": { + "p50": 792.4799919128418, + "p90": 798.8799810409546, + "p95": 800.7040023803711, + "p99": 809.7599744796753 + }, + "isolatedSum": { + "p50": 810.8159601688385, + "p90": 821.2479650974274, + "p95": 824.8960077762604, + "p99": 836.575984954834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 958.8159918785095, + "p90": 964.7039771080017, + "p95": 966.6560292243958, + "p99": 1062.5280141830444 + }, + "combine": { + "p50": 507.4880123138428, + "p90": 513.5999917984009, + "p95": 515.3599977493286, + "p99": 522.7839946746826 + }, + "roundtrip": { + "p50": 1450.976014137268, + "p90": 1458.240032196045, + "p95": 1460.1279497146606, + "p99": 1464.352011680603 + }, + "isolatedSum": { + "p50": 1466.3040041923523, + "p90": 1478.3039689064026, + "p95": 1482.0160269737244, + "p99": 1585.312008857727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1818.2079792022705, + "p90": 1826.8799781799316, + "p95": 1829.3440341949463, + "p99": 1835.8399868011475 + }, + "combine": { + "p50": 932.1600198745728, + "p90": 940.9279823303223, + "p95": 944.5440173149109, + "p99": 951.9039988517761 + }, + "roundtrip": { + "p50": 2734.4961166381836, + "p90": 2746.6559410095215, + "p95": 2750.52809715271, + "p99": 2839.456081390381 + }, + "isolatedSum": { + "p50": 2750.3679990768433, + "p90": 2767.807960510254, + "p95": 2773.888051509857, + "p99": 2787.7439856529236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-48c283f0", + "identity": "h100|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_50016168", + "comparisonKey": "7d7d2aa8d144f9cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:12.930990+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 205.9520035982132, + "p90": 212.8320038318634, + "p95": 215.2319997549057, + "p99": 229.79199886322021 + }, + "combine": { + "p50": 89.66399729251862, + "p90": 92.06400066614151, + "p95": 95.32800316810608, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 282.1120023727417, + "p90": 288.7679934501648, + "p95": 290.336012840271, + "p99": 302.0159900188446 + }, + "isolatedSum": { + "p50": 295.6160008907318, + "p90": 304.8960044980049, + "p95": 310.5600029230118, + "p99": 332.2559967637062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 266.81599020957947, + "p90": 273.24798703193665, + "p95": 275.61599016189575, + "p99": 282.3359966278076 + }, + "combine": { + "p50": 126.39999389648438, + "p90": 129.37599420547485, + "p95": 130.62399625778198, + "p99": 133.760005235672 + }, + "roundtrip": { + "p50": 382.33599066734314, + "p90": 387.935996055603, + "p95": 390.24001359939575, + "p99": 404.28799390792847 + }, + "isolatedSum": { + "p50": 393.21598410606384, + "p90": 402.6239812374115, + "p95": 406.23998641967773, + "p99": 416.0960018634796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 372.76801466941833, + "p90": 378.7199854850769, + "p95": 380.99199533462524, + "p99": 389.60000872612 + }, + "combine": { + "p50": 200.95999538898468, + "p90": 204.79999482631683, + "p95": 205.9839963912964, + "p99": 213.50400149822235 + }, + "roundtrip": { + "p50": 560.096025466919, + "p90": 567.1359896659851, + "p95": 570.8799958229065, + "p99": 956.5439820289612 + }, + "isolatedSum": { + "p50": 573.728010058403, + "p90": 583.5199803113937, + "p95": 586.9759917259216, + "p99": 603.1040102243423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 593.1199789047241, + "p90": 596.992015838623, + "p95": 599.3599891662598, + "p99": 608.672022819519 + }, + "combine": { + "p50": 323.199987411499, + "p90": 327.61600613594055, + "p95": 328.92799377441406, + "p99": 331.10401034355164 + }, + "roundtrip": { + "p50": 899.0399837493896, + "p90": 904.1919708251953, + "p95": 906.5920114517212, + "p99": 927.7759790420532 + }, + "isolatedSum": { + "p50": 916.3199663162231, + "p90": 924.6080219745636, + "p95": 928.2879829406738, + "p99": 939.7760331630707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1106.0479879379272, + "p90": 1111.583948135376, + "p95": 1113.4719848632812, + "p99": 1118.1440353393555 + }, + "combine": { + "p50": 570.3679919242859, + "p90": 576.1920213699341, + "p95": 577.6640176773071, + "p99": 580.9280276298523 + }, + "roundtrip": { + "p50": 1664.255976676941, + "p90": 1675.1680374145508, + "p95": 1677.791953086853, + "p99": 1685.8880519866943 + }, + "isolatedSum": { + "p50": 1676.4159798622131, + "p90": 1687.77596950531, + "p95": 1691.1360025405884, + "p99": 1699.0720629692078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2105.2799224853516, + "p90": 2114.016056060791, + "p95": 2117.151975631714, + "p99": 2126.2080669403076 + }, + "combine": { + "p50": 1051.2640476226807, + "p90": 1059.4559907913208, + "p95": 1061.5999698638916, + "p99": 1066.431999206543 + }, + "roundtrip": { + "p50": 3195.5199241638184, + "p90": 3217.632055282593, + "p95": 3225.536108016968, + "p99": 3238.719940185547 + }, + "isolatedSum": { + "p50": 3156.543970108032, + "p90": 3173.472046852112, + "p95": 3178.7519454956055, + "p99": 3192.6400661468506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c48e98af", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_30e588e0", + "comparisonKey": "9850b1b587e403b6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:24.496641+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.96799927949905, + "p90": 91.39200299978256, + "p95": 94.01600062847137, + "p99": 98.14400225877762 + }, + "combine": { + "p50": 97.82399982213974, + "p90": 100.00000149011612, + "p95": 102.11200267076492, + "p99": 105.18400371074677 + }, + "roundtrip": { + "p50": 215.10399878025055, + "p90": 218.46400201320648, + "p95": 219.7760045528412, + "p99": 225.055992603302 + }, + "isolatedSum": { + "p50": 185.7919991016388, + "p90": 191.39200448989868, + "p95": 196.1280032992363, + "p99": 203.32800596952438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 106.23999685049057, + "p90": 109.95200276374817, + "p95": 111.48799955844879, + "p99": 118.59200149774551 + }, + "combine": { + "p50": 143.19999516010284, + "p90": 146.27200365066528, + "p95": 147.64800667762756, + "p99": 151.87199413776398 + }, + "roundtrip": { + "p50": 329.47200536727905, + "p90": 332.99198746681213, + "p95": 334.4320058822632, + "p99": 342.272013425827 + }, + "isolatedSum": { + "p50": 249.43999201059341, + "p90": 256.22400641441345, + "p95": 259.13600623607635, + "p99": 270.4639956355095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 142.2400027513504, + "p90": 145.79200744628906, + "p95": 147.5840061903, + "p99": 153.50399911403656 + }, + "combine": { + "p50": 222.3999947309494, + "p90": 225.69599747657776, + "p95": 226.84800624847412, + "p99": 229.88800704479218 + }, + "roundtrip": { + "p50": 526.6559720039368, + "p90": 531.7440032958984, + "p95": 534.2720150947571, + "p99": 537.1519923210144 + }, + "isolatedSum": { + "p50": 364.6399974822998, + "p90": 371.4880049228668, + "p95": 374.4320124387741, + "p99": 383.39200615882874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 212.47999370098114, + "p90": 216.48000180721283, + "p95": 217.98400580883026, + "p99": 222.91199862957 + }, + "combine": { + "p50": 357.05599188804626, + "p90": 361.91999912261963, + "p95": 363.1359934806824, + "p99": 367.0400083065033 + }, + "roundtrip": { + "p50": 893.887996673584, + "p90": 899.5519876480103, + "p95": 900.6720185279846, + "p99": 904.5439958572388 + }, + "isolatedSum": { + "p50": 569.5359855890274, + "p90": 578.4000009298325, + "p95": 581.1199992895126, + "p99": 589.9520069360733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 354.68798875808716, + "p90": 359.935998916626, + "p95": 362.65599727630615, + "p99": 541.3439869880676 + }, + "combine": { + "p50": 629.4080018997192, + "p90": 636.9600296020508, + "p95": 639.1680240631104, + "p99": 642.9439783096313 + }, + "roundtrip": { + "p50": 1627.9679536819458, + "p90": 1635.9039545059204, + "p95": 1639.1040086746216, + "p99": 1644.8960304260254 + }, + "isolatedSum": { + "p50": 984.0959906578064, + "p90": 996.8960285186768, + "p95": 1001.8240213394165, + "p99": 1184.287965297699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 648.3520269393921, + "p90": 656.5759778022766, + "p95": 659.5199704170227, + "p99": 664.5439863204956 + }, + "combine": { + "p50": 1157.5679779052734, + "p90": 1166.208028793335, + "p95": 1168.7040328979492, + "p99": 1174.3680238723755 + }, + "roundtrip": { + "p50": 3095.7438945770264, + "p90": 3106.8480014801025, + "p95": 3111.583948135376, + "p99": 3119.0719604492188 + }, + "isolatedSum": { + "p50": 1805.9200048446655, + "p90": 1822.7840065956116, + "p95": 1828.224003314972, + "p99": 1838.912010192871 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edced40f", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_50016168", + "comparisonKey": "eb3107556e534be8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:56.496027+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 212.41599321365356, + "p90": 218.9439982175827, + "p95": 220.44800221920013, + "p99": 227.35999524593353 + }, + "combine": { + "p50": 96.76799923181534, + "p90": 99.64799880981445, + "p95": 101.72799974679947, + "p99": 105.56799918413162 + }, + "roundtrip": { + "p50": 297.0240116119385, + "p90": 303.23201417922974, + "p95": 305.85598945617676, + "p99": 311.74400448799133 + }, + "isolatedSum": { + "p50": 309.1839924454689, + "p90": 318.59199702739716, + "p95": 322.1760019659996, + "p99": 332.92799443006516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 279.87200021743774, + "p90": 285.63201427459717, + "p95": 287.6479923725128, + "p99": 292.4799919128418 + }, + "combine": { + "p50": 142.30400323867798, + "p90": 145.1839953660965, + "p95": 146.33600413799286, + "p99": 149.24800395965576 + }, + "roundtrip": { + "p50": 411.23199462890625, + "p90": 416.9600009918213, + "p95": 418.62401366233826, + "p99": 422.4959909915924 + }, + "isolatedSum": { + "p50": 422.1760034561157, + "p90": 430.81600964069366, + "p95": 433.9839965105057, + "p99": 441.72799587249756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 398.0799913406372, + "p90": 403.872013092041, + "p95": 405.5039882659912, + "p99": 411.9679927825928 + }, + "combine": { + "p50": 222.33599424362183, + "p90": 226.3679951429367, + "p95": 227.4239957332611, + "p99": 230.81600666046143 + }, + "roundtrip": { + "p50": 609.5679998397827, + "p90": 616.4799928665161, + "p95": 618.7520027160645, + "p99": 625.0879764556885 + }, + "isolatedSum": { + "p50": 620.415985584259, + "p90": 630.2400082349777, + "p95": 632.9279839992523, + "p99": 642.7839994430542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 662.4000072479248, + "p90": 666.3039922714233, + "p95": 668.0960059165955, + "p99": 670.84801197052 + }, + "combine": { + "p50": 356.76801204681396, + "p90": 361.56800389289856, + "p95": 362.91199922561646, + "p99": 366.14400148391724 + }, + "roundtrip": { + "p50": 1003.5840272903442, + "p90": 1008.6400508880615, + "p95": 1010.208010673523, + "p99": 1013.7920379638672 + }, + "isolatedSum": { + "p50": 1019.1680192947388, + "p90": 1027.871996164322, + "p95": 1031.008005142212, + "p99": 1036.9920134544373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1235.0080013275146, + "p90": 1240.1920557022095, + "p95": 1241.6000366210938, + "p99": 1245.792031288147 + }, + "combine": { + "p50": 631.2000155448914, + "p90": 637.7919912338257, + "p95": 640.064001083374, + "p99": 644.0640091896057 + }, + "roundtrip": { + "p50": 1845.1839685440063, + "p90": 1852.6079654693604, + "p95": 1854.9439907073975, + "p99": 1858.016014099121 + }, + "isolatedSum": { + "p50": 1866.208016872406, + "p90": 1877.9840469360352, + "p95": 1881.6640377044678, + "p99": 1889.8560404777527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2359.935998916626, + "p90": 2370.271921157837, + "p95": 2374.1440773010254, + "p99": 2383.8400840759277 + }, + "combine": { + "p50": 1155.2640199661255, + "p90": 1164.7039651870728, + "p95": 1168.8640117645264, + "p99": 1173.248052597046 + }, + "roundtrip": { + "p50": 3513.11993598938, + "p90": 3527.8398990631104, + "p95": 3533.4720611572266, + "p99": 3544.9600219726562 + }, + "isolatedSum": { + "p50": 3515.2000188827515, + "p90": 3534.9758863449097, + "p95": 3543.0080890655518, + "p99": 3557.0881366729736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cb57e574", + "identity": "h100|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_50016168", + "comparisonKey": "467cc62b52a274a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:53.452285+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 215.10399878025055, + "p90": 220.96000611782074, + "p95": 224.0000069141388, + "p99": 232.12799429893494 + }, + "combine": { + "p50": 97.95200079679489, + "p90": 100.51199793815613, + "p95": 103.35999727249146, + "p99": 106.97600245475769 + }, + "roundtrip": { + "p50": 299.71200227737427, + "p90": 305.2479922771454, + "p95": 307.5520098209381, + "p99": 312.9279911518097 + }, + "isolatedSum": { + "p50": 313.05599957704544, + "p90": 321.47200405597687, + "p95": 327.36000418663025, + "p99": 339.1039967536926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 282.04798698425293, + "p90": 287.6800000667572, + "p95": 289.7599935531616, + "p99": 294.7840094566345 + }, + "combine": { + "p50": 142.94399321079254, + "p90": 146.17599546909332, + "p95": 147.5519984960556, + "p99": 151.0400027036667 + }, + "roundtrip": { + "p50": 411.9679927825928, + "p90": 417.9840087890625, + "p95": 419.96800899505615, + "p99": 425.53600668907166 + }, + "isolatedSum": { + "p50": 424.99198019504547, + "p90": 433.8559955358505, + "p95": 437.3119920492172, + "p99": 445.8240121603012 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 401.2799859046936, + "p90": 406.43200278282166, + "p95": 408.6399972438812, + "p99": 413.1839871406555 + }, + "combine": { + "p50": 222.24000096321106, + "p90": 226.3679951429367, + "p95": 228.09599339962006, + "p99": 230.75200617313385 + }, + "roundtrip": { + "p50": 608.3840131759644, + "p90": 614.2079830169678, + "p95": 615.9679889678955, + "p99": 621.4720010757446 + }, + "isolatedSum": { + "p50": 623.5199868679047, + "p90": 632.7999979257584, + "p95": 636.7359906435013, + "p99": 643.9359933137894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 662.2400283813477, + "p90": 666.4959788322449, + "p95": 668.7999963760376, + "p99": 673.2159852981567 + }, + "combine": { + "p50": 358.815997838974, + "p90": 363.42400312423706, + "p95": 364.8639917373657, + "p99": 367.8719997406006 + }, + "roundtrip": { + "p50": 1004.4480562210083, + "p90": 1010.3360414505005, + "p95": 1012.1599435806274, + "p99": 1015.455961227417 + }, + "isolatedSum": { + "p50": 1021.0560262203217, + "p90": 1029.919981956482, + "p95": 1033.6639881134033, + "p99": 1041.0879850387573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1227.8079986572266, + "p90": 1232.5119972229004, + "p95": 1234.2400550842285, + "p99": 1237.663984298706 + }, + "combine": { + "p50": 624.5120167732239, + "p90": 632.319986820221, + "p95": 635.0079774856567, + "p99": 639.8720145225525 + }, + "roundtrip": { + "p50": 1835.6800079345703, + "p90": 1844.0959453582764, + "p95": 1846.9760417938232, + "p99": 1850.592017173767 + }, + "isolatedSum": { + "p50": 1852.3200154304504, + "p90": 1864.8319840431213, + "p95": 1869.2480325698853, + "p99": 1877.5359988212585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2349.1199016571045, + "p90": 2357.311964035034, + "p95": 2359.1361045837402, + "p99": 2365.216016769409 + }, + "combine": { + "p50": 1144.927978515625, + "p90": 1153.1519889831543, + "p95": 1155.5839776992798, + "p99": 1161.3119840621948 + }, + "roundtrip": { + "p50": 3481.760025024414, + "p90": 3494.976043701172, + "p95": 3498.624086380005, + "p99": 3505.376100540161 + }, + "isolatedSum": { + "p50": 3494.0478801727295, + "p90": 3510.4639530181885, + "p95": 3514.72008228302, + "p99": 3526.528000831604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc1d8e3b", + "identity": "h100|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_621a873c", + "comparisonKey": "506280f738fdc648", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:08.371259+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_10", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 77.11999863386154, + "p90": 79.9039974808693, + "p95": 81.60000294446945, + "p99": 83.74399691820145 + }, + "combine": { + "p50": 97.53599762916565, + "p90": 99.90400075912476, + "p95": 101.75999999046326, + "p99": 104.38399761915207 + }, + "roundtrip": { + "p50": 204.48000729084015, + "p90": 207.74400234222412, + "p95": 209.24800634384155, + "p99": 211.4879935979843 + }, + "isolatedSum": { + "p50": 174.6559962630272, + "p90": 179.80799823999405, + "p95": 183.3600029349327, + "p99": 188.12799453735352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 95.2640026807785, + "p90": 99.45599734783173, + "p95": 101.1200025677681, + "p99": 103.64799946546555 + }, + "combine": { + "p50": 143.16800236701965, + "p90": 146.14400267601013, + "p95": 147.48799800872803, + "p99": 152.8960019350052 + }, + "roundtrip": { + "p50": 320.3519880771637, + "p90": 324.16000962257385, + "p95": 326.6240060329437, + "p99": 460.4479968547821 + }, + "isolatedSum": { + "p50": 238.43200504779816, + "p90": 245.60000002384186, + "p95": 248.60800057649612, + "p99": 256.54400140047073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 133.12000036239624, + "p90": 137.56799697875977, + "p95": 140.83200693130493, + "p99": 347.9039967060089 + }, + "combine": { + "p50": 223.58399629592896, + "p90": 227.55199670791626, + "p95": 228.70400547981262, + "p99": 232.16000199317932 + }, + "roundtrip": { + "p50": 516.1920189857483, + "p90": 521.9200253486633, + "p95": 524.511992931366, + "p99": 558.9119791984558 + }, + "isolatedSum": { + "p50": 356.7039966583252, + "p90": 365.119993686676, + "p95": 369.53601241111755, + "p99": 580.0639986991882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 202.91200280189514, + "p90": 206.65599405765533, + "p95": 208.25600624084473, + "p99": 213.1199985742569 + }, + "combine": { + "p50": 358.3360016345978, + "p90": 363.2960021495819, + "p95": 364.76799845695496, + "p99": 367.68001317977905 + }, + "roundtrip": { + "p50": 882.7199935913086, + "p90": 887.6479864120483, + "p95": 889.0560269355774, + "p99": 891.8079733848572 + }, + "isolatedSum": { + "p50": 561.2480044364929, + "p90": 569.9519962072372, + "p95": 573.0240046977997, + "p99": 580.800011754036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 341.40801429748535, + "p90": 349.5999872684479, + "p95": 352.8960049152374, + "p99": 359.0399920940399 + }, + "combine": { + "p50": 630.7520270347595, + "p90": 637.2159719467163, + "p95": 639.3920183181763, + "p99": 643.9039707183838 + }, + "roundtrip": { + "p50": 1612.3520135879517, + "p90": 1619.711995124817, + "p95": 1623.0080127716064, + "p99": 1627.135992050171 + }, + "isolatedSum": { + "p50": 972.1600413322449, + "p90": 986.8159592151642, + "p95": 992.2880232334137, + "p99": 1002.9439628124237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 614.4000291824341, + "p90": 622.655987739563, + "p95": 625.4400014877319, + "p99": 630.2400231361389 + }, + "combine": { + "p50": 1157.5039625167847, + "p90": 1167.1040058135986, + "p95": 1169.5040464401245, + "p99": 1173.9519834518433 + }, + "roundtrip": { + "p50": 3067.6798820495605, + "p90": 3081.1519622802734, + "p95": 3084.480047225952, + "p99": 3094.719886779785 + }, + "isolatedSum": { + "p50": 1771.9039916992188, + "p90": 1789.7599935531616, + "p95": 1794.9440479278564, + "p99": 1804.1920065879822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a4e060e4", + "identity": "h100|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_e51abe52", + "comparisonKey": "70ce89485e469ab6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:57.012949+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.79199892282486, + "p90": 98.30400347709656, + "p95": 100.22400319576263, + "p99": 102.7199998497963 + }, + "combine": { + "p50": 89.63199704885483, + "p90": 91.36000275611877, + "p95": 91.61599725484848, + "p99": 96.76799923181534 + }, + "roundtrip": { + "p50": 161.53599321842194, + "p90": 166.6560024023056, + "p95": 167.84000396728516, + "p99": 169.88800466060638 + }, + "isolatedSum": { + "p50": 183.4239959716797, + "p90": 189.66400623321533, + "p95": 191.84000045061111, + "p99": 199.48799908161163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.36799848079681, + "p90": 115.35999923944473, + "p95": 116.64000153541565, + "p99": 119.45600062608719 + }, + "combine": { + "p50": 114.75200206041336, + "p90": 116.2559986114502, + "p95": 116.83200299739838, + "p99": 122.14399874210358 + }, + "roundtrip": { + "p50": 204.51200008392334, + "p90": 207.87200331687927, + "p95": 209.1519981622696, + "p99": 212.70400285720825 + }, + "isolatedSum": { + "p50": 225.12000054121017, + "p90": 231.61599785089493, + "p95": 233.47200453281403, + "p99": 241.59999936819077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 152.79999375343323, + "p90": 156.51200711727142, + "p95": 157.9200029373169, + "p99": 160.73599457740784 + }, + "combine": { + "p50": 168.60799491405487, + "p90": 173.75999689102173, + "p95": 174.6560037136078, + "p99": 175.80799758434296 + }, + "roundtrip": { + "p50": 289.11998867988586, + "p90": 294.07998919487, + "p95": 295.3279912471771, + "p99": 298.6240088939667 + }, + "isolatedSum": { + "p50": 321.4079886674881, + "p90": 330.27200400829315, + "p95": 332.5760066509247, + "p99": 336.5439921617508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 217.75999665260315, + "p90": 222.20799326896667, + "p95": 224.19199347496033, + "p99": 228.28799486160278 + }, + "combine": { + "p50": 279.55201268196106, + "p90": 283.52001309394836, + "p95": 285.5679988861084, + "p99": 289.8879945278168 + }, + "roundtrip": { + "p50": 470.8479940891266, + "p90": 476.9279956817627, + "p95": 479.2320132255554, + "p99": 483.7439954280853 + }, + "isolatedSum": { + "p50": 497.3120093345642, + "p90": 505.72800636291504, + "p95": 509.7599923610687, + "p99": 518.1759893894196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 356.00000619888306, + "p90": 368.9279854297638, + "p95": 372.2879886627197, + "p99": 378.495991230011 + }, + "combine": { + "p50": 468.4799909591675, + "p90": 476.4479994773865, + "p95": 478.39999198913574, + "p99": 482.11199045181274 + }, + "roundtrip": { + "p50": 794.0160036087036, + "p90": 801.8239736557007, + "p95": 804.639995098114, + "p99": 809.5679879188538 + }, + "isolatedSum": { + "p50": 824.4799971580505, + "p90": 845.3759849071503, + "p95": 850.6879806518555, + "p99": 860.6079816818237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 634.656012058258, + "p90": 642.304003238678, + "p95": 646.4959979057312, + "p99": 651.5840291976929 + }, + "combine": { + "p50": 850.5600094795227, + "p90": 859.0720295906067, + "p95": 862.0160222053528, + "p99": 866.1760091781616 + }, + "roundtrip": { + "p50": 1459.0400457382202, + "p90": 1469.6639776229858, + "p95": 1474.1120338439941, + "p99": 1480.031967163086 + }, + "isolatedSum": { + "p50": 1485.2160215377808, + "p90": 1501.3760328292847, + "p95": 1508.512020111084, + "p99": 1517.7600383758545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d93d708a", + "identity": "h100|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_e51abe52", + "comparisonKey": "ee429b8a71c2b0ce", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:49.418156+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.32799702882767, + "p90": 108.86400192975998, + "p95": 110.91200262308121, + "p99": 187.0719939470291 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 99.04000163078308, + "p95": 99.48799759149551, + "p99": 105.24799674749374 + }, + "roundtrip": { + "p50": 170.68800330162048, + "p90": 176.15999281406403, + "p95": 177.21599340438843, + "p99": 182.8480064868927 + }, + "isolatedSum": { + "p50": 200.54399967193604, + "p90": 207.90400356054306, + "p95": 210.40000021457672, + "p99": 292.31999069452286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 120.25599926710129, + "p90": 126.01600587368011, + "p95": 127.48800218105316, + "p99": 132.03200697898865 + }, + "combine": { + "p50": 124.25599992275238, + "p90": 130.0799995660782, + "p95": 130.78400492668152, + "p99": 132.7040046453476 + }, + "roundtrip": { + "p50": 221.343994140625, + "p90": 226.1440008878708, + "p95": 228.12800109386444, + "p99": 232.7360063791275 + }, + "isolatedSum": { + "p50": 244.51199918985367, + "p90": 256.0960054397583, + "p95": 258.2720071077347, + "p99": 264.73601162433624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 164.70399498939514, + "p90": 168.32000017166138, + "p95": 169.5680022239685, + "p99": 174.97600615024567 + }, + "combine": { + "p50": 192.06400215625763, + "p90": 196.28800451755524, + "p95": 197.56799936294556, + "p99": 200.3840059041977 + }, + "roundtrip": { + "p50": 326.1440098285675, + "p90": 330.87998628616333, + "p95": 333.21601152420044, + "p99": 337.3439908027649 + }, + "isolatedSum": { + "p50": 356.76799714565277, + "p90": 364.6080046892166, + "p95": 367.13600158691406, + "p99": 375.36001205444336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 243.20000410079956, + "p90": 247.23200500011444, + "p95": 248.86399507522583, + "p99": 253.4720003604889 + }, + "combine": { + "p50": 300.00001192092896, + "p90": 305.6960105895996, + "p95": 307.96799063682556, + "p99": 313.27998638153076 + }, + "roundtrip": { + "p50": 517.4400210380554, + "p90": 523.8720178604126, + "p95": 525.7279872894287, + "p99": 529.5040011405945 + }, + "isolatedSum": { + "p50": 543.2000160217285, + "p90": 552.928015589714, + "p95": 556.8319857120514, + "p99": 566.7519867420197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 407.1039855480194, + "p90": 414.40001130104065, + "p95": 417.9840087890625, + "p99": 422.3040044307709 + }, + "combine": { + "p50": 515.7759785652161, + "p90": 522.9439735412598, + "p95": 525.8240103721619, + "p99": 528.4159779548645 + }, + "roundtrip": { + "p50": 893.7600255012512, + "p90": 903.0399918556213, + "p95": 905.2479863166809, + "p99": 911.9679927825928 + }, + "isolatedSum": { + "p50": 922.8799641132355, + "p90": 937.3439848423004, + "p95": 943.8080191612244, + "p99": 950.7199823856354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 741.1519885063171, + "p90": 757.4719786643982, + "p95": 761.4719867706299, + "p99": 773.248016834259 + }, + "combine": { + "p50": 936.896026134491, + "p90": 945.6319808959961, + "p95": 948.0640292167664, + "p99": 954.0799856185913 + }, + "roundtrip": { + "p50": 1641.6319608688354, + "p90": 1656.1280488967896, + "p95": 1661.120057106018, + "p99": 1671.455979347229 + }, + "isolatedSum": { + "p50": 1678.048014640808, + "p90": 1703.1039595603943, + "p95": 1709.5360159873962, + "p99": 1727.3280024528503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-600deb0c", + "identity": "h100|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_e51abe52", + "comparisonKey": "ebcced062eefc0ef", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:42.998902+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.95200341939926, + "p90": 117.66400188207626, + "p95": 118.6240017414093, + "p99": 120.83200365304947 + }, + "combine": { + "p50": 99.42399710416794, + "p90": 105.56799918413162, + "p95": 106.46399855613708, + "p99": 113.37599903345108 + }, + "roundtrip": { + "p50": 186.0799938440323, + "p90": 189.60000574588776, + "p95": 190.59200584888458, + "p99": 195.5839991569519 + }, + "isolatedSum": { + "p50": 213.3760005235672, + "p90": 223.23200106620789, + "p95": 225.0880002975464, + "p99": 234.20800268650055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.03200697898865, + "p90": 137.79200613498688, + "p95": 139.13600146770477, + "p99": 143.0400013923645 + }, + "combine": { + "p50": 138.5599970817566, + "p90": 140.51200449466705, + "p95": 142.33599603176117, + "p99": 147.77599275112152 + }, + "roundtrip": { + "p50": 240.89600145816803, + "p90": 245.79200148582458, + "p95": 247.1040040254593, + "p99": 251.67998671531677 + }, + "isolatedSum": { + "p50": 270.59200406074524, + "p90": 278.30401062965393, + "p95": 281.47199749946594, + "p99": 290.815994143486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 181.63199722766876, + "p90": 185.56800484657288, + "p95": 187.1040016412735, + "p99": 190.17599523067474 + }, + "combine": { + "p50": 208.8640034198761, + "p90": 211.776003241539, + "p95": 212.92799711227417, + "p99": 218.81599724292755 + }, + "roundtrip": { + "p50": 363.3919954299927, + "p90": 368.4160113334656, + "p95": 369.8880076408386, + "p99": 373.50401282310486 + }, + "isolatedSum": { + "p50": 390.49600064754486, + "p90": 397.3440080881119, + "p95": 400.03199875354767, + "p99": 408.9919924736023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 271.93599939346313, + "p90": 275.87199211120605, + "p95": 277.24799513816833, + "p99": 280.44798970222473 + }, + "combine": { + "p50": 331.4560055732727, + "p90": 337.44001388549805, + "p95": 338.6879861354828, + "p99": 341.6000008583069 + }, + "roundtrip": { + "p50": 578.6240100860596, + "p90": 584.384024143219, + "p95": 585.919976234436, + "p99": 589.5040035247803 + }, + "isolatedSum": { + "p50": 603.3920049667358, + "p90": 613.3120059967041, + "p95": 615.9359812736511, + "p99": 622.0479905605316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 456.9920003414154, + "p90": 462.75201439857483, + "p95": 465.34401178359985, + "p99": 470.8159863948822 + }, + "combine": { + "p50": 579.9040198326111, + "p90": 586.687982082367, + "p95": 588.703989982605, + "p99": 592.8320288658142 + }, + "roundtrip": { + "p50": 1008.3199739456177, + "p90": 1016.319990158081, + "p95": 1018.9759731292725, + "p99": 1022.5280523300171 + }, + "isolatedSum": { + "p50": 1036.8960201740265, + "p90": 1049.4399964809418, + "p95": 1054.0480017662048, + "p99": 1063.6480152606964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 871.8720078468323, + "p90": 894.3359851837158, + "p95": 903.8400053977966, + "p99": 1134.9120140075684 + }, + "combine": { + "p50": 1059.615969657898, + "p90": 1069.3440437316895, + "p95": 1072.7360248565674, + "p99": 1095.3279733657837 + }, + "roundtrip": { + "p50": 1896.7360258102417, + "p90": 1914.720058441162, + "p95": 1919.3919897079468, + "p99": 1925.5679845809937 + }, + "isolatedSum": { + "p50": 1931.4879775047302, + "p90": 1963.6800289154053, + "p95": 1976.576030254364, + "p99": 2230.239987373352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aeadb669", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_0b58f4de", + "comparisonKey": "cca7a3f5d9dbba36", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:46.961554+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.92000317573547, + "p90": 120.15999853610992, + "p95": 121.24799937009811, + "p99": 124.95999783277512 + }, + "combine": { + "p50": 108.09600353240967, + "p90": 109.92000252008438, + "p95": 113.50400000810623, + "p99": 115.9679964184761 + }, + "roundtrip": { + "p50": 198.71999323368073, + "p90": 203.3279985189438, + "p95": 204.25599813461304, + "p99": 207.87200331687927 + }, + "isolatedSum": { + "p50": 222.01600670814514, + "p90": 230.0800010561943, + "p95": 234.75199937820435, + "p99": 240.92799425125122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.752006649971, + "p90": 147.35999703407288, + "p95": 148.70400726795197, + "p99": 154.65599298477173 + }, + "combine": { + "p50": 153.21600437164307, + "p90": 156.15999698638916, + "p95": 157.02399611473083, + "p99": 159.45599973201752 + }, + "roundtrip": { + "p50": 266.1440074443817, + "p90": 271.10400795936584, + "p95": 273.27999472618103, + "p99": 277.536004781723 + }, + "isolatedSum": { + "p50": 295.9680110216141, + "p90": 303.51999402046204, + "p95": 305.7280033826828, + "p99": 314.11199271678925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 197.9839950799942, + "p90": 202.14399695396423, + "p95": 203.90400290489197, + "p99": 216.22399985790253 + }, + "combine": { + "p50": 230.71999847888947, + "p90": 236.57600581645966, + "p95": 238.36800456047058, + "p99": 246.5600073337555 + }, + "roundtrip": { + "p50": 403.3919870853424, + "p90": 408.09598565101624, + "p95": 410.2399945259094, + "p99": 413.8239920139313 + }, + "isolatedSum": { + "p50": 428.70399355888367, + "p90": 438.7200027704239, + "p95": 442.27200746536255, + "p99": 462.784007191658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 302.592009305954, + "p90": 306.91200494766235, + "p95": 308.28800797462463, + "p99": 311.48800253868103 + }, + "combine": { + "p50": 366.0160005092621, + "p90": 372.0639944076538, + "p95": 374.11201000213623, + "p99": 376.73598527908325 + }, + "roundtrip": { + "p50": 644.1919803619385, + "p90": 649.6000289916992, + "p95": 652.2240042686462, + "p99": 656.9600105285645 + }, + "isolatedSum": { + "p50": 668.6080098152161, + "p90": 678.9759993553162, + "p95": 682.4000179767609, + "p99": 688.2239878177643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 517.6960229873657, + "p90": 525.4719853401184, + "p95": 527.9359817504883, + "p99": 537.1519923210144 + }, + "combine": { + "p50": 634.8479986190796, + "p90": 641.8880224227905, + "p95": 643.6480283737183, + "p99": 647.0080018043518 + }, + "roundtrip": { + "p50": 1128.8959980010986, + "p90": 1138.5600566864014, + "p95": 1142.5600051879883, + "p99": 1148.0000019073486 + }, + "isolatedSum": { + "p50": 1152.5440216064453, + "p90": 1167.360007762909, + "p95": 1171.5840101242065, + "p99": 1184.1599941253662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 996.3520169258118, + "p90": 1016.319990158081, + "p95": 1021.5679407119751, + "p99": 1036.7679595947266 + }, + "combine": { + "p50": 1165.4720306396484, + "p90": 1174.3040084838867, + "p95": 1177.9839992523193, + "p99": 1183.0400228500366 + }, + "roundtrip": { + "p50": 2119.6160316467285, + "p90": 2135.5841159820557, + "p95": 2140.7999992370605, + "p99": 2147.7439403533936 + }, + "isolatedSum": { + "p50": 2161.82404756546, + "p90": 2190.623998641968, + "p95": 2199.5519399642944, + "p99": 2219.807982444763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-48275d35", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_e51abe52", + "comparisonKey": "4722c46550df5d1c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:38.377481+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.1119971871376, + "p90": 167.84000396728516, + "p95": 175.4239946603775, + "p99": 179.967999458313 + }, + "combine": { + "p50": 107.51999914646149, + "p90": 113.34399878978729, + "p95": 114.14399743080139, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 199.93600249290466, + "p90": 204.76800203323364, + "p95": 206.62400126457214, + "p99": 212.16000616550446 + }, + "isolatedSum": { + "p50": 221.6319963335991, + "p90": 281.18400275707245, + "p95": 289.5679920911789, + "p99": 296.03199660778046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.75999975204468, + "p90": 148.99200201034546, + "p95": 149.98400211334229, + "p99": 155.2319973707199 + }, + "combine": { + "p50": 150.78400075435638, + "p90": 155.64799308776855, + "p95": 156.51200711727142, + "p99": 159.61599349975586 + }, + "roundtrip": { + "p50": 265.855997800827, + "p90": 271.36000990867615, + "p95": 272.99201488494873, + "p99": 275.6800055503845 + }, + "isolatedSum": { + "p50": 296.54400050640106, + "p90": 304.639995098114, + "p95": 306.4960092306137, + "p99": 314.84799087047577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 198.71999323368073, + "p90": 203.61599326133728, + "p95": 205.50400018692017, + "p99": 213.95200490951538 + }, + "combine": { + "p50": 230.43200373649597, + "p90": 235.9679937362671, + "p95": 236.95999383926392, + "p99": 239.99999463558197 + }, + "roundtrip": { + "p50": 403.1040072441101, + "p90": 408.1920087337494, + "p95": 410.1119935512543, + "p99": 412.7039909362793 + }, + "isolatedSum": { + "p50": 429.1519969701767, + "p90": 439.58398699760437, + "p95": 442.4639940261841, + "p99": 453.95199954509735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 303.51999402046204, + "p90": 307.93601274490356, + "p95": 309.471994638443, + "p99": 314.36800956726074 + }, + "combine": { + "p50": 366.36799573898315, + "p90": 371.8400001525879, + "p95": 373.63201379776, + "p99": 377.0880103111267 + }, + "roundtrip": { + "p50": 645.0240015983582, + "p90": 652.288019657135, + "p95": 658.3359837532043, + "p99": 894.7839736938477 + }, + "isolatedSum": { + "p50": 669.8879897594452, + "p90": 679.7760128974915, + "p95": 683.104008436203, + "p99": 691.4560198783875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 519.3600058555603, + "p90": 528.3200144767761, + "p95": 530.5920243263245, + "p99": 536.9920134544373 + }, + "combine": { + "p50": 636.352002620697, + "p90": 644.2559957504272, + "p95": 646.5920209884644, + "p99": 651.5520215034485 + }, + "roundtrip": { + "p50": 1128.4799575805664, + "p90": 1139.1359567642212, + "p95": 1141.6319608688354, + "p99": 1147.744059562683 + }, + "isolatedSum": { + "p50": 1155.7120084762573, + "p90": 1172.5760102272034, + "p95": 1177.1840453147888, + "p99": 1188.5440349578857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 996.6719746589661, + "p90": 1017.408013343811, + "p95": 1024.6399641036987, + "p99": 1034.1119766235352 + }, + "combine": { + "p50": 1165.120005607605, + "p90": 1174.4639873504639, + "p95": 1177.6319742202759, + "p99": 1184.8959922790527 + }, + "roundtrip": { + "p50": 2123.4560012817383, + "p90": 2159.264087677002, + "p95": 2170.1440811157227, + "p99": 2233.952045440674 + }, + "isolatedSum": { + "p50": 2161.791980266571, + "p90": 2191.872000694275, + "p95": 2202.2719383239746, + "p99": 2219.007968902588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7cc4eb26", + "identity": "h100|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_e51abe52", + "comparisonKey": "1448569dc4d99fad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:06.952579+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.60800129175186, + "p90": 177.15199291706085, + "p95": 179.55200374126434, + "p99": 183.55199694633484 + }, + "combine": { + "p50": 108.19199681282043, + "p90": 148.95999431610107, + "p95": 149.6639996767044, + "p99": 155.008003115654 + }, + "roundtrip": { + "p50": 196.48000597953796, + "p90": 262.9440128803253, + "p95": 267.2959864139557, + "p99": 273.0239927768707 + }, + "isolatedSum": { + "p50": 224.7999981045723, + "p90": 326.1119872331619, + "p95": 329.21600341796875, + "p99": 338.56000006198883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.7280069589615, + "p90": 150.14399588108063, + "p95": 151.0079950094223, + "p99": 153.76000106334686 + }, + "combine": { + "p50": 153.53600680828094, + "p90": 156.6080003976822, + "p95": 157.3439985513687, + "p99": 162.30399906635284 + }, + "roundtrip": { + "p50": 267.7440047264099, + "p90": 271.87201380729675, + "p95": 273.0560004711151, + "p99": 275.6800055503845 + }, + "isolatedSum": { + "p50": 299.26401376724243, + "p90": 306.7519962787628, + "p95": 308.351993560791, + "p99": 316.0640001296997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.3280053138733, + "p90": 203.90400290489197, + "p95": 205.4399996995926, + "p99": 210.81599593162537 + }, + "combine": { + "p50": 230.1120012998581, + "p90": 235.32800376415253, + "p95": 236.89599335193634, + "p99": 239.51999843120575 + }, + "roundtrip": { + "p50": 403.4239947795868, + "p90": 408.160001039505, + "p95": 409.4719886779785, + "p99": 412.7039909362793 + }, + "isolatedSum": { + "p50": 429.4400066137314, + "p90": 439.2320066690445, + "p95": 442.33599305152893, + "p99": 450.3359943628311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.1279911994934, + "p90": 308.57598781585693, + "p95": 310.2079927921295, + "p99": 314.04799222946167 + }, + "combine": { + "p50": 368.2880103588104, + "p90": 374.36801195144653, + "p95": 376.25598907470703, + "p99": 380.12799620628357 + }, + "roundtrip": { + "p50": 644.8320150375366, + "p90": 650.5280137062073, + "p95": 652.0000100135803, + "p99": 657.1840047836304 + }, + "isolatedSum": { + "p50": 672.4160015583038, + "p90": 682.9439997673035, + "p95": 686.4639818668365, + "p99": 694.1759884357452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.0399966239929, + "p90": 532.9599976539612, + "p95": 536.0640287399292, + "p99": 541.2799715995789 + }, + "combine": { + "p50": 630.1760077476501, + "p90": 637.9520297050476, + "p95": 640.1600241661072, + "p99": 644.5440053939819 + }, + "roundtrip": { + "p50": 1125.8879899978638, + "p90": 1174.9119758605957, + "p95": 1181.1840534210205, + "p99": 1189.8880004882812 + }, + "isolatedSum": { + "p50": 1153.216004371643, + "p90": 1170.9120273590088, + "p95": 1176.2240529060364, + "p99": 1185.8239769935608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 999.0079998970032, + "p90": 1020.095944404602, + "p95": 1027.9359817504883, + "p99": 1039.7119522094727 + }, + "combine": { + "p50": 1147.1359729766846, + "p90": 1156.1280488967896, + "p95": 1157.920002937317, + "p99": 1164.3520593643188 + }, + "roundtrip": { + "p50": 2107.1999073028564, + "p90": 2122.976064682007, + "p95": 2127.808094024658, + "p99": 2134.6240043640137 + }, + "isolatedSum": { + "p50": 2146.1439728736877, + "p90": 2176.2239933013916, + "p95": 2185.855984687805, + "p99": 2204.0640115737915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cdd39da8", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_f262fa06", + "comparisonKey": "9b66e1a939952d73", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:40.059447+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.74399733543396, + "p90": 134.97599959373474, + "p95": 136.3839954137802, + "p99": 142.59199798107147 + }, + "combine": { + "p50": 129.40800189971924, + "p90": 131.77600502967834, + "p95": 132.7359974384308, + "p99": 137.82399892807007 + }, + "roundtrip": { + "p50": 230.6240051984787, + "p90": 233.91999304294586, + "p95": 235.26400327682495, + "p99": 238.5919988155365 + }, + "isolatedSum": { + "p50": 261.1519992351532, + "p90": 266.7520046234131, + "p95": 269.119992852211, + "p99": 280.41599690914154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 175.3920018672943, + "p90": 189.05599415302277, + "p95": 191.48799777030945, + "p99": 194.04800236225128 + }, + "combine": { + "p50": 183.77600610256195, + "p90": 191.93600118160248, + "p95": 193.12000274658203, + "p99": 194.5600062608719 + }, + "roundtrip": { + "p50": 325.79201459884644, + "p90": 330.3999900817871, + "p95": 332.12798833847046, + "p99": 342.0799970626831 + }, + "isolatedSum": { + "p50": 359.16800796985626, + "p90": 380.99199533462524, + "p95": 384.6080005168915, + "p99": 388.60800862312317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 265.4080092906952, + "p90": 279.4240117073059, + "p95": 281.21599555015564, + "p99": 284.38401222229004 + }, + "combine": { + "p50": 277.75999903678894, + "p90": 285.7919931411743, + "p95": 287.03999519348145, + "p99": 292.1920120716095 + }, + "roundtrip": { + "p50": 514.3679976463318, + "p90": 527.679979801178, + "p95": 529.9199819564819, + "p99": 537.0240211486816 + }, + "isolatedSum": { + "p50": 543.1680083274841, + "p90": 565.2160048484802, + "p95": 568.2559907436371, + "p99": 576.5760242938995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 440.6079947948456, + "p90": 446.943998336792, + "p95": 448.7999975681305, + "p99": 453.0239999294281 + }, + "combine": { + "p50": 468.4480130672455, + "p90": 472.7360010147095, + "p95": 474.07999634742737, + "p99": 477.24801301956177 + }, + "roundtrip": { + "p50": 882.752001285553, + "p90": 890.2080059051514, + "p95": 891.4240002632141, + "p99": 895.0080275535583 + }, + "isolatedSum": { + "p50": 909.0560078620911, + "p90": 919.6799993515015, + "p95": 922.8799939155579, + "p99": 930.2720129489899 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 795.4559922218323, + "p90": 811.9999766349792, + "p95": 816.2559866905212, + "p99": 822.3040103912354 + }, + "combine": { + "p50": 856.3519716262817, + "p90": 864.0639781951904, + "p95": 866.3039803504944, + "p99": 873.2479810714722 + }, + "roundtrip": { + "p50": 1621.9840049743652, + "p90": 1636.191964149475, + "p95": 1640.6400203704834, + "p99": 1647.3280191421509 + }, + "isolatedSum": { + "p50": 1651.807963848114, + "p90": 1676.0639548301697, + "p95": 1682.5599670410156, + "p99": 1695.5519914627075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1507.904052734375, + "p90": 1519.2320346832275, + "p95": 1523.0400562286377, + "p99": 1530.079960823059 + }, + "combine": { + "p50": 1601.3760566711426, + "p90": 1608.2559823989868, + "p95": 1610.4960441589355, + "p99": 1616.7999505996704 + }, + "roundtrip": { + "p50": 3085.2479934692383, + "p90": 3097.791910171509, + "p95": 3101.151943206787, + "p99": 3110.8479499816895 + }, + "isolatedSum": { + "p50": 3109.2801094055176, + "p90": 3127.4880170822144, + "p95": 3133.5361003875732, + "p99": 3146.8799114227295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-719d2cde", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_fea85c39", + "comparisonKey": "9a35011257052b8f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:26.622667+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 86.94399893283844, + "p90": 92.06400066614151, + "p95": 93.24800223112106, + "p99": 96.3200032711029 + }, + "combine": { + "p50": 75.19999891519547, + "p90": 76.38400048017502, + "p95": 79.32800054550171, + "p99": 82.59200304746628 + }, + "roundtrip": { + "p50": 136.1600011587143, + "p90": 140.06400108337402, + "p95": 141.40799641609192, + "p99": 145.37599682807922 + }, + "isolatedSum": { + "p50": 162.1439978480339, + "p90": 168.44800114631653, + "p95": 172.57600277662277, + "p99": 178.91200631856918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 115.52000045776367, + "p90": 131.45600259304047, + "p95": 132.60799646377563, + "p99": 137.37599551677704 + }, + "combine": { + "p50": 122.14399874210358, + "p90": 132.4159950017929, + "p95": 133.08799266815186, + "p99": 133.91999900341034 + }, + "roundtrip": { + "p50": 205.47200739383698, + "p90": 220.47999501228333, + "p95": 222.08000719547272, + "p99": 225.8239984512329 + }, + "isolatedSum": { + "p50": 237.66399919986725, + "p90": 263.8719975948334, + "p95": 265.6959891319275, + "p99": 271.2959945201874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 193.31200420856476, + "p90": 205.1520049571991, + "p95": 206.40000700950623, + "p99": 210.01599729061127 + }, + "combine": { + "p50": 294.14400458335876, + "p90": 299.55199360847473, + "p95": 301.08800530433655, + "p99": 305.4080009460449 + }, + "roundtrip": { + "p50": 454.5919895172119, + "p90": 458.8800072669983, + "p95": 460.09600162506104, + "p99": 463.20000290870667 + }, + "isolatedSum": { + "p50": 487.4560087919235, + "p90": 504.7039985656738, + "p95": 507.4880123138428, + "p99": 515.4239982366562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f60aeaf3", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_14949248", + "comparisonKey": "2e0547a8b7addd8b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:05.487719+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.93600034713745, + "p90": 99.07200187444687, + "p95": 100.80000013113022, + "p99": 104.41599786281586 + }, + "combine": { + "p50": 88.19200098514557, + "p90": 89.88799899816513, + "p95": 91.10400080680847, + "p99": 94.94400024414062 + }, + "roundtrip": { + "p50": 161.24799847602844, + "p90": 164.2879992723465, + "p95": 165.72800278663635, + "p99": 170.3999936580658 + }, + "isolatedSum": { + "p50": 184.12800133228302, + "p90": 188.960000872612, + "p95": 191.9040009379387, + "p99": 199.35999810695648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 122.97599762678146, + "p90": 126.30400061607361, + "p95": 127.23200023174286, + "p99": 131.96800649166107 + }, + "combine": { + "p50": 106.20799660682678, + "p90": 107.84000158309937, + "p95": 108.60799998044968, + "p99": 115.29599875211716 + }, + "roundtrip": { + "p50": 201.12000405788422, + "p90": 204.51200008392334, + "p95": 205.56800067424774, + "p99": 209.3760073184967 + }, + "isolatedSum": { + "p50": 229.18399423360825, + "p90": 234.14400219917297, + "p95": 235.84000021219254, + "p99": 247.26400524377823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 150.9760022163391, + "p90": 156.41599893569946, + "p95": 157.69599378108978, + "p99": 160.22400557994843 + }, + "combine": { + "p50": 146.11199498176575, + "p90": 148.00000190734863, + "p95": 149.02399480342865, + "p99": 154.36799824237823 + }, + "roundtrip": { + "p50": 267.8079903125763, + "p90": 272.12798595428467, + "p95": 273.6000120639801, + "p99": 278.0480086803436 + }, + "isolatedSum": { + "p50": 297.08799719810486, + "p90": 304.4160008430481, + "p95": 306.71998858451843, + "p99": 314.59200382232666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 208.8959962129593, + "p90": 213.50400149822235, + "p95": 214.88000452518463, + "p99": 218.52800250053406 + }, + "combine": { + "p50": 223.39199483394623, + "p90": 229.15199398994446, + "p95": 230.56000471115112, + "p99": 232.35200345516205 + }, + "roundtrip": { + "p50": 407.45601058006287, + "p90": 412.1600091457367, + "p95": 413.4719967842102, + "p99": 417.1839952468872 + }, + "isolatedSum": { + "p50": 432.2879910469055, + "p90": 442.6559954881668, + "p95": 445.44000923633575, + "p99": 450.8800059556961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 325.56799054145813, + "p90": 331.2639892101288, + "p95": 332.99198746681213, + "p99": 335.999995470047 + }, + "combine": { + "p50": 372.5759983062744, + "p90": 379.2319893836975, + "p95": 380.8639943599701, + "p99": 384.223997592926 + }, + "roundtrip": { + "p50": 670.5600023269653, + "p90": 676.4159798622131, + "p95": 678.5920262336731, + "p99": 683.0719709396362 + }, + "isolatedSum": { + "p50": 698.1439888477325, + "p90": 710.4959785938263, + "p95": 713.8559818267822, + "p99": 720.223993062973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 570.6239938735962, + "p90": 579.0079832077026, + "p95": 581.2159776687622, + "p99": 584.2239856719971 + }, + "combine": { + "p50": 650.4960060119629, + "p90": 657.3759913444519, + "p95": 660.0959897041321, + "p99": 668.4479713439941 + }, + "roundtrip": { + "p50": 1194.1759586334229, + "p90": 1202.239990234375, + "p95": 1205.4719924926758, + "p99": 1216.863989830017 + }, + "isolatedSum": { + "p50": 1221.119999885559, + "p90": 1236.3839745521545, + "p95": 1241.3119673728943, + "p99": 1252.6719570159912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9b60ff2c", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_63e16ccc", + "comparisonKey": "f1ddb5acc1793c04", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:18.630975+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 128.48000228405, + "p90": 145.91999351978302, + "p95": 148.47999811172485, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 121.79200351238251, + "p90": 124.35200065374374, + "p95": 126.20800733566284, + "p99": 131.42399489879608 + }, + "roundtrip": { + "p50": 223.4240025281906, + "p90": 231.48800432682037, + "p95": 232.96000063419342, + "p99": 237.18400299549103 + }, + "isolatedSum": { + "p50": 250.2720057964325, + "p90": 270.27199417352676, + "p95": 274.6880054473877, + "p99": 285.43999791145325 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.63999450206757, + "p90": 168.44800114631653, + "p95": 170.0800061225891, + "p99": 173.08799922466278 + }, + "combine": { + "p50": 172.8000044822693, + "p90": 180.60800433158875, + "p95": 198.43199849128723, + "p99": 362.68800497055054 + }, + "roundtrip": { + "p50": 307.5520098209381, + "p90": 311.8720054626465, + "p95": 313.34400177001953, + "p99": 527.8080105781555 + }, + "isolatedSum": { + "p50": 337.43999898433685, + "p90": 349.0560054779053, + "p95": 368.51200461387634, + "p99": 535.7760041952133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.8480007648468, + "p90": 243.58400702476501, + "p95": 245.66400051116943, + "p99": 249.6960014104843 + }, + "combine": { + "p50": 271.67999744415283, + "p90": 290.97598791122437, + "p95": 301.0239899158478, + "p99": 353.0240058898926 + }, + "roundtrip": { + "p50": 484.47999358177185, + "p90": 488.95999789237976, + "p95": 490.55999517440796, + "p99": 494.1439926624298 + }, + "isolatedSum": { + "p50": 510.52799820899963, + "p90": 534.5599949359894, + "p95": 546.6879904270172, + "p99": 602.7200073003769 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 391.4240002632141, + "p90": 396.8319892883301, + "p95": 398.4319865703583, + "p99": 401.95199847221375 + }, + "combine": { + "p50": 456.38400316238403, + "p90": 461.5679979324341, + "p95": 463.71200680732727, + "p99": 467.26399660110474 + }, + "roundtrip": { + "p50": 821.1519718170166, + "p90": 827.5200128555298, + "p95": 829.2800188064575, + "p99": 833.7600231170654 + }, + "isolatedSum": { + "p50": 847.8080034255981, + "p90": 858.3999872207642, + "p95": 862.1439933776855, + "p99": 869.2159950733185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 713.4720087051392, + "p90": 730.400025844574, + "p95": 733.3760261535645, + "p99": 740.3519749641418 + }, + "combine": { + "p50": 826.3999819755554, + "p90": 833.0240249633789, + "p95": 836.031973361969, + "p99": 873.0559945106506 + }, + "roundtrip": { + "p50": 1507.3920488357544, + "p90": 1514.240026473999, + "p95": 1516.800045967102, + "p99": 1520.7359790802002 + }, + "isolatedSum": { + "p50": 1539.8719906806946, + "p90": 1563.4240508079529, + "p95": 1569.4079995155334, + "p99": 1613.4079694747925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1364.4479513168335, + "p90": 1373.91996383667, + "p95": 1377.5999546051025, + "p99": 1382.7840089797974 + }, + "combine": { + "p50": 1544.0959930419922, + "p90": 1551.6159534454346, + "p95": 1554.4960498809814, + "p99": 1564.9280548095703 + }, + "roundtrip": { + "p50": 2881.279945373535, + "p90": 2890.3679847717285, + "p95": 2893.631935119629, + "p99": 2924.2238998413086 + }, + "isolatedSum": { + "p50": 2908.5439443588257, + "p90": 2925.5359172821045, + "p95": 2932.096004486084, + "p99": 2947.7120637893677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-228036e3", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_7fe650e2", + "comparisonKey": "c719655eca0b4564", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:39.632945+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.94400352239609, + "p90": 118.27199906110764, + "p95": 120.19199877977371, + "p99": 168.70400309562683 + }, + "combine": { + "p50": 108.31999778747559, + "p90": 113.0559965968132, + "p95": 113.56800049543381, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 198.36799800395966, + "p90": 202.39999890327454, + "p95": 204.44799959659576, + "p99": 206.68800175189972 + }, + "isolatedSum": { + "p50": 223.26400130987167, + "p90": 231.32799565792084, + "p95": 233.75999927520752, + "p99": 283.9680016040802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.48800480365753, + "p90": 149.1200029850006, + "p95": 150.27199685573578, + "p99": 154.11199629306793 + }, + "combine": { + "p50": 155.42399883270264, + "p90": 160.38399934768677, + "p95": 161.5999937057495, + "p99": 164.8319959640503 + }, + "roundtrip": { + "p50": 267.520010471344, + "p90": 272.5760042667389, + "p95": 276.1920094490051, + "p99": 596.127986907959 + }, + "isolatedSum": { + "p50": 298.91200363636017, + "p90": 309.5040023326874, + "p95": 311.8719905614853, + "p99": 318.9439922571182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.6720006465912, + "p90": 205.1520049571991, + "p95": 206.14400506019592, + "p99": 208.80000293254852 + }, + "combine": { + "p50": 230.6559979915619, + "p90": 235.83999276161194, + "p95": 237.69600689411163, + "p99": 239.74399268627167 + }, + "roundtrip": { + "p50": 407.00799226760864, + "p90": 411.9360148906708, + "p95": 414.0160083770752, + "p99": 418.0479943752289 + }, + "isolatedSum": { + "p50": 431.3279986381531, + "p90": 440.99199771881104, + "p95": 443.84001195430756, + "p99": 448.5439956188202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 306.7519962787628, + "p90": 311.5200102329254, + "p95": 312.8960132598877, + "p99": 317.0880079269409 + }, + "combine": { + "p50": 372.5759983062744, + "p90": 379.2319893836975, + "p95": 381.47199153900146, + "p99": 384.70399379730225 + }, + "roundtrip": { + "p50": 654.2400121688843, + "p90": 661.791980266571, + "p95": 664.3199920654297, + "p99": 668.0960059165955 + }, + "isolatedSum": { + "p50": 679.3279945850372, + "p90": 690.7519996166229, + "p95": 694.3680047988892, + "p99": 701.7920017242432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.528000831604, + "p90": 536.6399884223938, + "p95": 540.4800176620483, + "p99": 547.9360222816467 + }, + "combine": { + "p50": 635.807991027832, + "p90": 643.5199975967407, + "p95": 645.8560228347778, + "p99": 651.199996471405 + }, + "roundtrip": { + "p50": 1131.775975227356, + "p90": 1142.3039436340332, + "p95": 1145.0560092926025, + "p99": 1152.0960330963135 + }, + "isolatedSum": { + "p50": 1162.335991859436, + "p90": 1180.1599860191345, + "p95": 1186.3360404968262, + "p99": 1199.1360187530518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 982.7839732170105, + "p90": 1003.8720369338989, + "p95": 1009.7919702529907, + "p99": 1022.5280523300171 + }, + "combine": { + "p50": 1151.4559984207153, + "p90": 1159.9680185317993, + "p95": 1162.816047668457, + "p99": 1168.3520078659058 + }, + "roundtrip": { + "p50": 2094.496011734009, + "p90": 2109.0240478515625, + "p95": 2113.3759021759033, + "p99": 2122.112035751343 + }, + "isolatedSum": { + "p50": 2134.239971637726, + "p90": 2163.8400554656982, + "p95": 2172.6080179214478, + "p99": 2190.880060195923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e2de189c", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_11265150", + "comparisonKey": "1e6b21bda03c0002", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:14.535876+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.15199732780457, + "p90": 116.5120005607605, + "p95": 118.8800036907196, + "p99": 123.29600006341934 + }, + "combine": { + "p50": 107.77600109577179, + "p90": 113.43999952077866, + "p95": 115.74400216341019, + "p99": 124.4800016283989 + }, + "roundtrip": { + "p50": 196.86399400234222, + "p90": 200.8640021085739, + "p95": 202.59200036525726, + "p99": 208.12800526618958 + }, + "isolatedSum": { + "p50": 220.92799842357635, + "p90": 229.95200008153915, + "p95": 234.6240058541298, + "p99": 247.77600169181824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.90400052070618, + "p90": 149.34399724006653, + "p95": 150.68799257278442, + "p99": 155.2640050649643 + }, + "combine": { + "p50": 152.54400670528412, + "p90": 156.15999698638916, + "p95": 157.1200042963028, + "p99": 161.02400422096252 + }, + "roundtrip": { + "p50": 268.0639922618866, + "p90": 272.352010011673, + "p95": 273.72801303863525, + "p99": 277.3439884185791 + }, + "isolatedSum": { + "p50": 296.4480072259903, + "p90": 305.5039942264557, + "p95": 307.8079968690872, + "p99": 316.2880092859268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 199.35999810695648, + "p90": 203.42400670051575, + "p95": 205.28000593185425, + "p99": 211.16800606250763 + }, + "combine": { + "p50": 231.10400140285492, + "p90": 237.5040054321289, + "p95": 238.5600060224533, + "p99": 240.6720072031021 + }, + "roundtrip": { + "p50": 402.52798795700073, + "p90": 407.51999616622925, + "p95": 409.11999344825745, + "p99": 412.416011095047 + }, + "isolatedSum": { + "p50": 430.4639995098114, + "p90": 440.92801213264465, + "p95": 443.84001195430756, + "p99": 451.84001326560974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 302.5279939174652, + "p90": 307.16800689697266, + "p95": 308.6720108985901, + "p99": 311.5839958190918 + }, + "combine": { + "p50": 368.19198727607727, + "p90": 375.5519986152649, + "p95": 377.53599882125854, + "p99": 383.0080032348633 + }, + "roundtrip": { + "p50": 643.5520052909851, + "p90": 650.2400040626526, + "p95": 652.6399850845337, + "p99": 657.9520106315613 + }, + "isolatedSum": { + "p50": 670.7199811935425, + "p90": 682.7200055122375, + "p95": 686.2080097198486, + "p99": 694.5919990539551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 524.6719717979431, + "p90": 532.800018787384, + "p95": 536.0000133514404, + "p99": 542.8799986839294 + }, + "combine": { + "p50": 629.9840211868286, + "p90": 637.8239989280701, + "p95": 639.5840048789978, + "p99": 643.1040167808533 + }, + "roundtrip": { + "p50": 1126.9760131835938, + "p90": 1137.9519701004028, + "p95": 1141.3439512252808, + "p99": 1172.1919775009155 + }, + "isolatedSum": { + "p50": 1154.6559929847717, + "p90": 1170.624017715454, + "p95": 1175.5840182304382, + "p99": 1185.9840154647827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1010.2399587631226, + "p90": 1031.9360494613647, + "p95": 1037.11998462677, + "p99": 1045.151948928833 + }, + "combine": { + "p50": 1149.407982826233, + "p90": 1159.6159934997559, + "p95": 1163.8400554656982, + "p99": 1244.8320388793945 + }, + "roundtrip": { + "p50": 2116.960048675537, + "p90": 2138.6559009552, + "p95": 2143.3920860290527, + "p99": 2153.6641120910645 + }, + "isolatedSum": { + "p50": 2159.6479415893555, + "p90": 2191.5520429611206, + "p95": 2200.9600400924683, + "p99": 2289.9839878082275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-75469a33", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_c103770d", + "comparisonKey": "787c2a24720b9018", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:23.806495+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.24000012874603, + "p90": 131.96800649166107, + "p95": 132.9919993877411, + "p99": 134.8479986190796 + }, + "combine": { + "p50": 114.3679991364479, + "p90": 116.03199690580368, + "p95": 116.38399958610535, + "p99": 122.04799801111221 + }, + "roundtrip": { + "p50": 215.7759964466095, + "p90": 219.9999988079071, + "p95": 221.18400037288666, + "p99": 224.2240011692047 + }, + "isolatedSum": { + "p50": 240.60799926519394, + "p90": 248.00000339746475, + "p95": 249.37599897384644, + "p99": 256.8959966301918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.03999722003937, + "p90": 166.17600619792938, + "p95": 167.9680049419403, + "p99": 171.26399278640747 + }, + "combine": { + "p50": 163.90399634838104, + "p90": 166.04800522327423, + "p95": 167.87199676036835, + "p99": 172.992005944252 + }, + "roundtrip": { + "p50": 296.28801345825195, + "p90": 301.7919957637787, + "p95": 303.1359910964966, + "p99": 305.82401156425476 + }, + "isolatedSum": { + "p50": 326.9439935684204, + "p90": 332.2240114212036, + "p95": 335.84000170230865, + "p99": 344.2559987306595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.5359982252121, + "p90": 242.17599630355835, + "p95": 244.00000274181366, + "p99": 248.1600046157837 + }, + "combine": { + "p50": 263.10399174690247, + "p90": 267.96799898147583, + "p95": 268.95999908447266, + "p99": 271.1679935455322 + }, + "roundtrip": { + "p50": 473.6959934234619, + "p90": 478.68800163269043, + "p95": 480.22401332855225, + "p99": 483.8080108165741 + }, + "isolatedSum": { + "p50": 500.63998997211456, + "p90": 510.1439952850342, + "p95": 512.9600018262863, + "p99": 519.3279981613159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.73598527908325, + "p90": 382.4959993362427, + "p95": 384.4799995422363, + "p99": 389.8560106754303 + }, + "combine": { + "p50": 443.9679980278015, + "p90": 448.8320052623749, + "p95": 450.27199387550354, + "p99": 454.3359875679016 + }, + "roundtrip": { + "p50": 795.0080037117004, + "p90": 800.9920120239258, + "p95": 802.2400140762329, + "p99": 809.4080090522766 + }, + "isolatedSum": { + "p50": 820.7039833068848, + "p90": 831.3280045986176, + "p95": 834.7519934177399, + "p99": 844.1919982433319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 663.5839939117432, + "p90": 671.5520024299622, + "p95": 674.0800142288208, + "p99": 678.3040165901184 + }, + "combine": { + "p50": 805.9840202331543, + "p90": 813.2799863815308, + "p95": 814.9120211601257, + "p99": 818.560004234314 + }, + "roundtrip": { + "p50": 1440.000057220459, + "p90": 1447.3600387573242, + "p95": 1449.3759870529175, + "p99": 1454.751968383789 + }, + "isolatedSum": { + "p50": 1469.5680141448975, + "p90": 1484.831988811493, + "p95": 1488.9920353889465, + "p99": 1496.8640208244324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1256.5439939498901, + "p90": 1266.9440507888794, + "p95": 1270.1120376586914, + "p99": 1280.4160118103027 + }, + "combine": { + "p50": 1516.1919593811035, + "p90": 1523.9360332489014, + "p95": 1526.7200469970703, + "p99": 1536.5439653396606 + }, + "roundtrip": { + "p50": 2745.8560466766357, + "p90": 2756.0319900512695, + "p95": 2759.7439289093018, + "p99": 2764.064073562622 + }, + "isolatedSum": { + "p50": 2772.7359533309937, + "p90": 2790.8800840377808, + "p95": 2796.8320846557617, + "p99": 2816.9599771499634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c18e08ed", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_b2b419cf", + "comparisonKey": "b55bd2ff771db02c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:20.991237+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.18399888277054, + "p90": 123.48800152540207, + "p95": 124.89599734544754, + "p99": 129.31199371814728 + }, + "combine": { + "p50": 107.58399963378906, + "p90": 109.95200276374817, + "p95": 113.8560026884079, + "p99": 115.93600362539291 + }, + "roundtrip": { + "p50": 210.24000644683838, + "p90": 213.53599429130554, + "p95": 214.91199731826782, + "p99": 220.86399793624878 + }, + "isolatedSum": { + "p50": 228.7679985165596, + "p90": 233.44000428915024, + "p95": 238.75200003385544, + "p99": 245.2479973435402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.95199704170227, + "p90": 168.5439944267273, + "p95": 169.24799978733063, + "p99": 172.28800058364868 + }, + "combine": { + "p50": 154.88000214099884, + "p90": 156.76799416542053, + "p95": 157.18400478363037, + "p99": 162.4000072479248 + }, + "roundtrip": { + "p50": 286.655992269516, + "p90": 290.8799946308136, + "p95": 291.9999957084656, + "p99": 295.23199796676636 + }, + "isolatedSum": { + "p50": 320.8319991827011, + "p90": 325.3119885921478, + "p95": 326.432004570961, + "p99": 334.6880078315735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 232.96000063419342, + "p90": 237.34399676322937, + "p95": 239.48800563812256, + "p99": 242.20800399780273 + }, + "combine": { + "p50": 248.73599410057068, + "p90": 254.04798984527588, + "p95": 255.0399899482727, + "p99": 257.56800174713135 + }, + "roundtrip": { + "p50": 458.9439928531647, + "p90": 463.9360010623932, + "p95": 465.1840031147003, + "p99": 469.2800045013428 + }, + "isolatedSum": { + "p50": 481.6959947347641, + "p90": 491.39198660850525, + "p95": 494.52799558639526, + "p99": 499.7760057449341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.76799297332764, + "p90": 382.6879858970642, + "p95": 386.27201318740845, + "p99": 531.8400263786316 + }, + "combine": { + "p50": 429.3439984321594, + "p90": 434.33600664138794, + "p95": 436.8959963321686, + "p99": 440.95999002456665 + }, + "roundtrip": { + "p50": 776.0000228881836, + "p90": 781.8880081176758, + "p95": 783.1680178642273, + "p99": 786.2399816513062 + }, + "isolatedSum": { + "p50": 806.1119914054871, + "p90": 817.0239925384521, + "p95": 823.168009519577, + "p99": 972.8000164031982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 658.1439971923828, + "p90": 665.0559902191162, + "p95": 667.1680212020874, + "p99": 670.8160042762756 + }, + "combine": { + "p50": 783.6800217628479, + "p90": 791.0720109939575, + "p95": 794.9439883232117, + "p99": 797.5999712944031 + }, + "roundtrip": { + "p50": 1412.8960371017456, + "p90": 1419.711947441101, + "p95": 1422.2079515457153, + "p99": 1429.152011871338 + }, + "isolatedSum": { + "p50": 1441.8240189552307, + "p90": 1456.1280012130737, + "p95": 1462.112009525299, + "p99": 1468.4159755706787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1239.1040325164795, + "p90": 1250.5919933319092, + "p95": 1253.9199590682983, + "p99": 1259.775996208191 + }, + "combine": { + "p50": 1464.4479751586914, + "p90": 1471.4879989624023, + "p95": 1474.4000434875488, + "p99": 1479.6160459518433 + }, + "roundtrip": { + "p50": 2675.6160259246826, + "p90": 2684.256076812744, + "p95": 2688.1918907165527, + "p99": 2695.136070251465 + }, + "isolatedSum": { + "p50": 2703.552007675171, + "p90": 2722.0799922943115, + "p95": 2728.320002555847, + "p99": 2739.392042160034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-548e2ce8", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_fe0d696f", + "comparisonKey": "2421cf056b45711f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:42.404220+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.79200220108032, + "p90": 118.72000247240067, + "p95": 120.7360029220581, + "p99": 122.65600264072418 + }, + "combine": { + "p50": 108.12799632549286, + "p90": 112.70400136709213, + "p95": 113.63200098276138, + "p99": 116.06399714946747 + }, + "roundtrip": { + "p50": 196.79999351501465, + "p90": 202.27199792861938, + "p95": 203.23200523853302, + "p99": 207.32800662517548 + }, + "isolatedSum": { + "p50": 221.91999852657318, + "p90": 231.4240038394928, + "p95": 234.3680039048195, + "p99": 238.71999979019165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 151.13599598407745, + "p90": 154.81600165367126, + "p95": 156.031996011734, + "p99": 161.3759994506836 + }, + "combine": { + "p50": 151.13599598407745, + "p90": 156.76799416542053, + "p95": 157.56799280643463, + "p99": 162.4639928340912 + }, + "roundtrip": { + "p50": 270.2080011367798, + "p90": 275.35998821258545, + "p95": 277.536004781723, + "p99": 280.4799973964691 + }, + "isolatedSum": { + "p50": 302.2719919681549, + "p90": 311.5839958190918, + "p95": 313.59998881816864, + "p99": 323.8399922847748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.96800339221954, + "p90": 207.61600136756897, + "p95": 208.95999670028687, + "p99": 214.1440063714981 + }, + "combine": { + "p50": 230.24000227451324, + "p90": 233.8239997625351, + "p95": 236.03199422359467, + "p99": 238.27199637889862 + }, + "roundtrip": { + "p50": 407.0720076560974, + "p90": 412.2239947319031, + "p95": 413.9840006828308, + "p99": 417.63201355934143 + }, + "isolatedSum": { + "p50": 434.2080056667328, + "p90": 441.44000113010406, + "p95": 444.99199092388153, + "p99": 452.41600275039673 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.0959997177124, + "p90": 316.96000695228577, + "p95": 318.2719945907593, + "p99": 322.01600074768066 + }, + "combine": { + "p50": 371.3279962539673, + "p90": 377.375990152359, + "p95": 379.10398840904236, + "p99": 383.83999466896057 + }, + "roundtrip": { + "p50": 657.9840183258057, + "p90": 663.3920073509216, + "p95": 665.0239825248718, + "p99": 670.6560254096985 + }, + "isolatedSum": { + "p50": 683.4239959716797, + "p90": 694.3359971046448, + "p95": 697.3759829998016, + "p99": 705.8559954166412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.9920053482056, + "p90": 540.8959984779358, + "p95": 543.1680083274841, + "p99": 547.2319722175598 + }, + "combine": { + "p50": 645.3440189361572, + "p90": 652.5760293006897, + "p95": 655.135989189148, + "p99": 666.2399768829346 + }, + "roundtrip": { + "p50": 1152.0320177078247, + "p90": 1161.3119840621948, + "p95": 1163.2959842681885, + "p99": 1167.1359539031982 + }, + "isolatedSum": { + "p50": 1178.3360242843628, + "p90": 1193.4720277786255, + "p95": 1198.303997516632, + "p99": 1213.4719491004944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1010.3039741516113, + "p90": 1029.8880338668823, + "p95": 1039.29603099823, + "p99": 1057.7919483184814 + }, + "combine": { + "p50": 1189.728021621704, + "p90": 1198.7520456314087, + "p95": 1202.5279998779297, + "p99": 1258.3999633789062 + }, + "roundtrip": { + "p50": 2163.360118865967, + "p90": 2179.1040897369385, + "p95": 2182.528018951416, + "p99": 2195.5199241638184 + }, + "isolatedSum": { + "p50": 2200.0319957733154, + "p90": 2228.640079498291, + "p95": 2241.8240308761597, + "p99": 2316.1919116973877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ccbf6dc7", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_6145a872", + "comparisonKey": "b59d328f50885c6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:22.233468+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.57600170373917, + "p90": 126.56000256538391, + "p95": 127.45599448680878, + "p99": 131.55199587345123 + }, + "combine": { + "p50": 115.7120019197464, + "p90": 116.73600226640701, + "p95": 122.52800166606903, + "p99": 132.86399841308594 + }, + "roundtrip": { + "p50": 215.61600267887115, + "p90": 219.9999988079071, + "p95": 221.343994140625, + "p99": 227.1679937839508 + }, + "isolatedSum": { + "p50": 236.28800362348557, + "p90": 243.29600483179092, + "p95": 249.9839961528778, + "p99": 264.41599428653717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.35999965667725, + "p90": 168.96000504493713, + "p95": 171.23199999332428, + "p99": 536.9920134544373 + }, + "combine": { + "p50": 169.37600076198578, + "p90": 173.7920045852661, + "p95": 174.97600615024567, + "p99": 177.98399925231934 + }, + "roundtrip": { + "p50": 302.5600016117096, + "p90": 306.62399530410767, + "p95": 307.99999833106995, + "p99": 311.71199679374695 + }, + "isolatedSum": { + "p50": 332.736000418663, + "p90": 342.75200963020325, + "p95": 346.20800614356995, + "p99": 714.9760127067566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 233.08800160884857, + "p90": 239.84000086784363, + "p95": 241.82400107383728, + "p99": 247.0719963312149 + }, + "combine": { + "p50": 264.8960053920746, + "p90": 269.1519856452942, + "p95": 271.232008934021, + "p99": 273.824006319046 + }, + "roundtrip": { + "p50": 472.6400077342987, + "p90": 477.82400250434875, + "p95": 479.74398732185364, + "p99": 482.04800486564636 + }, + "isolatedSum": { + "p50": 497.98400700092316, + "p90": 508.9919865131378, + "p95": 513.0560100078583, + "p99": 520.8960026502609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.4640052318573, + "p90": 381.44001364707947, + "p95": 383.61600041389465, + "p99": 392.89599657058716 + }, + "combine": { + "p50": 446.4319944381714, + "p90": 453.98399233818054, + "p95": 458.5919976234436, + "p99": 578.8480043411255 + }, + "roundtrip": { + "p50": 794.975996017456, + "p90": 801.7600178718567, + "p95": 804.9920201301575, + "p99": 822.9439854621887 + }, + "isolatedSum": { + "p50": 820.8959996700287, + "p90": 835.42400598526, + "p95": 842.2079980373383, + "p99": 971.7440009117126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 666.815996170044, + "p90": 675.1999855041504, + "p95": 677.5040030479431, + "p99": 685.2160096168518 + }, + "combine": { + "p50": 793.9199805259705, + "p90": 800.4159927368164, + "p95": 803.0400276184082, + "p99": 810.7200264930725 + }, + "roundtrip": { + "p50": 1433.9519739151, + "p90": 1445.1520442962646, + "p95": 1449.3119716644287, + "p99": 1463.3599519729614 + }, + "isolatedSum": { + "p50": 1460.7359766960144, + "p90": 1475.6159782409668, + "p95": 1480.5440306663513, + "p99": 1495.9360361099243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1261.4400386810303, + "p90": 1273.7599611282349, + "p95": 1277.791976928711, + "p99": 1284.8000526428223 + }, + "combine": { + "p50": 1500.2559423446655, + "p90": 1509.503960609436, + "p95": 1512.4800205230713, + "p99": 1517.9840326309204 + }, + "roundtrip": { + "p50": 2734.272003173828, + "p90": 2745.5999851226807, + "p95": 2749.47190284729, + "p99": 2757.6000690460205 + }, + "isolatedSum": { + "p50": 2761.695981025696, + "p90": 2783.263921737671, + "p95": 2790.271997451782, + "p99": 2802.7840852737427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edd1aded", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_de79407c", + "comparisonKey": "70ecc39cc06f3875", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:53.464461+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.3119985461235, + "p90": 117.53600090742111, + "p95": 119.90399658679962, + "p99": 123.29600006341934 + }, + "combine": { + "p50": 107.96800255775452, + "p90": 114.20799791812897, + "p95": 115.10399729013443, + "p99": 116.09599739313126 + }, + "roundtrip": { + "p50": 198.36799800395966, + "p90": 202.72000133991241, + "p95": 204.54399287700653, + "p99": 208.8640034198761 + }, + "isolatedSum": { + "p50": 221.28000110387802, + "p90": 231.74399882555008, + "p95": 235.00799387693405, + "p99": 239.3919974565506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.5839980840683, + "p90": 148.6400067806244, + "p95": 150.4960060119629, + "p99": 153.79199385643005 + }, + "combine": { + "p50": 151.93599462509155, + "p90": 156.8319946527481, + "p95": 157.50400722026825, + "p99": 159.16800498962402 + }, + "roundtrip": { + "p50": 267.7119970321655, + "p90": 272.8959918022156, + "p95": 274.2399871349335, + "p99": 278.56001257896423 + }, + "isolatedSum": { + "p50": 295.51999270915985, + "p90": 305.4720014333725, + "p95": 308.00001323223114, + "p99": 312.9599988460541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 200.3840059041977, + "p90": 205.08800446987152, + "p95": 206.08000457286835, + "p99": 210.78400313854218 + }, + "combine": { + "p50": 231.00799322128296, + "p90": 235.3920042514801, + "p95": 237.47199773788452, + "p99": 240.12799561023712 + }, + "roundtrip": { + "p50": 403.9680063724518, + "p90": 408.80000591278076, + "p95": 410.1119935512543, + "p99": 415.2959883213043 + }, + "isolatedSum": { + "p50": 431.39199912548065, + "p90": 440.4800087213516, + "p95": 443.55200231075287, + "p99": 450.9119987487793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 304.7359883785248, + "p90": 309.4080090522766, + "p95": 310.94399094581604, + "p99": 313.6959969997406 + }, + "combine": { + "p50": 367.35999584198, + "p90": 373.31199645996094, + "p95": 374.91199374198914, + "p99": 377.47201323509216 + }, + "roundtrip": { + "p50": 646.3040113449097, + "p90": 652.0320177078247, + "p95": 654.1759967803955, + "p99": 659.8399877548218 + }, + "isolatedSum": { + "p50": 672.0959842205048, + "p90": 682.7200055122375, + "p95": 685.8559846878052, + "p99": 691.1680102348328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 521.6959714889526, + "p90": 529.9199819564819, + "p95": 532.6079726219177, + "p99": 538.8799905776978 + }, + "combine": { + "p50": 633.0879926681519, + "p90": 639.7759914398193, + "p95": 643.231987953186, + "p99": 700.6400227546692 + }, + "roundtrip": { + "p50": 1127.616047859192, + "p90": 1138.815999031067, + "p95": 1144.7999477386475, + "p99": 1231.6800355911255 + }, + "isolatedSum": { + "p50": 1154.7839641571045, + "p90": 1169.6959733963013, + "p95": 1175.8399605751038, + "p99": 1239.520013332367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 975.3919839859009, + "p90": 992.3840165138245, + "p95": 999.8080134391785, + "p99": 1176.0319471359253 + }, + "combine": { + "p50": 1163.5839939117432, + "p90": 1172.1919775009155, + "p95": 1175.6800413131714, + "p99": 1183.4239959716797 + }, + "roundtrip": { + "p50": 2103.3918857574463, + "p90": 2118.7520027160645, + "p95": 2124.2880821228027, + "p99": 2135.3919506073 + }, + "isolatedSum": { + "p50": 2138.975977897644, + "p90": 2164.57599401474, + "p95": 2175.48805475235, + "p99": 2359.455943107605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8de1b1a9", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_03e5f4f9", + "comparisonKey": "5a23cf2f0ab9591b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:14.660806+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.98399817943573, + "p90": 132.79999792575836, + "p95": 133.18400084972382, + "p99": 135.23200154304504 + }, + "combine": { + "p50": 114.33599889278412, + "p90": 116.12799763679504, + "p95": 116.48000031709671, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 216.09599888324738, + "p90": 219.9680060148239, + "p95": 221.6320037841797, + "p99": 226.81599855422974 + }, + "isolatedSum": { + "p50": 240.31999707221985, + "p90": 248.9279955625534, + "p95": 249.66400116682053, + "p99": 256.73600286245346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.88000345230103, + "p90": 167.04000532627106, + "p95": 169.3439930677414, + "p99": 301.7599880695343 + }, + "combine": { + "p50": 164.48000073432922, + "p90": 167.9999977350235, + "p95": 173.7920045852661, + "p99": 191.23199582099915 + }, + "roundtrip": { + "p50": 297.4720001220703, + "p90": 302.3679852485657, + "p95": 303.45600843429565, + "p99": 306.5280020236969 + }, + "isolatedSum": { + "p50": 327.36000418663025, + "p90": 335.04000306129456, + "p95": 343.1359976530075, + "p99": 492.99198389053345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.2480034828186, + "p90": 240.9600019454956, + "p95": 242.08000302314758, + "p99": 247.77600169181824 + }, + "combine": { + "p50": 263.13599944114685, + "p90": 267.2320008277893, + "p95": 268.73600482940674, + "p99": 271.67999744415283 + }, + "roundtrip": { + "p50": 474.2079973220825, + "p90": 478.87998819351196, + "p95": 480.320006608963, + "p99": 483.487993478775 + }, + "isolatedSum": { + "p50": 500.38400292396545, + "p90": 508.1920027732849, + "p95": 510.8160078525543, + "p99": 519.4559991359711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 377.56800651550293, + "p90": 383.07198882102966, + "p95": 384.95999574661255, + "p99": 387.84000277519226 + }, + "combine": { + "p50": 443.6799883842468, + "p90": 449.0880072116852, + "p95": 451.4879882335663, + "p99": 455.07198572158813 + }, + "roundtrip": { + "p50": 794.6239709854126, + "p90": 800.8319735527039, + "p95": 802.3040294647217, + "p99": 807.2959780693054 + }, + "isolatedSum": { + "p50": 821.2479948997498, + "p90": 832.1599960327148, + "p95": 836.4479839801788, + "p99": 842.9119884967804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 663.7759804725647, + "p90": 672.0640063285828, + "p95": 674.3040084838867, + "p99": 679.423987865448 + }, + "combine": { + "p50": 805.728018283844, + "p90": 811.7759823799133, + "p95": 813.8880133628845, + "p99": 819.0079927444458 + }, + "roundtrip": { + "p50": 1440.7039880752563, + "p90": 1448.2879638671875, + "p95": 1450.719952583313, + "p99": 1454.7200202941895 + }, + "isolatedSum": { + "p50": 1469.5039987564087, + "p90": 1483.839988708496, + "p95": 1488.1920218467712, + "p99": 1498.4319806098938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1256.2559843063354, + "p90": 1267.7439451217651, + "p95": 1271.0720300674438, + "p99": 1284.4159603118896 + }, + "combine": { + "p50": 1517.1840190887451, + "p90": 1526.0159969329834, + "p95": 1527.8400182724, + "p99": 1537.7919673919678 + }, + "roundtrip": { + "p50": 2746.783971786499, + "p90": 2755.808115005493, + "p95": 2759.2320442199707, + "p99": 2764.0960216522217 + }, + "isolatedSum": { + "p50": 2773.4400033950806, + "p90": 2793.7599420547485, + "p95": 2798.9120483398438, + "p99": 2822.2079277038574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b2405b71", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_ce177875", + "comparisonKey": "c09918843f53302d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:45.824102+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.13599753379822, + "p90": 120.44800072908401, + "p95": 122.17599898576736, + "p99": 125.66399574279785 + }, + "combine": { + "p50": 107.68000036478043, + "p90": 113.40799927711487, + "p95": 113.76000195741653, + "p99": 121.05599790811539 + }, + "roundtrip": { + "p50": 197.37599790096283, + "p90": 201.9519954919815, + "p95": 203.3279985189438, + "p99": 206.68800175189972 + }, + "isolatedSum": { + "p50": 222.81599789857864, + "p90": 233.85600000619888, + "p95": 235.9360009431839, + "p99": 246.71999365091324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.42400431632996, + "p90": 148.70400726795197, + "p95": 149.85600113868713, + "p99": 153.28000485897064 + }, + "combine": { + "p50": 154.1759967803955, + "p90": 156.54399991035461, + "p95": 157.18400478363037, + "p99": 160.09600460529327 + }, + "roundtrip": { + "p50": 267.2640085220337, + "p90": 271.93599939346313, + "p95": 272.8320062160492, + "p99": 274.944007396698 + }, + "isolatedSum": { + "p50": 297.60000109672546, + "p90": 305.2480071783066, + "p95": 307.0400059223175, + "p99": 313.3760094642639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.47199928760529, + "p90": 205.1199972629547, + "p95": 205.79199492931366, + "p99": 208.3200067281723 + }, + "combine": { + "p50": 230.3999960422516, + "p90": 235.3920042514801, + "p95": 237.0239943265915, + "p99": 239.51999843120575 + }, + "roundtrip": { + "p50": 404.63998913764954, + "p90": 408.8959991931915, + "p95": 410.2399945259094, + "p99": 414.40001130104065 + }, + "isolatedSum": { + "p50": 431.8719953298569, + "p90": 440.5120015144348, + "p95": 442.81598925590515, + "p99": 447.84000515937805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 302.8480112552643, + "p90": 307.16800689697266, + "p95": 308.9919984340668, + "p99": 312.6719892024994 + }, + "combine": { + "p50": 365.4080033302307, + "p90": 369.6320056915283, + "p95": 370.7520067691803, + "p99": 374.9760091304779 + }, + "roundtrip": { + "p50": 642.1440243721008, + "p90": 647.8400230407715, + "p95": 649.7920155525208, + "p99": 652.4159908294678 + }, + "isolatedSum": { + "p50": 668.256014585495, + "p90": 676.800012588501, + "p95": 679.7440052032471, + "p99": 687.6479983329773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 518.9120173454285, + "p90": 527.679979801178, + "p95": 530.7199954986572, + "p99": 537.056028842926 + }, + "combine": { + "p50": 645.0240015983582, + "p90": 651.9039869308472, + "p95": 654.8799872398376, + "p99": 659.0719819068909 + }, + "roundtrip": { + "p50": 1134.0800523757935, + "p90": 1142.2719955444336, + "p95": 1146.0800170898438, + "p99": 1149.6319770812988 + }, + "isolatedSum": { + "p50": 1163.9360189437866, + "p90": 1179.5839667320251, + "p95": 1185.5999827384949, + "p99": 1196.128010749817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 989.6960258483887, + "p90": 1010.591983795166, + "p95": 1017.1840190887451, + "p99": 1026.5599489212036 + }, + "combine": { + "p50": 1166.208028793335, + "p90": 1174.4320392608643, + "p95": 1177.8240203857422, + "p99": 1183.359980583191 + }, + "roundtrip": { + "p50": 2114.3040657043457, + "p90": 2130.8159828186035, + "p95": 2135.200023651123, + "p99": 2144.6080207824707 + }, + "isolatedSum": { + "p50": 2155.9040546417236, + "p90": 2185.0240230560303, + "p95": 2195.0080394744873, + "p99": 2209.9199295043945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-52c893fd", + "identity": "h100|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_e5da7451", + "comparisonKey": "db57186035a7e73a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:00.785428+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.75200206041336, + "p90": 120.67200243473053, + "p95": 122.5920021533966, + "p99": 125.88800489902496 + }, + "combine": { + "p50": 107.45599865913391, + "p90": 112.89600282907486, + "p95": 113.63200098276138, + "p99": 115.03999680280685 + }, + "roundtrip": { + "p50": 197.82400131225586, + "p90": 203.5519927740097, + "p95": 204.70400154590607, + "p99": 207.96799659729004 + }, + "isolatedSum": { + "p50": 222.20800071954727, + "p90": 233.5680052638054, + "p95": 236.224003136158, + "p99": 240.92800170183182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.60000598430634, + "p90": 148.99200201034546, + "p95": 149.9519944190979, + "p99": 152.8960019350052 + }, + "combine": { + "p50": 153.72799336910248, + "p90": 156.73600137233734, + "p95": 157.79200196266174, + "p99": 161.85599565505981 + }, + "roundtrip": { + "p50": 268.095999956131, + "p90": 272.15999364852905, + "p95": 273.79199862480164, + "p99": 275.9679853916168 + }, + "isolatedSum": { + "p50": 299.3279993534088, + "p90": 305.7280033826828, + "p95": 307.74399638175964, + "p99": 314.751997590065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 201.47199928760529, + "p90": 205.47200739383698, + "p95": 206.40000700950623, + "p99": 210.24000644683838 + }, + "combine": { + "p50": 229.79199886322021, + "p90": 236.7040067911148, + "p95": 237.56800591945648, + "p99": 251.39200687408447 + }, + "roundtrip": { + "p50": 404.992014169693, + "p90": 409.9839925765991, + "p95": 411.5839898586273, + "p99": 414.2720103263855 + }, + "isolatedSum": { + "p50": 431.2639981508255, + "p90": 442.1760141849518, + "p95": 443.9680129289627, + "p99": 461.63201332092285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 302.8160035610199, + "p90": 307.0400059223175, + "p95": 308.9599907398224, + "p99": 312.8960132598877 + }, + "combine": { + "p50": 364.9280071258545, + "p90": 368.76800656318665, + "p95": 370.5280125141144, + "p99": 375.5519986152649 + }, + "roundtrip": { + "p50": 641.5039896965027, + "p90": 647.7760076522827, + "p95": 649.6319770812988, + "p99": 653.8559794425964 + }, + "isolatedSum": { + "p50": 667.7440106868744, + "p90": 675.8080124855042, + "p95": 679.4880032539368, + "p99": 688.4480118751526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 518.01598072052, + "p90": 527.3280143737793, + "p95": 530.7199954986572, + "p99": 537.3439788818359 + }, + "combine": { + "p50": 645.0560092926025, + "p90": 653.8559794425964, + "p95": 656.2880277633667, + "p99": 660.0639820098877 + }, + "roundtrip": { + "p50": 1133.7599754333496, + "p90": 1143.1360244750977, + "p95": 1145.3759670257568, + "p99": 1150.65598487854 + }, + "isolatedSum": { + "p50": 1163.0719900131226, + "p90": 1181.1839938163757, + "p95": 1187.008023262024, + "p99": 1197.4079608917236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 989.1840219497681, + "p90": 1011.8399858474731, + "p95": 1018.1759595870972, + "p99": 1035.9359979629517 + }, + "combine": { + "p50": 1167.2639846801758, + "p90": 1175.2640008926392, + "p95": 1178.0799627304077, + "p99": 1183.6800575256348 + }, + "roundtrip": { + "p50": 2113.503932952881, + "p90": 2129.215955734253, + "p95": 2133.280038833618, + "p99": 2137.02392578125 + }, + "isolatedSum": { + "p50": 2156.448006629944, + "p90": 2187.1039867401123, + "p95": 2196.255922317505, + "p99": 2219.6160554885864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-67726011", + "identity": "h100|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_882de7be", + "comparisonKey": "227babaee250a297", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:42.309173+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.45599800348282, + "p90": 107.93600231409073, + "p95": 109.82400178909302, + "p99": 115.00799655914307 + }, + "combine": { + "p50": 107.84000158309937, + "p90": 112.96000331640244, + "p95": 113.76000195741653, + "p99": 126.3359934091568 + }, + "roundtrip": { + "p50": 188.31999599933624, + "p90": 191.9039934873581, + "p95": 192.60799884796143, + "p99": 195.68000733852386 + }, + "isolatedSum": { + "p50": 211.29599958658218, + "p90": 220.89600563049316, + "p95": 223.58400374650955, + "p99": 241.34398996829987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 134.14399325847626, + "p90": 137.79200613498688, + "p95": 139.00800049304962, + "p99": 144.03200149536133 + }, + "combine": { + "p50": 152.0960032939911, + "p90": 155.93600273132324, + "p95": 156.8640023469925, + "p99": 162.52799332141876 + }, + "roundtrip": { + "p50": 255.0719976425171, + "p90": 258.65599513053894, + "p95": 259.93600487709045, + "p99": 263.808012008667 + }, + "isolatedSum": { + "p50": 286.23999655246735, + "p90": 293.7280088663101, + "p95": 295.8720028400421, + "p99": 306.5599948167801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 188.48000466823578, + "p90": 191.45600497722626, + "p95": 192.6400065422058, + "p99": 196.6399997472763 + }, + "combine": { + "p50": 230.30400276184082, + "p90": 235.58400571346283, + "p95": 236.28799617290497, + "p99": 242.68800020217896 + }, + "roundtrip": { + "p50": 392.4799859523773, + "p90": 397.8559970855713, + "p95": 399.4880020618439, + "p99": 402.3039937019348 + }, + "isolatedSum": { + "p50": 418.7840074300766, + "p90": 427.0400106906891, + "p95": 428.9280027151108, + "p99": 439.32799994945526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 291.80800914764404, + "p90": 296.640008687973, + "p95": 298.4960079193115, + "p99": 307.2960078716278 + }, + "combine": { + "p50": 365.9839928150177, + "p90": 370.88000774383545, + "p95": 373.3440041542053, + "p99": 377.1519958972931 + }, + "roundtrip": { + "p50": 634.7519755363464, + "p90": 641.3120031356812, + "p95": 644.2880034446716, + "p99": 686.3679885864258 + }, + "isolatedSum": { + "p50": 657.7920019626617, + "p90": 667.5200164318085, + "p95": 671.8400120735168, + "p99": 684.4480037689209 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 504.35197353363037, + "p90": 510.68800687789917, + "p95": 512.5759840011597, + "p99": 517.7599787712097 + }, + "combine": { + "p50": 636.3199949264526, + "p90": 643.1360244750977, + "p95": 645.7920074462891, + "p99": 649.183988571167 + }, + "roundtrip": { + "p50": 1114.240050315857, + "p90": 1122.0799684524536, + "p95": 1124.5440244674683, + "p99": 1133.023977279663 + }, + "isolatedSum": { + "p50": 1140.671968460083, + "p90": 1153.8240313529968, + "p95": 1158.3679914474487, + "p99": 1166.9439673423767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 964.6080136299133, + "p90": 983.1680059432983, + "p95": 988.4160161018372, + "p99": 996.9279766082764 + }, + "combine": { + "p50": 1164.3520593643188, + "p90": 1174.3359565734863, + "p95": 1177.7280569076538, + "p99": 1182.7199459075928 + }, + "roundtrip": { + "p50": 2089.7281169891357, + "p90": 2105.34405708313, + "p95": 2109.600067138672, + "p99": 2118.4959411621094 + }, + "isolatedSum": { + "p50": 2128.960072994232, + "p90": 2157.5039625167847, + "p95": 2166.144073009491, + "p99": 2179.647922515869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3623bfa7", + "identity": "h100|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_557e3a7d", + "comparisonKey": "686187cf16fdf9b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:04.181238+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 189.66400623321533, + "p90": 197.6960003376007, + "p95": 203.5519927740097, + "p99": 215.488001704216 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 82.8159973025322, + "p95": 84.51200276613235, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 253.2159984111786, + "p90": 260.9280049800873, + "p95": 263.3279860019684, + "p99": 268.0639922618866 + }, + "isolatedSum": { + "p50": 269.5680037140846, + "p90": 280.5119976401329, + "p95": 288.06399554014206, + "p99": 304.83200401067734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 226.623997092247, + "p90": 233.63199830055237, + "p95": 235.55199801921844, + "p99": 240.48000574111938 + }, + "combine": { + "p50": 104.8320010304451, + "p90": 107.96800255775452, + "p95": 110.1439967751503, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 317.6639974117279, + "p90": 323.3279883861542, + "p95": 325.53601264953613, + "p99": 330.7519853115082 + }, + "isolatedSum": { + "p50": 331.4559981226921, + "p90": 341.6000008583069, + "p95": 345.69599479436874, + "p99": 353.5040020942688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 309.85599756240845, + "p90": 315.71200489997864, + "p95": 317.440003156662, + "p99": 324.38400387763977 + }, + "combine": { + "p50": 163.455992937088, + "p90": 166.4000004529953, + "p95": 167.4560010433197, + "p99": 172.2559928894043 + }, + "roundtrip": { + "p50": 456.28800988197327, + "p90": 462.1120095252991, + "p95": 464.0960097312927, + "p99": 467.6159918308258 + }, + "isolatedSum": { + "p50": 473.31199049949646, + "p90": 482.11200535297394, + "p95": 484.8960041999817, + "p99": 496.63999676704407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 464.4800126552582, + "p90": 469.4400131702423, + "p95": 471.5520143508911, + "p99": 476.51201486587524 + }, + "combine": { + "p50": 271.39198780059814, + "p90": 276.73599123954773, + "p95": 278.0799865722656, + "p99": 281.40801191329956 + }, + "roundtrip": { + "p50": 718.4640169143677, + "p90": 725.1520156860352, + "p95": 726.5920042991638, + "p99": 732.6080203056335 + }, + "isolatedSum": { + "p50": 735.8720004558563, + "p90": 746.17600440979, + "p95": 749.6320009231567, + "p99": 757.9200267791748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 815.3280019760132, + "p90": 822.5280046463013, + "p95": 824.288010597229, + "p99": 829.5040130615234 + }, + "combine": { + "p50": 466.3679897785187, + "p90": 477.31199860572815, + "p95": 480.54400086402893, + "p99": 483.0720126628876 + }, + "roundtrip": { + "p50": 1262.3039484024048, + "p90": 1275.5520343780518, + "p95": 1279.0720462799072, + "p99": 1288.8319492340088 + }, + "isolatedSum": { + "p50": 1281.6959917545319, + "p90": 1299.8400032520294, + "p95": 1304.832011461258, + "p99": 1312.576025724411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1541.4079427719116, + "p90": 1545.7279682159424, + "p95": 1547.6160049438477, + "p99": 1550.6559610366821 + }, + "combine": { + "p50": 845.7279801368713, + "p90": 853.7920117378235, + "p95": 855.9039831161499, + "p99": 859.4880104064941 + }, + "roundtrip": { + "p50": 2379.487991333008, + "p90": 2390.0160789489746, + "p95": 2393.440008163452, + "p99": 2401.18408203125 + }, + "isolatedSum": { + "p50": 2387.135922908783, + "p90": 2399.519979953766, + "p95": 2403.5199880599976, + "p99": 2410.1439714431763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c72a047", + "identity": "h100|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_557e3a7d", + "comparisonKey": "7c3983d0a03e7a7c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:55.806488+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.70400023460388, + "p90": 202.7519941329956, + "p95": 205.1839977502823, + "p99": 214.36800062656403 + }, + "combine": { + "p50": 86.81599795818329, + "p90": 89.05600011348724, + "p95": 90.62399715185165, + "p99": 93.40800344944 + }, + "roundtrip": { + "p50": 267.07199215888977, + "p90": 272.2240090370178, + "p95": 273.98398518562317, + "p99": 280.2239954471588 + }, + "isolatedSum": { + "p50": 283.51999819278717, + "p90": 291.80799424648285, + "p95": 295.80799490213394, + "p99": 307.776004076004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 246.11200392246246, + "p90": 251.2960135936737, + "p95": 253.4399926662445, + "p99": 259.10401344299316 + }, + "combine": { + "p50": 119.1679984331131, + "p90": 121.79200351238251, + "p95": 123.07199835777283, + "p99": 125.88800489902496 + }, + "roundtrip": { + "p50": 349.88799691200256, + "p90": 355.8399975299835, + "p95": 358.68799686431885, + "p99": 368.5759902000427 + }, + "isolatedSum": { + "p50": 365.28000235557556, + "p90": 373.0880171060562, + "p95": 376.51199102401733, + "p99": 384.9920183420181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 339.00800347328186, + "p90": 344.2560136318207, + "p95": 346.6239869594574, + "p99": 352.7359962463379 + }, + "combine": { + "p50": 185.69600582122803, + "p90": 189.02400135993958, + "p95": 189.98399376869202, + "p99": 192.1599954366684 + }, + "roundtrip": { + "p50": 505.3759813308716, + "p90": 511.2000107765198, + "p95": 513.4400129318237, + "p99": 520.1600193977356 + }, + "isolatedSum": { + "p50": 524.7040092945099, + "p90": 533.2800149917603, + "p95": 536.6079807281494, + "p99": 544.8959916830063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 515.7759785652161, + "p90": 521.6959714889526, + "p95": 523.7439870834351, + "p99": 528.5120010375977 + }, + "combine": { + "p50": 294.40000653266907, + "p90": 299.0399897098541, + "p95": 300.79999566078186, + "p99": 304.0960133075714 + }, + "roundtrip": { + "p50": 793.5680150985718, + "p90": 800.8000254631042, + "p95": 803.712010383606, + "p99": 808.8319897651672 + }, + "isolatedSum": { + "p50": 810.1759850978851, + "p90": 820.7359611988068, + "p95": 824.5439827442169, + "p99": 832.6080143451691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 956.4800262451172, + "p90": 962.2399806976318, + "p95": 963.7439846992493, + "p99": 967.4239754676819 + }, + "combine": { + "p50": 510.9120011329651, + "p90": 518.1440114974976, + "p95": 520.6080079078674, + "p99": 524.1600275039673 + }, + "roundtrip": { + "p50": 1449.5680332183838, + "p90": 1458.016037940979, + "p95": 1460.5120420455933, + "p99": 1466.1760330200195 + }, + "isolatedSum": { + "p50": 1467.3920273780823, + "p90": 1480.3839921951294, + "p95": 1484.3519926071167, + "p99": 1491.5840029716492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1813.3440017700195, + "p90": 1821.4399814605713, + "p95": 1825.4719972610474, + "p99": 1835.103988647461 + }, + "combine": { + "p50": 930.2080273628235, + "p90": 938.6879801750183, + "p95": 941.215991973877, + "p99": 945.6959962844849 + }, + "roundtrip": { + "p50": 2725.503921508789, + "p90": 2737.7920150756836, + "p95": 2741.2478923797607, + "p99": 2747.4238872528076 + }, + "isolatedSum": { + "p50": 2743.552029132843, + "p90": 2760.1279616355896, + "p95": 2766.6879892349243, + "p99": 2780.799984931946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c30174ef", + "identity": "h100|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_557e3a7d", + "comparisonKey": "c6d6fcd4286b1090", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:49.010811+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 206.81600272655487, + "p90": 214.59199488162994, + "p95": 217.53600239753723, + "p99": 405.023992061615 + }, + "combine": { + "p50": 92.73599833250046, + "p90": 96.03200107812881, + "p95": 97.9200005531311, + "p99": 100.22400319576263 + }, + "roundtrip": { + "p50": 282.78398513793945, + "p90": 289.0239953994751, + "p95": 291.4240062236786, + "p99": 307.6480031013489 + }, + "isolatedSum": { + "p50": 299.5520010590553, + "p90": 310.62399595975876, + "p95": 315.45600295066833, + "p99": 505.2479952573776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 266.400009393692, + "p90": 276.2559950351715, + "p95": 308.6079955101013, + "p99": 657.3439836502075 + }, + "combine": { + "p50": 129.82399761676788, + "p90": 133.53599607944489, + "p95": 136.1600011587143, + "p99": 233.5360050201416 + }, + "roundtrip": { + "p50": 382.52800703048706, + "p90": 388.3199989795685, + "p95": 390.9119963645935, + "p99": 395.9999978542328 + }, + "isolatedSum": { + "p50": 396.2240070104599, + "p90": 409.7919911146164, + "p95": 444.7679966688156, + "p99": 890.8799886703491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 371.64801359176636, + "p90": 377.56800651550293, + "p95": 379.71198558807373, + "p99": 387.10400462150574 + }, + "combine": { + "p50": 203.5519927740097, + "p90": 207.519993185997, + "p95": 208.67200195789337, + "p99": 210.78400313854218 + }, + "roundtrip": { + "p50": 560.6399774551392, + "p90": 569.5040225982666, + "p95": 617.0560121536255, + "p99": 945.5999732017517 + }, + "isolatedSum": { + "p50": 575.2000063657761, + "p90": 585.0879997014999, + "p95": 588.3839875459671, + "p99": 597.8880077600479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 587.1359705924988, + "p90": 591.5200114250183, + "p95": 592.6719903945923, + "p99": 596.9280004501343 + }, + "combine": { + "p50": 325.0559866428375, + "p90": 329.69599962234497, + "p95": 331.03999495506287, + "p99": 335.87199449539185 + }, + "roundtrip": { + "p50": 894.3039774894714, + "p90": 899.5839953422546, + "p95": 901.2799859046936, + "p99": 906.65602684021 + }, + "isolatedSum": { + "p50": 912.1919572353363, + "p90": 921.2160110473633, + "p95": 923.7119853496552, + "p99": 932.7999949455261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1095.3600406646729, + "p90": 1100.8000373840332, + "p95": 1102.687954902649, + "p99": 1105.7920455932617 + }, + "combine": { + "p50": 572.6720094680786, + "p90": 578.8159966468811, + "p95": 581.2479853630066, + "p99": 585.312008857727 + }, + "roundtrip": { + "p50": 1650.879979133606, + "p90": 1662.4959707260132, + "p95": 1667.423963546753, + "p99": 1828.4159898757935 + }, + "isolatedSum": { + "p50": 1668.0320501327515, + "p90": 1679.6160340309143, + "p95": 1683.9359402656555, + "p99": 1691.1040544509888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2072.767972946167, + "p90": 2081.631898880005, + "p95": 2083.6799144744873, + "p99": 2088.063955307007 + }, + "combine": { + "p50": 1051.9360303878784, + "p90": 1060.703992843628, + "p95": 1062.9440546035767, + "p99": 1068.287968635559 + }, + "roundtrip": { + "p50": 3168.191909790039, + "p90": 3187.6800060272217, + "p95": 3194.240093231201, + "p99": 3203.295946121216 + }, + "isolatedSum": { + "p50": 3124.7040033340454, + "p90": 3142.335891723633, + "p95": 3146.623969078064, + "p99": 3156.351923942566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e97b1ac", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_317edb15", + "comparisonKey": "8f5b1e85a1ce530c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:37.472408+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.82399851083755, + "p90": 94.14400160312653, + "p95": 96.25600278377533, + "p99": 99.61599856615067 + }, + "combine": { + "p50": 99.84000027179718, + "p90": 102.4319976568222, + "p95": 104.60799932479858, + "p99": 114.30399864912033 + }, + "roundtrip": { + "p50": 218.23999285697937, + "p90": 223.4240025281906, + "p95": 228.38400304317474, + "p99": 474.62400794029236 + }, + "isolatedSum": { + "p50": 189.66399878263474, + "p90": 196.57599925994873, + "p95": 200.8640021085739, + "p99": 213.919997215271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 107.4879989027977, + "p90": 111.64800077676773, + "p95": 113.47199976444244, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 145.21600306034088, + "p90": 148.44800531864166, + "p95": 150.14399588108063, + "p99": 155.13600409030914 + }, + "roundtrip": { + "p50": 331.5519988536835, + "p90": 335.7119858264923, + "p95": 337.3120129108429, + "p99": 340.64000844955444 + }, + "isolatedSum": { + "p50": 252.70400196313858, + "p90": 260.0960060954094, + "p95": 263.61599564552307, + "p99": 272.12800085544586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 143.26399564743042, + "p90": 148.92800152301788, + "p95": 175.04000663757324, + "p99": 362.8480136394501 + }, + "combine": { + "p50": 224.31999444961548, + "p90": 229.72799837589264, + "p95": 244.25600469112396, + "p99": 373.088002204895 + }, + "roundtrip": { + "p50": 527.7119874954224, + "p90": 532.9599976539612, + "p95": 534.6559882164001, + "p99": 540.4800176620483 + }, + "isolatedSum": { + "p50": 367.5839900970459, + "p90": 378.6559998989105, + "p95": 419.2960113286972, + "p99": 735.9360158443451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 211.5519940853119, + "p90": 215.36000072956085, + "p95": 216.70399606227875, + "p99": 221.24800086021423 + }, + "combine": { + "p50": 360.54399609565735, + "p90": 365.05600810050964, + "p95": 366.2720024585724, + "p99": 370.7199990749359 + }, + "roundtrip": { + "p50": 893.1519985198975, + "p90": 898.8159894943237, + "p95": 900.4480242729187, + "p99": 1068.0639743804932 + }, + "isolatedSum": { + "p50": 572.0959901809692, + "p90": 580.4160088300705, + "p95": 582.9759985208511, + "p99": 591.9679999351501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 347.26399183273315, + "p90": 352.1600067615509, + "p95": 353.7920117378235, + "p99": 357.7919900417328 + }, + "combine": { + "p50": 630.2719712257385, + "p90": 638.4000182151794, + "p95": 640.4160261154175, + "p99": 645.1200246810913 + }, + "roundtrip": { + "p50": 1619.0400123596191, + "p90": 1628.1280517578125, + "p95": 1630.784034729004, + "p99": 1843.8400030136108 + }, + "isolatedSum": { + "p50": 977.5359630584717, + "p90": 990.5600249767303, + "p95": 994.208037853241, + "p99": 1002.9120147228241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 626.911997795105, + "p90": 634.335994720459, + "p95": 636.1600160598755, + "p99": 639.5840048789978 + }, + "combine": { + "p50": 1157.7919721603394, + "p90": 1165.5360460281372, + "p95": 1167.5200462341309, + "p99": 1174.8160123825073 + }, + "roundtrip": { + "p50": 3074.0480422973633, + "p90": 3086.1759185791016, + "p95": 3090.0158882141113, + "p99": 3097.0559120178223 + }, + "isolatedSum": { + "p50": 1784.7039699554443, + "p90": 1799.8720407485962, + "p95": 1803.6800622940063, + "p99": 1814.4000172615051 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8922e898", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_557e3a7d", + "comparisonKey": "787f870d15a2a168", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:34.633441+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 214.23999965190887, + "p90": 219.61599588394165, + "p95": 221.5680032968521, + "p99": 228.7999987602234 + }, + "combine": { + "p50": 99.71199929714203, + "p90": 102.30399668216705, + "p95": 104.38399761915207, + "p99": 107.10400342941284 + }, + "roundtrip": { + "p50": 300.6080090999603, + "p90": 489.3760085105896, + "p95": 492.15999245643616, + "p99": 497.1520006656647 + }, + "isolatedSum": { + "p50": 313.9519989490509, + "p90": 321.9199925661087, + "p95": 325.9520009160042, + "p99": 335.90400218963623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 281.3760042190552, + "p90": 289.34401273727417, + "p95": 295.2960133552551, + "p99": 376.0960102081299 + }, + "combine": { + "p50": 145.1839953660965, + "p90": 148.28799664974213, + "p95": 149.82399344444275, + "p99": 152.8960019350052 + }, + "roundtrip": { + "p50": 412.447988986969, + "p90": 418.2080030441284, + "p95": 420.6399917602539, + "p99": 427.5200068950653 + }, + "isolatedSum": { + "p50": 426.5599995851517, + "p90": 437.6320093870163, + "p95": 445.1200067996979, + "p99": 528.9920121431351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 398.4000086784363, + "p90": 405.56800365448, + "p95": 408.70401263237, + "p99": 416.79999232292175 + }, + "combine": { + "p50": 224.2559939622879, + "p90": 228.70400547981262, + "p95": 229.88800704479218, + "p99": 234.27200317382812 + }, + "roundtrip": { + "p50": 609.9519729614258, + "p90": 616.5760159492493, + "p95": 619.5840239524841, + "p99": 628.7680268287659 + }, + "isolatedSum": { + "p50": 622.6560026407242, + "p90": 634.2720091342926, + "p95": 638.5920196771622, + "p99": 651.0719954967499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 658.1439971923828, + "p90": 662.015974521637, + "p95": 663.2000207901001, + "p99": 666.8480038642883 + }, + "combine": { + "p50": 359.51998829841614, + "p90": 363.8080060482025, + "p95": 365.08798599243164, + "p99": 368.1600093841553 + }, + "roundtrip": { + "p50": 1000.5439519882202, + "p90": 1005.9200525283813, + "p95": 1007.8400373458862, + "p99": 1010.8480453491211 + }, + "isolatedSum": { + "p50": 1017.663985490799, + "p90": 1025.8239805698395, + "p95": 1028.2880067825317, + "p99": 1035.0080132484436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1228.2880544662476, + "p90": 1235.0720167160034, + "p95": 1237.5680208206177, + "p99": 1311.776041984558 + }, + "combine": { + "p50": 634.5279812812805, + "p90": 641.215980052948, + "p95": 643.775999546051, + "p99": 648.8000154495239 + }, + "roundtrip": { + "p50": 1836.7680311203003, + "p90": 1844.8959589004517, + "p95": 1847.7439880371094, + "p99": 1858.7839603424072 + }, + "isolatedSum": { + "p50": 1862.816035747528, + "p90": 1876.2879967689514, + "p95": 1881.3440203666687, + "p99": 1960.576057434082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2336.7679119110107, + "p90": 2346.7841148376465, + "p95": 2349.792003631592, + "p99": 2360.9280586242676 + }, + "combine": { + "p50": 1158.5279703140259, + "p90": 1166.4960384368896, + "p95": 1169.0880060195923, + "p99": 1172.4799871444702 + }, + "roundtrip": { + "p50": 3484.031915664673, + "p90": 3495.680093765259, + "p95": 3499.5200634002686, + "p99": 3510.8160972595215 + }, + "isolatedSum": { + "p50": 3495.2958822250366, + "p90": 3513.280153274536, + "p95": 3518.880009651184, + "p99": 3533.408045768738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2e6dadf7", + "identity": "h100|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_557e3a7d", + "comparisonKey": "e2f12dc48b8c646e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:10.839426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 222.97599911689758, + "p90": 227.29599475860596, + "p95": 229.63200509548187, + "p99": 232.70399868488312 + }, + "combine": { + "p50": 99.45599734783173, + "p90": 102.14400291442871, + "p95": 104.032002389431, + "p99": 106.9440022110939 + }, + "roundtrip": { + "p50": 309.1520071029663, + "p90": 313.9840066432953, + "p95": 316.44800305366516, + "p99": 320.47998905181885 + }, + "isolatedSum": { + "p50": 322.4319964647293, + "p90": 329.43999767303467, + "p95": 333.6640074849129, + "p99": 339.648000895977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 289.11998867988586, + "p90": 293.66400837898254, + "p95": 295.80798745155334, + "p99": 300.1280128955841 + }, + "combine": { + "p50": 144.25599575042725, + "p90": 147.23199605941772, + "p95": 148.54399859905243, + "p99": 151.61600708961487 + }, + "roundtrip": { + "p50": 421.1199879646301, + "p90": 426.144003868103, + "p95": 428.25600504875183, + "p99": 432.096004486084 + }, + "isolatedSum": { + "p50": 433.3759844303131, + "p90": 440.89600443840027, + "p95": 444.3519860506058, + "p99": 451.744019985199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 407.0720076560974, + "p90": 412.7359986305237, + "p95": 415.039986371994, + "p99": 423.8399863243103 + }, + "combine": { + "p50": 223.10400009155273, + "p90": 226.75199806690216, + "p95": 227.52000391483307, + "p99": 231.58399760723114 + }, + "roundtrip": { + "p50": 613.6320233345032, + "p90": 619.1040277481079, + "p95": 620.9279894828796, + "p99": 626.5280246734619 + }, + "isolatedSum": { + "p50": 630.1760077476501, + "p90": 639.4879966974258, + "p95": 642.5599902868271, + "p99": 655.4239839315414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 657.7919721603394, + "p90": 661.5359783172607, + "p95": 662.4959707260132, + "p99": 666.7199730873108 + }, + "combine": { + "p50": 361.63198947906494, + "p90": 365.88799953460693, + "p95": 367.16800928115845, + "p99": 370.9760010242462 + }, + "roundtrip": { + "p50": 999.7119903564453, + "p90": 1005.0560235977173, + "p95": 1007.1040391921997, + "p99": 1012.671947479248 + }, + "isolatedSum": { + "p50": 1019.4239616394043, + "p90": 1027.4239778518677, + "p95": 1029.6639800071716, + "p99": 1037.695974111557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1220.6720113754272, + "p90": 1226.8160581588745, + "p95": 1230.080008506775, + "p99": 1531.4559936523438 + }, + "combine": { + "p50": 628.4160017967224, + "p90": 636.896014213562, + "p95": 639.136016368866, + "p99": 647.3600268363953 + }, + "roundtrip": { + "p50": 1828.06396484375, + "p90": 1836.9280099868774, + "p95": 1839.5839929580688, + "p99": 1845.3760147094727 + }, + "isolatedSum": { + "p50": 1849.0880131721497, + "p90": 1863.7120723724365, + "p95": 1869.2160248756409, + "p99": 2178.816020488739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2334.4318866729736, + "p90": 2341.952085494995, + "p95": 2344.9599742889404, + "p99": 2357.8879833221436 + }, + "combine": { + "p50": 1144.927978515625, + "p90": 1153.4719467163086, + "p95": 1156.5120220184326, + "p99": 1161.2800359725952 + }, + "roundtrip": { + "p50": 3466.14408493042, + "p90": 3479.2640209198, + "p95": 3484.287977218628, + "p99": 3523.616075515747 + }, + "isolatedSum": { + "p50": 3479.3598651885986, + "p90": 3495.4240322113037, + "p95": 3501.471996307373, + "p99": 3519.1680192947388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d25fe88c", + "identity": "h100|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_6c115061", + "comparisonKey": "c6ccc081ec9bd6b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:33.582160+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_16", + "sku": "h100", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 78.20799946784973, + "p90": 80.92799782752991, + "p95": 82.97599852085114, + "p99": 85.37600189447403 + }, + "combine": { + "p50": 99.29600358009338, + "p90": 101.85600072145462, + "p95": 104.51199859380722, + "p99": 107.96800255775452 + }, + "roundtrip": { + "p50": 206.27200603485107, + "p90": 209.72800254821777, + "p95": 211.58400177955627, + "p99": 215.64799547195435 + }, + "isolatedSum": { + "p50": 177.50400304794312, + "p90": 182.78399854898453, + "p95": 187.48799711465836, + "p99": 193.34400445222855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 96.0640013217926, + "p90": 99.32799637317657, + "p95": 100.96000134944916, + "p99": 104.16000336408615 + }, + "combine": { + "p50": 145.56799829006195, + "p90": 148.8640010356903, + "p95": 150.30400454998016, + "p99": 154.59200739860535 + }, + "roundtrip": { + "p50": 319.9999928474426, + "p90": 323.90400767326355, + "p95": 325.439989566803, + "p99": 330.3999900817871 + }, + "isolatedSum": { + "p50": 241.63199961185455, + "p90": 248.19199740886688, + "p95": 251.26400589942932, + "p99": 258.7520107626915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 131.48799538612366, + "p90": 135.1040005683899, + "p95": 136.4160031080246, + "p99": 140.06400108337402 + }, + "combine": { + "p50": 224.31999444961548, + "p90": 228.0000001192093, + "p95": 229.72799837589264, + "p99": 233.66400599479675 + }, + "roundtrip": { + "p50": 516.6400074958801, + "p90": 530.5920243263245, + "p95": 538.0160212516785, + "p99": 557.2159886360168 + }, + "isolatedSum": { + "p50": 355.80798983573914, + "p90": 363.1040006875992, + "p95": 366.14400148391724, + "p99": 373.7280070781708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 200.95999538898468, + "p90": 204.99199628829956, + "p95": 206.33600652217865, + "p99": 213.05599808692932 + }, + "combine": { + "p50": 361.37598752975464, + "p90": 366.11199378967285, + "p95": 368.19198727607727, + "p99": 372.25601077079773 + }, + "roundtrip": { + "p50": 881.5039992332458, + "p90": 886.847972869873, + "p95": 888.8000249862671, + "p99": 893.5679793357849 + }, + "isolatedSum": { + "p50": 562.3359829187393, + "p90": 571.1039900779724, + "p95": 574.5279937982559, + "p99": 585.312008857727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 333.9200019836426, + "p90": 338.8479948043823, + "p95": 341.3119912147522, + "p99": 347.55200147628784 + }, + "combine": { + "p50": 631.3279867172241, + "p90": 638.4959816932678, + "p95": 644.8959708213806, + "p99": 821.7920064926147 + }, + "roundtrip": { + "p50": 1606.592059135437, + "p90": 1614.7840023040771, + "p95": 1617.8879737854004, + "p99": 1624.7999668121338 + }, + "isolatedSum": { + "p50": 965.2479887008667, + "p90": 977.3439764976501, + "p95": 986.2079620361328, + "p99": 1169.3440079689026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 598.2720255851746, + "p90": 606.0799956321716, + "p95": 609.8880171775818, + "p99": 640.1919722557068 + }, + "combine": { + "p50": 1161.7920398712158, + "p90": 1172.0000505447388, + "p95": 1174.623966217041, + "p99": 1182.7839612960815 + }, + "roundtrip": { + "p50": 3059.0720176696777, + "p90": 3077.888011932373, + "p95": 3083.872079849243, + "p99": 3106.4000129699707 + }, + "isolatedSum": { + "p50": 1760.0640654563904, + "p90": 1778.0800461769104, + "p95": 1784.5119833946228, + "p99": 1822.9759335517883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a9b1edb", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_930e262d", + "comparisonKey": "04d8dc12f0898400", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:32.965523+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 167.4879938364029, + "p90": 173.8239973783493, + "p95": 175.77600479125977, + "p99": 178.847998380661 + }, + "combine": { + "p50": 35.96799820661545, + "p90": 39.48799893260002, + "p95": 41.69600084424019, + "p99": 49.92000013589859 + }, + "roundtrip": { + "p50": 195.8400011062622, + "p90": 202.59200036525726, + "p95": 205.05599677562714, + "p99": 210.4319930076599 + }, + "isolatedSum": { + "p50": 203.45599204301834, + "p90": 213.31199631094933, + "p95": 217.47200563549995, + "p99": 228.7679985165596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 171.83999717235565, + "p90": 293.7279939651489, + "p95": 297.1520125865936, + "p99": 301.7919957637787 + }, + "combine": { + "p50": 80.35200089216232, + "p90": 84.57600325345993, + "p95": 85.60000360012054, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 326.30398869514465, + "p90": 340.92798829078674, + "p95": 342.72000193595886, + "p99": 348.224014043808 + }, + "isolatedSum": { + "p50": 252.19199806451797, + "p90": 378.30399721860886, + "p95": 382.7520161867142, + "p99": 390.52799344062805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.52799463272095, + "p90": 221.3120013475418, + "p95": 224.2240011692047, + "p99": 228.7359982728958 + }, + "combine": { + "p50": 39.51999917626381, + "p90": 52.51200124621391, + "p95": 53.279999643564224, + "p99": 59.55199897289276 + }, + "roundtrip": { + "p50": 197.85599410533905, + "p90": 260.5440020561218, + "p95": 264.0640139579773, + "p99": 272.70400524139404 + }, + "isolatedSum": { + "p50": 210.04799380898476, + "p90": 273.8240025937557, + "p95": 277.50400081276894, + "p99": 288.2879972457886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 168.35199296474457, + "p90": 174.55999553203583, + "p95": 177.50400304794312, + "p99": 185.7600063085556 + }, + "combine": { + "p50": 39.29600119590759, + "p90": 42.01599955558777, + "p95": 44.28799822926521, + "p99": 49.215998500585556 + }, + "roundtrip": { + "p50": 195.99999487400055, + "p90": 203.0079960823059, + "p95": 205.76000213623047, + "p99": 213.919997215271 + }, + "isolatedSum": { + "p50": 207.64799416065216, + "p90": 216.5759950876236, + "p95": 221.79200127720833, + "p99": 234.97600480914116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 177.76000499725342, + "p90": 182.75199830532074, + "p95": 185.88800728321075, + "p99": 193.95199418067932 + }, + "combine": { + "p50": 42.24000126123428, + "p90": 44.704001396894455, + "p95": 47.040000557899475, + "p99": 51.90400034189224 + }, + "roundtrip": { + "p50": 210.4319930076599, + "p90": 215.29600024223328, + "p95": 219.07199919223785, + "p99": 234.3360036611557 + }, + "isolatedSum": { + "p50": 220.0000062584877, + "p90": 227.4559997022152, + "p95": 232.92800784111023, + "p99": 245.85599452257156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 204.8960030078888, + "p90": 209.6319943666458, + "p95": 211.84000372886658, + "p99": 217.3440009355545 + }, + "combine": { + "p50": 44.224001467227936, + "p90": 48.19199815392494, + "p95": 50.27199909090996, + "p99": 55.58399856090546 + }, + "roundtrip": { + "p50": 239.71199989318848, + "p90": 245.1840043067932, + "p95": 248.06399643421173, + "p99": 260.127991437912 + }, + "isolatedSum": { + "p50": 249.12000447511673, + "p90": 257.82399252057076, + "p95": 262.11200281977654, + "p99": 272.92799949645996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 261.82401180267334, + "p90": 298.2400059700012, + "p95": 301.1839985847473, + "p99": 308.4479868412018 + }, + "combine": { + "p50": 55.84000051021576, + "p90": 66.14399701356888, + "p95": 67.58400052785873, + "p99": 79.52000200748444 + }, + "roundtrip": { + "p50": 305.6960105895996, + "p90": 343.3600068092346, + "p95": 347.84001111984253, + "p99": 555.5840134620667 + }, + "isolatedSum": { + "p50": 317.6640123128891, + "p90": 364.3840029835701, + "p95": 368.76799911260605, + "p99": 387.9679888486862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.41600918769836, + "p90": 299.80799555778503, + "p95": 304.57600951194763, + "p99": 691.968023777008 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 70.94399631023407, + "p95": 72.60800153017044, + "p99": 77.7600035071373 + }, + "roundtrip": { + "p50": 323.743999004364, + "p90": 328.4800052642822, + "p95": 331.743985414505, + "p99": 343.7120020389557 + }, + "isolatedSum": { + "p50": 332.8000083565712, + "p90": 370.7519918680191, + "p95": 377.1840110421181, + "p99": 769.7280272841454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49bde3cf", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_05758067", + "comparisonKey": "586491d6e7aec895", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:19.516432+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 210.01599729061127, + "p90": 218.9760059118271, + "p95": 221.91999852657318, + "p99": 244.54399943351746 + }, + "combine": { + "p50": 50.11200159788132, + "p90": 52.57600173354149, + "p95": 54.9440011382103, + "p99": 59.26400050520897 + }, + "roundtrip": { + "p50": 243.16799640655518, + "p90": 251.42401456832886, + "p95": 254.72000241279602, + "p99": 262.2720003128052 + }, + "isolatedSum": { + "p50": 260.1279988884926, + "p90": 271.5520076453686, + "p95": 276.8639996647835, + "p99": 303.8079999387264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 223.1999933719635, + "p90": 233.37599635124207, + "p95": 236.64000630378723, + "p99": 243.77599358558655 + }, + "combine": { + "p50": 54.27199974656105, + "p90": 57.11999908089638, + "p95": 58.62399935722351, + "p99": 63.10400366783142 + }, + "roundtrip": { + "p50": 257.4400007724762, + "p90": 264.95999097824097, + "p95": 268.41598749160767, + "p99": 483.7760031223297 + }, + "isolatedSum": { + "p50": 277.47199311852455, + "p90": 290.49599543213844, + "p95": 295.26400566101074, + "p99": 306.87999725341797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 263.45598697662354, + "p90": 270.24000883102417, + "p95": 272.7360129356384, + "p99": 279.55201268196106 + }, + "combine": { + "p50": 81.40800148248672, + "p90": 83.20000022649765, + "p95": 84.22400057315826, + "p99": 89.59999680519104 + }, + "roundtrip": { + "p50": 329.15198802948, + "p90": 336.41600608825684, + "p95": 339.7440016269684, + "p99": 350.23999214172363 + }, + "isolatedSum": { + "p50": 344.86398845911026, + "p90": 353.4400090575218, + "p95": 356.9600135087967, + "p99": 369.1520094871521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1dacf01c", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_8d4df269", + "comparisonKey": "45e5c30bbaddf9dc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:44.418519+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 151.67999267578125, + "p90": 156.8640023469925, + "p95": 159.42400693893433, + "p99": 161.8880033493042 + }, + "combine": { + "p50": 35.80800071358681, + "p90": 38.30400109291077, + "p95": 40.64000025391579, + "p99": 42.59200021624565 + }, + "roundtrip": { + "p50": 179.74400520324707, + "p90": 184.51200425624847, + "p95": 187.3600035905838, + "p99": 192.4159973859787 + }, + "isolatedSum": { + "p50": 187.48799338936806, + "p90": 195.16800343990326, + "p95": 200.0640071928501, + "p99": 204.48000356554985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 179.967999458313, + "p90": 221.82400524616241, + "p95": 224.31999444961548, + "p99": 237.18400299549103 + }, + "combine": { + "p50": 43.2640016078949, + "p90": 53.63199859857559, + "p95": 54.23999950289726, + "p99": 59.39200147986412 + }, + "roundtrip": { + "p50": 221.02400660514832, + "p90": 263.16800713539124, + "p95": 267.61600375175476, + "p99": 276.5119969844818 + }, + "isolatedSum": { + "p50": 223.23200106620789, + "p90": 275.456003844738, + "p95": 278.55999395251274, + "p99": 296.57600447535515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 222.75200486183167, + "p90": 256.6080093383789, + "p95": 261.34398579597473, + "p99": 268.5120105743408 + }, + "combine": { + "p50": 73.56800138950348, + "p90": 78.68800312280655, + "p95": 79.55200225114822, + "p99": 82.20800012350082 + }, + "roundtrip": { + "p50": 298.68799448013306, + "p90": 339.55198526382446, + "p95": 343.29599142074585, + "p99": 357.4720025062561 + }, + "isolatedSum": { + "p50": 296.32000625133514, + "p90": 335.29601246118546, + "p95": 340.89598804712296, + "p99": 350.72001069784164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c7f0dce", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_8a4dedb0", + "comparisonKey": "88f2c93f082720f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:08.600205+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 164.12800550460815, + "p90": 220.12799978256226, + "p95": 223.26399385929108, + "p99": 229.79199886322021 + }, + "combine": { + "p50": 38.24000060558319, + "p90": 50.81599950790405, + "p95": 51.52000114321709, + "p99": 56.03199824690819 + }, + "roundtrip": { + "p50": 188.4479969739914, + "p90": 254.43199276924133, + "p95": 257.34400749206543, + "p99": 273.5680043697357 + }, + "isolatedSum": { + "p50": 202.36800611019135, + "p90": 270.9439992904663, + "p95": 274.78399500250816, + "p99": 285.8239971101284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.6479983329773, + "p90": 229.24800217151642, + "p95": 231.77599906921387, + "p99": 238.75199258327484 + }, + "combine": { + "p50": 46.01600021123886, + "p90": 57.18399956822395, + "p95": 58.27200040221214, + "p99": 63.391998410224915 + }, + "roundtrip": { + "p50": 223.87200593948364, + "p90": 266.2079930305481, + "p95": 268.2879865169525, + "p99": 276.12799406051636 + }, + "isolatedSum": { + "p50": 233.66399854421616, + "p90": 286.4320017397404, + "p95": 290.047999471426, + "p99": 302.14399099349976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 231.3919961452484, + "p90": 272.0640003681183, + "p95": 274.56000447273254, + "p99": 282.24000334739685 + }, + "combine": { + "p50": 72.89600372314453, + "p90": 83.3280012011528, + "p95": 83.96799862384796, + "p99": 85.63199639320374 + }, + "roundtrip": { + "p50": 296.7680096626282, + "p90": 341.15201234817505, + "p95": 344.09600496292114, + "p99": 349.11999106407166 + }, + "isolatedSum": { + "p50": 304.28799986839294, + "p90": 355.3920015692711, + "p95": 358.5280030965805, + "p99": 367.8719997406006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02f751c4", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_8b4def43", + "comparisonKey": "0d4a9670ffde50a6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:33.809908+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 157.56799280643463, + "p90": 199.61600005626678, + "p95": 203.16800475120544, + "p99": 209.47200059890747 + }, + "combine": { + "p50": 36.73600032925606, + "p90": 47.10400104522705, + "p95": 47.968000173568726, + "p99": 53.408000618219376 + }, + "roundtrip": { + "p50": 180.28800189495087, + "p90": 229.34399545192719, + "p95": 232.09600150585175, + "p99": 241.82400107383728 + }, + "isolatedSum": { + "p50": 194.3039931356907, + "p90": 246.72000110149384, + "p95": 251.13600492477417, + "p99": 262.88000121712685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 184.38400328159332, + "p90": 214.27200734615326, + "p95": 219.32800114154816, + "p99": 348.35198521614075 + }, + "combine": { + "p50": 43.83999854326248, + "p90": 49.855999648571014, + "p95": 50.84799975156784, + "p99": 57.5999990105629 + }, + "roundtrip": { + "p50": 218.62399578094482, + "p90": 256.22400641441345, + "p95": 259.71201062202454, + "p99": 265.53601026535034 + }, + "isolatedSum": { + "p50": 228.2240018248558, + "p90": 264.1280069947243, + "p95": 270.176000893116, + "p99": 405.95198422670364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 228.60799729824066, + "p90": 251.8720030784607, + "p95": 255.64798712730408, + "p99": 260.0319981575012 + }, + "combine": { + "p50": 73.82400333881378, + "p90": 78.17599922418594, + "p95": 79.6160027384758, + "p99": 82.65600353479385 + }, + "roundtrip": { + "p50": 296.1280047893524, + "p90": 333.7920010089874, + "p95": 336.544007062912, + "p99": 343.77598762512207 + }, + "isolatedSum": { + "p50": 302.43200063705444, + "p90": 330.04800230264664, + "p95": 335.2639898657799, + "p99": 342.6880016922951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ab6ad52", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_6a473c2f", + "comparisonKey": "25241da59eb961d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:33.395851+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 158.81599485874176, + "p90": 165.24800658226013, + "p95": 167.84000396728516, + "p99": 190.2720034122467 + }, + "combine": { + "p50": 36.86400130391121, + "p90": 39.77600112557411, + "p95": 42.27200150489807, + "p99": 47.87199944257736 + }, + "roundtrip": { + "p50": 187.1359944343567, + "p90": 193.1840032339096, + "p95": 195.8719938993454, + "p99": 209.3760073184967 + }, + "isolatedSum": { + "p50": 195.67999616265297, + "p90": 205.02400770783424, + "p95": 210.11200547218323, + "p99": 238.14400285482407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 159.13599729537964, + "p90": 166.24000668525696, + "p95": 168.70400309562683, + "p99": 176.60799622535706 + }, + "combine": { + "p50": 37.151999771595, + "p90": 40.12800008058548, + "p95": 42.27200150489807, + "p99": 48.06400090456009 + }, + "roundtrip": { + "p50": 186.8479996919632, + "p90": 192.35199689865112, + "p95": 195.10400295257568, + "p99": 198.17599654197693 + }, + "isolatedSum": { + "p50": 196.28799706697464, + "p90": 206.36800676584244, + "p95": 210.9760046005249, + "p99": 224.67199712991714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 159.96800363063812, + "p90": 166.07999801635742, + "p95": 169.11999881267548, + "p99": 177.44000256061554 + }, + "combine": { + "p50": 40.863998234272, + "p90": 43.20000112056732, + "p95": 45.31199857592583, + "p99": 49.40799996256828 + }, + "roundtrip": { + "p50": 189.66400623321533, + "p90": 195.00799477100372, + "p95": 198.62399995326996, + "p99": 200.95999538898468 + }, + "isolatedSum": { + "p50": 200.83200186491013, + "p90": 209.27999913692474, + "p95": 214.4319973886013, + "p99": 226.84800252318382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 163.26400637626648, + "p90": 168.64000260829926, + "p95": 171.23199999332428, + "p99": 178.3359944820404 + }, + "combine": { + "p50": 41.50399938225746, + "p90": 43.83999854326248, + "p95": 45.791998505592346, + "p99": 50.23999884724617 + }, + "roundtrip": { + "p50": 193.12000274658203, + "p90": 197.91999459266663, + "p95": 200.19200444221497, + "p99": 205.53599298000336 + }, + "isolatedSum": { + "p50": 204.76800575852394, + "p90": 212.48000115156174, + "p95": 217.02399849891663, + "p99": 228.57599332928658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 183.67999792099, + "p90": 188.1919950246811, + "p95": 191.3599967956543, + "p99": 196.6399997472763 + }, + "combine": { + "p50": 42.94399917125702, + "p90": 45.21600157022476, + "p95": 48.35199937224388, + "p99": 53.66399884223938 + }, + "roundtrip": { + "p50": 214.39999341964722, + "p90": 219.04000639915466, + "p95": 221.69600427150726, + "p99": 226.8799990415573 + }, + "isolatedSum": { + "p50": 226.623997092247, + "p90": 233.40799659490585, + "p95": 239.71199616789818, + "p99": 250.3039985895157 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 224.09600019454956, + "p90": 228.41599583625793, + "p95": 231.58399760723114, + "p99": 236.35199666023254 + }, + "combine": { + "p50": 48.64000156521797, + "p90": 51.392000168561935, + "p95": 53.279999643564224, + "p99": 58.27200040221214 + }, + "roundtrip": { + "p50": 261.4080011844635, + "p90": 265.50400257110596, + "p95": 268.15998554229736, + "p99": 272.15999364852905 + }, + "isolatedSum": { + "p50": 272.73600175976753, + "p90": 279.80799600481987, + "p95": 284.86399725079536, + "p99": 294.6239970624447 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 304.7359883785248, + "p90": 337.0560109615326, + "p95": 339.4879996776581, + "p99": 348.7679958343506 + }, + "combine": { + "p50": 65.15199691057205, + "p90": 68.2239979505539, + "p95": 69.95200365781784, + "p99": 73.91999661922455 + }, + "roundtrip": { + "p50": 359.42399501800537, + "p90": 364.8959994316101, + "p95": 367.2640025615692, + "p99": 376.25598907470703 + }, + "isolatedSum": { + "p50": 369.88798528909683, + "p90": 405.2800089120865, + "p95": 409.4400033354759, + "p99": 422.68799245357513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 308.31998586654663, + "p90": 313.4399950504303, + "p95": 316.6399896144867, + "p99": 334.52799916267395 + }, + "combine": { + "p50": 88.06400001049042, + "p90": 92.00000017881393, + "p95": 94.7519987821579, + "p99": 180.89599907398224 + }, + "roundtrip": { + "p50": 390.8480107784271, + "p90": 396.38400077819824, + "p95": 399.58399534225464, + "p99": 720.2879786491394 + }, + "isolatedSum": { + "p50": 396.38398587703705, + "p90": 405.43999522924423, + "p95": 411.3919883966446, + "p99": 515.4239982366562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7ac86b7b", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_2de185b6", + "comparisonKey": "8c3297b1ad6f3b52", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:34.884020+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 153.21600437164307, + "p90": 160.12799739837646, + "p95": 162.52799332141876, + "p99": 170.04799842834473 + }, + "combine": { + "p50": 33.663999289274216, + "p90": 35.840000957250595, + "p95": 39.36000168323517, + "p99": 55.48800155520439 + }, + "roundtrip": { + "p50": 179.00800704956055, + "p90": 184.9920004606247, + "p95": 187.80800700187683, + "p99": 193.7599927186966 + }, + "isolatedSum": { + "p50": 186.88000366091728, + "p90": 195.96799835562706, + "p95": 201.88799500465393, + "p99": 225.53599998354912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 164.8319959640503, + "p90": 211.84000372886658, + "p95": 214.62400257587433, + "p99": 223.4880030155182 + }, + "combine": { + "p50": 35.90400144457817, + "p90": 50.71999877691269, + "p95": 51.32799968123436, + "p99": 54.4000007212162 + }, + "roundtrip": { + "p50": 191.6159987449646, + "p90": 247.77600169181824, + "p95": 251.13600492477417, + "p99": 289.4720137119293 + }, + "isolatedSum": { + "p50": 200.73599740862846, + "p90": 262.56000250577927, + "p95": 265.9520022571087, + "p99": 277.8880037367344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 166.4000004529953, + "p90": 213.72799575328827, + "p95": 217.53600239753723, + "p99": 234.047994017601 + }, + "combine": { + "p50": 36.768000572919846, + "p90": 51.13599821925163, + "p95": 51.67999863624573, + "p99": 57.440001517534256 + }, + "roundtrip": { + "p50": 193.50400567054749, + "p90": 248.19199740886688, + "p95": 251.16801261901855, + "p99": 255.71200251579285 + }, + "isolatedSum": { + "p50": 203.16800102591515, + "p90": 264.8639939725399, + "p95": 269.21600103378296, + "p99": 291.48799553513527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 172.38399386405945, + "p90": 209.34399962425232, + "p95": 213.85599672794342, + "p99": 217.3759937286377 + }, + "combine": { + "p50": 42.14400053024292, + "p90": 44.67200115323067, + "p95": 46.751998364925385, + "p99": 50.144001841545105 + }, + "roundtrip": { + "p50": 202.59200036525726, + "p90": 207.519993185997, + "p95": 210.11200547218323, + "p99": 215.39199352264404 + }, + "isolatedSum": { + "p50": 214.52799439430237, + "p90": 254.016000777483, + "p95": 260.6079950928688, + "p99": 267.5199955701828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f75b0466", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_29d2ddcf", + "comparisonKey": "87b739fdffd1ab91", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:34.362621+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 159.5200002193451, + "p90": 167.10400581359863, + "p95": 170.78399658203125, + "p99": 175.4239946603775 + }, + "combine": { + "p50": 34.46400165557861, + "p90": 37.05599904060364, + "p95": 38.7520007789135, + "p99": 42.047999799251556 + }, + "roundtrip": { + "p50": 186.43200397491455, + "p90": 191.93600118160248, + "p95": 195.2960044145584, + "p99": 200.3840059041977 + }, + "isolatedSum": { + "p50": 193.9840018749237, + "p90": 204.16000485420227, + "p95": 209.53599736094475, + "p99": 217.47199445962906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 160.16000509262085, + "p90": 166.75199568271637, + "p95": 169.95200514793396, + "p99": 184.03199315071106 + }, + "combine": { + "p50": 34.623999148607254, + "p90": 37.728000432252884, + "p95": 38.91199827194214, + "p99": 42.080000042915344 + }, + "roundtrip": { + "p50": 186.27199530601501, + "p90": 191.80800020694733, + "p95": 194.84800100326538, + "p99": 205.6639939546585 + }, + "isolatedSum": { + "p50": 194.7840042412281, + "p90": 204.47999611496925, + "p95": 208.8640034198761, + "p99": 226.1119931936264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 162.84799575805664, + "p90": 212.96000480651855, + "p95": 215.2319997549057, + "p99": 219.200000166893 + }, + "combine": { + "p50": 36.67199984192848, + "p90": 51.67999863624573, + "p95": 52.319999784231186, + "p99": 57.08799883723259 + }, + "roundtrip": { + "p50": 187.9040002822876, + "p90": 246.3040053844452, + "p95": 249.40800666809082, + "p99": 255.5519938468933 + }, + "isolatedSum": { + "p50": 199.51999559998512, + "p90": 264.6400034427643, + "p95": 267.5519995391369, + "p99": 276.2879990041256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 162.78399527072906, + "p90": 214.4639939069748, + "p95": 218.1120067834854, + "p99": 224.8000055551529 + }, + "combine": { + "p50": 36.959998309612274, + "p90": 51.93600058555603, + "p95": 52.60799825191498, + "p99": 57.28000029921532 + }, + "roundtrip": { + "p50": 187.83999979496002, + "p90": 247.19999730587006, + "p95": 250.0160038471222, + "p99": 255.295991897583 + }, + "isolatedSum": { + "p50": 199.74399358034134, + "p90": 266.3999944925308, + "p95": 270.7200050354004, + "p99": 282.0800058543682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 163.80800306797028, + "p90": 214.75200355052948, + "p95": 218.23999285697937, + "p99": 223.32799434661865 + }, + "combine": { + "p50": 37.696000188589096, + "p90": 51.93600058555603, + "p95": 52.639998495578766, + "p99": 56.352000683546066 + }, + "roundtrip": { + "p50": 189.15200233459473, + "p90": 249.56800043582916, + "p95": 253.08799743652344, + "p99": 259.7759962081909 + }, + "isolatedSum": { + "p50": 201.50400325655937, + "p90": 266.6880041360855, + "p95": 270.87999135255814, + "p99": 279.6799950301647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 168.19199919700623, + "p90": 214.23999965190887, + "p95": 217.43999421596527, + "p99": 223.7119972705841 + }, + "combine": { + "p50": 39.135999977588654, + "p90": 51.83999985456467, + "p95": 52.73599922657013, + "p99": 58.20799991488457 + }, + "roundtrip": { + "p50": 194.5279985666275, + "p90": 248.86399507522583, + "p95": 251.42401456832886, + "p99": 256.44800066947937 + }, + "isolatedSum": { + "p50": 207.32799917459488, + "p90": 266.07999950647354, + "p95": 270.1759934425354, + "p99": 281.9199971854687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 189.88800048828125, + "p90": 306.94401264190674, + "p95": 312.7039968967438, + "p99": 322.7519989013672 + }, + "combine": { + "p50": 44.89599913358688, + "p90": 75.93599706888199, + "p95": 76.60800218582153, + "p99": 82.14399963617325 + }, + "roundtrip": { + "p50": 222.27199375629425, + "p90": 259.2960000038147, + "p95": 261.59998774528503, + "p99": 267.8399980068207 + }, + "isolatedSum": { + "p50": 234.78399962186813, + "p90": 382.8800097107887, + "p95": 389.3119990825653, + "p99": 404.89599853754044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 190.94400107860565, + "p90": 228.5120040178299, + "p95": 231.32799565792084, + "p99": 236.09599471092224 + }, + "combine": { + "p50": 52.671998739242554, + "p90": 62.3680017888546, + "p95": 62.912002205848694, + "p99": 64.2239972949028 + }, + "roundtrip": { + "p50": 233.2800030708313, + "p90": 271.90399169921875, + "p95": 274.3360102176666, + "p99": 280.12800216674805 + }, + "isolatedSum": { + "p50": 243.6159998178482, + "p90": 290.8800058066845, + "p95": 294.23999786376953, + "p99": 300.31999200582504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9631a632", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_98df0737", + "comparisonKey": "a601b24931e0b5c1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:40.381529+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 162.08000481128693, + "p90": 216.73600375652313, + "p95": 222.1119999885559, + "p99": 271.32800221443176 + }, + "combine": { + "p50": 40.41599854826927, + "p90": 53.599998354911804, + "p95": 55.39200082421303, + "p99": 141.79199934005737 + }, + "roundtrip": { + "p50": 190.62399864196777, + "p90": 250.94398856163025, + "p95": 255.42399287223816, + "p99": 360.22400856018066 + }, + "isolatedSum": { + "p50": 202.4960033595562, + "p90": 270.33600211143494, + "p95": 277.50400081276894, + "p99": 413.12000155448914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 241.2160038948059, + "p90": 248.7040013074875, + "p95": 250.5599856376648, + "p99": 259.2960000038147 + }, + "combine": { + "p50": 57.440001517534256, + "p90": 59.487998485565186, + "p95": 60.256000608205795, + "p99": 65.69600105285645 + }, + "roundtrip": { + "p50": 239.04000222682953, + "p90": 284.5120131969452, + "p95": 287.77599334716797, + "p99": 296.4479923248291 + }, + "isolatedSum": { + "p50": 298.65600541234016, + "p90": 308.1919997930527, + "p95": 310.8159862458706, + "p99": 324.99200105667114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.0880136489868, + "p90": 305.7279884815216, + "p95": 309.31198596954346, + "p99": 318.6880052089691 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 84.1279998421669, + "p95": 85.11999994516373, + "p99": 88.3840024471283 + }, + "roundtrip": { + "p50": 330.1759958267212, + "p90": 370.0160086154938, + "p95": 374.62401390075684, + "p99": 424.54400658607483 + }, + "isolatedSum": { + "p50": 336.09601110219955, + "p90": 389.8559883236885, + "p95": 394.4319859147072, + "p99": 407.0720076560974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-95be974e", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_59050659", + "comparisonKey": "47f492faa14b3f34", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:05.589110+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 159.04000401496887, + "p90": 164.95999693870544, + "p95": 167.87199676036835, + "p99": 178.46399545669556 + }, + "combine": { + "p50": 39.103999733924866, + "p90": 41.600000113248825, + "p95": 44.79999840259552, + "p99": 52.76799947023392 + }, + "roundtrip": { + "p50": 187.6479983329773, + "p90": 194.33599710464478, + "p95": 196.51199877262115, + "p99": 208.95999670028687 + }, + "isolatedSum": { + "p50": 198.14400374889374, + "p90": 206.55999705195427, + "p95": 212.67199516296387, + "p99": 231.23199492692947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 201.85600221157074, + "p90": 439.32801485061646, + "p95": 446.1759924888611, + "p99": 523.4240293502808 + }, + "combine": { + "p50": 47.71199822425842, + "p90": 56.48000165820122, + "p95": 57.56799876689911, + "p99": 62.39999830722809 + }, + "roundtrip": { + "p50": 239.3600046634674, + "p90": 284.1919958591461, + "p95": 287.6800000667572, + "p99": 300.1280128955841 + }, + "isolatedSum": { + "p50": 249.56800043582916, + "p90": 495.8080165088177, + "p95": 503.7439912557602, + "p99": 585.8240276575089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 257.4079930782318, + "p90": 301.40799283981323, + "p95": 304.0960133075714, + "p99": 312.25600838661194 + }, + "combine": { + "p50": 73.66400212049484, + "p90": 83.10399949550629, + "p95": 83.71199667453766, + "p99": 90.87999910116196 + }, + "roundtrip": { + "p50": 327.5519907474518, + "p90": 372.3199963569641, + "p95": 376.6399919986725, + "p99": 555.5520057678223 + }, + "isolatedSum": { + "p50": 331.07199519872665, + "p90": 384.5119923353195, + "p95": 387.80800998210907, + "p99": 403.1360074877739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc5eab8b", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_560501a0", + "comparisonKey": "dac51cb8b692cd63", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:30.419072+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 168.03200542926788, + "p90": 220.15999257564545, + "p95": 222.49600291252136, + "p99": 229.18400168418884 + }, + "combine": { + "p50": 41.31200164556503, + "p90": 51.67999863624573, + "p95": 52.76799947023392, + "p99": 58.33600088953972 + }, + "roundtrip": { + "p50": 196.51199877262115, + "p90": 257.79199600219727, + "p95": 260.6399953365326, + "p99": 266.27200841903687 + }, + "isolatedSum": { + "p50": 209.34400707483292, + "p90": 271.8399912118912, + "p95": 275.2640023827553, + "p99": 287.52000257372856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 244.1280037164688, + "p90": 249.7279942035675, + "p95": 252.57599353790283, + "p99": 279.7439992427826 + }, + "combine": { + "p50": 56.96000158786774, + "p90": 58.720000088214874, + "p95": 60.095999389886856, + "p99": 64.51199948787689 + }, + "roundtrip": { + "p50": 244.1920042037964, + "p90": 283.3920121192932, + "p95": 285.63201427459717, + "p99": 291.23198986053467 + }, + "isolatedSum": { + "p50": 301.08800530433655, + "p90": 308.4479942917824, + "p95": 312.6719929277897, + "p99": 344.2559987306595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.51200246810913, + "p90": 300.8959889411926, + "p95": 303.71201038360596, + "p99": 313.3760094642639 + }, + "combine": { + "p50": 76.99199765920639, + "p90": 83.71199667453766, + "p95": 85.08799970149994, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 334.56000685691833, + "p90": 372.44799733161926, + "p95": 374.6879994869232, + "p99": 381.6640079021454 + }, + "isolatedSum": { + "p50": 341.5040001273155, + "p90": 384.6079856157303, + "p95": 388.8000100851059, + "p99": 400.7680118083954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3cfaa24e", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_57050333", + "comparisonKey": "f8d21753895e7ca4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:55.049673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 164.8000031709671, + "p90": 224.92800652980804, + "p95": 230.14399409294128, + "p99": 234.49599742889404 + }, + "combine": { + "p50": 39.583999663591385, + "p90": 50.71999877691269, + "p95": 52.15999856591225, + "p99": 57.760000228881836 + }, + "roundtrip": { + "p50": 189.53600525856018, + "p90": 195.39199769496918, + "p95": 198.0160027742386, + "p99": 203.80799472332 + }, + "isolatedSum": { + "p50": 204.3840028345585, + "p90": 275.64800530672073, + "p95": 282.30399265885353, + "p99": 292.2559976577759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 205.9520035982132, + "p90": 250.88000297546387, + "p95": 254.33599948883057, + "p99": 268.99200677871704 + }, + "combine": { + "p50": 47.45600000023842, + "p90": 58.14399942755699, + "p95": 59.39200147986412, + "p99": 64.92800265550613 + }, + "roundtrip": { + "p50": 241.60000681877136, + "p90": 287.1040105819702, + "p95": 290.6560003757477, + "p99": 302.65599489212036 + }, + "isolatedSum": { + "p50": 253.40800359845161, + "p90": 309.02400240302086, + "p95": 313.7280009686947, + "p99": 333.9200094342232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.4400088787079, + "p90": 306.2720000743866, + "p95": 309.79201197624207, + "p99": 318.56000423431396 + }, + "combine": { + "p50": 74.14399832487106, + "p90": 83.71199667453766, + "p95": 85.4720026254654, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 329.9520015716553, + "p90": 376.6080141067505, + "p95": 379.71198558807373, + "p99": 394.1439986228943 + }, + "isolatedSum": { + "p50": 335.58400720357895, + "p90": 389.98399674892426, + "p95": 395.26401460170746, + "p99": 408.6720049381256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e3e9ef9", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_1e9bbeb9", + "comparisonKey": "e336c8d26bd26ce7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:30.959446+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 163.32800686359406, + "p90": 222.81600534915924, + "p95": 226.52800381183624, + "p99": 231.9680005311966 + }, + "combine": { + "p50": 36.57599911093712, + "p90": 50.65599828958511, + "p95": 51.263999193906784, + "p99": 55.64799904823303 + }, + "roundtrip": { + "p50": 187.6160055398941, + "p90": 252.128005027771, + "p95": 256.51198625564575, + "p99": 262.36799359321594 + }, + "isolatedSum": { + "p50": 199.90400597453117, + "p90": 273.47200363874435, + "p95": 277.792003005743, + "p99": 287.6159995794296 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 161.47199273109436, + "p90": 167.42399334907532, + "p95": 169.76000368595123, + "p99": 174.01599884033203 + }, + "combine": { + "p50": 35.999998450279236, + "p90": 38.72000053524971, + "p95": 40.863998234272, + "p99": 49.92000013589859 + }, + "roundtrip": { + "p50": 188.51199746131897, + "p90": 194.46399807929993, + "p95": 196.79999351501465, + "p99": 202.65600085258484 + }, + "isolatedSum": { + "p50": 197.4719911813736, + "p90": 206.14399388432503, + "p95": 210.62400192022324, + "p99": 223.93599897623062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 163.2000058889389, + "p90": 224.35200214385986, + "p95": 228.5439968109131, + "p99": 234.23999547958374 + }, + "combine": { + "p50": 39.07199949026108, + "p90": 50.81599950790405, + "p95": 51.19999870657921, + "p99": 55.23199960589409 + }, + "roundtrip": { + "p50": 189.91999328136444, + "p90": 258.14399123191833, + "p95": 261.21601462364197, + "p99": 266.7520046234131 + }, + "isolatedSum": { + "p50": 202.27200537919998, + "p90": 275.1680016517639, + "p95": 279.7439955174923, + "p99": 289.47199508547783 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 164.70399498939514, + "p90": 222.33599424362183, + "p95": 225.055992603302, + "p99": 229.8559993505478 + }, + "combine": { + "p50": 40.063999593257904, + "p90": 50.56000128388405, + "p95": 51.45600065588951, + "p99": 55.296000093221664 + }, + "roundtrip": { + "p50": 191.52000546455383, + "p90": 257.1839988231659, + "p95": 260.2880001068115, + "p99": 263.96799087524414 + }, + "isolatedSum": { + "p50": 204.76799458265305, + "p90": 272.8959955275059, + "p95": 276.5119932591915, + "p99": 285.15199944376945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 176.12800002098083, + "p90": 225.2800017595291, + "p95": 227.90400683879852, + "p99": 233.8559925556183 + }, + "combine": { + "p50": 43.42399910092354, + "p90": 51.10400170087814, + "p95": 52.032001316547394, + "p99": 55.67999929189682 + }, + "roundtrip": { + "p50": 207.42399990558624, + "p90": 257.82400369644165, + "p95": 260.4160010814667, + "p99": 264.22399282455444 + }, + "isolatedSum": { + "p50": 219.55199912190437, + "p90": 276.38400346040726, + "p95": 279.9360081553459, + "p99": 289.5359918475151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 203.8400024175644, + "p90": 245.53599953651428, + "p95": 248.51199984550476, + "p99": 254.07999753952026 + }, + "combine": { + "p50": 47.45600000023842, + "p90": 54.30399999022484, + "p95": 55.16799911856651, + "p99": 59.39200147986412 + }, + "roundtrip": { + "p50": 240.92799425125122, + "p90": 283.61600637435913, + "p95": 285.75998544692993, + "p99": 290.20801186561584 + }, + "isolatedSum": { + "p50": 251.2960024178028, + "p90": 299.8399995267391, + "p95": 303.6799989640713, + "p99": 313.4719990193844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 255.71200251579285, + "p90": 302.0800054073334, + "p95": 304.9280047416687, + "p99": 318.1760013103485 + }, + "combine": { + "p50": 56.89600110054016, + "p90": 67.10399687290192, + "p95": 68.28799843788147, + "p99": 70.75200229883194 + }, + "roundtrip": { + "p50": 303.0720055103302, + "p90": 349.727988243103, + "p95": 353.4719944000244, + "p99": 359.9039912223816 + }, + "isolatedSum": { + "p50": 312.608003616333, + "p90": 369.1840022802353, + "p95": 373.21600317955017, + "p99": 388.92800360918045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 263.10399174690247, + "p90": 307.6159954071045, + "p95": 309.56798791885376, + "p99": 313.79199028015137 + }, + "combine": { + "p50": 74.87999647855759, + "p90": 84.3840017914772, + "p95": 85.28000116348267, + "p99": 87.55200356245041 + }, + "roundtrip": { + "p50": 332.5119912624359, + "p90": 377.1840035915375, + "p95": 379.07201051712036, + "p99": 384.2880129814148 + }, + "isolatedSum": { + "p50": 337.98398822546005, + "p90": 391.9999971985817, + "p95": 394.8479890823364, + "p99": 401.3439938426018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4b7f07a2", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_a587a2b5", + "comparisonKey": "8c7c92c3e5383d5d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:42.591295+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 159.45599973201752, + "p90": 166.01599752902985, + "p95": 168.70400309562683, + "p99": 178.52799594402313 + }, + "combine": { + "p50": 35.360001027584076, + "p90": 37.28000074625015, + "p95": 40.063999593257904, + "p99": 43.07200014591217 + }, + "roundtrip": { + "p50": 188.4479969739914, + "p90": 195.3279972076416, + "p95": 197.79199361801147, + "p99": 209.9519968032837 + }, + "isolatedSum": { + "p50": 194.8160007596016, + "p90": 203.29599827528, + "p95": 208.76800268888474, + "p99": 221.5999960899353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 159.64800119400024, + "p90": 166.1120057106018, + "p95": 169.08800601959229, + "p99": 176.86399817466736 + }, + "combine": { + "p50": 35.392001271247864, + "p90": 38.62399980425835, + "p95": 40.89599847793579, + "p99": 47.775998711586 + }, + "roundtrip": { + "p50": 188.48000466823578, + "p90": 194.0159946680069, + "p95": 197.1520036458969, + "p99": 207.61600136756897 + }, + "isolatedSum": { + "p50": 195.0400024652481, + "p90": 204.73600551486015, + "p95": 209.98400449752808, + "p99": 224.63999688625336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 159.67999398708344, + "p90": 166.78400337696075, + "p95": 169.3120002746582, + "p99": 178.46399545669556 + }, + "combine": { + "p50": 38.40000182390213, + "p90": 40.511999279260635, + "p95": 43.23200136423111, + "p99": 46.20800167322159 + }, + "roundtrip": { + "p50": 187.45599687099457, + "p90": 193.08799505233765, + "p95": 196.1279958486557, + "p99": 200.28799772262573 + }, + "isolatedSum": { + "p50": 198.07999581098557, + "p90": 207.2960026562214, + "p95": 212.5440016388893, + "p99": 224.67199712991714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 161.50400042533875, + "p90": 167.58400201797485, + "p95": 170.56000232696533, + "p99": 178.30400168895721 + }, + "combine": { + "p50": 38.656000047922134, + "p90": 41.69600084424019, + "p95": 44.415999203920364, + "p99": 49.44000020623207 + }, + "roundtrip": { + "p50": 189.11999464035034, + "p90": 194.7840005159378, + "p95": 197.63199985027313, + "p99": 201.7280012369156 + }, + "isolatedSum": { + "p50": 200.16000047326088, + "p90": 209.28000286221504, + "p95": 214.9760015308857, + "p99": 227.74400189518929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 172.5119948387146, + "p90": 178.1120002269745, + "p95": 180.63999712467194, + "p99": 185.88800728321075 + }, + "combine": { + "p50": 41.37599840760231, + "p90": 44.224001467227936, + "p95": 46.46399989724159, + "p99": 52.51200124621391 + }, + "roundtrip": { + "p50": 204.3199986219406, + "p90": 208.8319957256317, + "p95": 211.96800470352173, + "p99": 215.61600267887115 + }, + "isolatedSum": { + "p50": 213.8879932463169, + "p90": 222.33600169420242, + "p95": 227.10399702191353, + "p99": 238.40000852942467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.57600736618042, + "p90": 207.13600516319275, + "p95": 210.40000021457672, + "p99": 228.64000499248505 + }, + "combine": { + "p50": 43.90399903059006, + "p90": 46.78399860858917, + "p95": 50.11200159788132, + "p99": 58.9120015501976 + }, + "roundtrip": { + "p50": 234.592005610466, + "p90": 240.63999950885773, + "p95": 245.12000381946564, + "p99": 312.54398822784424 + }, + "isolatedSum": { + "p50": 244.48000639677048, + "p90": 253.92000377178192, + "p95": 260.51200181245804, + "p99": 287.55200654268265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 254.5279860496521, + "p90": 258.9440047740936, + "p95": 260.672003030777, + "p99": 266.55998826026917 + }, + "combine": { + "p50": 54.71999943256378, + "p90": 59.74400043487549, + "p95": 89.88799899816513, + "p99": 276.3519883155823 + }, + "roundtrip": { + "p50": 298.8159954547882, + "p90": 302.72001028060913, + "p95": 306.46398663520813, + "p99": 341.0879969596863 + }, + "isolatedSum": { + "p50": 309.2479854822159, + "p90": 318.6880052089691, + "p95": 350.5600020289421, + "p99": 542.9119765758514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 256.8640112876892, + "p90": 262.9440128803253, + "p95": 267.87200570106506, + "p99": 321.02400064468384 + }, + "combine": { + "p50": 69.66400146484375, + "p90": 72.12799787521362, + "p95": 73.79200309515, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 318.36798787117004, + "p90": 322.4959969520569, + "p95": 326.33599638938904, + "p99": 330.0800025463104 + }, + "isolatedSum": { + "p50": 326.52801275253296, + "p90": 335.07201075553894, + "p95": 341.66400879621506, + "p99": 399.9039977788925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9eaf9a4b", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_66b5ad37", + "comparisonKey": "901014bd162fa98e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:59.309253+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 210.91200411319733, + "p90": 216.67200326919556, + "p95": 218.4000015258789, + "p99": 222.46399521827698 + }, + "combine": { + "p50": 51.35999992489815, + "p90": 53.31199988722801, + "p95": 54.30399999022484, + "p99": 59.90400165319443 + }, + "roundtrip": { + "p50": 245.08799612522125, + "p90": 250.75200200080872, + "p95": 253.63200902938843, + "p99": 257.7280104160309 + }, + "isolatedSum": { + "p50": 262.2720040380955, + "p90": 269.98400315642357, + "p95": 272.70400151610374, + "p99": 282.3679968714714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 262.7519965171814, + "p90": 268.99200677871704, + "p95": 272.5760042667389, + "p99": 280.41601181030273 + }, + "combine": { + "p50": 57.95200169086456, + "p90": 61.40799820423126, + "p95": 62.24000081419945, + "p99": 66.72000139951706 + }, + "roundtrip": { + "p50": 299.9359965324402, + "p90": 305.9839904308319, + "p95": 308.31998586654663, + "p99": 311.67998909950256 + }, + "isolatedSum": { + "p50": 320.70399820804596, + "p90": 330.4000049829483, + "p95": 334.81600508093834, + "p99": 347.1360132098198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 266, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 287.58400678634644, + "p90": 296.57599329948425, + "p95": 299.1360127925873, + "p99": 307.13599920272827 + }, + "combine": { + "p50": 84.99199897050858, + "p90": 91.0400003194809, + "p95": 92.06400066614151, + "p99": 95.93600034713745 + }, + "roundtrip": { + "p50": 362.4640107154846, + "p90": 370.84800004959106, + "p95": 373.663991689682, + "p99": 395.00799775123596 + }, + "isolatedSum": { + "p50": 372.576005756855, + "p90": 387.61599361896515, + "p95": 391.2000134587288, + "p99": 403.0719995498657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 917, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ecbea087", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_abbe2ef1", + "comparisonKey": "bfeed72746113cb2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:33.416966+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 162.56000101566315, + "p90": 169.53599452972412, + "p95": 172.44799435138702, + "p99": 181.5039962530136 + }, + "combine": { + "p50": 35.45600175857544, + "p90": 38.43199834227562, + "p95": 41.08799993991852, + "p99": 44.38399896025658 + }, + "roundtrip": { + "p50": 191.64800643920898, + "p90": 196.70400023460388, + "p95": 200.06400346755981, + "p99": 207.42399990558624 + }, + "isolatedSum": { + "p50": 198.0160027742386, + "p90": 207.96799287199974, + "p95": 213.53599429130554, + "p99": 225.8879952132702 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 162.49600052833557, + "p90": 169.21600699424744, + "p95": 172.0000058412552, + "p99": 178.97599935531616 + }, + "combine": { + "p50": 35.58399900794029, + "p90": 38.495998829603195, + "p95": 40.76800122857094, + "p99": 45.791998505592346 + }, + "roundtrip": { + "p50": 191.39200448989868, + "p90": 197.66399264335632, + "p95": 200.15999674797058, + "p99": 206.30399882793427 + }, + "isolatedSum": { + "p50": 198.07999953627586, + "p90": 207.71200582385063, + "p95": 212.76800706982613, + "p99": 224.7679978609085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 165.98400473594666, + "p90": 221.8559980392456, + "p95": 224.57599639892578, + "p99": 229.21599447727203 + }, + "combine": { + "p50": 38.68800029158592, + "p90": 52.2879995405674, + "p95": 53.02400141954422, + "p99": 57.11999908089638 + }, + "roundtrip": { + "p50": 192.03199446201324, + "p90": 257.53599405288696, + "p95": 261.0880136489868, + "p99": 265.5999958515167 + }, + "isolatedSum": { + "p50": 204.67200502753258, + "p90": 274.143997579813, + "p95": 277.59999781847, + "p99": 286.3359935581684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 164.48000073432922, + "p90": 169.98399794101715, + "p95": 172.95999825000763, + "p99": 183.07200074195862 + }, + "combine": { + "p50": 38.495998829603195, + "p90": 41.24800115823746, + "p95": 43.68000105023384, + "p99": 49.056001007556915 + }, + "roundtrip": { + "p50": 191.23199582099915, + "p90": 197.11999595165253, + "p95": 199.20000433921814, + "p99": 205.31199872493744 + }, + "isolatedSum": { + "p50": 202.97599956393242, + "p90": 211.2319990992546, + "p95": 216.63999930024147, + "p99": 232.12800174951553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 173.7920045852661, + "p90": 179.32799458503723, + "p95": 181.5679967403412, + "p99": 189.18399512767792 + }, + "combine": { + "p50": 40.41599854826927, + "p90": 42.78400167822838, + "p95": 45.024000108242035, + "p99": 49.92000013589859 + }, + "roundtrip": { + "p50": 203.80799472332, + "p90": 207.2640061378479, + "p95": 210.30400693416595, + "p99": 213.8880044221878 + }, + "isolatedSum": { + "p50": 214.20800313353539, + "p90": 222.1119962632656, + "p95": 226.59199684858322, + "p99": 239.1039952635765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 202.2400051355362, + "p90": 206.84799551963806, + "p95": 209.1519981622696, + "p99": 213.31200003623962 + }, + "combine": { + "p50": 44.544000178575516, + "p90": 46.30399867892265, + "p95": 48.767998814582825, + "p99": 53.15199866890907 + }, + "roundtrip": { + "p50": 237.37600445747375, + "p90": 241.28000438213348, + "p95": 244.09599602222443, + "p99": 250.59199333190918 + }, + "isolatedSum": { + "p50": 246.7840053141117, + "p90": 253.15199419856071, + "p95": 257.9199969768524, + "p99": 266.4639987051487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 259.7759962081909, + "p90": 298.2400059700012, + "p95": 301.2799918651581, + "p99": 306.17600679397583 + }, + "combine": { + "p50": 55.776000022888184, + "p90": 65.2799978852272, + "p95": 65.69600105285645, + "p99": 68.80000233650208 + }, + "roundtrip": { + "p50": 304.60798740386963, + "p90": 346.24001383781433, + "p95": 349.2160141468048, + "p99": 352.7359962463379 + }, + "isolatedSum": { + "p50": 315.5519962310791, + "p90": 363.5200038552284, + "p95": 366.9759929180145, + "p99": 374.9760091304779 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.0159902572632, + "p90": 264.70398902893066, + "p95": 268.7999904155731, + "p99": 301.08800530433655 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 72.38399982452393, + "p95": 75.87199658155441, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 321.3759958744049, + "p90": 325.82399249076843, + "p95": 328.15998792648315, + "p99": 331.0079872608185 + }, + "isolatedSum": { + "p50": 328.19198817014694, + "p90": 337.0879888534546, + "p95": 344.67198699712753, + "p99": 380.8320090174675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4d3aac45", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_efc996cc", + "comparisonKey": "b9905b4f54992265", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:27.457679+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 154.7199934720993, + "p90": 162.49600052833557, + "p95": 168.06399822235107, + "p99": 228.06400060653687 + }, + "combine": { + "p50": 35.64799949526787, + "p90": 38.62399980425835, + "p95": 40.47999903559685, + "p99": 45.75999826192856 + }, + "roundtrip": { + "p50": 181.40800297260284, + "p90": 187.6479983329773, + "p95": 190.40000438690186, + "p99": 200.00000298023224 + }, + "isolatedSum": { + "p50": 190.36799296736717, + "p90": 201.12000033259392, + "p95": 208.54399725794792, + "p99": 273.8239988684654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 154.78399395942688, + "p90": 162.1440052986145, + "p95": 165.12000560760498, + "p99": 222.52799570560455 + }, + "combine": { + "p50": 35.551998764276505, + "p90": 38.88000175356865, + "p95": 41.56799986958504, + "p99": 48.86399954557419 + }, + "roundtrip": { + "p50": 181.95199966430664, + "p90": 187.58399784564972, + "p95": 190.91199338436127, + "p99": 199.64799284934998 + }, + "isolatedSum": { + "p50": 190.33599272370338, + "p90": 201.02400705218315, + "p95": 206.68800547719002, + "p99": 271.39199525117874 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 155.13600409030914, + "p90": 161.6639941930771, + "p95": 164.76799547672272, + "p99": 176.9919991493225 + }, + "combine": { + "p50": 37.88800165057182, + "p90": 40.863998234272, + "p95": 43.39199885725975, + "p99": 54.75199967622757 + }, + "roundtrip": { + "p50": 181.98400735855103, + "p90": 188.6720061302185, + "p95": 190.97599387168884, + "p99": 197.2160041332245 + }, + "isolatedSum": { + "p50": 193.02400574088097, + "p90": 202.5279924273491, + "p95": 208.15999433398247, + "p99": 231.74399882555008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 156.25600516796112, + "p90": 162.49600052833557, + "p95": 165.72800278663635, + "p99": 183.3920031785965 + }, + "combine": { + "p50": 38.27200084924698, + "p90": 40.76800122857094, + "p95": 42.94399917125702, + "p99": 46.04800045490265 + }, + "roundtrip": { + "p50": 182.94399976730347, + "p90": 188.63999843597412, + "p95": 190.75199961662292, + "p99": 196.19199633598328 + }, + "isolatedSum": { + "p50": 194.5280060172081, + "p90": 203.2640017569065, + "p95": 208.67200195789337, + "p99": 229.44000363349915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 161.8880033493042, + "p90": 167.58400201797485, + "p95": 169.8240041732788, + "p99": 174.68799650669098 + }, + "combine": { + "p50": 41.919998824596405, + "p90": 44.16000097990036, + "p95": 46.560000628232956, + "p99": 49.44000020623207 + }, + "roundtrip": { + "p50": 192.89599359035492, + "p90": 197.85599410533905, + "p95": 200.54399967193604, + "p99": 209.82399582862854 + }, + "isolatedSum": { + "p50": 203.8080021739006, + "p90": 211.7440029978752, + "p95": 216.38400480151176, + "p99": 224.12799671292305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 179.4240027666092, + "p90": 186.3040030002594, + "p95": 189.4720047712326, + "p99": 229.8240065574646 + }, + "combine": { + "p50": 46.36799916625023, + "p90": 49.056001007556915, + "p95": 50.65599828958511, + "p99": 57.21599981188774 + }, + "roundtrip": { + "p50": 215.68000316619873, + "p90": 220.89600563049316, + "p95": 223.61600399017334, + "p99": 231.48800432682037 + }, + "isolatedSum": { + "p50": 225.79200193285942, + "p90": 235.36000400781631, + "p95": 240.12800306081772, + "p99": 287.04000636935234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 215.55200219154358, + "p90": 250.33599138259888, + "p95": 254.2400062084198, + "p99": 277.18400955200195 + }, + "combine": { + "p50": 54.52800169587135, + "p90": 64.41599875688553, + "p95": 64.99200314283371, + "p99": 68.92800331115723 + }, + "roundtrip": { + "p50": 258.2719922065735, + "p90": 264.8319900035858, + "p95": 268.2560086250305, + "p99": 301.56800150871277 + }, + "isolatedSum": { + "p50": 270.08000388741493, + "p90": 314.7519901394844, + "p95": 319.2320093512535, + "p99": 346.1120128631592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 220.768004655838, + "p90": 226.04799270629883, + "p95": 228.4799963235855, + "p99": 237.69600689411163 + }, + "combine": { + "p50": 72.9919970035553, + "p90": 76.12799853086472, + "p95": 78.43200117349625, + "p99": 84.57600325345993 + }, + "roundtrip": { + "p50": 288.60801458358765, + "p90": 293.95198822021484, + "p95": 296.7680096626282, + "p99": 310.33599376678467 + }, + "isolatedSum": { + "p50": 293.7600016593933, + "p90": 302.17599123716354, + "p95": 306.91199749708176, + "p99": 322.27201014757156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac6323de", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_b4518d9e", + "comparisonKey": "4995815a8f6bae1e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:29.249621+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 167.77600347995758, + "p90": 176.09600722789764, + "p95": 180.4800033569336, + "p99": 253.63200902938843 + }, + "combine": { + "p50": 34.2399999499321, + "p90": 37.79200091958046, + "p95": 41.08799993991852, + "p99": 50.52800104022026 + }, + "roundtrip": { + "p50": 194.94399428367615, + "p90": 200.44800639152527, + "p95": 203.2639980316162, + "p99": 210.62399446964264 + }, + "isolatedSum": { + "p50": 202.01600342988968, + "p90": 213.8880081474781, + "p95": 221.5680032968521, + "p99": 304.1600100696087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 168.60799491405487, + "p90": 175.20000040531158, + "p95": 178.46399545669556, + "p99": 189.56799805164337 + }, + "combine": { + "p50": 34.04799848794937, + "p90": 37.76000067591667, + "p95": 39.48799893260002, + "p99": 42.78400167822838 + }, + "roundtrip": { + "p50": 195.64799964427948, + "p90": 203.19999754428864, + "p95": 206.08000457286835, + "p99": 244.47999894618988 + }, + "isolatedSum": { + "p50": 202.65599340200424, + "p90": 212.96000108122826, + "p95": 217.95199438929558, + "p99": 232.35199972987175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 170.1440066099167, + "p90": 233.95200073719025, + "p95": 237.44000494480133, + "p99": 242.75200068950653 + }, + "combine": { + "p50": 36.19199991226196, + "p90": 52.15999856591225, + "p95": 53.119998425245285, + "p99": 58.687999844551086 + }, + "roundtrip": { + "p50": 197.2160041332245, + "p90": 272.09600806236267, + "p95": 275.29600262641907, + "p99": 279.07198667526245 + }, + "isolatedSum": { + "p50": 206.33600652217865, + "p90": 286.1119993031025, + "p95": 290.5600033700466, + "p99": 301.4400005340576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 169.18399930000305, + "p90": 175.87199807167053, + "p95": 180.4479956626892, + "p99": 244.83199417591095 + }, + "combine": { + "p50": 36.639999598264694, + "p90": 41.21600091457367, + "p95": 46.52800038456917, + "p99": 113.11999708414078 + }, + "roundtrip": { + "p50": 197.37599790096283, + "p90": 203.5199999809265, + "p95": 207.0399969816208, + "p99": 269.76001262664795 + }, + "isolatedSum": { + "p50": 205.82399889826775, + "p90": 217.0879989862442, + "p95": 226.97599604725838, + "p99": 357.9519912600517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 169.50400173664093, + "p90": 187.16800212860107, + "p95": 212.3199999332428, + "p99": 254.36800718307495 + }, + "combine": { + "p50": 38.656000047922134, + "p90": 42.27200150489807, + "p95": 44.51199993491173, + "p99": 49.40799996256828 + }, + "roundtrip": { + "p50": 196.25599682331085, + "p90": 201.88799500465393, + "p95": 204.19199764728546, + "p99": 208.80000293254852 + }, + "isolatedSum": { + "p50": 208.16000178456306, + "p90": 229.44000363349915, + "p95": 256.8319998681545, + "p99": 303.77600714564323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 171.58399522304535, + "p90": 178.5919964313507, + "p95": 182.3039948940277, + "p99": 233.5360050201416 + }, + "combine": { + "p50": 43.007999658584595, + "p90": 45.60000076889992, + "p95": 47.839999198913574, + "p99": 52.799999713897705 + }, + "roundtrip": { + "p50": 200.51200687885284, + "p90": 207.32800662517548, + "p95": 210.59200167655945, + "p99": 241.7919933795929 + }, + "isolatedSum": { + "p50": 214.59199488162994, + "p90": 224.19199720025063, + "p95": 230.14399409294128, + "p99": 286.3360047340393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 189.98399376869202, + "p90": 238.65599930286407, + "p95": 241.82400107383728, + "p99": 247.51999974250793 + }, + "combine": { + "p50": 51.77599936723709, + "p90": 60.80000102519989, + "p95": 61.88800185918808, + "p99": 68.38399916887283 + }, + "roundtrip": { + "p50": 229.8240065574646, + "p90": 278.75199913978577, + "p95": 281.5360128879547, + "p99": 286.9440019130707 + }, + "isolatedSum": { + "p50": 241.7599931359291, + "p90": 299.45600032806396, + "p95": 303.71200293302536, + "p99": 315.90399891138077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 198.55999946594238, + "p90": 246.11200392246246, + "p95": 249.2160052061081, + "p99": 451.200008392334 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 79.23199981451035, + "p95": 82.30400085449219, + "p99": 84.73599702119827 + }, + "roundtrip": { + "p50": 261.05600595474243, + "p90": 289.15199637413025, + "p95": 292.5119996070862, + "p99": 297.2800135612488 + }, + "isolatedSum": { + "p50": 269.6639969944954, + "p90": 325.3440037369728, + "p95": 331.5200060606003, + "p99": 535.9360054135323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37800246", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_ba1c0a90", + "comparisonKey": "e9d3aa98ebb8524a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:40.461864+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 160.5760008096695, + "p90": 168.38400065898895, + "p95": 172.5119948387146, + "p99": 182.8480064868927 + }, + "combine": { + "p50": 36.320000886917114, + "p90": 39.32800143957138, + "p95": 42.11200028657913, + "p99": 49.12000149488449 + }, + "roundtrip": { + "p50": 189.31199610233307, + "p90": 196.99199497699738, + "p95": 200.41599869728088, + "p99": 245.85600197315216 + }, + "isolatedSum": { + "p50": 196.8960016965866, + "p90": 207.71200209856033, + "p95": 214.62399512529373, + "p99": 231.9680079817772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 160.288006067276, + "p90": 167.67999529838562, + "p95": 171.32799327373505, + "p99": 223.90399873256683 + }, + "combine": { + "p50": 36.448001861572266, + "p90": 39.5519994199276, + "p95": 41.34399816393852, + "p99": 48.00000041723251 + }, + "roundtrip": { + "p50": 188.7039989233017, + "p90": 194.815993309021, + "p95": 198.43199849128723, + "p99": 207.2959989309311 + }, + "isolatedSum": { + "p50": 196.73600792884827, + "p90": 207.23199471831322, + "p95": 212.67199143767357, + "p99": 271.90399914979935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 164.09599781036377, + "p90": 211.4879935979843, + "p95": 214.39999341964722, + "p99": 221.82400524616241 + }, + "combine": { + "p50": 40.32000154256821, + "p90": 51.392000168561935, + "p95": 52.000001072883606, + "p99": 56.03199824690819 + }, + "roundtrip": { + "p50": 190.528005361557, + "p90": 244.54399943351746, + "p95": 246.3040053844452, + "p99": 252.28801369667053 + }, + "isolatedSum": { + "p50": 204.41599935293198, + "p90": 262.87999376654625, + "p95": 266.3999944925308, + "p99": 277.8560034930706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 162.27200627326965, + "p90": 171.29600048065186, + "p95": 177.44000256061554, + "p99": 185.37600338459015 + }, + "combine": { + "p50": 40.063999593257904, + "p90": 43.487999588251114, + "p95": 48.96000027656555, + "p99": 53.63199859857559 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 197.60000705718994, + "p95": 200.6399929523468, + "p99": 214.39999341964722 + }, + "isolatedSum": { + "p50": 202.33600586652756, + "p90": 214.78400006890297, + "p95": 226.4000028371811, + "p99": 239.00800198316574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 175.9680062532425, + "p90": 183.03999304771423, + "p95": 194.4960057735443, + "p99": 452.8000056743622 + }, + "combine": { + "p50": 43.616000562906265, + "p90": 46.560000628232956, + "p95": 50.592001527547836, + "p99": 132.86399841308594 + }, + "roundtrip": { + "p50": 207.07200467586517, + "p90": 214.20800685882568, + "p95": 220.2560007572174, + "p99": 411.16800904273987 + }, + "isolatedSum": { + "p50": 219.58400681614876, + "p90": 229.5999936759472, + "p95": 245.08800730109215, + "p99": 585.6640040874481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 201.82399451732635, + "p90": 207.5520008802414, + "p95": 209.72800254821777, + "p99": 217.75999665260315 + }, + "combine": { + "p50": 46.01600021123886, + "p90": 48.128001391887665, + "p95": 50.175998359918594, + "p99": 59.74400043487549 + }, + "roundtrip": { + "p50": 235.6799989938736, + "p90": 241.60000681877136, + "p95": 244.7039932012558, + "p99": 280.9920012950897 + }, + "isolatedSum": { + "p50": 247.83999472856522, + "p90": 255.68000227212906, + "p95": 259.90400090813637, + "p99": 277.50399708747864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 257.5039863586426, + "p90": 297.7280020713806, + "p95": 300.1599907875061, + "p99": 305.7920038700104 + }, + "combine": { + "p50": 57.24800005555153, + "p90": 66.27199798822403, + "p95": 66.97600334882736, + "p99": 70.65600156784058 + }, + "roundtrip": { + "p50": 304.0960133075714, + "p90": 345.2799916267395, + "p95": 354.3680012226105, + "p99": 407.3280096054077 + }, + "isolatedSum": { + "p50": 314.7519864141941, + "p90": 364.00000005960464, + "p95": 367.13599413633347, + "p99": 376.44800543785095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.4640085697174, + "p90": 264.44798707962036, + "p95": 267.10399985313416, + "p99": 319.13599371910095 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 74.14399832487106, + "p95": 77.91999727487564, + "p99": 83.55200290679932 + }, + "roundtrip": { + "p50": 320.70401310920715, + "p90": 326.4000117778778, + "p95": 330.78399300575256, + "p99": 372.76801466941833 + }, + "isolatedSum": { + "p50": 329.3760120868683, + "p90": 338.5919854044914, + "p95": 345.0239971280098, + "p99": 402.68799662590027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a97c00e9", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_add1bf61", + "comparisonKey": "d4ed5bf9fee61215", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:31.448908+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 160.0320041179657, + "p90": 167.80799627304077, + "p95": 170.97599804401398, + "p99": 175.20000040531158 + }, + "combine": { + "p50": 35.77600046992302, + "p90": 39.29600119590759, + "p95": 41.37599840760231, + "p99": 44.89599913358688 + }, + "roundtrip": { + "p50": 189.69599902629852, + "p90": 195.68000733852386, + "p95": 199.0080028772354, + "p99": 206.2080055475235 + }, + "isolatedSum": { + "p50": 195.80800458788872, + "p90": 207.10399746894836, + "p95": 212.3519964516163, + "p99": 220.09599953889847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 161.28000617027283, + "p90": 169.69600319862366, + "p95": 174.81599748134613, + "p99": 188.28800320625305 + }, + "combine": { + "p50": 35.64799949526787, + "p90": 39.0079990029335, + "p95": 40.832001715898514, + "p99": 44.67200115323067 + }, + "roundtrip": { + "p50": 189.53600525856018, + "p90": 195.93599438667297, + "p95": 199.8080015182495, + "p99": 208.80000293254852 + }, + "isolatedSum": { + "p50": 196.9280056655407, + "p90": 208.70400220155716, + "p95": 215.64799919724464, + "p99": 232.96000435948372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 161.28000617027283, + "p90": 168.32000017166138, + "p95": 171.424001455307, + "p99": 187.32799589633942 + }, + "combine": { + "p50": 38.59199956059456, + "p90": 40.863998234272, + "p95": 43.2640016078949, + "p99": 45.88799923658371 + }, + "roundtrip": { + "p50": 189.95200097560883, + "p90": 197.4399983882904, + "p95": 201.6959935426712, + "p99": 212.76800334453583 + }, + "isolatedSum": { + "p50": 199.8720057308674, + "p90": 209.18399840593338, + "p95": 214.6880030632019, + "p99": 233.21599513292313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 163.26400637626648, + "p90": 168.89600455760956, + "p95": 171.1679995059967, + "p99": 182.01600015163422 + }, + "combine": { + "p50": 39.0079990029335, + "p90": 41.85599833726883, + "p95": 43.99999976158142, + "p99": 48.19199815392494 + }, + "roundtrip": { + "p50": 190.68799912929535, + "p90": 197.76000082492828, + "p95": 200.06400346755981, + "p99": 204.3839991092682 + }, + "isolatedSum": { + "p50": 202.27200537919998, + "p90": 210.7520028948784, + "p95": 215.16799926757812, + "p99": 230.20799830555916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 170.6559956073761, + "p90": 175.9680062532425, + "p95": 178.49600315093994, + "p99": 186.0159933567047 + }, + "combine": { + "p50": 42.527999728918076, + "p90": 44.67200115323067, + "p95": 46.23999819159508, + "p99": 49.695998430252075 + }, + "roundtrip": { + "p50": 202.30400562286377, + "p90": 207.2959989309311, + "p95": 210.07999777793884, + "p99": 214.9440050125122 + }, + "isolatedSum": { + "p50": 213.18399533629417, + "p90": 220.64000740647316, + "p95": 224.73600134253502, + "p99": 235.7119917869568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 197.50399887561798, + "p90": 202.97600328922272, + "p95": 205.28000593185425, + "p99": 208.639994263649 + }, + "combine": { + "p50": 45.951999723911285, + "p90": 48.25599864125252, + "p95": 50.464000552892685, + "p99": 55.00800162553787 + }, + "roundtrip": { + "p50": 234.46400463581085, + "p90": 240.06399512290955, + "p95": 244.159996509552, + "p99": 264.22399282455444 + }, + "isolatedSum": { + "p50": 243.45599859952927, + "p90": 251.23200193047523, + "p95": 255.74400648474693, + "p99": 263.64799588918686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 248.76800179481506, + "p90": 351.55200958251953, + "p95": 355.8399975299835, + "p99": 362.7519905567169 + }, + "combine": { + "p50": 56.063998490571976, + "p90": 81.60000294446945, + "p95": 83.29600095748901, + "p99": 86.65599673986435 + }, + "roundtrip": { + "p50": 291.5840148925781, + "p90": 296.79998755455017, + "p95": 299.1679906845093, + "p99": 308.4160089492798 + }, + "isolatedSum": { + "p50": 304.83200028538704, + "p90": 433.152012526989, + "p95": 439.13599848747253, + "p99": 449.40798729658127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.25599789619446, + "p90": 253.4399926662445, + "p95": 256.44800066947937, + "p99": 262.87999749183655 + }, + "combine": { + "p50": 72.31999933719635, + "p90": 74.72000271081924, + "p95": 77.69600301980972, + "p99": 81.727996468544 + }, + "roundtrip": { + "p50": 320.16000151634216, + "p90": 325.24800300598145, + "p95": 327.64801383018494, + "p99": 332.5439989566803 + }, + "isolatedSum": { + "p50": 320.5759972333908, + "p90": 328.15999537706375, + "p95": 334.1440036892891, + "p99": 344.60799396038055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8f90d63", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_051ab86d", + "comparisonKey": "38f02f10cb0b43b8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:58.480558+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 165.6000018119812, + "p90": 172.0000058412552, + "p95": 175.64800381660461, + "p99": 182.8799992799759 + }, + "combine": { + "p50": 35.61599925160408, + "p90": 38.52799907326698, + "p95": 40.95999896526337, + "p99": 46.65600135922432 + }, + "roundtrip": { + "p50": 193.08799505233765, + "p90": 198.59200716018677, + "p95": 201.27999782562256, + "p99": 206.56000077724457 + }, + "isolatedSum": { + "p50": 201.21600106358528, + "p90": 210.52800491452217, + "p95": 216.60800278186798, + "p99": 229.5360006392002 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 165.24800658226013, + "p90": 171.80800437927246, + "p95": 175.00799894332886, + "p99": 183.67999792099 + }, + "combine": { + "p50": 35.679999738931656, + "p90": 38.91199827194214, + "p95": 40.41599854826927, + "p99": 44.544000178575516 + }, + "roundtrip": { + "p50": 193.12000274658203, + "p90": 198.59200716018677, + "p95": 201.664000749588, + "p99": 206.1759978532791 + }, + "isolatedSum": { + "p50": 200.9280063211918, + "p90": 210.7200026512146, + "p95": 215.42399749159813, + "p99": 228.2239980995655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 168.64000260829926, + "p90": 229.12000119686127, + "p95": 231.455996632576, + "p99": 237.98400163650513 + }, + "combine": { + "p50": 39.135999977588654, + "p90": 53.53600159287453, + "p95": 54.1439987719059, + "p99": 59.67999994754791 + }, + "roundtrip": { + "p50": 195.13599574565887, + "p90": 266.4960026741028, + "p95": 269.98400688171387, + "p99": 276.63999795913696 + }, + "isolatedSum": { + "p50": 207.7760025858879, + "p90": 282.6560027897358, + "p95": 285.5999954044819, + "p99": 297.66400158405304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 166.72000288963318, + "p90": 172.60800302028656, + "p95": 174.72000420093536, + "p99": 181.98400735855103 + }, + "combine": { + "p50": 39.264000952243805, + "p90": 41.760001331567764, + "p95": 43.74400153756142, + "p99": 47.29599878191948 + }, + "roundtrip": { + "p50": 192.83199310302734, + "p90": 198.94400238990784, + "p95": 201.664000749588, + "p99": 206.9759964942932 + }, + "isolatedSum": { + "p50": 205.98400384187698, + "p90": 214.36800435185432, + "p95": 218.46400573849678, + "p99": 229.2800061404705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 177.5359958410263, + "p90": 182.43199586868286, + "p95": 185.40799617767334, + "p99": 189.91999328136444 + }, + "combine": { + "p50": 42.17600077390671, + "p90": 44.19200122356415, + "p95": 46.49600014090538, + "p99": 49.56800118088722 + }, + "roundtrip": { + "p50": 208.064004778862, + "p90": 212.70400285720825, + "p95": 216.0319983959198, + "p99": 221.5680032968521 + }, + "isolatedSum": { + "p50": 219.711996614933, + "p90": 226.623997092247, + "p95": 231.90399631857872, + "p99": 239.48799446225166 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 206.40000700950623, + "p90": 210.55999398231506, + "p95": 213.53599429130554, + "p99": 219.04000639915466 + }, + "combine": { + "p50": 45.3759990632534, + "p90": 47.488000243902206, + "p95": 49.12000149488449, + "p99": 53.37600037455559 + }, + "roundtrip": { + "p50": 240.22400379180908, + "p90": 244.4159984588623, + "p95": 247.29600548744202, + "p99": 252.0959973335266 + }, + "isolatedSum": { + "p50": 251.77600607275963, + "p90": 258.04799422621727, + "p95": 262.65599578619003, + "p99": 272.41600677371025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 260.3839933872223, + "p90": 305.2160143852234, + "p95": 308.6720108985901, + "p99": 315.39198756217957 + }, + "combine": { + "p50": 55.26399984955788, + "p90": 66.23999774456024, + "p95": 66.78400188684464, + "p99": 70.14399766921997 + }, + "roundtrip": { + "p50": 304.57600951194763, + "p90": 351.55200958251953, + "p95": 355.23200035095215, + "p99": 363.77599835395813 + }, + "isolatedSum": { + "p50": 315.64799323678017, + "p90": 371.45601212978363, + "p95": 375.4560127854347, + "p99": 385.53598523139954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.4400088787079, + "p90": 265.9519910812378, + "p95": 269.6320116519928, + "p99": 277.44001150131226 + }, + "combine": { + "p50": 68.03199648857117, + "p90": 70.52800059318542, + "p95": 72.09599763154984, + "p99": 75.48800110816956 + }, + "roundtrip": { + "p50": 320.47998905181885, + "p90": 324.70399141311646, + "p95": 326.911985874176, + "p99": 332.12798833847046 + }, + "isolatedSum": { + "p50": 329.47200536727905, + "p90": 336.4799916744232, + "p95": 341.72800928354263, + "p99": 352.9280126094818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4df954d9", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_7ff3464e", + "comparisonKey": "206880ad79c2dbd4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:37.192965+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 158.07999670505524, + "p90": 166.55999422073364, + "p95": 170.46399414539337, + "p99": 184.25600230693817 + }, + "combine": { + "p50": 35.19999980926514, + "p90": 39.29600119590759, + "p95": 41.08799993991852, + "p99": 47.90399968624115 + }, + "roundtrip": { + "p50": 186.27199530601501, + "p90": 193.31200420856476, + "p95": 195.8400011062622, + "p99": 210.59200167655945 + }, + "isolatedSum": { + "p50": 193.27999651432037, + "p90": 205.85599541664124, + "p95": 211.5519940853119, + "p99": 232.16000199317932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 158.27199816703796, + "p90": 166.59200191497803, + "p95": 169.72799599170685, + "p99": 186.78399920463562 + }, + "combine": { + "p50": 35.26400029659271, + "p90": 39.264000952243805, + "p95": 41.34399816393852, + "p99": 46.84799909591675 + }, + "roundtrip": { + "p50": 186.46399676799774, + "p90": 193.50400567054749, + "p95": 195.96800208091736, + "p99": 204.73599433898926 + }, + "isolatedSum": { + "p50": 193.53599846363068, + "p90": 205.85600286722183, + "p95": 211.07199415564537, + "p99": 233.63199830055237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 158.9760035276413, + "p90": 165.95199704170227, + "p95": 169.15200650691986, + "p99": 199.072003364563 + }, + "combine": { + "p50": 37.91999816894531, + "p90": 41.34399816393852, + "p95": 43.71200129389763, + "p99": 51.16799846291542 + }, + "roundtrip": { + "p50": 187.96800076961517, + "p90": 194.84800100326538, + "p95": 199.3280053138733, + "p99": 302.3360073566437 + }, + "isolatedSum": { + "p50": 196.8960016965866, + "p90": 207.2959952056408, + "p95": 212.8640078008175, + "p99": 250.2400018274784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 159.29600596427917, + "p90": 165.6319946050644, + "p95": 167.87199676036835, + "p99": 176.256000995636 + }, + "combine": { + "p50": 38.495998829603195, + "p90": 42.047999799251556, + "p95": 45.024000108242035, + "p99": 50.52800104022026 + }, + "roundtrip": { + "p50": 186.71999871730804, + "p90": 194.36800479888916, + "p95": 197.66399264335632, + "p99": 217.98400580883026 + }, + "isolatedSum": { + "p50": 197.79200479388237, + "p90": 207.67999440431595, + "p95": 212.89599686861038, + "p99": 226.78400203585625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 164.51199352741241, + "p90": 170.9440052509308, + "p95": 173.63199591636658, + "p99": 183.03999304771423 + }, + "combine": { + "p50": 41.82400181889534, + "p90": 44.544000178575516, + "p95": 46.720001846551895, + "p99": 52.928000688552856 + }, + "roundtrip": { + "p50": 194.65599954128265, + "p90": 200.41599869728088, + "p95": 203.13599705696106, + "p99": 213.34399282932281 + }, + "isolatedSum": { + "p50": 206.33599534630775, + "p90": 215.4880054295063, + "p95": 220.35199776291847, + "p99": 235.9679937362671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 182.5920045375824, + "p90": 188.73600661754608, + "p95": 190.8160001039505, + "p99": 200.19200444221497 + }, + "combine": { + "p50": 46.23999819159508, + "p90": 48.8319993019104, + "p95": 50.6879985332489, + "p99": 55.615998804569244 + }, + "roundtrip": { + "p50": 217.21599996089935, + "p90": 223.39199483394623, + "p95": 225.24799406528473, + "p99": 252.70399451255798 + }, + "isolatedSum": { + "p50": 228.83200272917747, + "p90": 237.56800591945648, + "p95": 241.5039986371994, + "p99": 255.8080032467842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 218.55999529361725, + "p90": 256.0639977455139, + "p95": 258.5600018501282, + "p99": 263.96799087524414 + }, + "combine": { + "p50": 54.84800040721893, + "p90": 64.25599753856659, + "p95": 65.11999666690826, + "p99": 69.31199878454208 + }, + "roundtrip": { + "p50": 261.1519992351532, + "p90": 299.8400032520294, + "p95": 301.82400345802307, + "p99": 305.5039942264557 + }, + "isolatedSum": { + "p50": 273.4079957008362, + "p90": 320.3199952840805, + "p95": 323.67999851703644, + "p99": 333.2799896597862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 223.55200350284576, + "p90": 228.5120040178299, + "p95": 231.32799565792084, + "p99": 238.62400650978088 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 75.83999633789062, + "p95": 77.7600035071373, + "p99": 83.80799740552902 + }, + "roundtrip": { + "p50": 290.49599170684814, + "p90": 296.4479923248291, + "p95": 298.5599935054779, + "p99": 304.7040104866028 + }, + "isolatedSum": { + "p50": 296.25600576400757, + "p90": 304.3520003557205, + "p95": 309.08799916505814, + "p99": 322.4320039153099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bd53e847", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_f56f8200", + "comparisonKey": "d0e61a9259aba44c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:04.068577+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 159.5200002193451, + "p90": 165.98400473594666, + "p95": 168.57600212097168, + "p99": 178.71999740600586 + }, + "combine": { + "p50": 34.84800085425377, + "p90": 37.82400116324425, + "p95": 40.73600098490715, + "p99": 44.256001710891724 + }, + "roundtrip": { + "p50": 187.00799345970154, + "p90": 193.08799505233765, + "p95": 195.26399672031403, + "p99": 205.59999346733093 + }, + "isolatedSum": { + "p50": 194.36800107359886, + "p90": 203.8080058991909, + "p95": 209.31200310587883, + "p99": 222.97599911689758 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 160.51200032234192, + "p90": 167.35999286174774, + "p95": 169.3120002746582, + "p99": 175.00799894332886 + }, + "combine": { + "p50": 34.94400158524513, + "p90": 36.959998309612274, + "p95": 39.84000161290169, + "p99": 44.73600164055824 + }, + "roundtrip": { + "p50": 188.28800320625305, + "p90": 194.2719966173172, + "p95": 196.70400023460388, + "p99": 200.54399967193604 + }, + "isolatedSum": { + "p50": 195.45600190758705, + "p90": 204.31999117136002, + "p95": 209.1520018875599, + "p99": 219.7440005838871 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 161.98399662971497, + "p90": 210.24000644683838, + "p95": 213.24799954891205, + "p99": 221.24800086021423 + }, + "combine": { + "p50": 38.495998829603195, + "p90": 50.97600072622299, + "p95": 52.000001072883606, + "p99": 55.67999929189682 + }, + "roundtrip": { + "p50": 188.31999599933624, + "p90": 244.28799748420715, + "p95": 248.86399507522583, + "p99": 253.50400805473328 + }, + "isolatedSum": { + "p50": 200.47999545931816, + "p90": 261.21600717306137, + "p95": 265.24800062179565, + "p99": 276.92800015211105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 162.04799711704254, + "p90": 166.9439971446991, + "p95": 169.72799599170685, + "p99": 178.68800461292267 + }, + "combine": { + "p50": 38.43199834227562, + "p90": 40.70400074124336, + "p95": 43.487999588251114, + "p99": 46.94399982690811 + }, + "roundtrip": { + "p50": 188.35200369358063, + "p90": 194.30400431156158, + "p95": 196.57599925994873, + "p99": 201.1840045452118 + }, + "isolatedSum": { + "p50": 200.47999545931816, + "p90": 207.64799788594246, + "p95": 213.21599557995796, + "p99": 225.63200443983078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 172.63999581336975, + "p90": 177.88800597190857, + "p95": 180.4479956626892, + "p99": 184.28799510002136 + }, + "combine": { + "p50": 42.080000042915344, + "p90": 43.96799951791763, + "p95": 45.85599899291992, + "p99": 49.60000142455101 + }, + "roundtrip": { + "p50": 203.93599569797516, + "p90": 208.44799280166626, + "p95": 211.90400421619415, + "p99": 216.86400473117828 + }, + "isolatedSum": { + "p50": 214.7199958562851, + "p90": 221.8560054898262, + "p95": 226.30399465560913, + "p99": 233.88799652457237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.95999538898468, + "p90": 205.1839977502823, + "p95": 207.90399610996246, + "p99": 213.6320024728775 + }, + "combine": { + "p50": 44.44799944758415, + "p90": 46.911999583244324, + "p95": 48.895999789237976, + "p99": 52.70399898290634 + }, + "roundtrip": { + "p50": 233.63199830055237, + "p90": 236.95999383926392, + "p95": 240.4160052537918, + "p99": 246.14399671554565 + }, + "isolatedSum": { + "p50": 245.40799483656883, + "p90": 252.0959973335266, + "p95": 256.79999589920044, + "p99": 266.33600145578384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 252.99200415611267, + "p90": 291.48799180984497, + "p95": 295.3279912471771, + "p99": 311.48800253868103 + }, + "combine": { + "p50": 54.59199845790863, + "p90": 65.50399959087372, + "p95": 66.6240006685257, + "p99": 71.35999947786331 + }, + "roundtrip": { + "p50": 297.7280020713806, + "p90": 336.544007062912, + "p95": 339.04001116752625, + "p99": 349.40800070762634 + }, + "isolatedSum": { + "p50": 307.5840026140213, + "p90": 356.9919914007187, + "p95": 361.9519919157028, + "p99": 382.84800201654434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.14399123191833, + "p90": 297.1839904785156, + "p95": 300.35200715065, + "p99": 309.59999561309814 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 78.015998005867, + "p95": 83.29600095748901, + "p99": 236.9920015335083 + }, + "roundtrip": { + "p50": 318.7519907951355, + "p90": 322.9439854621887, + "p95": 325.6320059299469, + "p99": 330.6879997253418 + }, + "isolatedSum": { + "p50": 326.78399235010147, + "p90": 375.19998848438263, + "p95": 383.64800810813904, + "p99": 546.5919971466064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a07a9f5", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_db644ee2", + "comparisonKey": "10b9acce54894dce", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:31.164336+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 160.16000509262085, + "p90": 166.84800386428833, + "p95": 170.9440052509308, + "p99": 185.248002409935 + }, + "combine": { + "p50": 35.26400029659271, + "p90": 38.59199956059456, + "p95": 41.08799993991852, + "p99": 47.231998294591904 + }, + "roundtrip": { + "p50": 188.80000710487366, + "p90": 194.0159946680069, + "p95": 197.08800315856934, + "p99": 204.6400010585785 + }, + "isolatedSum": { + "p50": 195.42400538921356, + "p90": 205.4400034248829, + "p95": 212.0320051908493, + "p99": 232.4800007045269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 160.0639969110489, + "p90": 168.32000017166138, + "p95": 172.35200107097626, + "p99": 178.49600315093994 + }, + "combine": { + "p50": 35.64799949526787, + "p90": 46.879999339580536, + "p95": 54.62399870157242, + "p99": 56.063998490571976 + }, + "roundtrip": { + "p50": 188.9919936656952, + "p90": 194.97600197792053, + "p95": 197.08800315856934, + "p99": 204.54399287700653 + }, + "isolatedSum": { + "p50": 195.71199640631676, + "p90": 215.1999995112419, + "p95": 226.97599977254868, + "p99": 234.56000164151192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 163.96799683570862, + "p90": 224.03199970722198, + "p95": 227.90400683879852, + "p99": 232.35200345516205 + }, + "combine": { + "p50": 39.16800022125244, + "p90": 52.51200124621391, + "p95": 53.408000618219376, + "p99": 58.52799862623215 + }, + "roundtrip": { + "p50": 192.00000166893005, + "p90": 260.19200682640076, + "p95": 262.65600323677063, + "p99": 269.3440020084381 + }, + "isolatedSum": { + "p50": 203.13599705696106, + "p90": 276.5440009534359, + "p95": 281.3120074570179, + "p99": 290.8800020813942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 162.1440052986145, + "p90": 166.9120043516159, + "p95": 169.24799978733063, + "p99": 173.72800409793854 + }, + "combine": { + "p50": 38.975998759269714, + "p90": 41.50399938225746, + "p95": 43.71200129389763, + "p99": 46.751998364925385 + }, + "roundtrip": { + "p50": 189.98399376869202, + "p90": 195.48800587654114, + "p95": 197.76000082492828, + "p99": 203.77600193023682 + }, + "isolatedSum": { + "p50": 201.12000405788422, + "p90": 208.41600373387337, + "p95": 212.96000108122826, + "p99": 220.48000246286392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 172.2559928894043, + "p90": 176.83200538158417, + "p95": 179.1359931230545, + "p99": 186.62400543689728 + }, + "combine": { + "p50": 42.399998754262924, + "p90": 44.47999969124794, + "p95": 46.431999653577805, + "p99": 52.352000027894974 + }, + "roundtrip": { + "p50": 205.53599298000336, + "p90": 209.21599864959717, + "p95": 211.71200275421143, + "p99": 216.92800521850586 + }, + "isolatedSum": { + "p50": 214.65599164366722, + "p90": 221.3120050728321, + "p95": 225.5679927766323, + "p99": 238.97600546479225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.6080001592636, + "p90": 205.28000593185425, + "p95": 208.38400721549988, + "p99": 214.04799818992615 + }, + "combine": { + "p50": 45.27999833226204, + "p90": 47.07200080156326, + "p95": 49.47200044989586, + "p99": 58.400001376867294 + }, + "roundtrip": { + "p50": 235.167995095253, + "p90": 239.00799453258514, + "p95": 242.8479939699173, + "p99": 255.45600056648254 + }, + "isolatedSum": { + "p50": 245.88799849152565, + "p90": 252.3520067334175, + "p95": 257.85600766539574, + "p99": 272.44799956679344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 257.34400749206543, + "p90": 300.25601387023926, + "p95": 302.94400453567505, + "p99": 309.63200330734253 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 66.3679987192154, + "p95": 66.91200286149979, + "p99": 69.40799951553345 + }, + "roundtrip": { + "p50": 300.5119860172272, + "p90": 345.7919955253601, + "p95": 347.7120101451874, + "p99": 352.9599905014038 + }, + "isolatedSum": { + "p50": 312.99200654029846, + "p90": 366.62401258945465, + "p95": 369.85600739717484, + "p99": 379.040002822876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 262.7519965171814, + "p90": 306.17600679397583, + "p95": 308.3840012550354, + "p99": 313.4720027446747 + }, + "combine": { + "p50": 69.66400146484375, + "p90": 80.6720033288002, + "p95": 81.34400099515915, + "p99": 84.3840017914772 + }, + "roundtrip": { + "p50": 322.07998633384705, + "p90": 334.46401357650757, + "p95": 368.47999691963196, + "p99": 372.6080060005188 + }, + "isolatedSum": { + "p50": 332.41599798202515, + "p90": 386.84801012277603, + "p95": 389.72800225019455, + "p99": 397.8560045361519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-58708cc8", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_930e262d", + "comparisonKey": "2d8d821b3680de8a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:06.297597+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 256.415992975235, + "p90": 261.53600215911865, + "p95": 264.0640139579773, + "p99": 274.27199482917786 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 70.72000205516815, + "p95": 72.80000299215317, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 320.607990026474, + "p90": 324.9279856681824, + "p95": 327.10400223731995, + "p99": 331.29599690437317 + }, + "isolatedSum": { + "p50": 325.21599531173706, + "p90": 332.2560042142868, + "p95": 336.86401695013046, + "p99": 351.4239937067032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 268.8960134983063, + "p90": 274.27199482917786, + "p95": 276.2559950351715, + "p99": 280.0000011920929 + }, + "combine": { + "p50": 105.18400371074677, + "p90": 107.42399841547012, + "p95": 108.47999900579453, + "p99": 111.1999973654747 + }, + "roundtrip": { + "p50": 368.5759902000427, + "p90": 372.8320002555847, + "p95": 375.0720024108887, + "p99": 379.5199990272522 + }, + "isolatedSum": { + "p50": 374.08001720905304, + "p90": 381.695993244648, + "p95": 384.73599404096603, + "p99": 391.1999985575676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 294.71999406814575, + "p90": 300.35200715065, + "p95": 303.0720055103302, + "p99": 309.79201197624207 + }, + "combine": { + "p50": 172.35200107097626, + "p90": 175.1679927110672, + "p95": 176.41599476337433, + "p99": 181.37599527835846 + }, + "roundtrip": { + "p50": 465.11998772621155, + "p90": 469.760000705719, + "p95": 472.351998090744, + "p99": 477.183997631073 + }, + "isolatedSum": { + "p50": 467.071995139122, + "p90": 475.5199998617172, + "p95": 479.48800027370453, + "p99": 491.1680072546005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 441.0879909992218, + "p90": 446.399986743927, + "p95": 448.2879936695099, + "p99": 452.2559940814972 + }, + "combine": { + "p50": 299.1679906845093, + "p90": 301.7919957637787, + "p95": 303.3599853515625, + "p99": 306.08001351356506 + }, + "roundtrip": { + "p50": 743.9680099487305, + "p90": 749.2160201072693, + "p95": 751.1039972305298, + "p99": 755.4559707641602 + }, + "isolatedSum": { + "p50": 740.2559816837311, + "p90": 748.1919825077057, + "p95": 751.6479790210724, + "p99": 758.3360075950623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 727.9040217399597, + "p90": 732.6080203056335, + "p95": 735.0080013275146, + "p99": 804.2240142822266 + }, + "combine": { + "p50": 556.1599731445312, + "p90": 560.8320236206055, + "p95": 565.4720067977905, + "p99": 631.3920021057129 + }, + "roundtrip": { + "p50": 1284.7360372543335, + "p90": 1289.504051208496, + "p95": 1291.200041770935, + "p99": 1296.7679500579834 + }, + "isolatedSum": { + "p50": 1284.063994884491, + "p90": 1293.440043926239, + "p95": 1300.4800081253052, + "p99": 1435.6160163879395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1301.759958267212, + "p90": 1306.9440126419067, + "p95": 1309.4079494476318, + "p99": 1313.4080171585083 + }, + "combine": { + "p50": 1069.1839456558228, + "p90": 1074.0480422973633, + "p95": 1075.9040117263794, + "p99": 1177.6319742202759 + }, + "roundtrip": { + "p50": 2370.9120750427246, + "p90": 2376.447916030884, + "p95": 2381.0880184173584, + "p99": 2471.9998836517334 + }, + "isolatedSum": { + "p50": 2370.9439039230347, + "p90": 2380.99205493927, + "p95": 2385.3119611740112, + "p99": 2491.039991378784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2aa0e84b", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_6a473c2f", + "comparisonKey": "f1a143c0c771c905", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:09.506397+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 310.016006231308, + "p90": 314.9760067462921, + "p95": 317.21600890159607, + "p99": 324.19198751449585 + }, + "combine": { + "p50": 88.41600269079208, + "p90": 91.45600348711014, + "p95": 93.02400052547455, + "p99": 96.12800180912018 + }, + "roundtrip": { + "p50": 390.6880021095276, + "p90": 395.9360122680664, + "p95": 398.75200390815735, + "p99": 405.40799498558044 + }, + "isolatedSum": { + "p50": 398.43200892210007, + "p90": 406.43201023340225, + "p95": 410.2400094270706, + "p99": 420.319989323616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 322.33598828315735, + "p90": 326.7199993133545, + "p95": 329.02398705482483, + "p99": 334.23998951911926 + }, + "combine": { + "p50": 141.40799641609192, + "p90": 145.31199634075165, + "p95": 146.62399888038635, + "p99": 149.85600113868713 + }, + "roundtrip": { + "p50": 455.9679925441742, + "p90": 461.216002702713, + "p95": 464.57600593566895, + "p99": 470.8159863948822 + }, + "isolatedSum": { + "p50": 463.74398469924927, + "p90": 472.03199565410614, + "p95": 475.6479859352112, + "p99": 484.0959906578064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 383.67998600006104, + "p90": 389.18399810791016, + "p95": 391.29599928855896, + "p99": 398.2720077037811 + }, + "combine": { + "p50": 239.32799696922302, + "p90": 244.35199797153473, + "p95": 246.07999622821808, + "p99": 250.97599625587463 + }, + "roundtrip": { + "p50": 619.0080046653748, + "p90": 626.1759996414185, + "p95": 629.7600269317627, + "p99": 637.9520297050476 + }, + "isolatedSum": { + "p50": 623.0079829692841, + "p90": 633.5359960794449, + "p95": 637.375995516777, + "p99": 649.2480039596558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 602.7200222015381, + "p90": 611.1040115356445, + "p95": 615.0400042533875, + "p99": 911.84002161026 + }, + "combine": { + "p50": 432.73600935935974, + "p90": 443.3920085430145, + "p95": 452.32000946998596, + "p99": 472.1919894218445 + }, + "roundtrip": { + "p50": 1031.9039821624756, + "p90": 1043.455958366394, + "p95": 1049.407958984375, + "p99": 1059.9360466003418 + }, + "isolatedSum": { + "p50": 1035.4560315608978, + "p90": 1054.496020078659, + "p95": 1067.3600137233734, + "p99": 1384.0320110321045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1053.8560152053833, + "p90": 1060.256004333496, + "p95": 1062.6239776611328, + "p99": 1075.1999616622925 + }, + "combine": { + "p50": 821.727991104126, + "p90": 831.712007522583, + "p95": 840.5119776725769, + "p99": 868.7040209770203 + }, + "roundtrip": { + "p50": 1872.864007949829, + "p90": 1885.2800130844116, + "p95": 1893.9839601516724, + "p99": 1920.5440282821655 + }, + "isolatedSum": { + "p50": 1875.5840063095093, + "p90": 1891.968011856079, + "p95": 1903.1359553337097, + "p99": 1943.9039826393127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1930.8480024337769, + "p90": 1943.9680576324463, + "p95": 1952.7679681777954, + "p99": 2159.424066543579 + }, + "combine": { + "p50": 1597.856044769287, + "p90": 1607.2640419006348, + "p95": 1612.9599809646606, + "p99": 1655.4559469223022 + }, + "roundtrip": { + "p50": 3525.023937225342, + "p90": 3538.8479232788086, + "p95": 3546.0801124572754, + "p99": 3568.063974380493 + }, + "isolatedSum": { + "p50": 3528.704047203064, + "p90": 3551.232099533081, + "p95": 3565.727949142456, + "p99": 3814.8800134658813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d348f1cf", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_2de185b6", + "comparisonKey": "1a7da5f52ca40530", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:00.499201+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 170.9119975566864, + "p90": 210.4959934949875, + "p95": 213.56800198554993, + "p99": 217.24799275398254 + }, + "combine": { + "p50": 43.07200014591217, + "p90": 51.29599943757057, + "p95": 52.319999784231186, + "p99": 58.687999844551086 + }, + "roundtrip": { + "p50": 200.19200444221497, + "p90": 214.9759978055954, + "p95": 243.3599978685379, + "p99": 252.3840069770813 + }, + "isolatedSum": { + "p50": 213.98399770259857, + "p90": 261.79199293255806, + "p95": 265.8880017697811, + "p99": 275.93599259853363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.79200041294098, + "p90": 223.77599775791168, + "p95": 226.59200429916382, + "p99": 230.52799701690674 + }, + "combine": { + "p50": 59.42400172352791, + "p90": 67.391999065876, + "p95": 68.1919977068901, + "p99": 70.88000327348709 + }, + "roundtrip": { + "p50": 242.33600497245789, + "p90": 275.64799785614014, + "p95": 277.98399329185486, + "p99": 282.943993806839 + }, + "isolatedSum": { + "p50": 253.2160021364689, + "p90": 291.1679968237877, + "p95": 294.7840020060539, + "p99": 301.40800029039383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 329.3760120868683, + "p90": 349.88799691200256, + "p95": 352.6400029659271, + "p99": 356.79998993873596 + }, + "combine": { + "p50": 136.4160031080246, + "p90": 142.94399321079254, + "p95": 143.93599331378937, + "p99": 149.31200444698334 + }, + "roundtrip": { + "p50": 452.2880017757416, + "p90": 480.0319969654083, + "p95": 485.152006149292, + "p99": 546.6240048408508 + }, + "isolatedSum": { + "p50": 465.7920151948929, + "p90": 492.8319901227951, + "p95": 496.5759962797165, + "p99": 506.1119943857193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17ca25ca", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_29d2ddcf", + "comparisonKey": "b903a6459c7e9ad7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:03.982430+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 193.1840032339096, + "p90": 197.79199361801147, + "p95": 200.25600492954254, + "p99": 207.519993185997 + }, + "combine": { + "p50": 51.35999992489815, + "p90": 53.599998354911804, + "p95": 55.93600124120712, + "p99": 58.97599831223488 + }, + "roundtrip": { + "p50": 233.91999304294586, + "p90": 238.17600309848785, + "p95": 240.76800048351288, + "p99": 245.69599330425262 + }, + "isolatedSum": { + "p50": 244.54400315880775, + "p90": 251.39199197292328, + "p95": 256.19200617074966, + "p99": 266.4959914982319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 197.2160041332245, + "p90": 201.664000749588, + "p95": 204.3199986219406, + "p99": 220.09600698947906 + }, + "combine": { + "p50": 65.37599861621857, + "p90": 67.84000247716904, + "p95": 70.20799815654755, + "p99": 75.3600001335144 + }, + "roundtrip": { + "p50": 255.19999861717224, + "p90": 259.64799523353577, + "p95": 261.75999641418457, + "p99": 265.3439939022064 + }, + "isolatedSum": { + "p50": 262.59200274944305, + "p90": 269.50400322675705, + "p95": 274.52799677848816, + "p99": 295.45600712299347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 227.48799622058868, + "p90": 231.74400627613068, + "p95": 234.46400463581085, + "p99": 238.36800456047058 + }, + "combine": { + "p50": 102.78400033712387, + "p90": 104.86400127410889, + "p95": 106.59199953079224, + "p99": 110.3999987244606 + }, + "roundtrip": { + "p50": 322.11199402809143, + "p90": 326.7199993133545, + "p95": 328.5120129585266, + "p99": 334.23998951911926 + }, + "isolatedSum": { + "p50": 330.27199655771255, + "p90": 336.60800755023956, + "p95": 341.0560041666031, + "p99": 348.7680032849312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 309.02400612831116, + "p90": 313.24800848960876, + "p95": 315.8720135688782, + "p99": 319.8719918727875 + }, + "combine": { + "p50": 173.88799786567688, + "p90": 175.99999904632568, + "p95": 176.7680048942566, + "p99": 180.7360053062439 + }, + "roundtrip": { + "p50": 476.28799080848694, + "p90": 480.320006608963, + "p95": 481.4079999923706, + "p99": 485.53600907325745 + }, + "isolatedSum": { + "p50": 482.91200399398804, + "p90": 489.24800753593445, + "p95": 492.64001846313477, + "p99": 500.6079971790314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 473.91998767852783, + "p90": 478.62398624420166, + "p95": 480.44800758361816, + "p99": 483.8719964027405 + }, + "combine": { + "p50": 310.68798899650574, + "p90": 312.6719892024994, + "p95": 314.1759932041168, + "p99": 317.3440098762512 + }, + "roundtrip": { + "p50": 782.5599908828735, + "p90": 787.4879837036133, + "p95": 789.247989654541, + "p99": 796.8000173568726 + }, + "isolatedSum": { + "p50": 784.6079766750336, + "p90": 791.295975446701, + "p95": 794.624000787735, + "p99": 801.2160062789917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 781.5999984741211, + "p90": 787.0720028877258, + "p95": 788.9599800109863, + "p99": 794.8160171508789 + }, + "combine": { + "p50": 590.0800228118896, + "p90": 592.3200249671936, + "p95": 593.2480096817017, + "p99": 596.3199734687805 + }, + "roundtrip": { + "p50": 1372.480034828186, + "p90": 1378.335952758789, + "p95": 1380.2239894866943, + "p99": 1387.231945991516 + }, + "isolatedSum": { + "p50": 1371.6800212860107, + "p90": 1379.3920278549194, + "p95": 1382.207989692688, + "p99": 1391.1359906196594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e399b963", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_1e9bbeb9", + "comparisonKey": "123f354e99036bdd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:06.470022+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.6880028247833, + "p90": 263.5839879512787, + "p95": 265.5999958515167, + "p99": 275.39199590682983 + }, + "combine": { + "p50": 74.11199808120728, + "p90": 76.64000242948532, + "p95": 78.59200239181519, + "p99": 81.11999928951263 + }, + "roundtrip": { + "p50": 326.4639973640442, + "p90": 331.0079872608185, + "p95": 332.92800188064575, + "p99": 336.8319869041443 + }, + "isolatedSum": { + "p50": 332.8000009059906, + "p90": 340.223990380764, + "p95": 344.1919982433319, + "p99": 356.51199519634247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 270.3680098056793, + "p90": 274.6559977531433, + "p95": 278.1440019607544, + "p99": 296.31999135017395 + }, + "combine": { + "p50": 123.64800274372101, + "p90": 125.85599720478058, + "p95": 126.62400305271149, + "p99": 130.40000200271606 + }, + "roundtrip": { + "p50": 389.9199962615967, + "p90": 395.1680064201355, + "p95": 397.43998646736145, + "p99": 422.0159947872162 + }, + "isolatedSum": { + "p50": 394.01601254940033, + "p90": 400.5119949579239, + "p95": 404.7680050134659, + "p99": 426.71999335289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 347.29599952697754, + "p90": 351.52000188827515, + "p95": 353.92001271247864, + "p99": 359.935998916626 + }, + "combine": { + "p50": 214.1440063714981, + "p90": 216.5759950876236, + "p95": 217.31199324131012, + "p99": 220.5439954996109 + }, + "roundtrip": { + "p50": 561.6000294685364, + "p90": 567.0400261878967, + "p95": 569.6319937705994, + "p99": 575.5519866943359 + }, + "isolatedSum": { + "p50": 561.4400058984756, + "p90": 568.0959969758987, + "p95": 571.2320059537888, + "p99": 580.4799944162369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 544.5119738578796, + "p90": 548.8960146903992, + "p95": 550.2399802207947, + "p99": 554.751992225647 + }, + "combine": { + "p50": 401.15201473236084, + "p90": 404.2240083217621, + "p95": 405.34400939941406, + "p99": 439.4240081310272 + }, + "roundtrip": { + "p50": 946.2720155715942, + "p90": 952.6399970054626, + "p95": 956.4160108566284, + "p99": 990.0799989700317 + }, + "isolatedSum": { + "p50": 945.6639885902405, + "p90": 953.1200230121613, + "p95": 955.5839896202087, + "p99": 994.1760003566742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 942.7199959754944, + "p90": 948.9279985427856, + "p95": 950.6880044937134, + "p99": 957.3119878768921 + }, + "combine": { + "p50": 769.3759799003601, + "p90": 773.0879783630371, + "p95": 774.1760015487671, + "p99": 775.7440209388733 + }, + "roundtrip": { + "p50": 1714.9120569229126, + "p90": 1721.343994140625, + "p95": 1723.296046257019, + "p99": 1727.295994758606 + }, + "isolatedSum": { + "p50": 1712.0959758758545, + "p90": 1722.0159769058228, + "p95": 1724.8640060424805, + "p99": 1733.0560088157654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1728.543996810913, + "p90": 1736.2879514694214, + "p95": 1738.9440536499023, + "p99": 1745.2479600906372 + }, + "combine": { + "p50": 1515.3599977493286, + "p90": 1521.0239887237549, + "p95": 1523.136019706726, + "p99": 1527.135968208313 + }, + "roundtrip": { + "p50": 3244.191884994507, + "p90": 3253.920078277588, + "p95": 3257.8558921813965, + "p99": 3268.064022064209 + }, + "isolatedSum": { + "p50": 3243.9039945602417, + "p90": 3257.3119401931763, + "p95": 3262.0800733566284, + "p99": 3272.38392829895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-46e94572", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_a587a2b5", + "comparisonKey": "899ee16a65e9b25c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:15.920338+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.96801257133484, + "p90": 265.24800062179565, + "p95": 266.36800169944763, + "p99": 270.9119915962219 + }, + "combine": { + "p50": 69.82400268316269, + "p90": 72.22399860620499, + "p95": 73.63200187683105, + "p99": 77.66400277614594 + }, + "roundtrip": { + "p50": 321.4400112628937, + "p90": 326.6240060329437, + "p95": 328.99200916290283, + "p99": 332.35201239585876 + }, + "isolatedSum": { + "p50": 329.7920152544975, + "p90": 337.47199922800064, + "p95": 340.0000035762787, + "p99": 348.57599437236786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 269.0880000591278, + "p90": 275.0079929828644, + "p95": 277.1199941635132, + "p99": 285.47200560569763 + }, + "combine": { + "p50": 103.4879982471466, + "p90": 106.175996363163, + "p95": 107.93600231409073, + "p99": 111.64800077676773 + }, + "roundtrip": { + "p50": 367.19998717308044, + "p90": 372.8640079498291, + "p95": 375.5199909210205, + "p99": 382.52800703048706 + }, + "isolatedSum": { + "p50": 372.5759983062744, + "p90": 381.1839893460274, + "p95": 385.0559964776039, + "p99": 397.12000638246536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 298.8159954547882, + "p90": 304.4160008430481, + "p95": 306.2720000743866, + "p99": 312.0959997177124 + }, + "combine": { + "p50": 172.83199727535248, + "p90": 175.1679927110672, + "p95": 176.32000148296356, + "p99": 181.08800053596497 + }, + "roundtrip": { + "p50": 467.45601296424866, + "p90": 472.7360010147095, + "p95": 475.0399887561798, + "p99": 480.1599979400635 + }, + "isolatedSum": { + "p50": 471.6479927301407, + "p90": 479.5839935541153, + "p95": 482.59200155735016, + "p99": 493.18400025367737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 443.58399510383606, + "p90": 448.60801100730896, + "p95": 451.1359930038452, + "p99": 460.00000834465027 + }, + "combine": { + "p50": 299.80799555778503, + "p90": 302.623987197876, + "p95": 304.03199791908264, + "p99": 307.23199248313904 + }, + "roundtrip": { + "p50": 742.7520155906677, + "p90": 748.7679719924927, + "p95": 750.9440183639526, + "p99": 770.1119780540466 + }, + "isolatedSum": { + "p50": 743.3919906616211, + "p90": 751.2319982051849, + "p95": 755.1679909229279, + "p99": 767.2320008277893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 736.5120053291321, + "p90": 742.8799867630005, + "p95": 793.4079766273499, + "p99": 956.991970539093 + }, + "combine": { + "p50": 555.0079941749573, + "p90": 558.3680272102356, + "p95": 559.391975402832, + "p99": 564.0000104904175 + }, + "roundtrip": { + "p50": 1289.1199588775635, + "p90": 1294.9440479278564, + "p95": 1297.376036643982, + "p99": 1308.351993560791 + }, + "isolatedSum": { + "p50": 1291.5199995040894, + "p90": 1301.248013973236, + "p95": 1352.7999520301819, + "p99": 1520.9919810295105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1309.6959590911865, + "p90": 1315.9359693527222, + "p95": 1318.079948425293, + "p99": 1324.1920471191406 + }, + "combine": { + "p50": 1065.4720067977905, + "p90": 1069.3119764328003, + "p95": 1070.6559419631958, + "p99": 1135.3600025177002 + }, + "roundtrip": { + "p50": 2372.096061706543, + "p90": 2379.5840740203857, + "p95": 2383.19993019104, + "p99": 2414.5920276641846 + }, + "isolatedSum": { + "p50": 2375.167965888977, + "p90": 2385.2479457855225, + "p95": 2388.7358903884888, + "p99": 2459.552049636841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-740ef803", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_abbe2ef1", + "comparisonKey": "45b459b321a3aaad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:07.241814+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 259.0720057487488, + "p90": 262.84798979759216, + "p95": 265.1839852333069, + "p99": 269.47200298309326 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 72.80000299215317, + "p95": 74.75200295448303, + "p99": 78.52800190448761 + }, + "roundtrip": { + "p50": 320.0640082359314, + "p90": 328.5120129585266, + "p95": 330.9119939804077, + "p99": 627.0080208778381 + }, + "isolatedSum": { + "p50": 329.50400561094284, + "p90": 335.64799278974533, + "p95": 339.9359881877899, + "p99": 348.0000048875809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 267.64801144599915, + "p90": 272.70400524139404, + "p95": 275.04000067710876, + "p99": 279.231995344162 + }, + "combine": { + "p50": 103.64799946546555, + "p90": 105.6319996714592, + "p95": 107.19999670982361, + "p99": 110.49599945545197 + }, + "roundtrip": { + "p50": 364.8959994316101, + "p90": 369.6640133857727, + "p95": 372.54399061203003, + "p99": 377.75999307632446 + }, + "isolatedSum": { + "p50": 371.2960109114647, + "p90": 378.33600491285324, + "p95": 382.2399973869324, + "p99": 389.72799479961395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 298.0479896068573, + "p90": 305.6960105895996, + "p95": 307.45598673820496, + "p99": 338.591992855072 + }, + "combine": { + "p50": 172.12800681591034, + "p90": 174.43199455738068, + "p95": 175.6799966096878, + "p99": 177.824005484581 + }, + "roundtrip": { + "p50": 464.92800116539, + "p90": 470.5919921398163, + "p95": 474.4639992713928, + "p99": 524.0319967269897 + }, + "isolatedSum": { + "p50": 470.17599642276764, + "p90": 480.1280051469803, + "p95": 483.13598334789276, + "p99": 516.415998339653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 446.6879963874817, + "p90": 450.97601413726807, + "p95": 452.7359902858734, + "p99": 456.28800988197327 + }, + "combine": { + "p50": 297.791987657547, + "p90": 300.57600140571594, + "p95": 301.7919957637787, + "p99": 306.7519962787628 + }, + "roundtrip": { + "p50": 742.6239848136902, + "p90": 748.4480142593384, + "p95": 751.2639760971069, + "p99": 806.7200183868408 + }, + "isolatedSum": { + "p50": 744.4799840450287, + "p90": 751.552015542984, + "p95": 754.5279860496521, + "p99": 763.0400061607361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 737.5040054321289, + "p90": 741.1199808120728, + "p95": 742.5600290298462, + "p99": 747.6159930229187 + }, + "combine": { + "p50": 554.2719960212708, + "p90": 557.5039982795715, + "p95": 559.1359734535217, + "p99": 561.1839890480042 + }, + "roundtrip": { + "p50": 1289.6959781646729, + "p90": 1295.1680421829224, + "p95": 1297.6640462875366, + "p99": 1309.0879917144775 + }, + "isolatedSum": { + "p50": 1291.7760014533997, + "p90": 1298.6239790916443, + "p95": 1301.696002483368, + "p99": 1308.7999820709229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1311.4240169525146, + "p90": 1319.1039562225342, + "p95": 1321.8560218811035, + "p99": 1325.4719972610474 + }, + "combine": { + "p50": 1066.815972328186, + "p90": 1070.6559419631958, + "p95": 1071.5199708938599, + "p99": 1073.8879442214966 + }, + "roundtrip": { + "p50": 2376.192092895508, + "p90": 2386.3039016723633, + "p95": 2390.9759521484375, + "p99": 2503.1681060791016 + }, + "isolatedSum": { + "p50": 2378.2399892807007, + "p90": 2389.75989818573, + "p95": 2393.3759927749634, + "p99": 2399.359941482544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-db08b93c", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_efc996cc", + "comparisonKey": "b8f0232f2b66558a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:02.612499+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 223.68000447750092, + "p90": 229.15199398994446, + "p95": 231.04000091552734, + "p99": 240.7359927892685 + }, + "combine": { + "p50": 72.76800274848938, + "p90": 75.48800110816956, + "p95": 77.504001557827, + "p99": 82.5280025601387 + }, + "roundtrip": { + "p50": 293.92001032829285, + "p90": 297.69599437713623, + "p95": 299.55199360847473, + "p99": 302.5600016117096 + }, + "isolatedSum": { + "p50": 296.4480072259903, + "p90": 304.639995098114, + "p95": 308.54400247335434, + "p99": 323.2639953494072 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 241.88800156116486, + "p90": 246.14399671554565, + "p95": 248.79999458789825, + "p99": 252.16001272201538 + }, + "combine": { + "p50": 121.2799996137619, + "p90": 123.00799787044525, + "p95": 123.77600371837616, + "p99": 127.36000120639801 + }, + "roundtrip": { + "p50": 363.77599835395813, + "p90": 368.0639863014221, + "p95": 370.0160086154938, + "p99": 373.24801087379456 + }, + "isolatedSum": { + "p50": 363.16800117492676, + "p90": 369.1519945859909, + "p95": 372.5759983062744, + "p99": 379.5200139284134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 337.69598603248596, + "p90": 341.8239951133728, + "p95": 343.4560000896454, + "p99": 346.8480110168457 + }, + "combine": { + "p50": 211.93599700927734, + "p90": 214.1440063714981, + "p95": 215.07200598716736, + "p99": 217.6000028848648 + }, + "roundtrip": { + "p50": 553.5039901733398, + "p90": 557.4079751968384, + "p95": 559.1040253639221, + "p99": 563.7440085411072 + }, + "isolatedSum": { + "p50": 549.6319830417633, + "p90": 555.9680014848709, + "p95": 558.5280060768127, + "p99": 564.4480139017105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 534.5600247383118, + "p90": 538.8799905776978, + "p95": 542.0160293579102, + "p99": 546.5279817581177 + }, + "combine": { + "p50": 394.463986158371, + "p90": 396.64000272750854, + "p95": 397.43998646736145, + "p99": 399.07199144363403 + }, + "roundtrip": { + "p50": 934.0479969978333, + "p90": 938.4639859199524, + "p95": 940.4799938201904, + "p99": 945.792019367218 + }, + "isolatedSum": { + "p50": 929.0240108966827, + "p90": 935.5199933052063, + "p95": 939.4560158252716, + "p99": 945.5999732017517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 915.1359796524048, + "p90": 919.8399782180786, + "p95": 921.5999841690063, + "p99": 927.5519847869873 + }, + "combine": { + "p50": 754.2080283164978, + "p90": 756.991982460022, + "p95": 757.856011390686, + "p99": 760.3520154953003 + }, + "roundtrip": { + "p50": 1675.6479740142822, + "p90": 1680.575966835022, + "p95": 1682.6239824295044, + "p99": 1689.95201587677 + }, + "isolatedSum": { + "p50": 1669.3440079689026, + "p90": 1676.8319606781006, + "p95": 1679.4559955596924, + "p99": 1687.9040002822876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1688.9280080795288, + "p90": 1694.9759721755981, + "p95": 1697.119951248169, + "p99": 1700.9919881820679 + }, + "combine": { + "p50": 1480.2240133285522, + "p90": 1483.0080270767212, + "p95": 1484.063982963562, + "p99": 1486.7199659347534 + }, + "roundtrip": { + "p50": 3174.9439239501953, + "p90": 3182.624101638794, + "p95": 3184.448003768921, + "p99": 3190.8481121063232 + }, + "isolatedSum": { + "p50": 3169.152021408081, + "p90": 3177.9839992523193, + "p95": 3181.183934211731, + "p99": 3187.7119541168213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2328bdfe", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_b4518d9e", + "comparisonKey": "7aa0d37d58d83d38", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:04.496144+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 189.28000330924988, + "p90": 194.7840005159378, + "p95": 196.57599925994873, + "p99": 205.28000593185425 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 74.68800246715546, + "p95": 77.60000228881836, + "p99": 113.66400122642517 + }, + "roundtrip": { + "p50": 259.5199942588806, + "p90": 264.0959918498993, + "p95": 266.975998878479, + "p99": 405.85601329803467 + }, + "isolatedSum": { + "p50": 260.0320056080818, + "p90": 269.47200298309326, + "p95": 274.1760015487671, + "p99": 318.9440071582794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 235.07200181484222, + "p90": 239.1359955072403, + "p95": 241.60000681877136, + "p99": 245.85600197315216 + }, + "combine": { + "p50": 118.04799735546112, + "p90": 119.39200013875961, + "p95": 119.90399658679962, + "p99": 121.88799679279327 + }, + "roundtrip": { + "p50": 354.5919954776764, + "p90": 358.43199491500854, + "p95": 361.05599999427795, + "p99": 364.80000615119934 + }, + "isolatedSum": { + "p50": 353.11999917030334, + "p90": 358.5279956459999, + "p95": 361.504003405571, + "p99": 367.74399876594543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 330.81600069999695, + "p90": 334.81600880622864, + "p95": 337.3759984970093, + "p99": 341.43999218940735 + }, + "combine": { + "p50": 204.0960043668747, + "p90": 205.53599298000336, + "p95": 206.04799687862396, + "p99": 208.8959962129593 + }, + "roundtrip": { + "p50": 539.8719906806946, + "p90": 543.5839891433716, + "p95": 546.4320182800293, + "p99": 549.7599840164185 + }, + "isolatedSum": { + "p50": 534.9120050668716, + "p90": 540.352001786232, + "p95": 543.4239953756332, + "p99": 550.3359884023666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 522.9759812355042, + "p90": 526.6559720039368, + "p95": 529.7920107841492, + "p99": 534.1439843177795 + }, + "combine": { + "p50": 380.73599338531494, + "p90": 382.4000060558319, + "p95": 383.1999897956848, + "p99": 384.5439851284027 + }, + "roundtrip": { + "p50": 909.1519713401794, + "p90": 913.4399890899658, + "p95": 915.7119989395142, + "p99": 920.9280014038086 + }, + "isolatedSum": { + "p50": 903.7119746208191, + "p90": 909.0559780597687, + "p95": 912.992000579834, + "p99": 918.6879694461823 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 907.263994216919, + "p90": 911.7760062217712, + "p95": 914.2720103263855, + "p99": 920.3519821166992 + }, + "combine": { + "p50": 733.7920069694519, + "p90": 735.8400225639343, + "p95": 736.8000149726868, + "p99": 739.2640113830566 + }, + "roundtrip": { + "p50": 1644.5120573043823, + "p90": 1648.7040519714355, + "p95": 1650.4000425338745, + "p99": 1658.8159799575806 + }, + "isolatedSum": { + "p50": 1641.0560011863708, + "p90": 1647.6160287857056, + "p95": 1651.0720252990723, + "p99": 1659.6159934997559 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1672.8320121765137, + "p90": 1678.015947341919, + "p95": 1680.2239418029785, + "p99": 1686.784029006958 + }, + "combine": { + "p50": 1444.000005722046, + "p90": 1446.1760520935059, + "p95": 1447.0399618148804, + "p99": 1448.3200311660767 + }, + "roundtrip": { + "p50": 3121.824026107788, + "p90": 3128.448009490967, + "p95": 3130.3999423980713, + "p99": 3146.5280055999756 + }, + "isolatedSum": { + "p50": 3116.8320178985596, + "p90": 3124.191999435425, + "p95": 3127.263903617859, + "p99": 3135.1040601730347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9fe3b791", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_ba1c0a90", + "comparisonKey": "8ddc23592e398bfa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:14.872334+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 260.0319981575012, + "p90": 323.68001341819763, + "p95": 346.5920090675354, + "p99": 688.1600022315979 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 75.42400062084198, + "p95": 99.0080013871193, + "p99": 232.70399868488312 + }, + "roundtrip": { + "p50": 321.4400112628937, + "p90": 325.8560001850128, + "p95": 329.0559947490692, + "p99": 334.879994392395 + }, + "isolatedSum": { + "p50": 330.3679972887039, + "p90": 399.1040140390396, + "p95": 445.6000104546547, + "p99": 920.864000916481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 271.0399925708771, + "p90": 276.92800760269165, + "p95": 278.75199913978577, + "p99": 287.9360020160675 + }, + "combine": { + "p50": 106.88000172376633, + "p90": 109.6000000834465, + "p95": 111.23199760913849, + "p99": 114.59200084209442 + }, + "roundtrip": { + "p50": 373.82400035858154, + "p90": 377.9520094394684, + "p95": 381.21598958969116, + "p99": 392.92800426483154 + }, + "isolatedSum": { + "p50": 377.9199942946434, + "p90": 386.52800768613815, + "p95": 389.98399674892426, + "p99": 402.5280028581619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 303.6800026893616, + "p90": 308.4479868412018, + "p95": 310.7199966907501, + "p99": 317.1840012073517 + }, + "combine": { + "p50": 177.824005484581, + "p90": 180.63999712467194, + "p95": 182.68799781799316, + "p99": 187.48800456523895 + }, + "roundtrip": { + "p50": 477.5039851665497, + "p90": 481.6960096359253, + "p95": 484.16000604629517, + "p99": 513.6640071868896 + }, + "isolatedSum": { + "p50": 481.50400817394257, + "p90": 489.0879839658737, + "p95": 493.4079945087433, + "p99": 504.67200577259064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 453.5999894142151, + "p90": 457.8559994697571, + "p95": 459.55199003219604, + "p99": 464.83200788497925 + }, + "combine": { + "p50": 309.88800525665283, + "p90": 312.9599988460541, + "p95": 313.60000371932983, + "p99": 315.744012594223 + }, + "roundtrip": { + "p50": 761.9839906692505, + "p90": 767.2320008277893, + "p95": 769.1839933395386, + "p99": 781.1840176582336 + }, + "isolatedSum": { + "p50": 763.4879946708679, + "p90": 770.8159983158112, + "p95": 773.1519937515259, + "p99": 780.5760204792023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 757.1520209312439, + "p90": 763.3280158042908, + "p95": 771.8719840049744, + "p99": 827.4880051612854 + }, + "combine": { + "p50": 581.3120007514954, + "p90": 585.0560069084167, + "p95": 586.2079858779907, + "p99": 589.631974697113 + }, + "roundtrip": { + "p50": 1337.5040292739868, + "p90": 1344.0320491790771, + "p95": 1349.0240573883057, + "p99": 1497.4080324172974 + }, + "isolatedSum": { + "p50": 1338.4640216827393, + "p90": 1348.3840227127075, + "p95": 1358.079969882965, + "p99": 1417.1199798583984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1354.81595993042, + "p90": 1359.9040508270264, + "p95": 1361.88805103302, + "p99": 1366.7839765548706 + }, + "combine": { + "p50": 1125.5359649658203, + "p90": 1130.3679943084717, + "p95": 1131.9040060043335, + "p99": 1134.81605052948 + }, + "roundtrip": { + "p50": 2482.6879501342773, + "p90": 2491.0080432891846, + "p95": 2494.01593208313, + "p99": 2608.448028564453 + }, + "isolatedSum": { + "p50": 2480.3519248962402, + "p90": 2490.272045135498, + "p95": 2493.7920570373535, + "p99": 2501.6000270843506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62bfb942", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_add1bf61", + "comparisonKey": "d2e536a124963622", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:34.632597+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 248.35200607776642, + "p90": 254.27201390266418, + "p95": 256.9279968738556, + "p99": 624.2560148239136 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 74.8480036854744, + "p95": 77.2479996085167, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 318.87999176979065, + "p90": 323.42401146888733, + "p95": 326.4319896697998, + "p99": 345.8560109138489 + }, + "isolatedSum": { + "p50": 320.8640068769455, + "p90": 329.1200175881386, + "p95": 334.1759964823723, + "p99": 709.1520130634308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 258.7839961051941, + "p90": 263.16800713539124, + "p95": 265.53601026535034, + "p99": 270.6559896469116 + }, + "combine": { + "p50": 121.11999839544296, + "p90": 122.94399738311768, + "p95": 123.61600250005722, + "p99": 127.55200266838074 + }, + "roundtrip": { + "p50": 376.1279881000519, + "p90": 380.8639943599701, + "p95": 384.0959966182709, + "p99": 395.1359987258911 + }, + "isolatedSum": { + "p50": 379.90399450063705, + "p90": 386.1120045185089, + "p95": 389.15201276540756, + "p99": 398.20799231529236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 335.83998680114746, + "p90": 339.9679958820343, + "p95": 342.272013425827, + "p99": 346.68800234794617 + }, + "combine": { + "p50": 208.25600624084473, + "p90": 210.9760046005249, + "p95": 211.71200275421143, + "p99": 218.87999773025513 + }, + "roundtrip": { + "p50": 547.4560260772705, + "p90": 553.8560152053833, + "p95": 556.5760135650635, + "p99": 570.8159804344177 + }, + "isolatedSum": { + "p50": 544.0959930419922, + "p90": 550.9440004825592, + "p95": 553.9840161800385, + "p99": 565.5680000782013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 528.5120010375977, + "p90": 533.5680246353149, + "p95": 536.0640287399292, + "p99": 550.8480072021484 + }, + "combine": { + "p50": 391.84001088142395, + "p90": 394.75199580192566, + "p95": 395.77600359916687, + "p99": 398.0799913406372 + }, + "roundtrip": { + "p50": 923.8399863243103, + "p90": 930.4320216178894, + "p95": 933.1520199775696, + "p99": 944.383978843689 + }, + "isolatedSum": { + "p50": 920.3520119190216, + "p90": 928.3200204372406, + "p95": 931.8400323390961, + "p99": 948.9279985427856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 904.7679901123047, + "p90": 911.1999869346619, + "p95": 914.0160083770752, + "p99": 932.2879910469055 + }, + "combine": { + "p50": 740.6399846076965, + "p90": 744.2560195922852, + "p95": 745.5999851226807, + "p99": 748.7360239028931 + }, + "roundtrip": { + "p50": 1651.6799926757812, + "p90": 1657.472014427185, + "p95": 1659.775972366333, + "p99": 1670.9760427474976 + }, + "isolatedSum": { + "p50": 1645.4079747200012, + "p90": 1655.456006526947, + "p95": 1659.6159934997559, + "p99": 1681.0240149497986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1652.9920101165771, + "p90": 1659.999966621399, + "p95": 1661.8880033493042, + "p99": 1667.072057723999 + }, + "combine": { + "p50": 1450.6560564041138, + "p90": 1454.4320106506348, + "p95": 1455.9680223464966, + "p99": 1458.7199687957764 + }, + "roundtrip": { + "p50": 3109.3759536743164, + "p90": 3117.568016052246, + "p95": 3121.407985687256, + "p99": 3163.583993911743 + }, + "isolatedSum": { + "p50": 3103.648066520691, + "p90": 3114.4319772720337, + "p95": 3117.856025695801, + "p99": 3125.7920265197754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f33abfa2", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_051ab86d", + "comparisonKey": "71469717c0f2db62", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:10.513778+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 260.25599241256714, + "p90": 264.67201113700867, + "p95": 267.36000180244446, + "p99": 270.9760069847107 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 72.51200079917908, + "p95": 74.36800003051758, + "p99": 79.80799674987793 + }, + "roundtrip": { + "p50": 321.9519853591919, + "p90": 328.2879889011383, + "p95": 330.30399680137634, + "p99": 342.303991317749 + }, + "isolatedSum": { + "p50": 330.4959908127785, + "p90": 337.18401193618774, + "p95": 341.72800183296204, + "p99": 350.7840037345886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 269.0240144729614, + "p90": 275.4240036010742, + "p95": 277.69601345062256, + "p99": 283.26401114463806 + }, + "combine": { + "p50": 103.96800190210342, + "p90": 106.6880002617836, + "p95": 108.64000022411346, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 366.14400148391724, + "p90": 372.1599876880646, + "p95": 373.6000061035156, + "p99": 379.7760009765625 + }, + "isolatedSum": { + "p50": 372.99201637506485, + "p90": 382.1120038628578, + "p95": 386.336013674736, + "p99": 396.512009203434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 303.42400074005127, + "p90": 308.51200222969055, + "p95": 309.7600042819977, + "p99": 315.0080144405365 + }, + "combine": { + "p50": 171.61600291728973, + "p90": 173.72800409793854, + "p95": 174.75199699401855, + "p99": 179.51999604701996 + }, + "roundtrip": { + "p50": 469.88800168037415, + "p90": 475.2640128135681, + "p95": 477.60000824928284, + "p99": 483.5200011730194 + }, + "isolatedSum": { + "p50": 475.040003657341, + "p90": 482.2400063276291, + "p95": 484.51200127601624, + "p99": 494.52801048755646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 447.64798879623413, + "p90": 452.2559940814972, + "p95": 454.46398854255676, + "p99": 457.5679898262024 + }, + "combine": { + "p50": 297.2480058670044, + "p90": 300.35200715065, + "p95": 301.503986120224, + "p99": 304.9600124359131 + }, + "roundtrip": { + "p50": 743.6479926109314, + "p90": 749.8239874839783, + "p95": 752.5119781494141, + "p99": 759.5199942588806 + }, + "isolatedSum": { + "p50": 744.8959946632385, + "p90": 752.6080012321472, + "p95": 755.9679746627808, + "p99": 762.5280022621155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 737.1839880943298, + "p90": 741.1839962005615, + "p95": 743.5839772224426, + "p99": 747.4880218505859 + }, + "combine": { + "p50": 555.679976940155, + "p90": 558.4959983825684, + "p95": 559.6479773521423, + "p99": 563.4239912033081 + }, + "roundtrip": { + "p50": 1291.808009147644, + "p90": 1296.5760231018066, + "p95": 1298.5919713974, + "p99": 1303.1680583953857 + }, + "isolatedSum": { + "p50": 1292.8639650344849, + "p90": 1299.6799945831299, + "p95": 1303.231954574585, + "p99": 1310.912013053894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1310.2400302886963, + "p90": 1319.9360370635986, + "p95": 1323.4879970550537, + "p99": 1337.3759984970093 + }, + "combine": { + "p50": 1067.903995513916, + "p90": 1071.7120170593262, + "p95": 1073.7279653549194, + "p99": 1090.7520055770874 + }, + "roundtrip": { + "p50": 2378.27205657959, + "p90": 2388.223886489868, + "p95": 2392.1918869018555, + "p99": 2432.86395072937 + }, + "isolatedSum": { + "p50": 2378.1440258026123, + "p90": 2391.648054122925, + "p95": 2397.215962409973, + "p99": 2428.1280040740967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2ebfb5d8", + "identity": "h100|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_7ff3464e", + "comparisonKey": "5e2b61a6e9b1e63e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:39.665896+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 226.59200429916382, + "p90": 231.26399517059326, + "p95": 233.50399732589722, + "p99": 238.11200261116028 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 75.45600086450577, + "p95": 77.27999985218048, + "p99": 83.03999900817871 + }, + "roundtrip": { + "p50": 291.456013917923, + "p90": 295.6799864768982, + "p95": 298.2720136642456, + "p99": 303.23201417922974 + }, + "isolatedSum": { + "p50": 299.8720034956932, + "p90": 306.71999603509903, + "p95": 310.7839971780777, + "p99": 321.152001619339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 245.9840029478073, + "p90": 250.36799907684326, + "p95": 253.63200902938843, + "p99": 259.10401344299316 + }, + "combine": { + "p50": 120.99199742078781, + "p90": 122.49600142240524, + "p95": 123.45600128173828, + "p99": 128.51199507713318 + }, + "roundtrip": { + "p50": 361.2799942493439, + "p90": 365.3759956359863, + "p95": 367.8719997406006, + "p99": 371.39201164245605 + }, + "isolatedSum": { + "p50": 366.9760003685951, + "p90": 372.8640004992485, + "p95": 377.0880103111267, + "p99": 387.61600852012634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 341.3439989089966, + "p90": 345.7599878311157, + "p95": 348.28799962997437, + "p99": 352.9599905014038 + }, + "combine": { + "p50": 211.42399311065674, + "p90": 214.27200734615326, + "p95": 215.58399498462677, + "p99": 266.7520046234131 + }, + "roundtrip": { + "p50": 549.9839782714844, + "p90": 554.6240210533142, + "p95": 556.5760135650635, + "p99": 560.0000023841858 + }, + "isolatedSum": { + "p50": 552.7679920196533, + "p90": 560.031995177269, + "p95": 563.8719946146011, + "p99": 619.7119951248169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 538.8479828834534, + "p90": 543.8719987869263, + "p95": 548.1280088424683, + "p99": 875.4879832267761 + }, + "combine": { + "p50": 393.75999569892883, + "p90": 395.87199687957764, + "p95": 396.5120017528534, + "p99": 398.5599875450134 + }, + "roundtrip": { + "p50": 930.0479888916016, + "p90": 934.7519874572754, + "p95": 936.7679953575134, + "p99": 948.9920139312744 + }, + "isolatedSum": { + "p50": 932.6079785823822, + "p90": 939.7439956665039, + "p95": 944.6400105953217, + "p99": 1274.0479707717896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 919.7440147399902, + "p90": 924.8960018157959, + "p95": 927.295982837677, + "p99": 931.3600063323975 + }, + "combine": { + "p50": 753.216028213501, + "p90": 755.9040188789368, + "p95": 756.5760016441345, + "p99": 758.9120268821716 + }, + "roundtrip": { + "p50": 1670.9120273590088, + "p90": 1676.31995677948, + "p95": 1678.4640550613403, + "p99": 1689.3119812011719 + }, + "isolatedSum": { + "p50": 1672.9600429534912, + "p90": 1680.8000206947327, + "p95": 1683.8719844818115, + "p99": 1690.272033214569 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1692.9600238800049, + "p90": 1699.3600130081177, + "p95": 1701.4080286026, + "p99": 1706.1439752578735 + }, + "combine": { + "p50": 1479.904055595398, + "p90": 1482.751965522766, + "p95": 1483.5200309753418, + "p99": 1486.464023590088 + }, + "roundtrip": { + "p50": 3173.504114151001, + "p90": 3182.3360919952393, + "p95": 3184.959888458252, + "p99": 3195.8398818969727 + }, + "isolatedSum": { + "p50": 3172.864079475403, + "p90": 3182.111978530884, + "p95": 3184.928059577942, + "p99": 3192.6079988479614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f9386632", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_f56f8200", + "comparisonKey": "f4a032e55e709ec2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:13.846875+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 254.33599948883057, + "p90": 260.09601354599, + "p95": 261.9520127773285, + "p99": 266.1759853363037 + }, + "combine": { + "p50": 68.51200014352798, + "p90": 70.97599655389786, + "p95": 73.85600358247757, + "p99": 79.16799932718277 + }, + "roundtrip": { + "p50": 314.14398550987244, + "p90": 413.91998529434204, + "p95": 417.9520010948181, + "p99": 423.2960045337677 + }, + "isolatedSum": { + "p50": 322.84799963235855, + "p90": 331.07201009988785, + "p95": 335.80801635980606, + "p99": 345.3439846634865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 264.6079957485199, + "p90": 269.6639895439148, + "p95": 271.87201380729675, + "p99": 279.4559895992279 + }, + "combine": { + "p50": 102.88000106811523, + "p90": 105.02400249242783, + "p95": 106.88000172376633, + "p99": 109.63200032711029 + }, + "roundtrip": { + "p50": 361.91999912261963, + "p90": 367.5839900970459, + "p95": 370.0160086154938, + "p99": 379.1680037975311 + }, + "isolatedSum": { + "p50": 367.48799681663513, + "p90": 374.6879920363426, + "p95": 378.7520155310631, + "p99": 389.0879899263382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 299.1679906845093, + "p90": 304.25599217414856, + "p95": 307.3279857635498, + "p99": 320.8000063896179 + }, + "combine": { + "p50": 170.9440052509308, + "p90": 173.75999689102173, + "p95": 174.78400468826294, + "p99": 179.9039989709854 + }, + "roundtrip": { + "p50": 466.3040041923523, + "p90": 472.1600115299225, + "p95": 475.3600060939789, + "p99": 482.65600204467773 + }, + "isolatedSum": { + "p50": 470.11199593544006, + "p90": 478.0159890651703, + "p95": 482.11199045181274, + "p99": 500.70400536060333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 440.2559995651245, + "p90": 445.6000030040741, + "p95": 447.80799746513367, + "p99": 451.80800557136536 + }, + "combine": { + "p50": 297.95199632644653, + "p90": 300.31999945640564, + "p95": 302.623987197876, + "p99": 329.3119966983795 + }, + "roundtrip": { + "p50": 738.3040189743042, + "p90": 743.4880137443542, + "p95": 747.2320199012756, + "p99": 753.1200051307678 + }, + "isolatedSum": { + "p50": 738.207995891571, + "p90": 745.9200024604797, + "p95": 750.4319846630096, + "p99": 781.1200022697449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 737.824022769928, + "p90": 822.5600123405457, + "p95": 825.3759741783142, + "p99": 860.4159951210022 + }, + "combine": { + "p50": 560.2239966392517, + "p90": 563.4559988975525, + "p95": 564.8000240325928, + "p99": 608.8640093803406 + }, + "roundtrip": { + "p50": 1297.3120212554932, + "p90": 1304.095983505249, + "p95": 1307.2320222854614, + "p99": 1358.9119911193848 + }, + "isolatedSum": { + "p50": 1298.0480194091797, + "p90": 1386.0160112380981, + "p95": 1390.175998210907, + "p99": 1469.2800045013428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1317.088007926941, + "p90": 1322.4639892578125, + "p95": 1325.7280588150024, + "p99": 1338.912010192871 + }, + "combine": { + "p50": 1079.8399448394775, + "p90": 1084.2880010604858, + "p95": 1086.8159532546997, + "p99": 1133.7920427322388 + }, + "roundtrip": { + "p50": 2397.7279663085938, + "p90": 2405.3759574890137, + "p95": 2410.304069519043, + "p99": 2435.3599548339844 + }, + "isolatedSum": { + "p50": 2396.9279527664185, + "p90": 2406.7519903182983, + "p95": 2412.544012069702, + "p99": 2472.70405292511 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-962c2c74", + "identity": "h100|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_db644ee2", + "comparisonKey": "0adaee9d6bd5b45a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:04.719509+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_09", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 261.7279887199402, + "p90": 266.36800169944763, + "p95": 268.92799139022827, + "p99": 278.01600098609924 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 70.39999961853027, + "p95": 73.40800017118454, + "p99": 79.99999821186066 + }, + "roundtrip": { + "p50": 323.68001341819763, + "p90": 327.0080089569092, + "p95": 328.8640081882477, + "p99": 331.7759931087494 + }, + "isolatedSum": { + "p50": 329.9199864268303, + "p90": 336.7680013179779, + "p95": 342.3359915614128, + "p99": 358.0159991979599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 275.29600262641907, + "p90": 279.90400791168213, + "p95": 282.30398893356323, + "p99": 289.37599062919617 + }, + "combine": { + "p50": 104.44799810647964, + "p90": 106.65600001811981, + "p95": 108.73600095510483, + "p99": 113.02399635314941 + }, + "roundtrip": { + "p50": 371.2640106678009, + "p90": 375.39198994636536, + "p95": 377.344012260437, + "p99": 381.53600692749023 + }, + "isolatedSum": { + "p50": 379.7440007328987, + "p90": 386.56000792980194, + "p95": 391.03998988866806, + "p99": 402.3999869823456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 305.4080009460449, + "p90": 311.13600730895996, + "p95": 313.24800848960876, + "p99": 322.1760094165802 + }, + "combine": { + "p50": 171.36000096797943, + "p90": 173.75999689102173, + "p95": 175.135999917984, + "p99": 178.27199399471283 + }, + "roundtrip": { + "p50": 474.11200404167175, + "p90": 478.33600640296936, + "p95": 480.51199316978455, + "p99": 484.41600799560547 + }, + "isolatedSum": { + "p50": 476.76800191402435, + "p90": 484.8960041999817, + "p95": 488.3840084075928, + "p99": 500.44800341129303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 446.6240108013153, + "p90": 452.41600275039673, + "p95": 454.8799991607666, + "p99": 497.1199929714203 + }, + "combine": { + "p50": 297.91998863220215, + "p90": 300.35200715065, + "p95": 301.31199955940247, + "p99": 304.51199412345886 + }, + "roundtrip": { + "p50": 744.6399927139282, + "p90": 749.4400143623352, + "p95": 751.1360049247742, + "p99": 756.1920285224915 + }, + "isolatedSum": { + "p50": 744.5439994335175, + "p90": 752.7680099010468, + "p95": 756.1919987201691, + "p99": 801.6319870948792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 741.4079904556274, + "p90": 745.7280158996582, + "p95": 747.4880218505859, + "p99": 753.6960244178772 + }, + "combine": { + "p50": 560.4479908943176, + "p90": 563.647985458374, + "p95": 564.2880201339722, + "p99": 568.9600110054016 + }, + "roundtrip": { + "p50": 1301.7280101776123, + "p90": 1306.6879510879517, + "p95": 1309.183955192566, + "p99": 1332.1919441223145 + }, + "isolatedSum": { + "p50": 1301.855981349945, + "p90": 1309.3760013580322, + "p95": 1311.776041984558, + "p99": 1322.6560354232788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1321.2480545043945, + "p90": 1326.848030090332, + "p95": 1329.7920227050781, + "p99": 1364.832043647766 + }, + "combine": { + "p50": 1080.415964126587, + "p90": 1084.8000049591064, + "p95": 1086.2400531768799, + "p99": 1091.968059539795 + }, + "roundtrip": { + "p50": 2404.1919708251953, + "p90": 2411.967992782593, + "p95": 2417.6321029663086, + "p99": 2442.0158863067627 + }, + "isolatedSum": { + "p50": 2401.6640186309814, + "p90": 2411.6480350494385, + "p95": 2416.032075881958, + "p99": 2456.800103187561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4f20f36d", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_4d19e0a7", + "comparisonKey": "5205049e72237a92", + "schemaVersion": 3, + "generatedAt": "2026-07-02T12:42:15.204673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.85600006580353, + "p90": 104.25599664449692, + "p95": 106.84800148010254, + "p99": 118.367999792099 + }, + "combine": { + "p50": 97.85600006580353, + "p90": 104.25599664449692, + "p95": 106.84800148010254, + "p99": 118.367999792099 + }, + "roundtrip": { + "p50": 97.85600006580353, + "p90": 104.25599664449692, + "p95": 106.84800148010254, + "p99": 118.367999792099 + }, + "isolatedSum": { + "p50": 195.71200013160706, + "p90": 208.51199328899384, + "p95": 213.69600296020508, + "p99": 236.735999584198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.96000069379807, + "p90": 104.25599664449692, + "p95": 108.41599851846695, + "p99": 114.04799669981003 + }, + "combine": { + "p50": 96.96000069379807, + "p90": 104.25599664449692, + "p95": 108.41599851846695, + "p99": 114.04799669981003 + }, + "roundtrip": { + "p50": 96.96000069379807, + "p90": 104.25599664449692, + "p95": 108.41599851846695, + "p99": 114.04799669981003 + }, + "isolatedSum": { + "p50": 193.92000138759613, + "p90": 208.51199328899384, + "p95": 216.8319970369339, + "p99": 228.09599339962006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.0640013217926, + "p90": 102.20800340175629, + "p95": 106.20799660682678, + "p99": 113.24799805879593 + }, + "combine": { + "p50": 96.0640013217926, + "p90": 102.20800340175629, + "p95": 106.20799660682678, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 96.0640013217926, + "p90": 102.20800340175629, + "p95": 106.20799660682678, + "p99": 113.24799805879593 + }, + "isolatedSum": { + "p50": 192.1280026435852, + "p90": 204.41600680351257, + "p95": 212.41599321365356, + "p99": 226.49599611759186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 96.44799679517746, + "p90": 102.49599814414978, + "p95": 107.04000294208527, + "p99": 116.2559986114502 + }, + "combine": { + "p50": 96.44799679517746, + "p90": 102.49599814414978, + "p95": 107.04000294208527, + "p99": 116.2559986114502 + }, + "roundtrip": { + "p50": 96.44799679517746, + "p90": 102.49599814414978, + "p95": 107.04000294208527, + "p99": 116.2559986114502 + }, + "isolatedSum": { + "p50": 192.89599359035492, + "p90": 204.99199628829956, + "p95": 214.08000588417053, + "p99": 232.5119972229004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.96000069379807, + "p90": 107.16799646615982, + "p95": 110.68800091743469, + "p99": 116.41599982976913 + }, + "combine": { + "p50": 96.96000069379807, + "p90": 107.16799646615982, + "p95": 110.68800091743469, + "p99": 116.41599982976913 + }, + "roundtrip": { + "p50": 96.96000069379807, + "p90": 107.16799646615982, + "p95": 110.68800091743469, + "p99": 116.41599982976913 + }, + "isolatedSum": { + "p50": 193.92000138759613, + "p90": 214.33599293231964, + "p95": 221.37600183486938, + "p99": 232.83199965953827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.25600278377533, + "p90": 103.00800204277039, + "p95": 107.45599865913391, + "p99": 115.52000045776367 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 103.00800204277039, + "p95": 107.45599865913391, + "p99": 115.52000045776367 + }, + "roundtrip": { + "p50": 96.25600278377533, + "p90": 103.00800204277039, + "p95": 107.45599865913391, + "p99": 115.52000045776367 + }, + "isolatedSum": { + "p50": 192.51200556755066, + "p90": 206.01600408554077, + "p95": 214.91199731826782, + "p99": 231.04000091552734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.88800030946732, + "p90": 105.05600273609161, + "p95": 108.25599730014801, + "p99": 114.20799791812897 + }, + "combine": { + "p50": 97.88800030946732, + "p90": 105.05600273609161, + "p95": 108.25599730014801, + "p99": 114.20799791812897 + }, + "roundtrip": { + "p50": 97.88800030946732, + "p90": 105.05600273609161, + "p95": 108.25599730014801, + "p99": 114.20799791812897 + }, + "isolatedSum": { + "p50": 195.77600061893463, + "p90": 210.11200547218323, + "p95": 216.51199460029602, + "p99": 228.41599583625793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 111.42399907112122, + "p90": 116.41599982976913, + "p95": 119.6800023317337, + "p99": 128.67200374603271 + }, + "combine": { + "p50": 111.42399907112122, + "p90": 116.41599982976913, + "p95": 119.6800023317337, + "p99": 128.67200374603271 + }, + "roundtrip": { + "p50": 111.42399907112122, + "p90": 116.41599982976913, + "p95": 119.6800023317337, + "p99": 128.67200374603271 + }, + "isolatedSum": { + "p50": 222.84799814224243, + "p90": 232.83199965953827, + "p95": 239.3600046634674, + "p99": 257.34400749206543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c7a83721", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_850a930d", + "comparisonKey": "7d72cfe9ab5745ec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:53.060619+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_06", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.96000134944916, + "p90": 106.65600001811981, + "p95": 108.57599973678589, + "p99": 114.27199840545654 + }, + "combine": { + "p50": 100.96000134944916, + "p90": 106.65600001811981, + "p95": 108.57599973678589, + "p99": 114.27199840545654 + }, + "roundtrip": { + "p50": 100.96000134944916, + "p90": 106.65600001811981, + "p95": 108.57599973678589, + "p99": 114.27199840545654 + }, + "isolatedSum": { + "p50": 201.92000269889832, + "p90": 213.31200003623962, + "p95": 217.15199947357178, + "p99": 228.5439968109131 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.3759970664978, + "p90": 107.68000036478043, + "p95": 110.01600325107574, + "p99": 119.58400160074234 + }, + "combine": { + "p50": 101.3759970664978, + "p90": 107.68000036478043, + "p95": 110.01600325107574, + "p99": 119.58400160074234 + }, + "roundtrip": { + "p50": 101.3759970664978, + "p90": 107.68000036478043, + "p95": 110.01600325107574, + "p99": 119.58400160074234 + }, + "isolatedSum": { + "p50": 202.7519941329956, + "p90": 215.36000072956085, + "p95": 220.0320065021515, + "p99": 239.16800320148468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.25599926710129, + "p90": 125.56800246238708, + "p95": 126.91199779510498, + "p99": 131.9040060043335 + }, + "combine": { + "p50": 120.25599926710129, + "p90": 125.56800246238708, + "p95": 126.91199779510498, + "p99": 131.9040060043335 + }, + "roundtrip": { + "p50": 120.25599926710129, + "p90": 125.56800246238708, + "p95": 126.91199779510498, + "p99": 131.9040060043335 + }, + "isolatedSum": { + "p50": 240.51199853420258, + "p90": 251.13600492477417, + "p95": 253.82399559020996, + "p99": 263.808012008667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c90dba6a", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_7d66862d", + "comparisonKey": "1e89e1164fe6025c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:43:40.103266+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.78399902582169, + "p90": 98.88000041246414, + "p95": 101.72799974679947, + "p99": 106.91200196743011 + }, + "combine": { + "p50": 94.78399902582169, + "p90": 98.88000041246414, + "p95": 101.72799974679947, + "p99": 106.91200196743011 + }, + "roundtrip": { + "p50": 94.78399902582169, + "p90": 98.88000041246414, + "p95": 101.72799974679947, + "p99": 106.91200196743011 + }, + "isolatedSum": { + "p50": 189.56799805164337, + "p90": 197.76000082492828, + "p95": 203.45599949359894, + "p99": 213.82400393486023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.64799815416336, + "p90": 99.96800124645233, + "p95": 102.81600058078766, + "p99": 109.27999764680862 + }, + "combine": { + "p50": 95.64799815416336, + "p90": 99.96800124645233, + "p95": 102.81600058078766, + "p99": 109.27999764680862 + }, + "roundtrip": { + "p50": 95.64799815416336, + "p90": 99.96800124645233, + "p95": 102.81600058078766, + "p99": 109.27999764680862 + }, + "isolatedSum": { + "p50": 191.29599630832672, + "p90": 199.93600249290466, + "p95": 205.63200116157532, + "p99": 218.55999529361725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.83200234174728, + "p90": 118.27199906110764, + "p95": 120.4800009727478, + "p99": 129.5360028743744 + }, + "combine": { + "p50": 112.83200234174728, + "p90": 118.27199906110764, + "p95": 120.4800009727478, + "p99": 129.5360028743744 + }, + "roundtrip": { + "p50": 112.83200234174728, + "p90": 118.27199906110764, + "p95": 120.4800009727478, + "p99": 129.5360028743744 + }, + "isolatedSum": { + "p50": 225.66400468349457, + "p90": 236.54399812221527, + "p95": 240.9600019454956, + "p99": 259.0720057487488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-534d6834", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_6e3f35b1", + "comparisonKey": "f3c21f2cb158de28", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:56:44.620600+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 95.74399888515472, + "p90": 99.74399954080582, + "p95": 102.39999741315842, + "p99": 105.31199723482132 + }, + "combine": { + "p50": 95.74399888515472, + "p90": 99.74399954080582, + "p95": 102.39999741315842, + "p99": 105.31199723482132 + }, + "roundtrip": { + "p50": 95.74399888515472, + "p90": 99.74399954080582, + "p95": 102.39999741315842, + "p99": 105.31199723482132 + }, + "isolatedSum": { + "p50": 191.48799777030945, + "p90": 199.48799908161163, + "p95": 204.79999482631683, + "p99": 210.62399446964264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.28800302743912, + "p90": 99.7759997844696, + "p95": 103.00800204277039, + "p99": 109.18399691581726 + }, + "combine": { + "p50": 96.28800302743912, + "p90": 99.7759997844696, + "p95": 103.00800204277039, + "p99": 109.18399691581726 + }, + "roundtrip": { + "p50": 96.28800302743912, + "p90": 99.7759997844696, + "p95": 103.00800204277039, + "p99": 109.18399691581726 + }, + "isolatedSum": { + "p50": 192.57600605487823, + "p90": 199.5519995689392, + "p95": 206.01600408554077, + "p99": 218.36799383163452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.93600034713745, + "p90": 99.93600100278854, + "p95": 101.95200145244598, + "p99": 104.63999956846237 + }, + "combine": { + "p50": 95.93600034713745, + "p90": 99.93600100278854, + "p95": 101.95200145244598, + "p99": 104.63999956846237 + }, + "roundtrip": { + "p50": 95.93600034713745, + "p90": 99.93600100278854, + "p95": 101.95200145244598, + "p99": 104.63999956846237 + }, + "isolatedSum": { + "p50": 191.8720006942749, + "p90": 199.8720020055771, + "p95": 203.90400290489197, + "p99": 209.27999913692474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.8079993724823, + "p90": 103.67999970912933, + "p95": 111.455999314785, + "p99": 115.80800265073776 + }, + "combine": { + "p50": 95.8079993724823, + "p90": 103.67999970912933, + "p95": 111.455999314785, + "p99": 115.80800265073776 + }, + "roundtrip": { + "p50": 95.8079993724823, + "p90": 103.67999970912933, + "p95": 111.455999314785, + "p99": 115.80800265073776 + }, + "isolatedSum": { + "p50": 191.6159987449646, + "p90": 207.35999941825867, + "p95": 222.91199862957, + "p99": 231.61600530147552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.23200243711472, + "p90": 98.24000298976898, + "p95": 101.53599828481674, + "p99": 108.83200168609619 + }, + "combine": { + "p50": 95.23200243711472, + "p90": 98.24000298976898, + "p95": 101.53599828481674, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 95.23200243711472, + "p90": 98.24000298976898, + "p95": 101.53599828481674, + "p99": 108.83200168609619 + }, + "isolatedSum": { + "p50": 190.46400487422943, + "p90": 196.48000597953796, + "p95": 203.07199656963348, + "p99": 217.66400337219238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.38399630784988, + "p90": 99.90400075912476, + "p95": 102.04800218343735, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 99.90400075912476, + "p95": 102.04800218343735, + "p99": 106.08000308275223 + }, + "roundtrip": { + "p50": 96.38399630784988, + "p90": 99.90400075912476, + "p95": 102.04800218343735, + "p99": 106.08000308275223 + }, + "isolatedSum": { + "p50": 192.76799261569977, + "p90": 199.8080015182495, + "p95": 204.0960043668747, + "p99": 212.16000616550446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.99200159311295, + "p90": 105.59999942779541, + "p95": 107.10400342941284, + "p99": 115.07199704647064 + }, + "combine": { + "p50": 100.99200159311295, + "p90": 105.59999942779541, + "p95": 107.10400342941284, + "p99": 115.07199704647064 + }, + "roundtrip": { + "p50": 100.99200159311295, + "p90": 105.59999942779541, + "p95": 107.10400342941284, + "p99": 115.07199704647064 + }, + "isolatedSum": { + "p50": 201.9840031862259, + "p90": 211.19999885559082, + "p95": 214.20800685882568, + "p99": 230.14399409294128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.31199371814728, + "p90": 132.57600367069244, + "p95": 135.3600025177002, + "p99": 149.34399724006653 + }, + "combine": { + "p50": 129.31199371814728, + "p90": 132.57600367069244, + "p95": 135.3600025177002, + "p99": 149.34399724006653 + }, + "roundtrip": { + "p50": 129.31199371814728, + "p90": 132.57600367069244, + "p95": 135.3600025177002, + "p99": 149.34399724006653 + }, + "isolatedSum": { + "p50": 258.62398743629456, + "p90": 265.1520073413849, + "p95": 270.7200050354004, + "p99": 298.68799448013306 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3314cbb5", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_eccd5c68", + "comparisonKey": "3f3c175d3a12fc62", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:29.953095+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 100.80000013113022, + "p90": 105.43999820947647, + "p95": 107.55199939012527, + "p99": 115.55200070142746 + }, + "combine": { + "p50": 100.80000013113022, + "p90": 105.43999820947647, + "p95": 107.55199939012527, + "p99": 115.55200070142746 + }, + "roundtrip": { + "p50": 100.80000013113022, + "p90": 105.43999820947647, + "p95": 107.55199939012527, + "p99": 115.55200070142746 + }, + "isolatedSum": { + "p50": 201.60000026226044, + "p90": 210.87999641895294, + "p95": 215.10399878025055, + "p99": 231.10400140285492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.70399940013885, + "p90": 105.02400249242783, + "p95": 107.45599865913391, + "p99": 125.791996717453 + }, + "combine": { + "p50": 100.70399940013885, + "p90": 105.02400249242783, + "p95": 107.45599865913391, + "p99": 125.791996717453 + }, + "roundtrip": { + "p50": 100.70399940013885, + "p90": 105.02400249242783, + "p95": 107.45599865913391, + "p99": 125.791996717453 + }, + "isolatedSum": { + "p50": 201.4079988002777, + "p90": 210.04800498485565, + "p95": 214.91199731826782, + "p99": 251.583993434906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.54399818181992, + "p90": 104.70400005578995, + "p95": 107.4879989027977, + "p99": 121.18399888277054 + }, + "combine": { + "p50": 100.54399818181992, + "p90": 104.70400005578995, + "p95": 107.4879989027977, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 100.54399818181992, + "p90": 104.70400005578995, + "p95": 107.4879989027977, + "p99": 121.18399888277054 + }, + "isolatedSum": { + "p50": 201.08799636363983, + "p90": 209.4080001115799, + "p95": 214.9759978055954, + "p99": 242.36799776554108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.80000013113022, + "p90": 103.87200117111206, + "p95": 105.98400235176086, + "p99": 109.72800105810165 + }, + "combine": { + "p50": 100.80000013113022, + "p90": 103.87200117111206, + "p95": 105.98400235176086, + "p99": 109.72800105810165 + }, + "roundtrip": { + "p50": 100.80000013113022, + "p90": 103.87200117111206, + "p95": 105.98400235176086, + "p99": 109.72800105810165 + }, + "isolatedSum": { + "p50": 201.60000026226044, + "p90": 207.74400234222412, + "p95": 211.96800470352173, + "p99": 219.4560021162033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ae325170", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_324b2efd", + "comparisonKey": "10a6db9a3bc7bce9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:26.612041+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 99.04000163078308, + "p90": 103.26399654150009, + "p95": 105.92000186443329, + "p99": 110.3999987244606 + }, + "combine": { + "p50": 99.04000163078308, + "p90": 103.26399654150009, + "p95": 105.92000186443329, + "p99": 110.3999987244606 + }, + "roundtrip": { + "p50": 99.04000163078308, + "p90": 103.26399654150009, + "p95": 105.92000186443329, + "p99": 110.3999987244606 + }, + "isolatedSum": { + "p50": 198.08000326156616, + "p90": 206.52799308300018, + "p95": 211.84000372886658, + "p99": 220.7999974489212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 98.88000041246414, + "p90": 102.20800340175629, + "p95": 104.60799932479858, + "p99": 109.76000130176544 + }, + "combine": { + "p50": 98.88000041246414, + "p90": 102.20800340175629, + "p95": 104.60799932479858, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 98.88000041246414, + "p90": 102.20800340175629, + "p95": 104.60799932479858, + "p99": 109.76000130176544 + }, + "isolatedSum": { + "p50": 197.76000082492828, + "p90": 204.41600680351257, + "p95": 209.21599864959717, + "p99": 219.52000260353088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.68799895048141, + "p90": 102.39999741315842, + "p95": 105.02400249242783, + "p99": 109.50399935245514 + }, + "combine": { + "p50": 98.68799895048141, + "p90": 102.39999741315842, + "p95": 105.02400249242783, + "p99": 109.50399935245514 + }, + "roundtrip": { + "p50": 98.68799895048141, + "p90": 102.39999741315842, + "p95": 105.02400249242783, + "p99": 109.50399935245514 + }, + "isolatedSum": { + "p50": 197.37599790096283, + "p90": 204.79999482631683, + "p95": 210.04800498485565, + "p99": 219.00799870491028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 99.13600236177444, + "p90": 103.39199751615524, + "p95": 107.2319969534874, + "p99": 115.00799655914307 + }, + "combine": { + "p50": 99.13600236177444, + "p90": 103.39199751615524, + "p95": 107.2319969534874, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 99.13600236177444, + "p90": 103.39199751615524, + "p95": 107.2319969534874, + "p99": 115.00799655914307 + }, + "isolatedSum": { + "p50": 198.2720047235489, + "p90": 206.7839950323105, + "p95": 214.4639939069748, + "p99": 230.01599311828613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.24000298976898, + "p90": 103.00800204277039, + "p95": 105.3759977221489, + "p99": 115.68000167608261 + }, + "combine": { + "p50": 98.24000298976898, + "p90": 103.00800204277039, + "p95": 105.3759977221489, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 98.24000298976898, + "p90": 103.00800204277039, + "p95": 105.3759977221489, + "p99": 115.68000167608261 + }, + "isolatedSum": { + "p50": 196.48000597953796, + "p90": 206.01600408554077, + "p95": 210.7519954442978, + "p99": 231.36000335216522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 98.27200323343277, + "p90": 102.62399911880493, + "p95": 105.79200088977814, + "p99": 113.27999830245972 + }, + "combine": { + "p50": 98.27200323343277, + "p90": 102.62399911880493, + "p95": 105.79200088977814, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 98.27200323343277, + "p90": 102.62399911880493, + "p95": 105.79200088977814, + "p99": 113.27999830245972 + }, + "isolatedSum": { + "p50": 196.54400646686554, + "p90": 205.24799823760986, + "p95": 211.58400177955627, + "p99": 226.55999660491943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.65599870681763, + "p90": 103.71199995279312, + "p95": 105.92000186443329, + "p99": 119.32799965143204 + }, + "combine": { + "p50": 98.65599870681763, + "p90": 103.71199995279312, + "p95": 105.92000186443329, + "p99": 119.32799965143204 + }, + "roundtrip": { + "p50": 98.65599870681763, + "p90": 103.71199995279312, + "p95": 105.92000186443329, + "p99": 119.32799965143204 + }, + "isolatedSum": { + "p50": 197.31199741363525, + "p90": 207.42399990558624, + "p95": 211.84000372886658, + "p99": 238.65599930286407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 99.64799880981445, + "p90": 103.87200117111206, + "p95": 105.69600015878677, + "p99": 111.7120012640953 + }, + "combine": { + "p50": 99.64799880981445, + "p90": 103.87200117111206, + "p95": 105.69600015878677, + "p99": 111.7120012640953 + }, + "roundtrip": { + "p50": 99.64799880981445, + "p90": 103.87200117111206, + "p95": 105.69600015878677, + "p99": 111.7120012640953 + }, + "isolatedSum": { + "p50": 199.2959976196289, + "p90": 207.74400234222412, + "p95": 211.39200031757355, + "p99": 223.4240025281906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5a48f51c", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_3c6e6651", + "comparisonKey": "80d71dfa83e5bb71", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:45.645056+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.3199964761734, + "p90": 104.60799932479858, + "p95": 106.9440022110939, + "p99": 116.92799627780914 + }, + "combine": { + "p50": 100.3199964761734, + "p90": 104.60799932479858, + "p95": 106.9440022110939, + "p99": 116.92799627780914 + }, + "roundtrip": { + "p50": 100.3199964761734, + "p90": 104.60799932479858, + "p95": 106.9440022110939, + "p99": 116.92799627780914 + }, + "isolatedSum": { + "p50": 200.6399929523468, + "p90": 209.21599864959717, + "p95": 213.8880044221878, + "p99": 233.8559925556183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 100.67199915647507, + "p90": 105.50399869680405, + "p95": 108.92800241708755, + "p99": 129.7920048236847 + }, + "combine": { + "p50": 100.67199915647507, + "p90": 105.50399869680405, + "p95": 108.92800241708755, + "p99": 129.7920048236847 + }, + "roundtrip": { + "p50": 100.67199915647507, + "p90": 105.50399869680405, + "p95": 108.92800241708755, + "p99": 129.7920048236847 + }, + "isolatedSum": { + "p50": 201.34399831295013, + "p90": 211.0079973936081, + "p95": 217.8560048341751, + "p99": 259.5840096473694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.26399981975555, + "p90": 126.52799487113953, + "p95": 129.05600666999817, + "p99": 134.46399569511414 + }, + "combine": { + "p50": 123.26399981975555, + "p90": 126.52799487113953, + "p95": 129.05600666999817, + "p99": 134.46399569511414 + }, + "roundtrip": { + "p50": 123.26399981975555, + "p90": 126.52799487113953, + "p95": 129.05600666999817, + "p99": 134.46399569511414 + }, + "isolatedSum": { + "p50": 246.5279996395111, + "p90": 253.05598974227905, + "p95": 258.11201333999634, + "p99": 268.92799139022827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf6de660", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_f5cccef3", + "comparisonKey": "2f5086e4fea93b80", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:04.534364+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.08800166845322, + "p90": 101.08800232410431, + "p95": 103.80800068378448, + "p99": 108.64000022411346 + }, + "combine": { + "p50": 97.08800166845322, + "p90": 101.08800232410431, + "p95": 103.80800068378448, + "p99": 108.64000022411346 + }, + "roundtrip": { + "p50": 97.08800166845322, + "p90": 101.08800232410431, + "p95": 103.80800068378448, + "p99": 108.64000022411346 + }, + "isolatedSum": { + "p50": 194.17600333690643, + "p90": 202.17600464820862, + "p95": 207.61600136756897, + "p99": 217.28000044822693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.47199714183807, + "p90": 102.9760017991066, + "p95": 104.89600151777267, + "p99": 109.72800105810165 + }, + "combine": { + "p50": 97.47199714183807, + "p90": 102.9760017991066, + "p95": 104.89600151777267, + "p99": 109.72800105810165 + }, + "roundtrip": { + "p50": 97.47199714183807, + "p90": 102.9760017991066, + "p95": 104.89600151777267, + "p99": 109.72800105810165 + }, + "isolatedSum": { + "p50": 194.94399428367615, + "p90": 205.9520035982132, + "p95": 209.79200303554535, + "p99": 219.4560021162033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.98399752378464, + "p90": 125.95200538635254, + "p95": 128.12800705432892, + "p99": 131.84000551700592 + }, + "combine": { + "p50": 121.98399752378464, + "p90": 125.95200538635254, + "p95": 128.12800705432892, + "p99": 131.84000551700592 + }, + "roundtrip": { + "p50": 121.98399752378464, + "p90": 125.95200538635254, + "p95": 128.12800705432892, + "p99": 131.84000551700592 + }, + "isolatedSum": { + "p50": 243.96799504756927, + "p90": 251.90401077270508, + "p95": 256.25601410865784, + "p99": 263.68001103401184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c2c14051", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_f6ccd086", + "comparisonKey": "19782ec15a278116", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:23.272915+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 88.92799913883209, + "p90": 92.76799857616425, + "p95": 94.84799951314926, + "p99": 137.1839940547943 + }, + "combine": { + "p50": 88.92799913883209, + "p90": 92.76799857616425, + "p95": 94.84799951314926, + "p99": 137.1839940547943 + }, + "roundtrip": { + "p50": 88.92799913883209, + "p90": 92.76799857616425, + "p95": 94.84799951314926, + "p99": 137.1839940547943 + }, + "isolatedSum": { + "p50": 177.85599827766418, + "p90": 185.5359971523285, + "p95": 189.69599902629852, + "p99": 274.3679881095886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 89.53599631786346, + "p90": 93.88799965381622, + "p95": 96.28800302743912, + "p99": 107.71200060844421 + }, + "combine": { + "p50": 89.53599631786346, + "p90": 93.88799965381622, + "p95": 96.28800302743912, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 89.53599631786346, + "p90": 93.88799965381622, + "p95": 96.28800302743912, + "p99": 107.71200060844421 + }, + "isolatedSum": { + "p50": 179.07199263572693, + "p90": 187.77599930763245, + "p95": 192.57600605487823, + "p99": 215.42400121688843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.656001329422, + "p90": 119.19999867677689, + "p95": 120.67200243473053, + "p99": 127.45599448680878 + }, + "combine": { + "p50": 114.656001329422, + "p90": 119.19999867677689, + "p95": 120.67200243473053, + "p99": 127.45599448680878 + }, + "roundtrip": { + "p50": 114.656001329422, + "p90": 119.19999867677689, + "p95": 120.67200243473053, + "p99": 127.45599448680878 + }, + "isolatedSum": { + "p50": 229.312002658844, + "p90": 238.39999735355377, + "p95": 241.34400486946106, + "p99": 254.91198897361755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f0b9bb9c", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_f7ccd219", + "comparisonKey": "d5e05ba8073051d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:42.483516+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.68799829483032, + "p90": 99.71199929714203, + "p95": 104.76800054311752, + "p99": 377.9839873313904 + }, + "combine": { + "p50": 94.68799829483032, + "p90": 99.71199929714203, + "p95": 104.76800054311752, + "p99": 377.9839873313904 + }, + "roundtrip": { + "p50": 94.68799829483032, + "p90": 99.71199929714203, + "p95": 104.76800054311752, + "p99": 377.9839873313904 + }, + "isolatedSum": { + "p50": 189.37599658966064, + "p90": 199.42399859428406, + "p95": 209.53600108623505, + "p99": 755.9679746627808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.43199634552002, + "p90": 99.16800260543823, + "p95": 101.85600072145462, + "p99": 110.46399921178818 + }, + "combine": { + "p50": 94.43199634552002, + "p90": 99.16800260543823, + "p95": 101.85600072145462, + "p99": 110.46399921178818 + }, + "roundtrip": { + "p50": 94.43199634552002, + "p90": 99.16800260543823, + "p95": 101.85600072145462, + "p99": 110.46399921178818 + }, + "isolatedSum": { + "p50": 188.86399269104004, + "p90": 198.33600521087646, + "p95": 203.71200144290924, + "p99": 220.92799842357635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.60000139474869, + "p90": 122.97599762678146, + "p95": 124.54400211572647, + "p99": 129.43999469280243 + }, + "combine": { + "p50": 117.60000139474869, + "p90": 122.97599762678146, + "p95": 124.54400211572647, + "p99": 129.43999469280243 + }, + "roundtrip": { + "p50": 117.60000139474869, + "p90": 122.97599762678146, + "p95": 124.54400211572647, + "p99": 129.43999469280243 + }, + "isolatedSum": { + "p50": 235.20000278949738, + "p90": 245.95199525356293, + "p95": 249.08800423145294, + "p99": 258.87998938560486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96da5803", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_f8534427", + "comparisonKey": "9291037e2e941076", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:57.124437+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.36799651384354, + "p90": 103.04000228643417, + "p95": 106.52799904346466, + "p99": 113.79200220108032 + }, + "combine": { + "p50": 98.36799651384354, + "p90": 103.04000228643417, + "p95": 106.52799904346466, + "p99": 113.79200220108032 + }, + "roundtrip": { + "p50": 98.36799651384354, + "p90": 103.04000228643417, + "p95": 106.52799904346466, + "p99": 113.79200220108032 + }, + "isolatedSum": { + "p50": 196.73599302768707, + "p90": 206.08000457286835, + "p95": 213.05599808692932, + "p99": 227.58400440216064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.56799787282944, + "p90": 100.8640006184578, + "p95": 103.29599678516388, + "p99": 109.31199789047241 + }, + "combine": { + "p50": 97.56799787282944, + "p90": 100.8640006184578, + "p95": 103.29599678516388, + "p99": 109.31199789047241 + }, + "roundtrip": { + "p50": 97.56799787282944, + "p90": 100.8640006184578, + "p95": 103.29599678516388, + "p99": 109.31199789047241 + }, + "isolatedSum": { + "p50": 195.13599574565887, + "p90": 201.7280012369156, + "p95": 206.59199357032776, + "p99": 218.62399578094482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.05600142478943, + "p90": 101.21600329875946, + "p95": 103.90400141477585, + "p99": 129.69599664211273 + }, + "combine": { + "p50": 97.05600142478943, + "p90": 101.21600329875946, + "p95": 103.90400141477585, + "p99": 129.69599664211273 + }, + "roundtrip": { + "p50": 97.05600142478943, + "p90": 101.21600329875946, + "p95": 103.90400141477585, + "p99": 129.69599664211273 + }, + "isolatedSum": { + "p50": 194.11200284957886, + "p90": 202.43200659751892, + "p95": 207.8080028295517, + "p99": 259.39199328422546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.82399982213974, + "p90": 101.75999999046326, + "p95": 104.54399883747101, + "p99": 114.07999694347382 + }, + "combine": { + "p50": 97.82399982213974, + "p90": 101.75999999046326, + "p95": 104.54399883747101, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 97.82399982213974, + "p90": 101.75999999046326, + "p95": 104.54399883747101, + "p99": 114.07999694347382 + }, + "isolatedSum": { + "p50": 195.64799964427948, + "p90": 203.5199999809265, + "p95": 209.08799767494202, + "p99": 228.15999388694763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.8639999628067, + "p90": 100.41599720716476, + "p95": 102.4319976568222, + "p99": 110.46399921178818 + }, + "combine": { + "p50": 96.8639999628067, + "p90": 100.41599720716476, + "p95": 102.4319976568222, + "p99": 110.46399921178818 + }, + "roundtrip": { + "p50": 96.8639999628067, + "p90": 100.41599720716476, + "p95": 102.4319976568222, + "p99": 110.46399921178818 + }, + "isolatedSum": { + "p50": 193.7279999256134, + "p90": 200.83199441432953, + "p95": 204.8639953136444, + "p99": 220.92799842357635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.83199971914291, + "p90": 100.54399818181992, + "p95": 104.2879968881607, + "p99": 115.00799655914307 + }, + "combine": { + "p50": 96.83199971914291, + "p90": 100.54399818181992, + "p95": 104.2879968881607, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 96.83199971914291, + "p90": 100.54399818181992, + "p95": 104.2879968881607, + "p99": 115.00799655914307 + }, + "isolatedSum": { + "p50": 193.66399943828583, + "p90": 201.08799636363983, + "p95": 208.5759937763214, + "p99": 230.01599311828613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 100.28800368309021, + "p90": 105.15200346708298, + "p95": 107.13600367307663, + "p99": 115.26399850845337 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 105.15200346708298, + "p95": 107.13600367307663, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 100.28800368309021, + "p90": 105.15200346708298, + "p95": 107.13600367307663, + "p99": 115.26399850845337 + }, + "isolatedSum": { + "p50": 200.57600736618042, + "p90": 210.30400693416595, + "p95": 214.27200734615326, + "p99": 230.52799701690674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.23999947309494, + "p90": 126.56000256538391, + "p95": 128.57599556446075, + "p99": 134.36800241470337 + }, + "combine": { + "p50": 122.23999947309494, + "p90": 126.56000256538391, + "p95": 128.57599556446075, + "p99": 134.36800241470337 + }, + "roundtrip": { + "p50": 122.23999947309494, + "p90": 126.56000256538391, + "p95": 128.57599556446075, + "p99": 134.36800241470337 + }, + "isolatedSum": { + "p50": 244.47999894618988, + "p90": 253.12000513076782, + "p95": 257.1519911289215, + "p99": 268.73600482940674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8110b558", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_d7c1fbf7", + "comparisonKey": "25435ff60f40d8bd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:02.298669+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 92.86399930715561, + "p90": 96.54399752616882, + "p95": 99.42399710416794, + "p99": 102.9760017991066 + }, + "combine": { + "p50": 92.86399930715561, + "p90": 96.54399752616882, + "p95": 99.42399710416794, + "p99": 102.9760017991066 + }, + "roundtrip": { + "p50": 92.86399930715561, + "p90": 96.54399752616882, + "p95": 99.42399710416794, + "p99": 102.9760017991066 + }, + "isolatedSum": { + "p50": 185.72799861431122, + "p90": 193.08799505233765, + "p95": 198.84799420833588, + "p99": 205.9520035982132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 93.79199892282486, + "p90": 97.18400239944458, + "p95": 99.5199978351593, + "p99": 103.64799946546555 + }, + "combine": { + "p50": 93.79199892282486, + "p90": 97.18400239944458, + "p95": 99.5199978351593, + "p99": 103.64799946546555 + }, + "roundtrip": { + "p50": 93.79199892282486, + "p90": 97.18400239944458, + "p95": 99.5199978351593, + "p99": 103.64799946546555 + }, + "isolatedSum": { + "p50": 187.58399784564972, + "p90": 194.36800479888916, + "p95": 199.0399956703186, + "p99": 207.2959989309311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.0960009098053, + "p90": 95.77599912881851, + "p95": 97.88800030946732, + "p99": 101.88800096511841 + }, + "combine": { + "p50": 92.0960009098053, + "p90": 95.77599912881851, + "p95": 97.88800030946732, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 92.0960009098053, + "p90": 95.77599912881851, + "p95": 97.88800030946732, + "p99": 101.88800096511841 + }, + "isolatedSum": { + "p50": 184.1920018196106, + "p90": 191.55199825763702, + "p95": 195.77600061893463, + "p99": 203.77600193023682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.50399672985077, + "p90": 97.59999811649323, + "p95": 99.90400075912476, + "p99": 103.5199984908104 + }, + "combine": { + "p50": 93.50399672985077, + "p90": 97.59999811649323, + "p95": 99.90400075912476, + "p99": 103.5199984908104 + }, + "roundtrip": { + "p50": 93.50399672985077, + "p90": 97.59999811649323, + "p95": 99.90400075912476, + "p99": 103.5199984908104 + }, + "isolatedSum": { + "p50": 187.00799345970154, + "p90": 195.19999623298645, + "p95": 199.8080015182495, + "p99": 207.0399969816208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 92.86399930715561, + "p90": 96.79999947547913, + "p95": 99.93600100278854, + "p99": 104.51199859380722 + }, + "combine": { + "p50": 92.86399930715561, + "p90": 96.79999947547913, + "p95": 99.93600100278854, + "p99": 104.51199859380722 + }, + "roundtrip": { + "p50": 92.86399930715561, + "p90": 96.79999947547913, + "p95": 99.93600100278854, + "p99": 104.51199859380722 + }, + "isolatedSum": { + "p50": 185.72799861431122, + "p90": 193.59999895095825, + "p95": 199.8720020055771, + "p99": 209.02399718761444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 93.59999746084213, + "p90": 97.47199714183807, + "p95": 99.61599856615067, + "p99": 104.19200360774994 + }, + "combine": { + "p50": 93.59999746084213, + "p90": 97.47199714183807, + "p95": 99.61599856615067, + "p99": 104.19200360774994 + }, + "roundtrip": { + "p50": 93.59999746084213, + "p90": 97.47199714183807, + "p95": 99.61599856615067, + "p99": 104.19200360774994 + }, + "isolatedSum": { + "p50": 187.19999492168427, + "p90": 194.94399428367615, + "p95": 199.23199713230133, + "p99": 208.38400721549988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.30400282144547, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 108.5439994931221 + }, + "combine": { + "p50": 94.30400282144547, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 108.5439994931221 + }, + "roundtrip": { + "p50": 94.30400282144547, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 108.5439994931221 + }, + "isolatedSum": { + "p50": 188.60800564289093, + "p90": 197.24799692630768, + "p95": 201.60000026226044, + "p99": 217.0879989862442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.73600095510483, + "p90": 113.08799684047699, + "p95": 114.97599631547928, + "p99": 118.17599833011627 + }, + "combine": { + "p50": 108.73600095510483, + "p90": 113.08799684047699, + "p95": 114.97599631547928, + "p99": 118.17599833011627 + }, + "roundtrip": { + "p50": 108.73600095510483, + "p90": 113.08799684047699, + "p95": 114.97599631547928, + "p99": 118.17599833011627 + }, + "isolatedSum": { + "p50": 217.47200191020966, + "p90": 226.17599368095398, + "p95": 229.95199263095856, + "p99": 236.35199666023254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92930be1", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_97f2b177", + "comparisonKey": "65e833e3cd13195d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:42.796617+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.03200107812881, + "p90": 99.20000284910202, + "p95": 102.04800218343735, + "p99": 107.2319969534874 + }, + "combine": { + "p50": 96.03200107812881, + "p90": 99.20000284910202, + "p95": 102.04800218343735, + "p99": 107.2319969534874 + }, + "roundtrip": { + "p50": 96.03200107812881, + "p90": 99.20000284910202, + "p95": 102.04800218343735, + "p99": 107.2319969534874 + }, + "isolatedSum": { + "p50": 192.06400215625763, + "p90": 198.40000569820404, + "p95": 204.0960043668747, + "p99": 214.4639939069748 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.76799923181534, + "p90": 99.84000027179718, + "p95": 102.49599814414978, + "p99": 105.15200346708298 + }, + "combine": { + "p50": 96.76799923181534, + "p90": 99.84000027179718, + "p95": 102.49599814414978, + "p99": 105.15200346708298 + }, + "roundtrip": { + "p50": 96.76799923181534, + "p90": 99.84000027179718, + "p95": 102.49599814414978, + "p99": 105.15200346708298 + }, + "isolatedSum": { + "p50": 193.53599846363068, + "p90": 199.68000054359436, + "p95": 204.99199628829956, + "p99": 210.30400693416595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 95.23200243711472, + "p90": 99.35999661684036, + "p95": 101.59999877214432, + "p99": 106.46399855613708 + }, + "combine": { + "p50": 95.23200243711472, + "p90": 99.35999661684036, + "p95": 101.59999877214432, + "p99": 106.46399855613708 + }, + "roundtrip": { + "p50": 95.23200243711472, + "p90": 99.35999661684036, + "p95": 101.59999877214432, + "p99": 106.46399855613708 + }, + "isolatedSum": { + "p50": 190.46400487422943, + "p90": 198.71999323368073, + "p95": 203.19999754428864, + "p99": 212.92799711227417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.90400010347366, + "p90": 99.07200187444687, + "p95": 102.1760031580925, + "p99": 104.89600151777267 + }, + "combine": { + "p50": 95.90400010347366, + "p90": 99.07200187444687, + "p95": 102.1760031580925, + "p99": 104.89600151777267 + }, + "roundtrip": { + "p50": 95.90400010347366, + "p90": 99.07200187444687, + "p95": 102.1760031580925, + "p99": 104.89600151777267 + }, + "isolatedSum": { + "p50": 191.80800020694733, + "p90": 198.14400374889374, + "p95": 204.352006316185, + "p99": 209.79200303554535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.32800316810608, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 105.92000186443329 + }, + "combine": { + "p50": 95.32800316810608, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 95.32800316810608, + "p90": 98.62399846315384, + "p95": 100.80000013113022, + "p99": 105.92000186443329 + }, + "isolatedSum": { + "p50": 190.65600633621216, + "p90": 197.24799692630768, + "p95": 201.60000026226044, + "p99": 211.84000372886658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 95.551997423172, + "p90": 99.16800260543823, + "p95": 101.43999755382538, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 95.551997423172, + "p90": 99.16800260543823, + "p95": 101.43999755382538, + "p99": 109.47199910879135 + }, + "roundtrip": { + "p50": 95.551997423172, + "p90": 99.16800260543823, + "p95": 101.43999755382538, + "p99": 109.47199910879135 + }, + "isolatedSum": { + "p50": 191.103994846344, + "p90": 198.33600521087646, + "p95": 202.87999510765076, + "p99": 218.9439982175827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.16000205278397, + "p90": 100.73599964380264, + "p95": 103.35999727249146, + "p99": 109.76000130176544 + }, + "combine": { + "p50": 96.16000205278397, + "p90": 100.73599964380264, + "p95": 103.35999727249146, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 96.16000205278397, + "p90": 100.73599964380264, + "p95": 103.35999727249146, + "p99": 109.76000130176544 + }, + "isolatedSum": { + "p50": 192.32000410556793, + "p90": 201.47199928760529, + "p95": 206.7199945449829, + "p99": 219.52000260353088 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.1439967751503, + "p90": 113.43999952077866, + "p95": 115.4559999704361, + "p99": 123.23199957609177 + }, + "combine": { + "p50": 110.1439967751503, + "p90": 113.43999952077866, + "p95": 115.4559999704361, + "p99": 123.23199957609177 + }, + "roundtrip": { + "p50": 110.1439967751503, + "p90": 113.43999952077866, + "p95": 115.4559999704361, + "p99": 123.23199957609177 + }, + "isolatedSum": { + "p50": 220.2879935503006, + "p90": 226.8799990415573, + "p95": 230.9119999408722, + "p99": 246.46399915218353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63671253", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_1e84ea82", + "comparisonKey": "7185002a3d75769b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:08.997941+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 96.99200093746185, + "p90": 101.05600208044052, + "p95": 103.45599800348282, + "p99": 110.1439967751503 + }, + "combine": { + "p50": 96.99200093746185, + "p90": 101.05600208044052, + "p95": 103.45599800348282, + "p99": 110.1439967751503 + }, + "roundtrip": { + "p50": 96.99200093746185, + "p90": 101.05600208044052, + "p95": 103.45599800348282, + "p99": 110.1439967751503 + }, + "isolatedSum": { + "p50": 193.9840018749237, + "p90": 202.11200416088104, + "p95": 206.91199600696564, + "p99": 220.2879935503006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.02400118112564, + "p90": 100.5759984254837, + "p95": 102.75200009346008, + "p99": 106.39999806880951 + }, + "combine": { + "p50": 97.02400118112564, + "p90": 100.5759984254837, + "p95": 102.75200009346008, + "p99": 106.39999806880951 + }, + "roundtrip": { + "p50": 97.02400118112564, + "p90": 100.5759984254837, + "p95": 102.75200009346008, + "p99": 106.39999806880951 + }, + "isolatedSum": { + "p50": 194.04800236225128, + "p90": 201.1519968509674, + "p95": 205.50400018692017, + "p99": 212.79999613761902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.24800288677216, + "p90": 101.34399682283401, + "p95": 103.39199751615524, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 101.34399682283401, + "p95": 103.39199751615524, + "p99": 107.68000036478043 + }, + "roundtrip": { + "p50": 97.24800288677216, + "p90": 101.34399682283401, + "p95": 103.39199751615524, + "p99": 107.68000036478043 + }, + "isolatedSum": { + "p50": 194.4960057735443, + "p90": 202.68799364566803, + "p95": 206.7839950323105, + "p99": 215.36000072956085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 97.15200215578079, + "p90": 100.8640006184578, + "p95": 103.61599922180176, + "p99": 107.35999792814255 + }, + "combine": { + "p50": 97.15200215578079, + "p90": 100.8640006184578, + "p95": 103.61599922180176, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 97.15200215578079, + "p90": 100.8640006184578, + "p95": 103.61599922180176, + "p99": 107.35999792814255 + }, + "isolatedSum": { + "p50": 194.30400431156158, + "p90": 201.7280012369156, + "p95": 207.23199844360352, + "p99": 214.7199958562851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 96.89600020647049, + "p90": 100.3199964761734, + "p95": 102.78400033712387, + "p99": 107.93600231409073 + }, + "combine": { + "p50": 96.89600020647049, + "p90": 100.3199964761734, + "p95": 102.78400033712387, + "p99": 107.93600231409073 + }, + "roundtrip": { + "p50": 96.89600020647049, + "p90": 100.3199964761734, + "p95": 102.78400033712387, + "p99": 107.93600231409073 + }, + "isolatedSum": { + "p50": 193.79200041294098, + "p90": 200.6399929523468, + "p95": 205.56800067424774, + "p99": 215.87200462818146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.08800166845322, + "p90": 101.79200023412704, + "p95": 104.2879968881607, + "p99": 118.43200027942657 + }, + "combine": { + "p50": 97.08800166845322, + "p90": 101.79200023412704, + "p95": 104.2879968881607, + "p99": 118.43200027942657 + }, + "roundtrip": { + "p50": 97.08800166845322, + "p90": 101.79200023412704, + "p95": 104.2879968881607, + "p99": 118.43200027942657 + }, + "isolatedSum": { + "p50": 194.17600333690643, + "p90": 203.5840004682541, + "p95": 208.5759937763214, + "p99": 236.86400055885315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.88000041246414, + "p90": 103.87200117111206, + "p95": 106.27199709415436, + "p99": 113.11999708414078 + }, + "combine": { + "p50": 98.88000041246414, + "p90": 103.87200117111206, + "p95": 106.27199709415436, + "p99": 113.11999708414078 + }, + "roundtrip": { + "p50": 98.88000041246414, + "p90": 103.87200117111206, + "p95": 106.27199709415436, + "p99": 113.11999708414078 + }, + "isolatedSum": { + "p50": 197.76000082492828, + "p90": 207.74400234222412, + "p95": 212.54399418830872, + "p99": 226.23999416828156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.76000261306763, + "p90": 122.3360002040863, + "p95": 123.99999797344208, + "p99": 127.29600071907043 + }, + "combine": { + "p50": 117.76000261306763, + "p90": 122.3360002040863, + "p95": 123.99999797344208, + "p99": 127.29600071907043 + }, + "roundtrip": { + "p50": 117.76000261306763, + "p90": 122.3360002040863, + "p95": 123.99999797344208, + "p99": 127.29600071907043 + }, + "isolatedSum": { + "p50": 235.52000522613525, + "p90": 244.6720004081726, + "p95": 247.99999594688416, + "p99": 254.59200143814087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96d0b460", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_17199ba8", + "comparisonKey": "68dac9ce4809e33a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:58:54.189492+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 97.63199836015701, + "p90": 102.04800218343735, + "p95": 104.35199737548828, + "p99": 109.24799740314484 + }, + "combine": { + "p50": 97.63199836015701, + "p90": 102.04800218343735, + "p95": 104.35199737548828, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 97.63199836015701, + "p90": 102.04800218343735, + "p95": 104.35199737548828, + "p99": 109.24799740314484 + }, + "isolatedSum": { + "p50": 195.26399672031403, + "p90": 204.0960043668747, + "p95": 208.70399475097656, + "p99": 218.49599480628967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.50399738550186, + "p90": 101.6319990158081, + "p95": 104.54399883747101, + "p99": 111.87200248241425 + }, + "combine": { + "p50": 97.50399738550186, + "p90": 101.6319990158081, + "p95": 104.54399883747101, + "p99": 111.87200248241425 + }, + "roundtrip": { + "p50": 97.50399738550186, + "p90": 101.6319990158081, + "p95": 104.54399883747101, + "p99": 111.87200248241425 + }, + "isolatedSum": { + "p50": 195.00799477100372, + "p90": 203.2639980316162, + "p95": 209.08799767494202, + "p99": 223.7440049648285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 97.21600264310837, + "p90": 106.20799660682678, + "p95": 146.7200070619583, + "p99": 156.67200088500977 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 106.20799660682678, + "p95": 146.7200070619583, + "p99": 156.67200088500977 + }, + "roundtrip": { + "p50": 97.21600264310837, + "p90": 106.20799660682678, + "p95": 146.7200070619583, + "p99": 156.67200088500977 + }, + "isolatedSum": { + "p50": 194.43200528621674, + "p90": 212.41599321365356, + "p95": 293.4400141239166, + "p99": 313.34400177001953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.11200201511383, + "p90": 102.49599814414978, + "p95": 105.12000322341919, + "p99": 115.1999980211258 + }, + "combine": { + "p50": 98.11200201511383, + "p90": 102.49599814414978, + "p95": 105.12000322341919, + "p99": 115.1999980211258 + }, + "roundtrip": { + "p50": 98.11200201511383, + "p90": 102.49599814414978, + "p95": 105.12000322341919, + "p99": 115.1999980211258 + }, + "isolatedSum": { + "p50": 196.22400403022766, + "p90": 204.99199628829956, + "p95": 210.24000644683838, + "p99": 230.3999960422516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.98400104045868, + "p90": 102.52799838781357, + "p95": 105.21599650382996, + "p99": 117.5680011510849 + }, + "combine": { + "p50": 97.98400104045868, + "p90": 102.52799838781357, + "p95": 105.21599650382996, + "p99": 117.5680011510849 + }, + "roundtrip": { + "p50": 97.98400104045868, + "p90": 102.52799838781357, + "p95": 105.21599650382996, + "p99": 117.5680011510849 + }, + "isolatedSum": { + "p50": 195.96800208091736, + "p90": 205.05599677562714, + "p95": 210.4319930076599, + "p99": 235.1360023021698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.56799787282944, + "p90": 101.05600208044052, + "p95": 104.70400005578995, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 97.56799787282944, + "p90": 101.05600208044052, + "p95": 104.70400005578995, + "p99": 110.23999750614166 + }, + "roundtrip": { + "p50": 97.56799787282944, + "p90": 101.05600208044052, + "p95": 104.70400005578995, + "p99": 110.23999750614166 + }, + "isolatedSum": { + "p50": 195.13599574565887, + "p90": 202.11200416088104, + "p95": 209.4080001115799, + "p99": 220.47999501228333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.91200065612793, + "p90": 103.16800326108932, + "p95": 105.21599650382996, + "p99": 116.92799627780914 + }, + "combine": { + "p50": 98.91200065612793, + "p90": 103.16800326108932, + "p95": 105.21599650382996, + "p99": 116.92799627780914 + }, + "roundtrip": { + "p50": 98.91200065612793, + "p90": 103.16800326108932, + "p95": 105.21599650382996, + "p99": 116.92799627780914 + }, + "isolatedSum": { + "p50": 197.82400131225586, + "p90": 206.33600652217865, + "p95": 210.4319930076599, + "p99": 233.8559925556183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.03199690580368, + "p90": 120.2239990234375, + "p95": 123.16799908876419, + "p99": 132.89600610733032 + }, + "combine": { + "p50": 116.03199690580368, + "p90": 120.2239990234375, + "p95": 123.16799908876419, + "p99": 132.89600610733032 + }, + "roundtrip": { + "p50": 116.03199690580368, + "p90": 120.2239990234375, + "p95": 123.16799908876419, + "p99": 132.89600610733032 + }, + "isolatedSum": { + "p50": 232.06399381160736, + "p90": 240.447998046875, + "p95": 246.33599817752838, + "p99": 265.79201221466064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b62cd780", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_fc142c06", + "comparisonKey": "127f0db6f5e50621", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:18.825096+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 93.79199892282486, + "p90": 97.34400361776352, + "p95": 100.09600222110748, + "p99": 110.04800349473953 + }, + "combine": { + "p50": 93.79199892282486, + "p90": 97.34400361776352, + "p95": 100.09600222110748, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 93.79199892282486, + "p90": 97.34400361776352, + "p95": 100.09600222110748, + "p99": 110.04800349473953 + }, + "isolatedSum": { + "p50": 187.58399784564972, + "p90": 194.68800723552704, + "p95": 200.19200444221497, + "p99": 220.09600698947906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 93.44000369310379, + "p90": 97.56799787282944, + "p95": 100.73599964380264, + "p99": 106.36799782514572 + }, + "combine": { + "p50": 93.44000369310379, + "p90": 97.56799787282944, + "p95": 100.73599964380264, + "p99": 106.36799782514572 + }, + "roundtrip": { + "p50": 93.44000369310379, + "p90": 97.56799787282944, + "p95": 100.73599964380264, + "p99": 106.36799782514572 + }, + "isolatedSum": { + "p50": 186.88000738620758, + "p90": 195.13599574565887, + "p95": 201.47199928760529, + "p99": 212.73599565029144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 92.22400188446045, + "p90": 97.08800166845322, + "p95": 99.64799880981445, + "p99": 110.23999750614166 + }, + "combine": { + "p50": 92.22400188446045, + "p90": 97.08800166845322, + "p95": 99.64799880981445, + "p99": 110.23999750614166 + }, + "roundtrip": { + "p50": 92.22400188446045, + "p90": 97.08800166845322, + "p95": 99.64799880981445, + "p99": 110.23999750614166 + }, + "isolatedSum": { + "p50": 184.4480037689209, + "p90": 194.17600333690643, + "p95": 199.2959976196289, + "p99": 220.47999501228333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.88799965381622, + "p90": 97.72799909114838, + "p95": 100.3199964761734, + "p99": 104.92800176143646 + }, + "combine": { + "p50": 93.88799965381622, + "p90": 97.72799909114838, + "p95": 100.3199964761734, + "p99": 104.92800176143646 + }, + "roundtrip": { + "p50": 93.88799965381622, + "p90": 97.72799909114838, + "p95": 100.3199964761734, + "p99": 104.92800176143646 + }, + "isolatedSum": { + "p50": 187.77599930763245, + "p90": 195.45599818229675, + "p95": 200.6399929523468, + "p99": 209.85600352287292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 93.66399794816971, + "p90": 97.15200215578079, + "p95": 99.13600236177444, + "p99": 106.175996363163 + }, + "combine": { + "p50": 93.66399794816971, + "p90": 97.15200215578079, + "p95": 99.13600236177444, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 93.66399794816971, + "p90": 97.15200215578079, + "p95": 99.13600236177444, + "p99": 106.175996363163 + }, + "isolatedSum": { + "p50": 187.32799589633942, + "p90": 194.30400431156158, + "p95": 198.2720047235489, + "p99": 212.351992726326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.8959995508194, + "p90": 96.41599655151367, + "p95": 98.78399968147278, + "p99": 102.4319976568222 + }, + "combine": { + "p50": 92.8959995508194, + "p90": 96.41599655151367, + "p95": 98.78399968147278, + "p99": 102.4319976568222 + }, + "roundtrip": { + "p50": 92.8959995508194, + "p90": 96.41599655151367, + "p95": 98.78399968147278, + "p99": 102.4319976568222 + }, + "isolatedSum": { + "p50": 185.7919991016388, + "p90": 192.83199310302734, + "p95": 197.56799936294556, + "p99": 204.8639953136444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.91999989748001, + "p90": 99.20000284910202, + "p95": 100.89600086212158, + "p99": 108.83200168609619 + }, + "combine": { + "p50": 93.91999989748001, + "p90": 99.20000284910202, + "p95": 100.89600086212158, + "p99": 108.83200168609619 + }, + "roundtrip": { + "p50": 93.91999989748001, + "p90": 99.20000284910202, + "p95": 100.89600086212158, + "p99": 108.83200168609619 + }, + "isolatedSum": { + "p50": 187.83999979496002, + "p90": 198.40000569820404, + "p95": 201.79200172424316, + "p99": 217.66400337219238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.41599851846695, + "p90": 113.02399635314941, + "p95": 114.72000181674957, + "p99": 120.86399644613266 + }, + "combine": { + "p50": 108.41599851846695, + "p90": 113.02399635314941, + "p95": 114.72000181674957, + "p99": 120.86399644613266 + }, + "roundtrip": { + "p50": 108.41599851846695, + "p90": 113.02399635314941, + "p95": 114.72000181674957, + "p99": 120.86399644613266 + }, + "isolatedSum": { + "p50": 216.8319970369339, + "p90": 226.04799270629883, + "p95": 229.44000363349915, + "p99": 241.72799289226532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d7f1b5ab", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_837dee23", + "comparisonKey": "9017adb27d13c4bb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:51.475918+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.40000355243683, + "p90": 100.12800246477127, + "p95": 105.85600137710571, + "p99": 156.6080003976822 + }, + "combine": { + "p50": 94.40000355243683, + "p90": 100.12800246477127, + "p95": 105.85600137710571, + "p99": 156.6080003976822 + }, + "roundtrip": { + "p50": 94.40000355243683, + "p90": 100.12800246477127, + "p95": 105.85600137710571, + "p99": 156.6080003976822 + }, + "isolatedSum": { + "p50": 188.80000710487366, + "p90": 200.25600492954254, + "p95": 211.71200275421143, + "p99": 313.2160007953644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 94.04800087213516, + "p90": 98.43199700117111, + "p95": 100.60799866914749, + "p99": 106.49599879980087 + }, + "combine": { + "p50": 94.04800087213516, + "p90": 98.43199700117111, + "p95": 100.60799866914749, + "p99": 106.49599879980087 + }, + "roundtrip": { + "p50": 94.04800087213516, + "p90": 98.43199700117111, + "p95": 100.60799866914749, + "p99": 106.49599879980087 + }, + "isolatedSum": { + "p50": 188.09600174427032, + "p90": 196.86399400234222, + "p95": 201.21599733829498, + "p99": 212.99199759960175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 93.9520001411438, + "p90": 97.63199836015701, + "p95": 99.61599856615067, + "p99": 104.96000200510025 + }, + "combine": { + "p50": 93.9520001411438, + "p90": 97.63199836015701, + "p95": 99.61599856615067, + "p99": 104.96000200510025 + }, + "roundtrip": { + "p50": 93.9520001411438, + "p90": 97.63199836015701, + "p95": 99.61599856615067, + "p99": 104.96000200510025 + }, + "isolatedSum": { + "p50": 187.9040002822876, + "p90": 195.26399672031403, + "p95": 199.23199713230133, + "p99": 209.9200040102005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 93.98400038480759, + "p90": 97.50399738550186, + "p95": 100.70399940013885, + "p99": 107.90400207042694 + }, + "combine": { + "p50": 93.98400038480759, + "p90": 97.50399738550186, + "p95": 100.70399940013885, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 93.98400038480759, + "p90": 97.50399738550186, + "p95": 100.70399940013885, + "p99": 107.90400207042694 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 195.00799477100372, + "p95": 201.4079988002777, + "p99": 215.80800414085388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 94.04800087213516, + "p90": 98.4639972448349, + "p95": 102.01600193977356, + "p99": 129.56799566745758 + }, + "combine": { + "p50": 94.04800087213516, + "p90": 98.4639972448349, + "p95": 102.01600193977356, + "p99": 129.56799566745758 + }, + "roundtrip": { + "p50": 94.04800087213516, + "p90": 98.4639972448349, + "p95": 102.01600193977356, + "p99": 129.56799566745758 + }, + "isolatedSum": { + "p50": 188.09600174427032, + "p90": 196.9279944896698, + "p95": 204.03200387954712, + "p99": 259.13599133491516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 94.11200135946274, + "p90": 97.56799787282944, + "p95": 99.71199929714203, + "p99": 105.43999820947647 + }, + "combine": { + "p50": 94.11200135946274, + "p90": 97.56799787282944, + "p95": 99.71199929714203, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 94.11200135946274, + "p90": 97.56799787282944, + "p95": 99.71199929714203, + "p99": 105.43999820947647 + }, + "isolatedSum": { + "p50": 188.22400271892548, + "p90": 195.13599574565887, + "p95": 199.42399859428406, + "p99": 210.87999641895294 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.76799923181534, + "p90": 102.08000242710114, + "p95": 103.71199995279312, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 96.76799923181534, + "p90": 102.08000242710114, + "p95": 103.71199995279312, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 96.76799923181534, + "p90": 102.08000242710114, + "p95": 103.71199995279312, + "p99": 108.12799632549286 + }, + "isolatedSum": { + "p50": 193.53599846363068, + "p90": 204.16000485420227, + "p95": 207.42399990558624, + "p99": 216.25599265098572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.0239970088005, + "p90": 121.63200229406357, + "p95": 124.1919994354248, + "p99": 144.70399916172028 + }, + "combine": { + "p50": 117.0239970088005, + "p90": 121.63200229406357, + "p95": 124.1919994354248, + "p99": 144.70399916172028 + }, + "roundtrip": { + "p50": 117.0239970088005, + "p90": 121.63200229406357, + "p95": 124.1919994354248, + "p99": 144.70399916172028 + }, + "isolatedSum": { + "p50": 234.047994017601, + "p90": 243.26400458812714, + "p95": 248.3839988708496, + "p99": 289.40799832344055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b0651681", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_b7296bdb", + "comparisonKey": "4dde7d8c1bf015ba", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:11.441025+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.7199991941452, + "p90": 102.68799960613251, + "p95": 105.53599894046783, + "p99": 111.13599687814713 + }, + "combine": { + "p50": 98.7199991941452, + "p90": 102.68799960613251, + "p95": 105.53599894046783, + "p99": 111.13599687814713 + }, + "roundtrip": { + "p50": 98.7199991941452, + "p90": 102.68799960613251, + "p95": 105.53599894046783, + "p99": 111.13599687814713 + }, + "isolatedSum": { + "p50": 197.4399983882904, + "p90": 205.37599921226501, + "p95": 211.07199788093567, + "p99": 222.27199375629425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 100.5759984254837, + "p90": 105.47199845314026, + "p95": 107.64800012111664, + "p99": 113.82400244474411 + }, + "combine": { + "p50": 100.5759984254837, + "p90": 105.47199845314026, + "p95": 107.64800012111664, + "p99": 113.82400244474411 + }, + "roundtrip": { + "p50": 100.5759984254837, + "p90": 105.47199845314026, + "p95": 107.64800012111664, + "p99": 113.82400244474411 + }, + "isolatedSum": { + "p50": 201.1519968509674, + "p90": 210.94399690628052, + "p95": 215.29600024223328, + "p99": 227.64800488948822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.62399846315384, + "p90": 103.74400019645691, + "p95": 106.6880002617836, + "p99": 117.0559972524643 + }, + "combine": { + "p50": 98.62399846315384, + "p90": 103.74400019645691, + "p95": 106.6880002617836, + "p99": 117.0559972524643 + }, + "roundtrip": { + "p50": 98.62399846315384, + "p90": 103.74400019645691, + "p95": 106.6880002617836, + "p99": 117.0559972524643 + }, + "isolatedSum": { + "p50": 197.24799692630768, + "p90": 207.48800039291382, + "p95": 213.3760005235672, + "p99": 234.1119945049286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 100.70399940013885, + "p90": 107.16799646615982, + "p95": 110.20799726247787, + "p99": 141.24800264835358 + }, + "combine": { + "p50": 100.70399940013885, + "p90": 107.16799646615982, + "p95": 110.20799726247787, + "p99": 141.24800264835358 + }, + "roundtrip": { + "p50": 100.70399940013885, + "p90": 107.16799646615982, + "p95": 110.20799726247787, + "p99": 141.24800264835358 + }, + "isolatedSum": { + "p50": 201.4079988002777, + "p90": 214.33599293231964, + "p95": 220.41599452495575, + "p99": 282.49600529670715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 98.62399846315384, + "p90": 103.61599922180176, + "p95": 105.76000064611435, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 98.62399846315384, + "p90": 103.61599922180176, + "p95": 105.76000064611435, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 98.62399846315384, + "p90": 103.61599922180176, + "p95": 105.76000064611435, + "p99": 112.41599917411804 + }, + "isolatedSum": { + "p50": 197.24799692630768, + "p90": 207.23199844360352, + "p95": 211.5200012922287, + "p99": 224.83199834823608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 99.90400075912476, + "p90": 107.39199817180634, + "p95": 119.55200135707855, + "p99": 162.33600676059723 + }, + "combine": { + "p50": 99.90400075912476, + "p90": 107.39199817180634, + "p95": 119.55200135707855, + "p99": 162.33600676059723 + }, + "roundtrip": { + "p50": 99.90400075912476, + "p90": 107.39199817180634, + "p95": 119.55200135707855, + "p99": 162.33600676059723 + }, + "isolatedSum": { + "p50": 199.8080015182495, + "p90": 214.78399634361267, + "p95": 239.1040027141571, + "p99": 324.67201352119446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.71199929714203, + "p90": 105.18400371074677, + "p95": 107.55199939012527, + "p99": 112.19199746847153 + }, + "combine": { + "p50": 99.71199929714203, + "p90": 105.18400371074677, + "p95": 107.55199939012527, + "p99": 112.19199746847153 + }, + "roundtrip": { + "p50": 99.71199929714203, + "p90": 105.18400371074677, + "p95": 107.55199939012527, + "p99": 112.19199746847153 + }, + "isolatedSum": { + "p50": 199.42399859428406, + "p90": 210.36800742149353, + "p95": 215.10399878025055, + "p99": 224.38399493694305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 112.0000034570694, + "p90": 117.50400066375732, + "p95": 119.87199634313583, + "p99": 136.28800213336945 + }, + "combine": { + "p50": 112.0000034570694, + "p90": 117.50400066375732, + "p95": 119.87199634313583, + "p99": 136.28800213336945 + }, + "roundtrip": { + "p50": 112.0000034570694, + "p90": 117.50400066375732, + "p95": 119.87199634313583, + "p99": 136.28800213336945 + }, + "isolatedSum": { + "p50": 224.0000069141388, + "p90": 235.00800132751465, + "p95": 239.74399268627167, + "p99": 272.5760042667389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ee33dff3", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_47918098", + "comparisonKey": "f447f266bfc31922", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:50.845932+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.81599926948547, + "p90": 100.0640019774437, + "p95": 102.1760031580925, + "p99": 108.15999656915665 + }, + "combine": { + "p50": 94.81599926948547, + "p90": 100.0640019774437, + "p95": 102.1760031580925, + "p99": 108.15999656915665 + }, + "roundtrip": { + "p50": 94.81599926948547, + "p90": 100.0640019774437, + "p95": 102.1760031580925, + "p99": 108.15999656915665 + }, + "isolatedSum": { + "p50": 189.63199853897095, + "p90": 200.1280039548874, + "p95": 204.352006316185, + "p99": 216.3199931383133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 96.25600278377533, + "p90": 100.70399940013885, + "p95": 103.93600165843964, + "p99": 125.44000148773193 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 100.70399940013885, + "p95": 103.93600165843964, + "p99": 125.44000148773193 + }, + "roundtrip": { + "p50": 96.25600278377533, + "p90": 100.70399940013885, + "p95": 103.93600165843964, + "p99": 125.44000148773193 + }, + "isolatedSum": { + "p50": 192.51200556755066, + "p90": 201.4079988002777, + "p95": 207.87200331687927, + "p99": 250.88000297546387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 96.19200229644775, + "p90": 100.41599720716476, + "p95": 102.84800082445145, + "p99": 106.23999685049057 + }, + "combine": { + "p50": 96.19200229644775, + "p90": 100.41599720716476, + "p95": 102.84800082445145, + "p99": 106.23999685049057 + }, + "roundtrip": { + "p50": 96.19200229644775, + "p90": 100.41599720716476, + "p95": 102.84800082445145, + "p99": 106.23999685049057 + }, + "isolatedSum": { + "p50": 192.3840045928955, + "p90": 200.83199441432953, + "p95": 205.6960016489029, + "p99": 212.47999370098114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 95.96800059080124, + "p90": 100.89600086212158, + "p95": 103.39199751615524, + "p99": 151.58399939537048 + }, + "combine": { + "p50": 95.96800059080124, + "p90": 100.89600086212158, + "p95": 103.39199751615524, + "p99": 151.58399939537048 + }, + "roundtrip": { + "p50": 95.96800059080124, + "p90": 100.89600086212158, + "p95": 103.39199751615524, + "p99": 151.58399939537048 + }, + "isolatedSum": { + "p50": 191.93600118160248, + "p90": 201.79200172424316, + "p95": 206.7839950323105, + "p99": 303.16799879074097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 95.83999961614609, + "p90": 99.64799880981445, + "p95": 101.75999999046326, + "p99": 107.19999670982361 + }, + "combine": { + "p50": 95.83999961614609, + "p90": 99.64799880981445, + "p95": 101.75999999046326, + "p99": 107.19999670982361 + }, + "roundtrip": { + "p50": 95.83999961614609, + "p90": 99.64799880981445, + "p95": 101.75999999046326, + "p99": 107.19999670982361 + }, + "isolatedSum": { + "p50": 191.67999923229218, + "p90": 199.2959976196289, + "p95": 203.5199999809265, + "p99": 214.39999341964722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 96.3200032711029, + "p90": 99.58399832248688, + "p95": 102.49599814414978, + "p99": 107.84000158309937 + }, + "combine": { + "p50": 96.3200032711029, + "p90": 99.58399832248688, + "p95": 102.49599814414978, + "p99": 107.84000158309937 + }, + "roundtrip": { + "p50": 96.3200032711029, + "p90": 99.58399832248688, + "p95": 102.49599814414978, + "p99": 107.84000158309937 + }, + "isolatedSum": { + "p50": 192.6400065422058, + "p90": 199.16799664497375, + "p95": 204.99199628829956, + "p99": 215.68000316619873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.37599641084671, + "p90": 102.11200267076492, + "p95": 104.38399761915207, + "p99": 108.96000266075134 + }, + "combine": { + "p50": 97.37599641084671, + "p90": 102.11200267076492, + "p95": 104.38399761915207, + "p99": 108.96000266075134 + }, + "roundtrip": { + "p50": 97.37599641084671, + "p90": 102.11200267076492, + "p95": 104.38399761915207, + "p99": 108.96000266075134 + }, + "isolatedSum": { + "p50": 194.75199282169342, + "p90": 204.22400534152985, + "p95": 208.76799523830414, + "p99": 217.92000532150269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.48000031709671, + "p90": 120.7680031657219, + "p95": 123.32800030708313, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 116.48000031709671, + "p90": 120.7680031657219, + "p95": 123.32800030708313, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 116.48000031709671, + "p90": 120.7680031657219, + "p95": 123.32800030708313, + "p99": 128.89599800109863 + }, + "isolatedSum": { + "p50": 232.96000063419342, + "p90": 241.5360063314438, + "p95": 246.65600061416626, + "p99": 257.79199600219727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-311b5ba1", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_9b70e3f6", + "comparisonKey": "f76e2a1a591c1742", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:10.705181+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 91.93599969148636, + "p90": 95.20000219345093, + "p95": 97.9200005531311, + "p99": 100.54399818181992 + }, + "combine": { + "p50": 91.93599969148636, + "p90": 95.20000219345093, + "p95": 97.9200005531311, + "p99": 100.54399818181992 + }, + "roundtrip": { + "p50": 91.93599969148636, + "p90": 95.20000219345093, + "p95": 97.9200005531311, + "p99": 100.54399818181992 + }, + "isolatedSum": { + "p50": 183.87199938297272, + "p90": 190.40000438690186, + "p95": 195.8400011062622, + "p99": 201.08799636363983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 92.06400066614151, + "p90": 96.22400254011154, + "p95": 97.98400104045868, + "p99": 102.11200267076492 + }, + "combine": { + "p50": 92.06400066614151, + "p90": 96.22400254011154, + "p95": 97.98400104045868, + "p99": 102.11200267076492 + }, + "roundtrip": { + "p50": 92.06400066614151, + "p90": 96.22400254011154, + "p95": 97.98400104045868, + "p99": 102.11200267076492 + }, + "isolatedSum": { + "p50": 184.12800133228302, + "p90": 192.44800508022308, + "p95": 195.96800208091736, + "p99": 204.22400534152985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 91.61599725484848, + "p90": 95.0080007314682, + "p95": 96.63999825716019, + "p99": 100.22400319576263 + }, + "combine": { + "p50": 91.61599725484848, + "p90": 95.0080007314682, + "p95": 96.63999825716019, + "p99": 100.22400319576263 + }, + "roundtrip": { + "p50": 91.61599725484848, + "p90": 95.0080007314682, + "p95": 96.63999825716019, + "p99": 100.22400319576263 + }, + "isolatedSum": { + "p50": 183.23199450969696, + "p90": 190.0160014629364, + "p95": 193.27999651432037, + "p99": 200.44800639152527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 92.12800115346909, + "p90": 95.93600034713745, + "p95": 99.5199978351593, + "p99": 108.12799632549286 + }, + "combine": { + "p50": 92.12800115346909, + "p90": 95.93600034713745, + "p95": 99.5199978351593, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 92.12800115346909, + "p90": 95.93600034713745, + "p95": 99.5199978351593, + "p99": 108.12799632549286 + }, + "isolatedSum": { + "p50": 184.25600230693817, + "p90": 191.8720006942749, + "p95": 199.0399956703186, + "p99": 216.25599265098572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 91.80799871683121, + "p90": 95.77599912881851, + "p95": 98.91200065612793, + "p99": 117.40799993276596 + }, + "combine": { + "p50": 91.80799871683121, + "p90": 95.77599912881851, + "p95": 98.91200065612793, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 91.80799871683121, + "p90": 95.77599912881851, + "p95": 98.91200065612793, + "p99": 117.40799993276596 + }, + "isolatedSum": { + "p50": 183.61599743366241, + "p90": 191.55199825763702, + "p95": 197.82400131225586, + "p99": 234.81599986553192 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 91.36000275611877, + "p90": 95.45599669218063, + "p95": 99.0080013871193, + "p99": 103.35999727249146 + }, + "combine": { + "p50": 91.36000275611877, + "p90": 95.45599669218063, + "p95": 99.0080013871193, + "p99": 103.35999727249146 + }, + "roundtrip": { + "p50": 91.36000275611877, + "p90": 95.45599669218063, + "p95": 99.0080013871193, + "p99": 103.35999727249146 + }, + "isolatedSum": { + "p50": 182.72000551223755, + "p90": 190.91199338436127, + "p95": 198.0160027742386, + "p99": 206.7199945449829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.44000369310379, + "p90": 97.95200079679489, + "p95": 99.90400075912476, + "p99": 107.29599744081497 + }, + "combine": { + "p50": 93.44000369310379, + "p90": 97.95200079679489, + "p95": 99.90400075912476, + "p99": 107.29599744081497 + }, + "roundtrip": { + "p50": 93.44000369310379, + "p90": 97.95200079679489, + "p95": 99.90400075912476, + "p99": 107.29599744081497 + }, + "isolatedSum": { + "p50": 186.88000738620758, + "p90": 195.90400159358978, + "p95": 199.8080015182495, + "p99": 214.59199488162994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.78400099277496, + "p90": 111.55200004577637, + "p95": 112.99200356006622, + "p99": 118.56000125408173 + }, + "combine": { + "p50": 106.78400099277496, + "p90": 111.55200004577637, + "p95": 112.99200356006622, + "p99": 118.56000125408173 + }, + "roundtrip": { + "p50": 106.78400099277496, + "p90": 111.55200004577637, + "p95": 112.99200356006622, + "p99": 118.56000125408173 + }, + "isolatedSum": { + "p50": 213.56800198554993, + "p90": 223.10400009155273, + "p95": 225.98400712013245, + "p99": 237.12000250816345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9a682c7b", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_c921954c", + "comparisonKey": "e499ddff60ec3dbc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:08.233364+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 98.11200201511383, + "p90": 106.49599879980087, + "p95": 112.47999966144562, + "p99": 118.43200027942657 + }, + "combine": { + "p50": 98.11200201511383, + "p90": 106.49599879980087, + "p95": 112.47999966144562, + "p99": 118.43200027942657 + }, + "roundtrip": { + "p50": 98.11200201511383, + "p90": 106.49599879980087, + "p95": 112.47999966144562, + "p99": 118.43200027942657 + }, + "isolatedSum": { + "p50": 196.22400403022766, + "p90": 212.99199759960175, + "p95": 224.95999932289124, + "p99": 236.86400055885315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.98400104045868, + "p90": 103.26399654150009, + "p95": 108.25599730014801, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 97.98400104045868, + "p90": 103.26399654150009, + "p95": 108.25599730014801, + "p99": 116.15999788045883 + }, + "roundtrip": { + "p50": 97.98400104045868, + "p90": 103.26399654150009, + "p95": 108.25599730014801, + "p99": 116.15999788045883 + }, + "isolatedSum": { + "p50": 195.96800208091736, + "p90": 206.52799308300018, + "p95": 216.51199460029602, + "p99": 232.31999576091766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 98.55999797582626, + "p90": 105.92000186443329, + "p95": 113.18399757146835, + "p99": 147.45600521564484 + }, + "combine": { + "p50": 98.55999797582626, + "p90": 105.92000186443329, + "p95": 113.18399757146835, + "p99": 147.45600521564484 + }, + "roundtrip": { + "p50": 98.55999797582626, + "p90": 105.92000186443329, + "p95": 113.18399757146835, + "p99": 147.45600521564484 + }, + "isolatedSum": { + "p50": 197.11999595165253, + "p90": 211.84000372886658, + "p95": 226.3679951429367, + "p99": 294.9120104312897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.39999675750732, + "p90": 102.68799960613251, + "p95": 107.26399719715118, + "p99": 115.84000289440155 + }, + "combine": { + "p50": 98.39999675750732, + "p90": 102.68799960613251, + "p95": 107.26399719715118, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 98.39999675750732, + "p90": 102.68799960613251, + "p95": 107.26399719715118, + "p99": 115.84000289440155 + }, + "isolatedSum": { + "p50": 196.79999351501465, + "p90": 205.37599921226501, + "p95": 214.52799439430237, + "p99": 231.6800057888031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 97.24800288677216, + "p90": 105.82400113344193, + "p95": 111.48799955844879, + "p99": 117.11999773979187 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 105.82400113344193, + "p95": 111.48799955844879, + "p99": 117.11999773979187 + }, + "roundtrip": { + "p50": 97.24800288677216, + "p90": 105.82400113344193, + "p95": 111.48799955844879, + "p99": 117.11999773979187 + }, + "isolatedSum": { + "p50": 194.4960057735443, + "p90": 211.64800226688385, + "p95": 222.97599911689758, + "p99": 234.23999547958374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 97.9200005531311, + "p90": 102.52799838781357, + "p95": 107.10400342941284, + "p99": 115.26399850845337 + }, + "combine": { + "p50": 97.9200005531311, + "p90": 102.52799838781357, + "p95": 107.10400342941284, + "p99": 115.26399850845337 + }, + "roundtrip": { + "p50": 97.9200005531311, + "p90": 102.52799838781357, + "p95": 107.10400342941284, + "p99": 115.26399850845337 + }, + "isolatedSum": { + "p50": 195.8400011062622, + "p90": 205.05599677562714, + "p95": 214.20800685882568, + "p99": 230.52799701690674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.55199807882309, + "p90": 107.4879989027977, + "p95": 113.21599781513214, + "p99": 118.52800101041794 + }, + "combine": { + "p50": 99.55199807882309, + "p90": 107.4879989027977, + "p95": 113.21599781513214, + "p99": 118.52800101041794 + }, + "roundtrip": { + "p50": 99.55199807882309, + "p90": 107.4879989027977, + "p95": 113.21599781513214, + "p99": 118.52800101041794 + }, + "isolatedSum": { + "p50": 199.10399615764618, + "p90": 214.9759978055954, + "p95": 226.43199563026428, + "p99": 237.05600202083588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 110.944002866745, + "p90": 117.0239970088005, + "p95": 120.57600170373917, + "p99": 127.68000364303589 + }, + "combine": { + "p50": 110.944002866745, + "p90": 117.0239970088005, + "p95": 120.57600170373917, + "p99": 127.68000364303589 + }, + "roundtrip": { + "p50": 110.944002866745, + "p90": 117.0239970088005, + "p95": 120.57600170373917, + "p99": 127.68000364303589 + }, + "isolatedSum": { + "p50": 221.88800573349, + "p90": 234.047994017601, + "p95": 241.15200340747833, + "p99": 255.36000728607178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-af637038", + "identity": "h100|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_5ba8257e", + "comparisonKey": "63f2ed34d1d8c7db", + "schemaVersion": 3, + "generatedAt": "2026-07-02T12:42:58.892121+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.60800063610077, + "p90": 116.48000031709671, + "p95": 119.77600306272507, + "p99": 128.57599556446075 + }, + "combine": { + "p50": 112.60800063610077, + "p90": 116.48000031709671, + "p95": 119.77600306272507, + "p99": 128.57599556446075 + }, + "roundtrip": { + "p50": 112.60800063610077, + "p90": 116.48000031709671, + "p95": 119.77600306272507, + "p99": 128.57599556446075 + }, + "isolatedSum": { + "p50": 225.21600127220154, + "p90": 232.96000063419342, + "p95": 239.55200612545013, + "p99": 257.1519911289215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 113.0559965968132, + "p90": 116.48000031709671, + "p95": 119.32799965143204, + "p99": 122.91199713945389 + }, + "combine": { + "p50": 113.0559965968132, + "p90": 116.48000031709671, + "p95": 119.32799965143204, + "p99": 122.91199713945389 + }, + "roundtrip": { + "p50": 113.0559965968132, + "p90": 116.48000031709671, + "p95": 119.32799965143204, + "p99": 122.91199713945389 + }, + "isolatedSum": { + "p50": 226.1119931936264, + "p90": 232.96000063419342, + "p95": 238.65599930286407, + "p99": 245.82399427890778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 112.31999844312668, + "p90": 117.76000261306763, + "p95": 121.8239963054657, + "p99": 128.1919926404953 + }, + "combine": { + "p50": 112.31999844312668, + "p90": 117.76000261306763, + "p95": 121.8239963054657, + "p99": 128.1919926404953 + }, + "roundtrip": { + "p50": 112.31999844312668, + "p90": 117.76000261306763, + "p95": 121.8239963054657, + "p99": 128.1919926404953 + }, + "isolatedSum": { + "p50": 224.63999688625336, + "p90": 235.52000522613525, + "p95": 243.6479926109314, + "p99": 256.3839852809906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 112.8000020980835, + "p90": 117.3119992017746, + "p95": 119.93599683046341, + "p99": 123.90399724245071 + }, + "combine": { + "p50": 112.8000020980835, + "p90": 117.3119992017746, + "p95": 119.93599683046341, + "p99": 123.90399724245071 + }, + "roundtrip": { + "p50": 112.8000020980835, + "p90": 117.3119992017746, + "p95": 119.93599683046341, + "p99": 123.90399724245071 + }, + "isolatedSum": { + "p50": 225.600004196167, + "p90": 234.6239984035492, + "p95": 239.87199366092682, + "p99": 247.80799448490143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 112.03200370073318, + "p90": 116.70400202274323, + "p95": 118.97599697113037, + "p99": 124.83199685811996 + }, + "combine": { + "p50": 112.03200370073318, + "p90": 116.70400202274323, + "p95": 118.97599697113037, + "p99": 124.83199685811996 + }, + "roundtrip": { + "p50": 112.03200370073318, + "p90": 116.70400202274323, + "p95": 118.97599697113037, + "p99": 124.83199685811996 + }, + "isolatedSum": { + "p50": 224.06400740146637, + "p90": 233.40800404548645, + "p95": 237.95199394226074, + "p99": 249.66399371623993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 112.28799819946289, + "p90": 117.11999773979187, + "p95": 119.23199892044067, + "p99": 126.91199779510498 + }, + "combine": { + "p50": 112.28799819946289, + "p90": 117.11999773979187, + "p95": 119.23199892044067, + "p99": 126.91199779510498 + }, + "roundtrip": { + "p50": 112.28799819946289, + "p90": 117.11999773979187, + "p95": 119.23199892044067, + "p99": 126.91199779510498 + }, + "isolatedSum": { + "p50": 224.57599639892578, + "p90": 234.23999547958374, + "p95": 238.46399784088135, + "p99": 253.82399559020996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 113.82400244474411, + "p90": 119.00799721479416, + "p95": 121.60000205039978, + "p99": 129.50399518013 + }, + "combine": { + "p50": 113.82400244474411, + "p90": 119.00799721479416, + "p95": 121.60000205039978, + "p99": 129.50399518013 + }, + "roundtrip": { + "p50": 113.82400244474411, + "p90": 119.00799721479416, + "p95": 121.60000205039978, + "p99": 129.50399518013 + }, + "isolatedSum": { + "p50": 227.64800488948822, + "p90": 238.01599442958832, + "p95": 243.20000410079956, + "p99": 259.00799036026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.59999525547028, + "p90": 131.00799918174744, + "p95": 132.6719969511032, + "p99": 137.53600418567657 + }, + "combine": { + "p50": 125.59999525547028, + "p90": 131.00799918174744, + "p95": 132.6719969511032, + "p99": 137.53600418567657 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 131.00799918174744, + "p95": 132.6719969511032, + "p99": 137.53600418567657 + }, + "isolatedSum": { + "p50": 251.19999051094055, + "p90": 262.0159983634949, + "p95": 265.3439939022064, + "p99": 275.07200837135315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-df16ae2a", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_4d19e0a7", + "comparisonKey": "ab2d6ab146526e25", + "schemaVersion": 3, + "generatedAt": "2026-07-02T12:42:39.019903+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.57599645853043, + "p90": 91.87199920415878, + "p95": 93.88799965381622, + "p99": 96.47999703884125 + }, + "combine": { + "p50": 88.57599645853043, + "p90": 91.87199920415878, + "p95": 93.88799965381622, + "p99": 96.47999703884125 + }, + "roundtrip": { + "p50": 88.57599645853043, + "p90": 91.87199920415878, + "p95": 93.88799965381622, + "p99": 96.47999703884125 + }, + "isolatedSum": { + "p50": 177.15199291706085, + "p90": 183.74399840831757, + "p95": 187.77599930763245, + "p99": 192.9599940776825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.4400054216385, + "p90": 152.28800475597382, + "p95": 153.47200632095337, + "p99": 158.27199816703796 + }, + "combine": { + "p50": 149.4400054216385, + "p90": 152.28800475597382, + "p95": 153.47200632095337, + "p99": 158.27199816703796 + }, + "roundtrip": { + "p50": 149.4400054216385, + "p90": 152.28800475597382, + "p95": 153.47200632095337, + "p99": 158.27199816703796 + }, + "isolatedSum": { + "p50": 298.880010843277, + "p90": 304.57600951194763, + "p95": 306.94401264190674, + "p99": 316.5439963340759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 263.45598697662354, + "p90": 267.2640085220337, + "p95": 269.1519856452942, + "p99": 273.3759880065918 + }, + "combine": { + "p50": 263.45598697662354, + "p90": 267.2640085220337, + "p95": 269.1519856452942, + "p99": 273.3759880065918 + }, + "roundtrip": { + "p50": 263.45598697662354, + "p90": 267.2640085220337, + "p95": 269.1519856452942, + "p99": 273.3759880065918 + }, + "isolatedSum": { + "p50": 526.9119739532471, + "p90": 534.5280170440674, + "p95": 538.3039712905884, + "p99": 546.7519760131836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 498.6560046672821, + "p90": 502.49600410461426, + "p95": 503.52001190185547, + "p99": 506.52801990509033 + }, + "combine": { + "p50": 498.6560046672821, + "p90": 502.49600410461426, + "p95": 503.52001190185547, + "p99": 506.52801990509033 + }, + "roundtrip": { + "p50": 498.6560046672821, + "p90": 502.49600410461426, + "p95": 503.52001190185547, + "p99": 506.52801990509033 + }, + "isolatedSum": { + "p50": 997.3120093345642, + "p90": 1004.9920082092285, + "p95": 1007.0400238037109, + "p99": 1013.0560398101807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 951.0719776153564, + "p90": 955.1680088043213, + "p95": 956.1920166015625, + "p99": 958.7519764900208 + }, + "combine": { + "p50": 951.0719776153564, + "p90": 955.1680088043213, + "p95": 956.1920166015625, + "p99": 958.7519764900208 + }, + "roundtrip": { + "p50": 951.0719776153564, + "p90": 955.1680088043213, + "p95": 956.1920166015625, + "p99": 958.7519764900208 + }, + "isolatedSum": { + "p50": 1902.143955230713, + "p90": 1910.3360176086426, + "p95": 1912.384033203125, + "p99": 1917.5039529800415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1855.6480407714844, + "p90": 1862.239956855774, + "p95": 1865.18394947052, + "p99": 1874.7520446777344 + }, + "combine": { + "p50": 1855.6480407714844, + "p90": 1862.239956855774, + "p95": 1865.18394947052, + "p99": 1874.7520446777344 + }, + "roundtrip": { + "p50": 1855.6480407714844, + "p90": 1862.239956855774, + "p95": 1865.18394947052, + "p99": 1874.7520446777344 + }, + "isolatedSum": { + "p50": 3711.2960815429688, + "p90": 3724.479913711548, + "p95": 3730.36789894104, + "p99": 3749.5040893554688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-96d1fd61", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_6e3f35b1", + "comparisonKey": "82f91b3ff756abff", + "schemaVersion": 3, + "generatedAt": "2026-07-02T13:05:06.756993+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.72800302505493, + "p90": 124.4800016283989, + "p95": 127.55200266838074, + "p99": 137.9839926958084 + }, + "combine": { + "p50": 121.72800302505493, + "p90": 124.4800016283989, + "p95": 127.55200266838074, + "p99": 137.9839926958084 + }, + "roundtrip": { + "p50": 121.72800302505493, + "p90": 124.4800016283989, + "p95": 127.55200266838074, + "p99": 137.9839926958084 + }, + "isolatedSum": { + "p50": 243.45600605010986, + "p90": 248.9600032567978, + "p95": 255.10400533676147, + "p99": 275.9679853916168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 208.44799280166626, + "p90": 214.9759978055954, + "p95": 215.7759964466095, + "p99": 217.53600239753723 + }, + "combine": { + "p50": 208.44799280166626, + "p90": 214.9759978055954, + "p95": 215.7759964466095, + "p99": 217.53600239753723 + }, + "roundtrip": { + "p50": 208.44799280166626, + "p90": 214.9759978055954, + "p95": 215.7759964466095, + "p99": 217.53600239753723 + }, + "isolatedSum": { + "p50": 416.8959856033325, + "p90": 429.9519956111908, + "p95": 431.551992893219, + "p99": 435.07200479507446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 383.13600420951843, + "p90": 389.8240029811859, + "p95": 391.2000060081482, + "p99": 394.20801401138306 + }, + "combine": { + "p50": 383.13600420951843, + "p90": 389.8240029811859, + "p95": 391.2000060081482, + "p99": 394.20801401138306 + }, + "roundtrip": { + "p50": 383.13600420951843, + "p90": 389.8240029811859, + "p95": 391.2000060081482, + "p99": 394.20801401138306 + }, + "isolatedSum": { + "p50": 766.2720084190369, + "p90": 779.6480059623718, + "p95": 782.4000120162964, + "p99": 788.4160280227661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 731.6480278968811, + "p90": 738.3679747581482, + "p95": 740.9600019454956, + "p99": 744.3519830703735 + }, + "combine": { + "p50": 731.6480278968811, + "p90": 738.3679747581482, + "p95": 740.9600019454956, + "p99": 744.3519830703735 + }, + "roundtrip": { + "p50": 731.6480278968811, + "p90": 738.3679747581482, + "p95": 740.9600019454956, + "p99": 744.3519830703735 + }, + "isolatedSum": { + "p50": 1463.2960557937622, + "p90": 1476.7359495162964, + "p95": 1481.9200038909912, + "p99": 1488.703966140747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1418.720006942749, + "p90": 1426.6879558563232, + "p95": 1428.9920330047607, + "p99": 1437.3120069503784 + }, + "combine": { + "p50": 1418.720006942749, + "p90": 1426.6879558563232, + "p95": 1428.9920330047607, + "p99": 1437.3120069503784 + }, + "roundtrip": { + "p50": 1418.720006942749, + "p90": 1426.6879558563232, + "p95": 1428.9920330047607, + "p99": 1437.3120069503784 + }, + "isolatedSum": { + "p50": 2837.440013885498, + "p90": 2853.3759117126465, + "p95": 2857.9840660095215, + "p99": 2874.624013900757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2793.9839363098145, + "p90": 2804.192066192627, + "p95": 2806.1439990997314, + "p99": 2814.879894256592 + }, + "combine": { + "p50": 2793.9839363098145, + "p90": 2804.192066192627, + "p95": 2806.1439990997314, + "p99": 2814.879894256592 + }, + "roundtrip": { + "p50": 2793.9839363098145, + "p90": 2804.192066192627, + "p95": 2806.1439990997314, + "p99": 2814.879894256592 + }, + "isolatedSum": { + "p50": 5587.967872619629, + "p90": 5608.384132385254, + "p95": 5612.287998199463, + "p99": 5629.759788513184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6068ee65", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_eccd5c68", + "comparisonKey": "c3beb174de105c4b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T08:57:49.171420+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_05", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 101.43999755382538, + "p90": 105.50399869680405, + "p95": 108.73600095510483, + "p99": 113.8560026884079 + }, + "combine": { + "p50": 101.43999755382538, + "p90": 105.50399869680405, + "p95": 108.73600095510483, + "p99": 113.8560026884079 + }, + "roundtrip": { + "p50": 101.43999755382538, + "p90": 105.50399869680405, + "p95": 108.73600095510483, + "p99": 113.8560026884079 + }, + "isolatedSum": { + "p50": 202.87999510765076, + "p90": 211.0079973936081, + "p95": 217.47200191020966, + "p99": 227.7120053768158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 107.71200060844421, + "p90": 112.15999722480774, + "p95": 114.81600254774094, + "p99": 119.07199770212173 + }, + "combine": { + "p50": 107.71200060844421, + "p90": 112.15999722480774, + "p95": 114.81600254774094, + "p99": 119.07199770212173 + }, + "roundtrip": { + "p50": 107.71200060844421, + "p90": 112.15999722480774, + "p95": 114.81600254774094, + "p99": 119.07199770212173 + }, + "isolatedSum": { + "p50": 215.42400121688843, + "p90": 224.31999444961548, + "p95": 229.63200509548187, + "p99": 238.14399540424347 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 222.4320024251938, + "p90": 225.600004196167, + "p95": 227.00800001621246, + "p99": 230.97600042819977 + }, + "combine": { + "p50": 222.4320024251938, + "p90": 225.600004196167, + "p95": 227.00800001621246, + "p99": 230.97600042819977 + }, + "roundtrip": { + "p50": 222.4320024251938, + "p90": 225.600004196167, + "p95": 227.00800001621246, + "p99": 230.97600042819977 + }, + "isolatedSum": { + "p50": 444.8640048503876, + "p90": 451.200008392334, + "p95": 454.0160000324249, + "p99": 461.95200085639954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f95e2664", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_324b2efd", + "comparisonKey": "6e43c05ea7cecda4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:47.996311+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.88799899816513, + "p90": 93.53599697351456, + "p95": 96.25600278377533, + "p99": 102.7199998497963 + }, + "combine": { + "p50": 89.88799899816513, + "p90": 93.53599697351456, + "p95": 96.25600278377533, + "p99": 102.7199998497963 + }, + "roundtrip": { + "p50": 89.88799899816513, + "p90": 93.53599697351456, + "p95": 96.25600278377533, + "p99": 102.7199998497963 + }, + "isolatedSum": { + "p50": 179.77599799633026, + "p90": 187.0719939470291, + "p95": 192.51200556755066, + "p99": 205.4399996995926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 105.21599650382996, + "p90": 110.27199774980545, + "p95": 113.40799927711487, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 105.21599650382996, + "p90": 110.27199774980545, + "p95": 113.40799927711487, + "p99": 118.97599697113037 + }, + "roundtrip": { + "p50": 105.21599650382996, + "p90": 110.27199774980545, + "p95": 113.40799927711487, + "p99": 118.97599697113037 + }, + "isolatedSum": { + "p50": 210.4319930076599, + "p90": 220.5439954996109, + "p95": 226.81599855422974, + "p99": 237.95199394226074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 158.720001578331, + "p90": 161.76000237464905, + "p95": 162.6559942960739, + "p99": 165.27999937534332 + }, + "combine": { + "p50": 158.720001578331, + "p90": 161.76000237464905, + "p95": 162.6559942960739, + "p99": 165.27999937534332 + }, + "roundtrip": { + "p50": 158.720001578331, + "p90": 161.76000237464905, + "p95": 162.6559942960739, + "p99": 165.27999937534332 + }, + "isolatedSum": { + "p50": 317.440003156662, + "p90": 323.5200047492981, + "p95": 325.3119885921478, + "p99": 330.55999875068665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 284.06399488449097, + "p90": 287.4239981174469, + "p95": 288.5439991950989, + "p99": 291.456013917923 + }, + "combine": { + "p50": 284.06399488449097, + "p90": 287.4239981174469, + "p95": 288.5439991950989, + "p99": 291.456013917923 + }, + "roundtrip": { + "p50": 284.06399488449097, + "p90": 287.4239981174469, + "p95": 288.5439991950989, + "p99": 291.456013917923 + }, + "isolatedSum": { + "p50": 568.1279897689819, + "p90": 574.8479962348938, + "p95": 577.0879983901978, + "p99": 582.912027835846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 529.7279953956604, + "p90": 533.2480072975159, + "p95": 534.1759920120239, + "p99": 536.5440249443054 + }, + "combine": { + "p50": 529.7279953956604, + "p90": 533.2480072975159, + "p95": 534.1759920120239, + "p99": 536.5440249443054 + }, + "roundtrip": { + "p50": 529.7279953956604, + "p90": 533.2480072975159, + "p95": 534.1759920120239, + "p99": 536.5440249443054 + }, + "isolatedSum": { + "p50": 1059.4559907913208, + "p90": 1066.4960145950317, + "p95": 1068.3519840240479, + "p99": 1073.0880498886108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1018.3680057525635, + "p90": 1021.8559503555298, + "p95": 1023.0079889297485, + "p99": 1025.5039930343628 + }, + "combine": { + "p50": 1018.3680057525635, + "p90": 1021.8559503555298, + "p95": 1023.0079889297485, + "p99": 1025.5039930343628 + }, + "roundtrip": { + "p50": 1018.3680057525635, + "p90": 1021.8559503555298, + "p95": 1023.0079889297485, + "p99": 1025.5039930343628 + }, + "isolatedSum": { + "p50": 2036.736011505127, + "p90": 2043.7119007110596, + "p95": 2046.015977859497, + "p99": 2051.0079860687256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ade9851d", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_f8534427", + "comparisonKey": "9f3e2a573f085d6c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:22.896915+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.83200168609619, + "p90": 110.36799848079681, + "p95": 111.26399785280228, + "p99": 114.81600254774094 + }, + "combine": { + "p50": 108.83200168609619, + "p90": 110.36799848079681, + "p95": 111.26399785280228, + "p99": 114.81600254774094 + }, + "roundtrip": { + "p50": 108.83200168609619, + "p90": 110.36799848079681, + "p95": 111.26399785280228, + "p99": 114.81600254774094 + }, + "isolatedSum": { + "p50": 217.66400337219238, + "p90": 220.73599696159363, + "p95": 222.52799570560455, + "p99": 229.63200509548187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 192.47999787330627, + "p90": 194.72000002861023, + "p95": 195.8400011062622, + "p99": 199.0399956703186 + }, + "combine": { + "p50": 192.47999787330627, + "p90": 194.72000002861023, + "p95": 195.8400011062622, + "p99": 199.0399956703186 + }, + "roundtrip": { + "p50": 192.47999787330627, + "p90": 194.72000002861023, + "p95": 195.8400011062622, + "p99": 199.0399956703186 + }, + "isolatedSum": { + "p50": 384.95999574661255, + "p90": 389.44000005722046, + "p95": 391.6800022125244, + "p99": 398.0799913406372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 362.08000779151917, + "p90": 365.05600810050964, + "p95": 365.88799953460693, + "p99": 368.9599931240082 + }, + "combine": { + "p50": 362.08000779151917, + "p90": 365.05600810050964, + "p95": 365.88799953460693, + "p99": 368.9599931240082 + }, + "roundtrip": { + "p50": 362.08000779151917, + "p90": 365.05600810050964, + "p95": 365.88799953460693, + "p99": 368.9599931240082 + }, + "isolatedSum": { + "p50": 724.1600155830383, + "p90": 730.1120162010193, + "p95": 731.7759990692139, + "p99": 737.9199862480164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 737.824022769928, + "p90": 741.0240173339844, + "p95": 742.5600290298462, + "p99": 746.3359832763672 + }, + "combine": { + "p50": 737.824022769928, + "p90": 741.0240173339844, + "p95": 742.5600290298462, + "p99": 746.3359832763672 + }, + "roundtrip": { + "p50": 737.824022769928, + "p90": 741.0240173339844, + "p95": 742.5600290298462, + "p99": 746.3359832763672 + }, + "isolatedSum": { + "p50": 1475.648045539856, + "p90": 1482.0480346679688, + "p95": 1485.1200580596924, + "p99": 1492.6719665527344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1480.9600114822388, + "p90": 1485.2479696273804, + "p95": 1487.0719909667969, + "p99": 1489.7279739379883 + }, + "combine": { + "p50": 1480.9600114822388, + "p90": 1485.2479696273804, + "p95": 1487.0719909667969, + "p99": 1489.7279739379883 + }, + "roundtrip": { + "p50": 1480.9600114822388, + "p90": 1485.2479696273804, + "p95": 1487.0719909667969, + "p99": 1489.7279739379883 + }, + "isolatedSum": { + "p50": 2961.9200229644775, + "p90": 2970.4959392547607, + "p95": 2974.1439819335938, + "p99": 2979.4559478759766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2973.6640453338623, + "p90": 2978.816032409668, + "p95": 2980.4160594940186, + "p99": 2983.07204246521 + }, + "combine": { + "p50": 2973.6640453338623, + "p90": 2978.816032409668, + "p95": 2980.4160594940186, + "p99": 2983.07204246521 + }, + "roundtrip": { + "p50": 2973.6640453338623, + "p90": 2978.816032409668, + "p95": 2980.4160594940186, + "p99": 2983.07204246521 + }, + "isolatedSum": { + "p50": 5947.328090667725, + "p90": 5957.632064819336, + "p95": 5960.832118988037, + "p99": 5966.14408493042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fb36d8bc", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_d7c1fbf7", + "comparisonKey": "8330fddd9c0d9284", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:26.345077+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.01599997282028, + "p90": 93.34400296211243, + "p95": 95.67999839782715, + "p99": 102.84800082445145 + }, + "combine": { + "p50": 90.01599997282028, + "p90": 93.34400296211243, + "p95": 95.67999839782715, + "p99": 102.84800082445145 + }, + "roundtrip": { + "p50": 90.01599997282028, + "p90": 93.34400296211243, + "p95": 95.67999839782715, + "p99": 102.84800082445145 + }, + "isolatedSum": { + "p50": 180.03199994564056, + "p90": 186.68800592422485, + "p95": 191.3599967956543, + "p99": 205.6960016489029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 150.94399452209473, + "p90": 153.18399667739868, + "p95": 154.2080044746399, + "p99": 158.65600109100342 + }, + "combine": { + "p50": 150.94399452209473, + "p90": 153.18399667739868, + "p95": 154.2080044746399, + "p99": 158.65600109100342 + }, + "roundtrip": { + "p50": 150.94399452209473, + "p90": 153.18399667739868, + "p95": 154.2080044746399, + "p99": 158.65600109100342 + }, + "isolatedSum": { + "p50": 301.88798904418945, + "p90": 306.36799335479736, + "p95": 308.4160089492798, + "p99": 317.31200218200684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 264.44798707962036, + "p90": 267.39200949668884, + "p95": 268.5759961605072, + "p99": 272.41599559783936 + }, + "combine": { + "p50": 264.44798707962036, + "p90": 267.39200949668884, + "p95": 268.5759961605072, + "p99": 272.41599559783936 + }, + "roundtrip": { + "p50": 264.44798707962036, + "p90": 267.39200949668884, + "p95": 268.5759961605072, + "p99": 272.41599559783936 + }, + "isolatedSum": { + "p50": 528.8959741592407, + "p90": 534.7840189933777, + "p95": 537.1519923210144, + "p99": 544.8319911956787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 498.4000027179718, + "p90": 503.10397148132324, + "p95": 504.2880177497864, + "p99": 508.67199897766113 + }, + "combine": { + "p50": 498.4000027179718, + "p90": 503.10397148132324, + "p95": 504.2880177497864, + "p99": 508.67199897766113 + }, + "roundtrip": { + "p50": 498.4000027179718, + "p90": 503.10397148132324, + "p95": 504.2880177497864, + "p99": 508.67199897766113 + }, + "isolatedSum": { + "p50": 996.8000054359436, + "p90": 1006.2079429626465, + "p95": 1008.5760354995728, + "p99": 1017.3439979553223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 944.8959827423096, + "p90": 950.5919814109802, + "p95": 952.8639912605286, + "p99": 957.472026348114 + }, + "combine": { + "p50": 944.8959827423096, + "p90": 950.5919814109802, + "p95": 952.8639912605286, + "p99": 957.472026348114 + }, + "roundtrip": { + "p50": 944.8959827423096, + "p90": 950.5919814109802, + "p95": 952.8639912605286, + "p99": 957.472026348114 + }, + "isolatedSum": { + "p50": 1889.7919654846191, + "p90": 1901.1839628219604, + "p95": 1905.7279825210571, + "p99": 1914.944052696228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1846.2400436401367, + "p90": 1853.5360097885132, + "p95": 1856.0960292816162, + "p99": 1860.543966293335 + }, + "combine": { + "p50": 1846.2400436401367, + "p90": 1853.5360097885132, + "p95": 1856.0960292816162, + "p99": 1860.543966293335 + }, + "roundtrip": { + "p50": 1846.2400436401367, + "p90": 1853.5360097885132, + "p95": 1856.0960292816162, + "p99": 1860.543966293335 + }, + "isolatedSum": { + "p50": 3692.4800872802734, + "p90": 3707.0720195770264, + "p95": 3712.1920585632324, + "p99": 3721.08793258667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9db83c21", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_97f2b177", + "comparisonKey": "95c60dd6255cacc8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:06.638623+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.25600147247314, + "p90": 91.64799749851227, + "p95": 93.66399794816971, + "p99": 98.84800016880035 + }, + "combine": { + "p50": 88.25600147247314, + "p90": 91.64799749851227, + "p95": 93.66399794816971, + "p99": 98.84800016880035 + }, + "roundtrip": { + "p50": 88.25600147247314, + "p90": 91.64799749851227, + "p95": 93.66399794816971, + "p99": 98.84800016880035 + }, + "isolatedSum": { + "p50": 176.5120029449463, + "p90": 183.29599499702454, + "p95": 187.32799589633942, + "p99": 197.6960003376007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.4079977273941, + "p90": 151.93599462509155, + "p95": 153.21600437164307, + "p99": 155.8080017566681 + }, + "combine": { + "p50": 149.4079977273941, + "p90": 151.93599462509155, + "p95": 153.21600437164307, + "p99": 155.8080017566681 + }, + "roundtrip": { + "p50": 149.4079977273941, + "p90": 151.93599462509155, + "p95": 153.21600437164307, + "p99": 155.8080017566681 + }, + "isolatedSum": { + "p50": 298.8159954547882, + "p90": 303.8719892501831, + "p95": 306.43200874328613, + "p99": 311.6160035133362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 262.87999749183655, + "p90": 266.30398631095886, + "p95": 267.7440047264099, + "p99": 272.44800329208374 + }, + "combine": { + "p50": 262.87999749183655, + "p90": 266.30398631095886, + "p95": 267.7440047264099, + "p99": 272.44800329208374 + }, + "roundtrip": { + "p50": 262.87999749183655, + "p90": 266.30398631095886, + "p95": 267.7440047264099, + "p99": 272.44800329208374 + }, + "isolatedSum": { + "p50": 525.7599949836731, + "p90": 532.6079726219177, + "p95": 535.4880094528198, + "p99": 544.8960065841675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 496.12799286842346, + "p90": 500.7359981536865, + "p95": 501.8240213394165, + "p99": 506.72000646591187 + }, + "combine": { + "p50": 496.12799286842346, + "p90": 500.7359981536865, + "p95": 501.8240213394165, + "p99": 506.72000646591187 + }, + "roundtrip": { + "p50": 496.12799286842346, + "p90": 500.7359981536865, + "p95": 501.8240213394165, + "p99": 506.72000646591187 + }, + "isolatedSum": { + "p50": 992.2559857368469, + "p90": 1001.471996307373, + "p95": 1003.648042678833, + "p99": 1013.4400129318237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 943.8400268554688, + "p90": 948.7360119819641, + "p95": 950.9760141372681, + "p99": 995.5840110778809 + }, + "combine": { + "p50": 943.8400268554688, + "p90": 948.7360119819641, + "p95": 950.9760141372681, + "p99": 995.5840110778809 + }, + "roundtrip": { + "p50": 943.8400268554688, + "p90": 948.7360119819641, + "p95": 950.9760141372681, + "p99": 995.5840110778809 + }, + "isolatedSum": { + "p50": 1887.6800537109375, + "p90": 1897.4720239639282, + "p95": 1901.9520282745361, + "p99": 1991.1680221557617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1843.7440395355225, + "p90": 1849.8560190200806, + "p95": 1852.8319597244263, + "p99": 1860.159993171692 + }, + "combine": { + "p50": 1843.7440395355225, + "p90": 1849.8560190200806, + "p95": 1852.8319597244263, + "p99": 1860.159993171692 + }, + "roundtrip": { + "p50": 1843.7440395355225, + "p90": 1849.8560190200806, + "p95": 1852.8319597244263, + "p99": 1860.159993171692 + }, + "isolatedSum": { + "p50": 3687.488079071045, + "p90": 3699.712038040161, + "p95": 3705.6639194488525, + "p99": 3720.319986343384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01479d6a", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_1e84ea82", + "comparisonKey": "e26af12f9a8f0dae", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:09:35.223524+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.56799918413162, + "p90": 106.78400099277496, + "p95": 107.26399719715118, + "p99": 109.24799740314484 + }, + "combine": { + "p50": 105.56799918413162, + "p90": 106.78400099277496, + "p95": 107.26399719715118, + "p99": 109.24799740314484 + }, + "roundtrip": { + "p50": 105.56799918413162, + "p90": 106.78400099277496, + "p95": 107.26399719715118, + "p99": 109.24799740314484 + }, + "isolatedSum": { + "p50": 211.13599836826324, + "p90": 213.56800198554993, + "p95": 214.52799439430237, + "p99": 218.49599480628967 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.88000738620758, + "p90": 188.1919950246811, + "p95": 188.92799317836761, + "p99": 191.29599630832672 + }, + "combine": { + "p50": 186.88000738620758, + "p90": 188.1919950246811, + "p95": 188.92799317836761, + "p99": 191.29599630832672 + }, + "roundtrip": { + "p50": 186.88000738620758, + "p90": 188.1919950246811, + "p95": 188.92799317836761, + "p99": 191.29599630832672 + }, + "isolatedSum": { + "p50": 373.76001477241516, + "p90": 376.3839900493622, + "p95": 377.85598635673523, + "p99": 382.59199261665344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 386.8800103664398, + "p90": 391.2000060081482, + "p95": 392.4799859523773, + "p99": 395.77600359916687 + }, + "combine": { + "p50": 386.8800103664398, + "p90": 391.2000060081482, + "p95": 392.4799859523773, + "p99": 395.77600359916687 + }, + "roundtrip": { + "p50": 386.8800103664398, + "p90": 391.2000060081482, + "p95": 392.4799859523773, + "p99": 395.77600359916687 + }, + "isolatedSum": { + "p50": 773.7600207328796, + "p90": 782.4000120162964, + "p95": 784.9599719047546, + "p99": 791.5520071983337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 764.9279832839966, + "p90": 767.6159739494324, + "p95": 768.3839797973633, + "p99": 776.3519883155823 + }, + "combine": { + "p50": 764.9279832839966, + "p90": 767.6159739494324, + "p95": 768.3839797973633, + "p99": 776.3519883155823 + }, + "roundtrip": { + "p50": 764.9279832839966, + "p90": 767.6159739494324, + "p95": 768.3839797973633, + "p99": 776.3519883155823 + }, + "isolatedSum": { + "p50": 1529.8559665679932, + "p90": 1535.2319478988647, + "p95": 1536.7679595947266, + "p99": 1552.7039766311646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1495.6480264663696, + "p90": 1499.135971069336, + "p95": 1500.9599924087524, + "p99": 1509.4079971313477 + }, + "combine": { + "p50": 1495.6480264663696, + "p90": 1499.135971069336, + "p95": 1500.9599924087524, + "p99": 1509.4079971313477 + }, + "roundtrip": { + "p50": 1495.6480264663696, + "p90": 1499.135971069336, + "p95": 1500.9599924087524, + "p99": 1509.4079971313477 + }, + "isolatedSum": { + "p50": 2991.2960529327393, + "p90": 2998.271942138672, + "p95": 3001.919984817505, + "p99": 3018.8159942626953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2956.6080570220947, + "p90": 2959.5839977264404, + "p95": 2960.9599113464355, + "p99": 2962.5279903411865 + }, + "combine": { + "p50": 2956.6080570220947, + "p90": 2959.5839977264404, + "p95": 2960.9599113464355, + "p99": 2962.5279903411865 + }, + "roundtrip": { + "p50": 2956.6080570220947, + "p90": 2959.5839977264404, + "p95": 2960.9599113464355, + "p99": 2962.5279903411865 + }, + "isolatedSum": { + "p50": 5913.216114044189, + "p90": 5919.167995452881, + "p95": 5921.919822692871, + "p99": 5925.055980682373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e476f450", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_17199ba8", + "comparisonKey": "2fde9cf0cfe9bd99", + "schemaVersion": 3, + "generatedAt": "2026-07-02T14:42:00.189842+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 102.33599692583084, + "p90": 103.32799702882767, + "p95": 103.7760004401207, + "p99": 105.3759977221489 + }, + "combine": { + "p50": 102.33599692583084, + "p90": 103.32799702882767, + "p95": 103.7760004401207, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 102.33599692583084, + "p90": 103.32799702882767, + "p95": 103.7760004401207, + "p99": 105.3759977221489 + }, + "isolatedSum": { + "p50": 204.67199385166168, + "p90": 206.65599405765533, + "p95": 207.5520008802414, + "p99": 210.7519954442978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 188.22400271892548, + "p90": 189.56799805164337, + "p95": 189.98399376869202, + "p99": 190.7840073108673 + }, + "combine": { + "p50": 188.22400271892548, + "p90": 189.56799805164337, + "p95": 189.98399376869202, + "p99": 190.7840073108673 + }, + "roundtrip": { + "p50": 188.22400271892548, + "p90": 189.56799805164337, + "p95": 189.98399376869202, + "p99": 190.7840073108673 + }, + "isolatedSum": { + "p50": 376.44800543785095, + "p90": 379.13599610328674, + "p95": 379.96798753738403, + "p99": 381.5680146217346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 353.66401076316833, + "p90": 355.103999376297, + "p95": 355.6160032749176, + "p99": 357.695996761322 + }, + "combine": { + "p50": 353.66401076316833, + "p90": 355.103999376297, + "p95": 355.6160032749176, + "p99": 357.695996761322 + }, + "roundtrip": { + "p50": 353.66401076316833, + "p90": 355.103999376297, + "p95": 355.6160032749176, + "p99": 357.695996761322 + }, + "isolatedSum": { + "p50": 707.3280215263367, + "p90": 710.207998752594, + "p95": 711.2320065498352, + "p99": 715.391993522644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 682.0799708366394, + "p90": 683.7760210037231, + "p95": 684.3519806861877, + "p99": 685.9520077705383 + }, + "combine": { + "p50": 682.0799708366394, + "p90": 683.7760210037231, + "p95": 684.3519806861877, + "p99": 685.9520077705383 + }, + "roundtrip": { + "p50": 682.0799708366394, + "p90": 683.7760210037231, + "p95": 684.3519806861877, + "p99": 685.9520077705383 + }, + "isolatedSum": { + "p50": 1364.1599416732788, + "p90": 1367.5520420074463, + "p95": 1368.7039613723755, + "p99": 1371.9040155410767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1331.936001777649, + "p90": 1334.0480327606201, + "p95": 1335.1680040359497, + "p99": 1340.0640487670898 + }, + "combine": { + "p50": 1331.936001777649, + "p90": 1334.0480327606201, + "p95": 1335.1680040359497, + "p99": 1340.0640487670898 + }, + "roundtrip": { + "p50": 1331.936001777649, + "p90": 1334.0480327606201, + "p95": 1335.1680040359497, + "p99": 1340.0640487670898 + }, + "isolatedSum": { + "p50": 2663.872003555298, + "p90": 2668.0960655212402, + "p95": 2670.3360080718994, + "p99": 2680.1280975341797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2630.9759616851807, + "p90": 2633.1839561462402, + "p95": 2633.824110031128, + "p99": 2636.1279487609863 + }, + "combine": { + "p50": 2630.9759616851807, + "p90": 2633.1839561462402, + "p95": 2633.824110031128, + "p99": 2636.1279487609863 + }, + "roundtrip": { + "p50": 2630.9759616851807, + "p90": 2633.1839561462402, + "p95": 2633.824110031128, + "p99": 2636.1279487609863 + }, + "isolatedSum": { + "p50": 5261.951923370361, + "p90": 5266.3679122924805, + "p95": 5267.648220062256, + "p99": 5272.255897521973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ace48cb3", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_fc142c06", + "comparisonKey": "8322ad702f7ab87f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:42.028946+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.56799656152725, + "p90": 92.6079973578453, + "p95": 96.41599655151367, + "p99": 133.88800621032715 + }, + "combine": { + "p50": 89.56799656152725, + "p90": 92.6079973578453, + "p95": 96.41599655151367, + "p99": 133.88800621032715 + }, + "roundtrip": { + "p50": 89.56799656152725, + "p90": 92.6079973578453, + "p95": 96.41599655151367, + "p99": 133.88800621032715 + }, + "isolatedSum": { + "p50": 179.1359931230545, + "p90": 185.2159947156906, + "p95": 192.83199310302734, + "p99": 267.7760124206543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 153.53600680828094, + "p90": 156.41599893569946, + "p95": 158.4320068359375, + "p99": 160.76800227165222 + }, + "combine": { + "p50": 153.53600680828094, + "p90": 156.41599893569946, + "p95": 158.4320068359375, + "p99": 160.76800227165222 + }, + "roundtrip": { + "p50": 153.53600680828094, + "p90": 156.41599893569946, + "p95": 158.4320068359375, + "p99": 160.76800227165222 + }, + "isolatedSum": { + "p50": 307.0720136165619, + "p90": 312.8319978713989, + "p95": 316.864013671875, + "p99": 321.53600454330444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 272.3200023174286, + "p90": 275.87199211120605, + "p95": 277.47198939323425, + "p99": 283.1360101699829 + }, + "combine": { + "p50": 272.3200023174286, + "p90": 275.87199211120605, + "p95": 277.47198939323425, + "p99": 283.1360101699829 + }, + "roundtrip": { + "p50": 272.3200023174286, + "p90": 275.87199211120605, + "p95": 277.47198939323425, + "p99": 283.1360101699829 + }, + "isolatedSum": { + "p50": 544.6400046348572, + "p90": 551.7439842224121, + "p95": 554.9439787864685, + "p99": 566.2720203399658 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 515.4560208320618, + "p90": 519.9360251426697, + "p95": 521.4080214500427, + "p99": 532.4159860610962 + }, + "combine": { + "p50": 515.4560208320618, + "p90": 519.9360251426697, + "p95": 521.4080214500427, + "p99": 532.4159860610962 + }, + "roundtrip": { + "p50": 515.4560208320618, + "p90": 519.9360251426697, + "p95": 521.4080214500427, + "p99": 532.4159860610962 + }, + "isolatedSum": { + "p50": 1030.9120416641235, + "p90": 1039.8720502853394, + "p95": 1042.8160429000854, + "p99": 1064.8319721221924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 987.0079755783081, + "p90": 990.880012512207, + "p95": 992.0960068702698, + "p99": 996.3520169258118 + }, + "combine": { + "p50": 987.0079755783081, + "p90": 990.880012512207, + "p95": 992.0960068702698, + "p99": 996.3520169258118 + }, + "roundtrip": { + "p50": 987.0079755783081, + "p90": 990.880012512207, + "p95": 992.0960068702698, + "p99": 996.3520169258118 + }, + "isolatedSum": { + "p50": 1974.0159511566162, + "p90": 1981.760025024414, + "p95": 1984.1920137405396, + "p99": 1992.7040338516235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1926.5600442886353, + "p90": 1931.3600063323975, + "p95": 1933.0240488052368, + "p99": 1956.2560319900513 + }, + "combine": { + "p50": 1926.5600442886353, + "p90": 1931.3600063323975, + "p95": 1933.0240488052368, + "p99": 1956.2560319900513 + }, + "roundtrip": { + "p50": 1926.5600442886353, + "p90": 1931.3600063323975, + "p95": 1933.0240488052368, + "p99": 1956.2560319900513 + }, + "isolatedSum": { + "p50": 3853.1200885772705, + "p90": 3862.720012664795, + "p95": 3866.0480976104736, + "p99": 3912.5120639801025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-59a47910", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_837dee23", + "comparisonKey": "ba092702bdaf404e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:36.473976+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.59999942779541, + "p90": 107.10400342941284, + "p95": 108.41599851846695, + "p99": 138.49599659442902 + }, + "combine": { + "p50": 105.59999942779541, + "p90": 107.10400342941284, + "p95": 108.41599851846695, + "p99": 138.49599659442902 + }, + "roundtrip": { + "p50": 105.59999942779541, + "p90": 107.10400342941284, + "p95": 108.41599851846695, + "p99": 138.49599659442902 + }, + "isolatedSum": { + "p50": 211.19999885559082, + "p90": 214.20800685882568, + "p95": 216.8319970369339, + "p99": 276.99199318885803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.08799374103546, + "p90": 187.1359944343567, + "p95": 188.06399405002594, + "p99": 192.4159973859787 + }, + "combine": { + "p50": 185.08799374103546, + "p90": 187.1359944343567, + "p95": 188.06399405002594, + "p99": 192.4159973859787 + }, + "roundtrip": { + "p50": 185.08799374103546, + "p90": 187.1359944343567, + "p95": 188.06399405002594, + "p99": 192.4159973859787 + }, + "isolatedSum": { + "p50": 370.1759874820709, + "p90": 374.2719888687134, + "p95": 376.1279881000519, + "p99": 384.8319947719574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 344.89598870277405, + "p90": 348.7040102481842, + "p95": 350.6560027599335, + "p99": 358.62401127815247 + }, + "combine": { + "p50": 344.89598870277405, + "p90": 348.7040102481842, + "p95": 350.6560027599335, + "p99": 358.62401127815247 + }, + "roundtrip": { + "p50": 344.89598870277405, + "p90": 348.7040102481842, + "p95": 350.6560027599335, + "p99": 358.62401127815247 + }, + "isolatedSum": { + "p50": 689.7919774055481, + "p90": 697.4080204963684, + "p95": 701.312005519867, + "p99": 717.2480225563049 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 706.496000289917, + "p90": 711.1679911613464, + "p95": 713.0240201950073, + "p99": 718.3039784431458 + }, + "combine": { + "p50": 706.496000289917, + "p90": 711.1679911613464, + "p95": 713.0240201950073, + "p99": 718.3039784431458 + }, + "roundtrip": { + "p50": 706.496000289917, + "p90": 711.1679911613464, + "p95": 713.0240201950073, + "p99": 718.3039784431458 + }, + "isolatedSum": { + "p50": 1412.992000579834, + "p90": 1422.3359823226929, + "p95": 1426.0480403900146, + "p99": 1436.6079568862915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1424.191951751709, + "p90": 1429.7280311584473, + "p95": 1431.712031364441, + "p99": 1437.8559589385986 + }, + "combine": { + "p50": 1424.191951751709, + "p90": 1429.7280311584473, + "p95": 1431.712031364441, + "p99": 1437.8559589385986 + }, + "roundtrip": { + "p50": 1424.191951751709, + "p90": 1429.7280311584473, + "p95": 1431.712031364441, + "p99": 1437.8559589385986 + }, + "isolatedSum": { + "p50": 2848.383903503418, + "p90": 2859.4560623168945, + "p95": 2863.424062728882, + "p99": 2875.7119178771973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2870.2399730682373, + "p90": 2877.72798538208, + "p95": 2880.8000087738037, + "p99": 2885.215997695923 + }, + "combine": { + "p50": 2870.2399730682373, + "p90": 2877.72798538208, + "p95": 2880.8000087738037, + "p99": 2885.215997695923 + }, + "roundtrip": { + "p50": 2870.2399730682373, + "p90": 2877.72798538208, + "p95": 2880.8000087738037, + "p99": 2885.215997695923 + }, + "isolatedSum": { + "p50": 5740.479946136475, + "p90": 5755.45597076416, + "p95": 5761.600017547607, + "p99": 5770.431995391846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-27c6d780", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_b7296bdb", + "comparisonKey": "530e4bee13b53424", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:00.652872+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 90.33600240945816, + "p90": 93.05600076913834, + "p95": 95.77599912881851, + "p99": 99.71199929714203 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 93.05600076913834, + "p95": 95.77599912881851, + "p99": 99.71199929714203 + }, + "roundtrip": { + "p50": 90.33600240945816, + "p90": 93.05600076913834, + "p95": 95.77599912881851, + "p99": 99.71199929714203 + }, + "isolatedSum": { + "p50": 180.67200481891632, + "p90": 186.11200153827667, + "p95": 191.55199825763702, + "p99": 199.42399859428406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 150.36800503730774, + "p90": 153.6320000886917, + "p95": 155.39200603961945, + "p99": 159.87199544906616 + }, + "combine": { + "p50": 150.36800503730774, + "p90": 153.6320000886917, + "p95": 155.39200603961945, + "p99": 159.87199544906616 + }, + "roundtrip": { + "p50": 150.36800503730774, + "p90": 153.6320000886917, + "p95": 155.39200603961945, + "p99": 159.87199544906616 + }, + "isolatedSum": { + "p50": 300.7360100746155, + "p90": 307.2640001773834, + "p95": 310.7840120792389, + "p99": 319.7439908981323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 265.9200131893158, + "p90": 268.8640058040619, + "p95": 269.98400688171387, + "p99": 277.18400955200195 + }, + "combine": { + "p50": 265.9200131893158, + "p90": 268.8640058040619, + "p95": 269.98400688171387, + "p99": 277.18400955200195 + }, + "roundtrip": { + "p50": 265.9200131893158, + "p90": 268.8640058040619, + "p95": 269.98400688171387, + "p99": 277.18400955200195 + }, + "isolatedSum": { + "p50": 531.8400263786316, + "p90": 537.7280116081238, + "p95": 539.9680137634277, + "p99": 554.3680191040039 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 497.5680112838745, + "p90": 501.8240213394165, + "p95": 503.4239888191223, + "p99": 506.816029548645 + }, + "combine": { + "p50": 497.5680112838745, + "p90": 501.8240213394165, + "p95": 503.4239888191223, + "p99": 506.816029548645 + }, + "roundtrip": { + "p50": 497.5680112838745, + "p90": 501.8240213394165, + "p95": 503.4239888191223, + "p99": 506.816029548645 + }, + "isolatedSum": { + "p50": 995.136022567749, + "p90": 1003.648042678833, + "p95": 1006.8479776382446, + "p99": 1013.63205909729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 951.0719776153564, + "p90": 956.3199877738953, + "p95": 958.2080245018005, + "p99": 962.5279903411865 + }, + "combine": { + "p50": 951.0719776153564, + "p90": 956.3199877738953, + "p95": 958.2080245018005, + "p99": 962.5279903411865 + }, + "roundtrip": { + "p50": 951.0719776153564, + "p90": 956.3199877738953, + "p95": 958.2080245018005, + "p99": 962.5279903411865 + }, + "isolatedSum": { + "p50": 1902.143955230713, + "p90": 1912.6399755477905, + "p95": 1916.416049003601, + "p99": 1925.055980682373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1857.5040102005005, + "p90": 1863.103985786438, + "p95": 1865.440011024475, + "p99": 1868.831992149353 + }, + "combine": { + "p50": 1857.5040102005005, + "p90": 1863.103985786438, + "p95": 1865.440011024475, + "p99": 1868.831992149353 + }, + "roundtrip": { + "p50": 1857.5040102005005, + "p90": 1863.103985786438, + "p95": 1865.440011024475, + "p99": 1868.831992149353 + }, + "isolatedSum": { + "p50": 3715.008020401001, + "p90": 3726.207971572876, + "p95": 3730.88002204895, + "p99": 3737.663984298706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2dcf860a", + "identity": "h100|flashinfer|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_47918098", + "comparisonKey": "82b5373c4bb3be0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:35.581132+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.92800176143646, + "p90": 105.92000186443329, + "p95": 106.23999685049057, + "p99": 109.15199667215347 + }, + "combine": { + "p50": 104.92800176143646, + "p90": 105.92000186443329, + "p95": 106.23999685049057, + "p99": 109.15199667215347 + }, + "roundtrip": { + "p50": 104.92800176143646, + "p90": 105.92000186443329, + "p95": 106.23999685049057, + "p99": 109.15199667215347 + }, + "isolatedSum": { + "p50": 209.85600352287292, + "p90": 211.84000372886658, + "p95": 212.47999370098114, + "p99": 218.30399334430695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.65599822998047, + "p90": 187.8719925880432, + "p95": 188.25599551200867, + "p99": 189.4720047712326 + }, + "combine": { + "p50": 186.65599822998047, + "p90": 187.8719925880432, + "p95": 188.25599551200867, + "p99": 189.4720047712326 + }, + "roundtrip": { + "p50": 186.65599822998047, + "p90": 187.8719925880432, + "p95": 188.25599551200867, + "p99": 189.4720047712326 + }, + "isolatedSum": { + "p50": 373.31199645996094, + "p90": 375.7439851760864, + "p95": 376.51199102401733, + "p99": 378.9440095424652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 385.50400733947754, + "p90": 389.69600200653076, + "p95": 390.8799886703491, + "p99": 394.7199881076813 + }, + "combine": { + "p50": 385.50400733947754, + "p90": 389.69600200653076, + "p95": 390.8799886703491, + "p99": 394.7199881076813 + }, + "roundtrip": { + "p50": 385.50400733947754, + "p90": 389.69600200653076, + "p95": 390.8799886703491, + "p99": 394.7199881076813 + }, + "isolatedSum": { + "p50": 771.0080146789551, + "p90": 779.3920040130615, + "p95": 781.7599773406982, + "p99": 789.4399762153625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 765.1519775390625, + "p90": 767.8080201148987, + "p95": 769.4399952888489, + "p99": 774.0160226821899 + }, + "combine": { + "p50": 765.1519775390625, + "p90": 767.8080201148987, + "p95": 769.4399952888489, + "p99": 774.0160226821899 + }, + "roundtrip": { + "p50": 765.1519775390625, + "p90": 767.8080201148987, + "p95": 769.4399952888489, + "p99": 774.0160226821899 + }, + "isolatedSum": { + "p50": 1530.303955078125, + "p90": 1535.6160402297974, + "p95": 1538.8799905776978, + "p99": 1548.0320453643799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1495.360016822815, + "p90": 1498.5599517822266, + "p95": 1499.9040365219116, + "p99": 1502.6559829711914 + }, + "combine": { + "p50": 1495.360016822815, + "p90": 1498.5599517822266, + "p95": 1499.9040365219116, + "p99": 1502.6559829711914 + }, + "roundtrip": { + "p50": 1495.360016822815, + "p90": 1498.5599517822266, + "p95": 1499.9040365219116, + "p99": 1502.6559829711914 + }, + "isolatedSum": { + "p50": 2990.72003364563, + "p90": 2997.119903564453, + "p95": 2999.8080730438232, + "p99": 3005.311965942383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2956.3839435577393, + "p90": 2959.520101547241, + "p95": 2960.767984390259, + "p99": 2966.3679599761963 + }, + "combine": { + "p50": 2956.3839435577393, + "p90": 2959.520101547241, + "p95": 2960.767984390259, + "p99": 2966.3679599761963 + }, + "roundtrip": { + "p50": 2956.3839435577393, + "p90": 2959.520101547241, + "p95": 2960.767984390259, + "p99": 2966.3679599761963 + }, + "isolatedSum": { + "p50": 5912.7678871154785, + "p90": 5919.040203094482, + "p95": 5921.535968780518, + "p99": 5932.735919952393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9325cc40", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_9b70e3f6", + "comparisonKey": "8e4bf60b4fd9cfd3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:59.082606+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_13", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 89.1840010881424, + "p90": 92.83199906349182, + "p95": 94.46399658918381, + "p99": 98.78399968147278 + }, + "combine": { + "p50": 89.1840010881424, + "p90": 92.83199906349182, + "p95": 94.46399658918381, + "p99": 98.78399968147278 + }, + "roundtrip": { + "p50": 89.1840010881424, + "p90": 92.83199906349182, + "p95": 94.46399658918381, + "p99": 98.78399968147278 + }, + "isolatedSum": { + "p50": 178.3680021762848, + "p90": 185.66399812698364, + "p95": 188.92799317836761, + "p99": 197.56799936294556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 150.11200308799744, + "p90": 152.5759994983673, + "p95": 153.888002038002, + "p99": 159.67999398708344 + }, + "combine": { + "p50": 150.11200308799744, + "p90": 152.5759994983673, + "p95": 153.888002038002, + "p99": 159.67999398708344 + }, + "roundtrip": { + "p50": 150.11200308799744, + "p90": 152.5759994983673, + "p95": 153.888002038002, + "p99": 159.67999398708344 + }, + "isolatedSum": { + "p50": 300.2240061759949, + "p90": 305.1519989967346, + "p95": 307.776004076004, + "p99": 319.35998797416687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 264.51200246810913, + "p90": 268.3199942111969, + "p95": 269.98400688171387, + "p99": 275.10398626327515 + }, + "combine": { + "p50": 264.51200246810913, + "p90": 268.3199942111969, + "p95": 269.98400688171387, + "p99": 275.10398626327515 + }, + "roundtrip": { + "p50": 264.51200246810913, + "p90": 268.3199942111969, + "p95": 269.98400688171387, + "p99": 275.10398626327515 + }, + "isolatedSum": { + "p50": 529.0240049362183, + "p90": 536.6399884223938, + "p95": 539.9680137634277, + "p99": 550.2079725265503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 496.73599004745483, + "p90": 500.8959770202637, + "p95": 502.1439790725708, + "p99": 509.7600221633911 + }, + "combine": { + "p50": 496.73599004745483, + "p90": 500.8959770202637, + "p95": 502.1439790725708, + "p99": 509.7600221633911 + }, + "roundtrip": { + "p50": 496.73599004745483, + "p90": 500.8959770202637, + "p95": 502.1439790725708, + "p99": 509.7600221633911 + }, + "isolatedSum": { + "p50": 993.4719800949097, + "p90": 1001.7919540405273, + "p95": 1004.2879581451416, + "p99": 1019.5200443267822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 954.0159702301025, + "p90": 958.2399725914001, + "p95": 959.551990032196, + "p99": 965.2479887008667 + }, + "combine": { + "p50": 954.0159702301025, + "p90": 958.2399725914001, + "p95": 959.551990032196, + "p99": 965.2479887008667 + }, + "roundtrip": { + "p50": 954.0159702301025, + "p90": 958.2399725914001, + "p95": 959.551990032196, + "p99": 965.2479887008667 + }, + "isolatedSum": { + "p50": 1908.031940460205, + "p90": 1916.4799451828003, + "p95": 1919.103980064392, + "p99": 1930.4959774017334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1870.303988456726, + "p90": 1875.9679794311523, + "p95": 1878.81600856781, + "p99": 1888.543963432312 + }, + "combine": { + "p50": 1870.303988456726, + "p90": 1875.9679794311523, + "p95": 1878.81600856781, + "p99": 1888.543963432312 + }, + "roundtrip": { + "p50": 1870.303988456726, + "p90": 1875.9679794311523, + "p95": 1878.81600856781, + "p99": 1888.543963432312 + }, + "isolatedSum": { + "p50": 3740.607976913452, + "p90": 3751.9359588623047, + "p95": 3757.63201713562, + "p99": 3777.087926864624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da590ad6", + "identity": "h100|flashinfer|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_c921954c", + "comparisonKey": "1edb556fde9aa40b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:31.883935+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_01", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.19200098514557, + "p90": 92.79999881982803, + "p95": 94.65599805116653, + "p99": 99.16800260543823 + }, + "combine": { + "p50": 88.19200098514557, + "p90": 92.79999881982803, + "p95": 94.65599805116653, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 88.19200098514557, + "p90": 92.79999881982803, + "p95": 94.65599805116653, + "p99": 99.16800260543823 + }, + "isolatedSum": { + "p50": 176.38400197029114, + "p90": 185.59999763965607, + "p95": 189.31199610233307, + "p99": 198.33600521087646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.88799393177032, + "p90": 153.08800339698792, + "p95": 154.52800691127777, + "p99": 196.03200256824493 + }, + "combine": { + "p50": 149.88799393177032, + "p90": 153.08800339698792, + "p95": 154.52800691127777, + "p99": 196.03200256824493 + }, + "roundtrip": { + "p50": 149.88799393177032, + "p90": 153.08800339698792, + "p95": 154.52800691127777, + "p99": 196.03200256824493 + }, + "isolatedSum": { + "p50": 299.77598786354065, + "p90": 306.17600679397583, + "p95": 309.05601382255554, + "p99": 392.06400513648987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 264.16000723838806, + "p90": 267.2320008277893, + "p95": 268.92799139022827, + "p99": 271.90399169921875 + }, + "combine": { + "p50": 264.16000723838806, + "p90": 267.2320008277893, + "p95": 268.92799139022827, + "p99": 271.90399169921875 + }, + "roundtrip": { + "p50": 264.16000723838806, + "p90": 267.2320008277893, + "p95": 268.92799139022827, + "p99": 271.90399169921875 + }, + "isolatedSum": { + "p50": 528.3200144767761, + "p90": 534.4640016555786, + "p95": 537.8559827804565, + "p99": 543.8079833984375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 496.288001537323, + "p90": 499.80801343917847, + "p95": 500.95999240875244, + "p99": 505.43999671936035 + }, + "combine": { + "p50": 496.288001537323, + "p90": 499.80801343917847, + "p95": 500.95999240875244, + "p99": 505.43999671936035 + }, + "roundtrip": { + "p50": 496.288001537323, + "p90": 499.80801343917847, + "p95": 500.95999240875244, + "p99": 505.43999671936035 + }, + "isolatedSum": { + "p50": 992.576003074646, + "p90": 999.6160268783569, + "p95": 1001.9199848175049, + "p99": 1010.8799934387207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 954.2080163955688, + "p90": 958.9440226554871, + "p95": 960.3840112686157, + "p99": 967.7760004997253 + }, + "combine": { + "p50": 954.2080163955688, + "p90": 958.9440226554871, + "p95": 960.3840112686157, + "p99": 967.7760004997253 + }, + "roundtrip": { + "p50": 954.2080163955688, + "p90": 958.9440226554871, + "p95": 960.3840112686157, + "p99": 967.7760004997253 + }, + "isolatedSum": { + "p50": 1908.4160327911377, + "p90": 1917.8880453109741, + "p95": 1920.7680225372314, + "p99": 1935.5520009994507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1872.0639944076538, + "p90": 1877.951979637146, + "p95": 1879.6800374984741, + "p99": 1882.8799724578857 + }, + "combine": { + "p50": 1872.0639944076538, + "p90": 1877.951979637146, + "p95": 1879.6800374984741, + "p99": 1882.8799724578857 + }, + "roundtrip": { + "p50": 1872.0639944076538, + "p90": 1877.951979637146, + "p95": 1879.6800374984741, + "p99": 1882.8799724578857 + }, + "isolatedSum": { + "p50": 3744.1279888153076, + "p90": 3755.903959274292, + "p95": 3759.3600749969482, + "p99": 3765.7599449157715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c692445", + "identity": "h100|flashinfer|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_5ba8257e", + "comparisonKey": "7ac85b4ec0b69909", + "schemaVersion": 3, + "generatedAt": "2026-07-02T12:43:21.862645+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_03", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · flashinfer · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.38399630784988, + "p90": 103.00800204277039, + "p95": 105.6319996714592, + "p99": 108.38399827480316 + }, + "combine": { + "p50": 96.38399630784988, + "p90": 103.00800204277039, + "p95": 105.6319996714592, + "p99": 108.38399827480316 + }, + "roundtrip": { + "p50": 96.38399630784988, + "p90": 103.00800204277039, + "p95": 105.6319996714592, + "p99": 108.38399827480316 + }, + "isolatedSum": { + "p50": 192.76799261569977, + "p90": 206.01600408554077, + "p95": 211.2639993429184, + "p99": 216.76799654960632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 135.42400300502777, + "p90": 142.11200177669525, + "p95": 144.83200013637543, + "p99": 147.2959965467453 + }, + "combine": { + "p50": 135.42400300502777, + "p90": 142.11200177669525, + "p95": 144.83200013637543, + "p99": 147.2959965467453 + }, + "roundtrip": { + "p50": 135.42400300502777, + "p90": 142.11200177669525, + "p95": 144.83200013637543, + "p99": 147.2959965467453 + }, + "isolatedSum": { + "p50": 270.84800601005554, + "p90": 284.2240035533905, + "p95": 289.66400027275085, + "p99": 294.5919930934906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 220.32000124454498, + "p90": 223.90399873256683, + "p95": 225.66400468349457, + "p99": 228.7999987602234 + }, + "combine": { + "p50": 220.32000124454498, + "p90": 223.90399873256683, + "p95": 225.66400468349457, + "p99": 228.7999987602234 + }, + "roundtrip": { + "p50": 220.32000124454498, + "p90": 223.90399873256683, + "p95": 225.66400468349457, + "p99": 228.7999987602234 + }, + "isolatedSum": { + "p50": 440.64000248908997, + "p90": 447.80799746513367, + "p95": 451.32800936698914, + "p99": 457.5999975204468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 404.9600064754486, + "p90": 410.2720022201538, + "p95": 413.08799386024475, + "p99": 482.2399914264679 + }, + "combine": { + "p50": 404.9600064754486, + "p90": 410.2720022201538, + "p95": 413.08799386024475, + "p99": 482.2399914264679 + }, + "roundtrip": { + "p50": 404.9600064754486, + "p90": 410.2720022201538, + "p95": 413.08799386024475, + "p99": 482.2399914264679 + }, + "isolatedSum": { + "p50": 809.9200129508972, + "p90": 820.5440044403076, + "p95": 826.1759877204895, + "p99": 964.4799828529358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 761.8560194969177, + "p90": 766.4639949798584, + "p95": 768.064022064209, + "p99": 770.8799839019775 + }, + "combine": { + "p50": 761.8560194969177, + "p90": 766.4639949798584, + "p95": 768.064022064209, + "p99": 770.8799839019775 + }, + "roundtrip": { + "p50": 761.8560194969177, + "p90": 766.4639949798584, + "p95": 768.064022064209, + "p99": 770.8799839019775 + }, + "isolatedSum": { + "p50": 1523.7120389938354, + "p90": 1532.9279899597168, + "p95": 1536.128044128418, + "p99": 1541.759967803955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1477.120041847229, + "p90": 1483.9040040969849, + "p95": 1485.7920408248901, + "p99": 1489.408016204834 + }, + "combine": { + "p50": 1477.120041847229, + "p90": 1483.9040040969849, + "p95": 1485.7920408248901, + "p99": 1489.408016204834 + }, + "roundtrip": { + "p50": 1477.120041847229, + "p90": 1483.9040040969849, + "p95": 1485.7920408248901, + "p99": 1489.408016204834 + }, + "isolatedSum": { + "p50": 2954.240083694458, + "p90": 2967.8080081939697, + "p95": 2971.5840816497803, + "p99": 2978.816032409668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5b2b72d", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_2a048167", + "comparisonKey": "94d694b69e8acccb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:34:58.588443+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 385.343998670578, + "p90": 404.4159948825836, + "p95": 4127.967834472656, + "p99": 4940.000057220459 + }, + "combine": { + "p50": 158.52800011634827, + "p90": 166.30400717258453, + "p95": 212.19199895858765, + "p99": 5270.143985748291 + }, + "roundtrip": { + "p50": 454.367995262146, + "p90": 610.2719902992249, + "p95": 4650.688171386719, + "p99": 5620.255947113037 + }, + "isolatedSum": { + "p50": 543.8719987869263, + "p90": 570.7200020551682, + "p95": 4340.159833431244, + "p99": 10210.14404296875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 308.6079955101013, + "p90": 399.7119963169098, + "p95": 488.2560074329376, + "p99": 5141.664028167725 + }, + "combine": { + "p50": 115.84000289440155, + "p90": 163.00800442695618, + "p95": 170.1440066099167, + "p99": 4271.071910858154 + }, + "roundtrip": { + "p50": 455.9679925441742, + "p90": 594.4960117340088, + "p95": 3869.503974914551, + "p99": 5629.024028778076 + }, + "isolatedSum": { + "p50": 424.44799840450287, + "p90": 562.720000743866, + "p95": 658.4000140428543, + "p99": 9412.735939025879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 324.5759904384613, + "p90": 450.5920112133026, + "p95": 2074.5599269866943, + "p99": 5241.663932800293 + }, + "combine": { + "p50": 118.14399808645248, + "p90": 170.01600563526154, + "p95": 176.38400197029114, + "p99": 4612.895965576172 + }, + "roundtrip": { + "p50": 468.1920111179352, + "p90": 623.1039762496948, + "p95": 1825.760006904602, + "p99": 5668.032169342041 + }, + "isolatedSum": { + "p50": 442.7199885249138, + "p90": 620.6080168485641, + "p95": 2250.9439289569855, + "p99": 9854.559898376465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 320.0959861278534, + "p90": 410.8479917049408, + "p95": 502.1759867668152, + "p99": 4714.975833892822 + }, + "combine": { + "p50": 118.9119964838028, + "p90": 165.3120070695877, + "p95": 208.51199328899384, + "p99": 5202.0158767700195 + }, + "roundtrip": { + "p50": 468.1279957294464, + "p90": 601.9840240478516, + "p95": 4153.279781341553, + "p99": 5699.711799621582 + }, + "isolatedSum": { + "p50": 439.0079826116562, + "p90": 576.1599987745285, + "p95": 710.687980055809, + "p99": 9916.991710662842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 321.9839930534363, + "p90": 415.0719940662384, + "p95": 521.120011806488, + "p99": 5289.216041564941 + }, + "combine": { + "p50": 118.40000003576279, + "p90": 164.89599645137787, + "p95": 170.43200135231018, + "p99": 4124.1278648376465 + }, + "roundtrip": { + "p50": 470.0480103492737, + "p90": 603.5839915275574, + "p95": 4008.8958740234375, + "p99": 5698.016166687012 + }, + "isolatedSum": { + "p50": 440.38399308919907, + "p90": 579.9679905176163, + "p95": 691.5520131587982, + "p99": 9413.343906402588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 338.0480110645294, + "p90": 427.264004945755, + "p95": 1314.303994178772, + "p99": 4923.903942108154 + }, + "combine": { + "p50": 119.13599818944931, + "p90": 163.68000209331512, + "p95": 167.39200055599213, + "p99": 3690.8481121063232 + }, + "roundtrip": { + "p50": 489.56799507141113, + "p90": 614.9439811706543, + "p95": 3822.07989692688, + "p99": 5404.575824737549 + }, + "isolatedSum": { + "p50": 457.18400925397873, + "p90": 590.9440070390701, + "p95": 1481.695994734764, + "p99": 8614.752054214478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 339.26400542259216, + "p90": 445.82399725914, + "p95": 510.3679895401001, + "p99": 4646.880149841309 + }, + "combine": { + "p50": 149.34399724006653, + "p90": 173.43999445438385, + "p95": 183.3599954843521, + "p99": 4800.928115844727 + }, + "roundtrip": { + "p50": 483.5520088672638, + "p90": 645.7599997520447, + "p95": 964.5119905471802, + "p99": 5480.576038360596 + }, + "isolatedSum": { + "p50": 488.6080026626587, + "p90": 619.2639917135239, + "p95": 693.7279850244522, + "p99": 9447.808265686035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 369.4719970226288, + "p90": 444.95999813079834, + "p95": 577.9839754104614, + "p99": 4763.743877410889 + }, + "combine": { + "p50": 248.6719936132431, + "p90": 262.5280022621155, + "p95": 350.6560027599335, + "p99": 3828.7360668182373 + }, + "roundtrip": { + "p50": 604.7679781913757, + "p90": 680.4479956626892, + "p95": 3098.047971725464, + "p99": 4766.143798828125 + }, + "isolatedSum": { + "p50": 618.1439906358719, + "p90": 707.4880003929138, + "p95": 928.6399781703949, + "p99": 8592.479944229126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-519f4961", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_7e3a534d", + "comparisonKey": "8a3860dcd8694af6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:54.173932+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 323.743999004364, + "p90": 458.49600434303284, + "p95": 4237.023830413818, + "p99": 5125.664234161377 + }, + "combine": { + "p50": 123.03999811410904, + "p90": 129.50399518013, + "p95": 148.47999811172485, + "p99": 4658.912181854248 + }, + "roundtrip": { + "p50": 461.88798546791077, + "p90": 544.9280142784119, + "p95": 4433.152198791504, + "p99": 5592.927932739258 + }, + "isolatedSum": { + "p50": 446.78399711847305, + "p90": 587.9999995231628, + "p95": 4385.503828525543, + "p99": 9784.576416015625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 346.6239869594574, + "p90": 485.24799942970276, + "p95": 3717.3120975494385, + "p99": 5331.90393447876 + }, + "combine": { + "p50": 127.93600559234619, + "p90": 190.72000682353973, + "p95": 233.43999683856964, + "p99": 4505.856037139893 + }, + "roundtrip": { + "p50": 487.3279929161072, + "p90": 768.8959836959839, + "p95": 4350.848197937012, + "p99": 5407.616138458252 + }, + "isolatedSum": { + "p50": 474.5599925518036, + "p90": 675.9680062532425, + "p95": 3950.752094388008, + "p99": 9837.759971618652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 414.46399688720703, + "p90": 521.1520195007324, + "p95": 3828.416109085083, + "p99": 4676.640033721924 + }, + "combine": { + "p50": 281.3439965248108, + "p90": 301.2480139732361, + "p95": 2287.3599529266357, + "p99": 3716.7680263519287 + }, + "roundtrip": { + "p50": 663.8720035552979, + "p90": 759.4559788703918, + "p95": 3950.2720832824707, + "p99": 4427.296161651611 + }, + "isolatedSum": { + "p50": 695.8079934120178, + "p90": 822.4000334739685, + "p95": 6115.776062011719, + "p99": 8393.408060073853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b3df04e2", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_a4b9bbc7", + "comparisonKey": "d507c5efd821b0bd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:11.206938+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 311.8720054626465, + "p90": 434.7200095653534, + "p95": 3922.015905380249, + "p99": 5388.319969177246 + }, + "combine": { + "p50": 116.5120005607605, + "p90": 168.86399686336517, + "p95": 191.39200448989868, + "p99": 4998.271942138672 + }, + "roundtrip": { + "p50": 452.09598541259766, + "p90": 630.1119923591614, + "p95": 4354.464054107666, + "p99": 5782.112121582031 + }, + "isolatedSum": { + "p50": 428.384006023407, + "p90": 603.5840064287186, + "p95": 4113.407909870148, + "p99": 10386.591911315918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 335.9679877758026, + "p90": 475.42399168014526, + "p95": 2349.0240573883057, + "p99": 4965.184211730957 + }, + "combine": { + "p50": 116.73600226640701, + "p90": 173.8560050725937, + "p95": 208.73600244522095, + "p99": 4158.336162567139 + }, + "roundtrip": { + "p50": 468.51199865341187, + "p90": 668.2239770889282, + "p95": 3820.1279640197754, + "p99": 5703.104019165039 + }, + "isolatedSum": { + "p50": 452.7039900422096, + "p90": 649.279996752739, + "p95": 2557.7600598335266, + "p99": 9123.520374298096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 390.81600308418274, + "p90": 480.73598742485046, + "p95": 3717.7278995513916, + "p99": 5449.088096618652 + }, + "combine": { + "p50": 278.30401062965393, + "p90": 301.2799918651581, + "p95": 2962.1760845184326, + "p99": 4059.807777404785 + }, + "roundtrip": { + "p50": 653.1199812889099, + "p90": 771.8080282211304, + "p95": 3662.4319553375244, + "p99": 4593.599796295166 + }, + "isolatedSum": { + "p50": 669.1200137138367, + "p90": 782.0159792900085, + "p95": 6679.903984069824, + "p99": 9508.895874023438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-726d5f58", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_a5b9bd5a", + "comparisonKey": "0650d0c9bccbf029", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:27.762457+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 313.4720027446747, + "p90": 423.8080084323883, + "p95": 463.6799991130829, + "p99": 5357.183933258057 + }, + "combine": { + "p50": 118.6240017414093, + "p90": 124.60800260305405, + "p95": 127.93600559234619, + "p99": 4708.064079284668 + }, + "roundtrip": { + "p50": 449.6000111103058, + "p90": 474.7520089149475, + "p95": 574.176013469696, + "p99": 5630.720138549805 + }, + "isolatedSum": { + "p50": 432.096004486084, + "p90": 548.4160110354424, + "p95": 591.6160047054291, + "p99": 10065.248012542725 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 340.1919901371002, + "p90": 464.28799629211426, + "p95": 592.6719903945923, + "p99": 5176.735877990723 + }, + "combine": { + "p50": 122.94399738311768, + "p90": 177.98399925231934, + "p95": 183.74399840831757, + "p99": 4767.136096954346 + }, + "roundtrip": { + "p50": 474.14401173591614, + "p90": 649.1199731826782, + "p95": 668.0960059165955, + "p99": 5054.912090301514 + }, + "isolatedSum": { + "p50": 463.1359875202179, + "p90": 642.2719955444336, + "p95": 776.4159888029099, + "p99": 9943.871974945068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 405.2799940109253, + "p90": 483.7439954280853, + "p95": 589.6639823913574, + "p99": 4780.159950256348 + }, + "combine": { + "p50": 281.2800109386444, + "p90": 301.82400345802307, + "p95": 313.85600566864014, + "p99": 3638.943910598755 + }, + "roundtrip": { + "p50": 660.256028175354, + "p90": 738.5920286178589, + "p95": 1040.3519868850708, + "p99": 4230.8478355407715 + }, + "isolatedSum": { + "p50": 686.5600049495697, + "p90": 785.5679988861084, + "p95": 903.5199880599976, + "p99": 8419.103860855103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a866faa", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_a6b9beed", + "comparisonKey": "0ff712663be38eec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:45.085132+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 318.87999176979065, + "p90": 438.9120042324066, + "p95": 3513.63205909729, + "p99": 5940.735816955566 + }, + "combine": { + "p50": 121.88799679279327, + "p90": 171.9679981470108, + "p95": 178.1120002269745, + "p99": 4461.5678787231445 + }, + "roundtrip": { + "p50": 461.7280066013336, + "p90": 617.1200275421143, + "p95": 751.2959837913513, + "p99": 5724.512100219727 + }, + "isolatedSum": { + "p50": 440.7679885625839, + "p90": 610.8800023794174, + "p95": 3691.7440593242645, + "p99": 10402.303695678711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 428.41601371765137, + "p90": 465.1840031147003, + "p95": 2386.6560459136963, + "p99": 4815.552234649658 + }, + "combine": { + "p50": 167.04000532627106, + "p90": 186.17600202560425, + "p95": 190.5599981546402, + "p99": 365.31201004981995 + }, + "roundtrip": { + "p50": 478.36801409721375, + "p90": 656.7040085792542, + "p95": 785.6960296630859, + "p99": 5255.743980407715 + }, + "isolatedSum": { + "p50": 595.4560190439224, + "p90": 651.3600051403046, + "p95": 2577.2160440683365, + "p99": 5180.864244699478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 401.88801288604736, + "p90": 488.6400103569031, + "p95": 4026.144027709961, + "p99": 4965.919971466064 + }, + "combine": { + "p50": 282.71999955177307, + "p90": 293.1840121746063, + "p95": 328.6080062389374, + "p99": 3387.615919113159 + }, + "roundtrip": { + "p50": 657.8559875488281, + "p90": 740.4159903526306, + "p95": 3349.8239517211914, + "p99": 4066.0481452941895 + }, + "isolatedSum": { + "p50": 684.6080124378204, + "p90": 781.8240225315094, + "p95": 4354.752033948898, + "p99": 8353.535890579224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1b7010f4", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_ad30ea71", + "comparisonKey": "12fafe17cd7453c1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:00.697385+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 305.6960105895996, + "p90": 362.91199922561646, + "p95": 511.7120146751404, + "p99": 5352.479934692383 + }, + "combine": { + "p50": 115.9679964184761, + "p90": 124.35200065374374, + "p95": 132.60799646377563, + "p99": 4885.4079246521 + }, + "roundtrip": { + "p50": 439.58398699760437, + "p90": 545.3760027885437, + "p95": 4070.6558227539062, + "p99": 5560.60791015625 + }, + "isolatedSum": { + "p50": 421.6640070080757, + "p90": 487.2639998793602, + "p95": 644.320011138916, + "p99": 10237.887859344482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 306.71998858451843, + "p90": 341.8239951133728, + "p95": 600.1279950141907, + "p99": 5037.280082702637 + }, + "combine": { + "p50": 116.57600104808807, + "p90": 125.02400577068329, + "p95": 202.30400562286377, + "p99": 4859.4560623168945 + }, + "roundtrip": { + "p50": 441.567987203598, + "p90": 521.9839811325073, + "p95": 4169.472217559814, + "p99": 5444.320201873779 + }, + "isolatedSum": { + "p50": 423.2959896326065, + "p90": 466.8480008840561, + "p95": 802.4320006370544, + "p99": 9896.736145019531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 322.4320113658905, + "p90": 452.9600143432617, + "p95": 2732.2239875793457, + "p99": 5020.671844482422 + }, + "combine": { + "p50": 119.80800330638885, + "p90": 178.20799350738525, + "p95": 185.12000143527985, + "p99": 5078.847885131836 + }, + "roundtrip": { + "p50": 460.25601029396057, + "p90": 640.7679915428162, + "p95": 3785.151958465576, + "p99": 5522.272109985352 + }, + "isolatedSum": { + "p50": 442.24001467227936, + "p90": 631.168007850647, + "p95": 2917.3439890146255, + "p99": 10099.519729614258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 315.67999720573425, + "p90": 342.78398752212524, + "p95": 3365.504026412964, + "p99": 5048.704147338867 + }, + "combine": { + "p50": 116.7680025100708, + "p90": 125.34399330615997, + "p95": 246.75199389457703, + "p99": 4688.831806182861 + }, + "roundtrip": { + "p50": 454.49599623680115, + "p90": 519.1360116004944, + "p95": 607.0719957351685, + "p99": 5208.4479331970215 + }, + "isolatedSum": { + "p50": 432.44799971580505, + "p90": 468.1279808282852, + "p95": 3612.256020307541, + "p99": 9737.535953521729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 315.744012594223, + "p90": 337.8239870071411, + "p95": 465.66399931907654, + "p99": 5120.672225952148 + }, + "combine": { + "p50": 116.7680025100708, + "p90": 122.78400361537933, + "p95": 126.43200159072876, + "p99": 184.7040057182312 + }, + "roundtrip": { + "p50": 453.7599980831146, + "p90": 572.6079940795898, + "p95": 3406.8799018859863, + "p99": 5646.592140197754 + }, + "isolatedSum": { + "p50": 432.5120151042938, + "p90": 460.60799062252045, + "p95": 592.0960009098053, + "p99": 5305.37623167038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 333.3759903907776, + "p90": 359.20000076293945, + "p95": 523.8080024719238, + "p99": 4955.359935760498 + }, + "combine": { + "p50": 117.88800358772278, + "p90": 125.72799623012543, + "p95": 156.73600137233734, + "p99": 4968.319892883301 + }, + "roundtrip": { + "p50": 469.8239862918854, + "p90": 660.8319878578186, + "p95": 4321.343898773193, + "p99": 5363.103866577148 + }, + "isolatedSum": { + "p50": 451.26399397850037, + "p90": 484.9279969930649, + "p95": 680.5440038442612, + "p99": 9923.679828643799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 340.86400270462036, + "p90": 457.40801095962524, + "p95": 526.3360142707825, + "p99": 4598.400115966797 + }, + "combine": { + "p50": 147.5519984960556, + "p90": 177.76000499725342, + "p95": 183.26400220394135, + "p99": 4573.056221008301 + }, + "roundtrip": { + "p50": 485.1199984550476, + "p90": 661.728024482727, + "p95": 4035.520076751709, + "p99": 5189.311981201172 + }, + "isolatedSum": { + "p50": 488.41600120067596, + "p90": 635.1680159568787, + "p95": 709.6000164747238, + "p99": 9171.456336975098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 360.1920008659363, + "p90": 385.18399000167847, + "p95": 578.5279870033264, + "p99": 5051.392078399658 + }, + "combine": { + "p50": 241.63199961185455, + "p90": 250.75200200080872, + "p95": 3247.584104537964, + "p99": 4023.615837097168 + }, + "roundtrip": { + "p50": 580.7999968528748, + "p90": 607.3600053787231, + "p95": 3889.5039558410645, + "p99": 4671.328067779541 + }, + "isolatedSum": { + "p50": 601.8240004777908, + "p90": 635.9359920024872, + "p95": 3826.1120915412903, + "p99": 9075.007915496826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-79ce1775", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_3addc428", + "comparisonKey": "4a6cf46164e7262e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:56.835453+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 306.4959943294525, + "p90": 426.91200971603394, + "p95": 3354.720115661621, + "p99": 6676.640033721924 + }, + "combine": { + "p50": 117.37599968910217, + "p90": 124.25599992275238, + "p95": 128.35200130939484, + "p99": 4899.392127990723 + }, + "roundtrip": { + "p50": 436.92800402641296, + "p90": 464.1920030117035, + "p95": 5026.080131530762, + "p99": 6390.463829040527 + }, + "isolatedSum": { + "p50": 423.8719940185547, + "p90": 551.1680096387863, + "p95": 3483.072116971016, + "p99": 11576.032161712646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 320.8320140838623, + "p90": 442.1760141849518, + "p95": 456.2560021877289, + "p99": 5826.848030090332 + }, + "combine": { + "p50": 125.02400577068329, + "p90": 216.8319970369339, + "p95": 4340.256214141846, + "p99": 5263.775825500488 + }, + "roundtrip": { + "p50": 457.95199275016785, + "p90": 640.3520107269287, + "p95": 4840.576171875, + "p99": 6670.9442138671875 + }, + "isolatedSum": { + "p50": 445.8560198545456, + "p90": 659.0080112218857, + "p95": 4796.512216329575, + "p99": 11090.62385559082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 338.17601203918457, + "p90": 457.15200901031494, + "p95": 500.92798471450806, + "p99": 5393.439769744873 + }, + "combine": { + "p50": 121.66400253772736, + "p90": 179.55200374126434, + "p95": 188.28800320625305, + "p99": 4314.591884613037 + }, + "roundtrip": { + "p50": 480.51199316978455, + "p90": 648.2239961624146, + "p95": 717.5679802894592, + "p99": 5360.640048980713 + }, + "isolatedSum": { + "p50": 459.8400145769119, + "p90": 636.7040127515793, + "p95": 689.2159879207611, + "p99": 9708.03165435791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 366.6880130767822, + "p90": 463.20000290870667, + "p95": 479.96801137924194, + "p99": 4776.512145996094 + }, + "combine": { + "p50": 244.4480061531067, + "p90": 261.82401180267334, + "p95": 3026.0798931121826, + "p99": 4164.480209350586 + }, + "roundtrip": { + "p50": 589.8879766464233, + "p90": 692.9280161857605, + "p95": 2628.6399364471436, + "p99": 4801.983833312988 + }, + "isolatedSum": { + "p50": 611.1360192298889, + "p90": 725.02401471138, + "p95": 3506.0479044914246, + "p99": 8940.99235534668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2651eab0", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_a180493d", + "comparisonKey": "eb192e4ed975dabe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:56.263826+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 316.22400879859924, + "p90": 334.1439962387085, + "p95": 3602.816104888916, + "p99": 6383.840084075928 + }, + "combine": { + "p50": 123.9359974861145, + "p90": 130.5599957704544, + "p95": 132.57600367069244, + "p99": 4573.919773101807 + }, + "roundtrip": { + "p50": 453.37599515914917, + "p90": 499.90400671958923, + "p95": 5193.280220031738, + "p99": 7319.392204284668 + }, + "isolatedSum": { + "p50": 440.16000628471375, + "p90": 464.7039920091629, + "p95": 3735.3921085596085, + "p99": 10957.759857177734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 9, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 316.895991563797, + "p90": 335.04000306129456, + "p95": 4137.856006622314, + "p99": 5229.6319007873535 + }, + "combine": { + "p50": 125.91999769210815, + "p90": 134.36800241470337, + "p95": 142.43200421333313, + "p99": 4935.391902923584 + }, + "roundtrip": { + "p50": 454.75199818611145, + "p90": 560.3200197219849, + "p95": 4483.67977142334, + "p99": 5894.591808319092 + }, + "isolatedSum": { + "p50": 442.81598925590515, + "p90": 469.4080054759979, + "p95": 4280.288010835648, + "p99": 10165.023803710938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 18, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 333.47201347351074, + "p90": 464.7040069103241, + "p95": 3509.82403755188, + "p99": 5274.847984313965 + }, + "combine": { + "p50": 128.63999605178833, + "p90": 191.8720006942749, + "p95": 200.57600736618042, + "p99": 4434.5598220825195 + }, + "roundtrip": { + "p50": 474.3039906024933, + "p90": 660.5119705200195, + "p95": 1691.2959814071655, + "p99": 5996.640205383301 + }, + "isolatedSum": { + "p50": 462.1120095252991, + "p90": 656.576007604599, + "p95": 3710.4000449180603, + "p99": 9709.407806396484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 36, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 325.82399249076843, + "p90": 341.69599413871765, + "p95": 419.295996427536, + "p99": 4804.63981628418 + }, + "combine": { + "p50": 124.60800260305405, + "p90": 130.5599957704544, + "p95": 133.05599987506866, + "p99": 328.8959860801697 + }, + "roundtrip": { + "p50": 464.7040069103241, + "p90": 494.3679869174957, + "p95": 859.8399758338928, + "p99": 5119.103908538818 + }, + "isolatedSum": { + "p50": 450.4319950938225, + "p90": 472.25598990917206, + "p95": 552.3519963026047, + "p99": 5133.535802364349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 72, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 327.0080089569092, + "p90": 341.18399024009705, + "p95": 463.9360010623932, + "p99": 4985.087871551514 + }, + "combine": { + "p50": 125.63200294971466, + "p90": 132.54399597644806, + "p95": 136.7039978504181, + "p99": 4470.816135406494 + }, + "roundtrip": { + "p50": 466.7840003967285, + "p90": 519.4240212440491, + "p95": 4044.703960418701, + "p99": 5502.111911773682 + }, + "isolatedSum": { + "p50": 452.64001190662384, + "p90": 473.7279862165451, + "p95": 600.6399989128113, + "p99": 9455.904006958008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 343.7440097332001, + "p90": 359.48801040649414, + "p95": 559.4559907913208, + "p99": 5085.343837738037 + }, + "combine": { + "p50": 125.69600343704224, + "p90": 134.39999520778656, + "p95": 372.8640079498291, + "p99": 4955.999851226807 + }, + "roundtrip": { + "p50": 483.68000984191895, + "p90": 519.648015499115, + "p95": 4242.208003997803, + "p99": 5226.175785064697 + }, + "isolatedSum": { + "p50": 469.4400131702423, + "p90": 493.8880056142807, + "p95": 932.3199987411499, + "p99": 10041.343688964844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 350.43200850486755, + "p90": 475.96800327301025, + "p95": 581.2159776687622, + "p99": 4577.792167663574 + }, + "combine": { + "p50": 149.02399480342865, + "p90": 188.9919936656952, + "p95": 193.79200041294098, + "p99": 4211.999893188477 + }, + "roundtrip": { + "p50": 490.04799127578735, + "p90": 670.0800061225891, + "p95": 729.3760180473328, + "p99": 5104.608058929443 + }, + "isolatedSum": { + "p50": 499.4560033082962, + "p90": 664.9599969387054, + "p95": 775.0079780817032, + "p99": 8789.79206085205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 576, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 364.6399974822998, + "p90": 380.511999130249, + "p95": 529.4399857521057, + "p99": 4628.096103668213 + }, + "combine": { + "p50": 247.5840002298355, + "p90": 253.6959946155548, + "p95": 258.9440047740936, + "p99": 4128.479957580566 + }, + "roundtrip": { + "p50": 599.5519757270813, + "p90": 621.4720010757446, + "p95": 3617.0239448547363, + "p99": 4555.520057678223 + }, + "isolatedSum": { + "p50": 612.2239977121353, + "p90": 634.2079937458038, + "p95": 788.3839905261993, + "p99": 8756.57606124878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5141b85c", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_dc372391", + "comparisonKey": "d0b5854acc848bc4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:44.739772+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 313.4079873561859, + "p90": 435.2000057697296, + "p95": 2776.3519287109375, + "p99": 5915.071964263916 + }, + "combine": { + "p50": 115.35999923944473, + "p90": 168.44800114631653, + "p95": 173.43999445438385, + "p99": 4327.775955200195 + }, + "roundtrip": { + "p50": 460.54399013519287, + "p90": 614.7199869155884, + "p95": 685.375988483429, + "p99": 5665.440082550049 + }, + "isolatedSum": { + "p50": 428.76798659563065, + "p90": 603.6480069160461, + "p95": 2949.7919231653214, + "p99": 10242.847919464111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 332.15999603271484, + "p90": 451.7439901828766, + "p95": 498.56001138687134, + "p99": 4977.695941925049 + }, + "combine": { + "p50": 117.11999773979187, + "p90": 171.29600048065186, + "p95": 175.9680062532425, + "p99": 4266.560077667236 + }, + "roundtrip": { + "p50": 476.0960042476654, + "p90": 648.2560038566589, + "p95": 763.7760043144226, + "p99": 5355.008125305176 + }, + "isolatedSum": { + "p50": 449.2799937725067, + "p90": 623.0399906635284, + "p95": 674.5280176401138, + "p99": 9244.256019592285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 387.90398836135864, + "p90": 456.7680060863495, + "p95": 506.84797763824463, + "p99": 5162.367820739746 + }, + "combine": { + "p50": 261.56800985336304, + "p90": 268.41598749160767, + "p95": 307.42400884628296, + "p99": 4037.1198654174805 + }, + "roundtrip": { + "p50": 636.9919776916504, + "p90": 723.4240174293518, + "p95": 2704.0319442749023, + "p99": 4769.599914550781 + }, + "isolatedSum": { + "p50": 649.4719982147217, + "p90": 725.1839935779572, + "p95": 814.2719864845276, + "p99": 9199.487686157227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b99b4ba0", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_1e06dab3", + "comparisonKey": "3053320720fb0743", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:01.699656+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 321.79200649261475, + "p90": 453.21598649024963, + "p95": 467.5840139389038, + "p99": 4753.24821472168 + }, + "combine": { + "p50": 121.11999839544296, + "p90": 127.51999497413635, + "p95": 134.36800241470337, + "p99": 4476.54390335083 + }, + "roundtrip": { + "p50": 456.06398582458496, + "p90": 499.5200037956238, + "p95": 2422.368049621582, + "p99": 5358.943939208984 + }, + "isolatedSum": { + "p50": 442.9120048880577, + "p90": 580.735981464386, + "p95": 601.9520163536072, + "p99": 9229.79211807251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 343.32799911499023, + "p90": 472.4160134792328, + "p95": 1602.3679971694946, + "p99": 4786.303997039795 + }, + "combine": { + "p50": 124.35200065374374, + "p90": 186.65599822998047, + "p95": 194.07999515533447, + "p99": 4582.143783569336 + }, + "roundtrip": { + "p50": 490.04799127578735, + "p90": 670.4639792442322, + "p95": 3641.1519050598145, + "p99": 5110.400199890137 + }, + "isolatedSum": { + "p50": 467.679999768734, + "p90": 659.0720117092133, + "p95": 1796.447992324829, + "p99": 9368.44778060913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 384.768009185791, + "p90": 475.2959907054901, + "p95": 3051.2640476226807, + "p99": 4614.975929260254 + }, + "combine": { + "p50": 261.59998774528503, + "p90": 268.38400959968567, + "p95": 375.7759928703308, + "p99": 3706.496000289917 + }, + "roundtrip": { + "p50": 642.7199840545654, + "p90": 739.5520210266113, + "p95": 3389.2478942871094, + "p99": 4625.02384185791 + }, + "isolatedSum": { + "p50": 646.367996931076, + "p90": 743.6800003051758, + "p95": 3427.0400404930115, + "p99": 8321.47192955017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b6698291", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_1f06dc46", + "comparisonKey": "06c3ba4ce28a006c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:19.559170+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 324.0959942340851, + "p90": 411.3599956035614, + "p95": 463.71200680732727, + "p99": 4892.1918869018555 + }, + "combine": { + "p50": 123.80799651145935, + "p90": 130.49599528312683, + "p95": 134.11200046539307, + "p99": 4931.23197555542 + }, + "roundtrip": { + "p50": 472.9599952697754, + "p90": 623.8399744033813, + "p95": 3891.1681175231934, + "p99": 5267.871856689453 + }, + "isolatedSum": { + "p50": 447.90399074554443, + "p90": 541.8559908866882, + "p95": 597.8240072727203, + "p99": 9823.423862457275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 348.7040102481842, + "p90": 472.896009683609, + "p95": 603.3599972724915, + "p99": 4493.216037750244 + }, + "combine": { + "p50": 127.77599692344666, + "p90": 191.20000302791595, + "p95": 1751.8399953842163, + "p99": 4750.400066375732 + }, + "roundtrip": { + "p50": 494.7200119495392, + "p90": 685.9840154647827, + "p95": 4184.415817260742, + "p99": 5472.511768341064 + }, + "isolatedSum": { + "p50": 476.48000717163086, + "p90": 664.096012711525, + "p95": 2355.1999926567078, + "p99": 9243.616104125977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 381.5999925136566, + "p90": 473.9519953727722, + "p95": 508.09597969055176, + "p99": 4470.975875854492 + }, + "combine": { + "p50": 263.68001103401184, + "p90": 282.6240062713623, + "p95": 1932.703971862793, + "p99": 3921.056032180786 + }, + "roundtrip": { + "p50": 642.1120166778564, + "p90": 732.2880029678345, + "p95": 3871.648073196411, + "p99": 4580.031871795654 + }, + "isolatedSum": { + "p50": 645.2800035476685, + "p90": 756.5760016441345, + "p95": 2440.7999515533447, + "p99": 8392.031908035278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-69e153dc", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_2006ddd9", + "comparisonKey": "9639bbd63a2d4285", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:36.686157+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 418.5279905796051, + "p90": 461.5359902381897, + "p95": 3632.960081100464, + "p99": 4673.439979553223 + }, + "combine": { + "p50": 166.20799899101257, + "p90": 185.40799617767334, + "p95": 191.6159987449646, + "p99": 4292.895793914795 + }, + "roundtrip": { + "p50": 469.8239862918854, + "p90": 652.3839831352234, + "p95": 3627.7759075164795, + "p99": 5491.648197174072 + }, + "isolatedSum": { + "p50": 584.7359895706177, + "p90": 646.943986415863, + "p95": 3824.5760798454285, + "p99": 8966.335773468018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 454.5919895172119, + "p90": 510.17600297927856, + "p95": 2775.167942047119, + "p99": 4555.840015411377 + }, + "combine": { + "p50": 171.7119961977005, + "p90": 195.6160068511963, + "p95": 555.4239749908447, + "p99": 4351.168155670166 + }, + "roundtrip": { + "p50": 619.7119951248169, + "p90": 697.6320147514343, + "p95": 3797.5680828094482, + "p99": 4994.048118591309 + }, + "isolatedSum": { + "p50": 626.3039857149124, + "p90": 705.7920098304749, + "p95": 3330.591917037964, + "p99": 8907.008171081543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 391.07200503349304, + "p90": 477.3760139942169, + "p95": 2583.5840702056885, + "p99": 4596.447944641113 + }, + "combine": { + "p50": 262.7519965171814, + "p90": 270.6240117549896, + "p95": 1637.0559930801392, + "p99": 3948.9920139312744 + }, + "roundtrip": { + "p50": 644.5440053939819, + "p90": 747.9360103607178, + "p95": 3760.7359886169434, + "p99": 4386.015892028809 + }, + "isolatedSum": { + "p50": 653.8240015506744, + "p90": 748.0000257492065, + "p95": 4220.640063285828, + "p99": 8545.439958572388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92875ac3", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_9a966a67", + "comparisonKey": "a7e36f9169fa5b70", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:48.737504+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 315.36000967025757, + "p90": 337.0240032672882, + "p95": 4355.743885040283, + "p99": 5468.063831329346 + }, + "combine": { + "p50": 125.50400197505951, + "p90": 132.35199451446533, + "p95": 141.40799641609192, + "p99": 4702.911853790283 + }, + "roundtrip": { + "p50": 454.49599623680115, + "p90": 521.9519734382629, + "p95": 4027.4558067321777, + "p99": 5638.175964355469 + }, + "isolatedSum": { + "p50": 440.8640116453171, + "p90": 469.37599778175354, + "p95": 4497.151881456375, + "p99": 10170.975685119629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 316.5439963340759, + "p90": 348.09601306915283, + "p95": 4003.0078887939453, + "p99": 5421.120166778564 + }, + "combine": { + "p50": 125.82400441169739, + "p90": 131.3920021057129, + "p95": 134.65599715709686, + "p99": 4966.368198394775 + }, + "roundtrip": { + "p50": 454.6239972114563, + "p90": 578.1760215759277, + "p95": 4412.415981292725, + "p99": 5710.591793060303 + }, + "isolatedSum": { + "p50": 442.3680007457733, + "p90": 479.4880151748657, + "p95": 4137.663885951042, + "p99": 10387.48836517334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 23, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 332.09601044654846, + "p90": 426.68798565864563, + "p95": 464.80000019073486, + "p99": 5118.015766143799 + }, + "combine": { + "p50": 127.74400413036346, + "p90": 168.83200407028198, + "p95": 174.55999553203583, + "p99": 4839.072227478027 + }, + "roundtrip": { + "p50": 478.303998708725, + "p90": 601.7280220985413, + "p95": 691.2639737129211, + "p99": 5458.432197570801 + }, + "isolatedSum": { + "p50": 459.8400145769119, + "p90": 595.5199897289276, + "p95": 639.3599957227707, + "p99": 9957.087993621826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 50, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 326.07999444007874, + "p90": 352.54400968551636, + "p95": 563.9680027961731, + "p99": 4899.839878082275 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 133.15199315547943, + "p95": 194.72000002861023, + "p99": 4368.127822875977 + }, + "roundtrip": { + "p50": 465.2160108089447, + "p90": 497.8879988193512, + "p95": 738.9439940452576, + "p99": 5379.519939422607 + }, + "isolatedSum": { + "p50": 452.2559940814972, + "p90": 485.6960028409958, + "p95": 758.6880028247833, + "p99": 9267.967700958252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 326.81599259376526, + "p90": 345.44000029563904, + "p95": 434.7519874572754, + "p99": 4897.280216217041 + }, + "combine": { + "p50": 127.29600071907043, + "p90": 134.94400680065155, + "p95": 208.8959962129593, + "p99": 4969.3121910095215 + }, + "roundtrip": { + "p50": 466.14399552345276, + "p90": 500.92798471450806, + "p95": 640.3200030326843, + "p99": 5529.53577041626 + }, + "isolatedSum": { + "p50": 454.1119933128357, + "p90": 480.3840070962906, + "p95": 643.6479836702347, + "p99": 9866.592407226562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 224, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 344.38401460647583, + "p90": 359.1359853744507, + "p95": 428.5440146923065, + "p99": 5001.984119415283 + }, + "combine": { + "p50": 127.93600559234619, + "p90": 136.60800457000732, + "p95": 293.503999710083, + "p99": 5089.087963104248 + }, + "roundtrip": { + "p50": 486.7199957370758, + "p90": 530.239999294281, + "p95": 2334.0160846710205, + "p99": 5472.383975982666 + }, + "isolatedSum": { + "p50": 472.320020198822, + "p90": 495.743989944458, + "p95": 722.0480144023895, + "p99": 10091.072082519531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 354.559987783432, + "p90": 475.13601183891296, + "p95": 569.1519975662231, + "p99": 4991.615772247314 + }, + "combine": { + "p50": 161.53599321842194, + "p90": 193.34399700164795, + "p95": 354.91201281547546, + "p99": 4545.7282066345215 + }, + "roundtrip": { + "p50": 494.30400133132935, + "p90": 673.1200218200684, + "p95": 815.455973148346, + "p99": 5366.464138031006 + }, + "isolatedSum": { + "p50": 516.0959810018539, + "p90": 668.4800088405609, + "p95": 924.0640103816986, + "p99": 9537.343978881836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 925, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 400.736004114151, + "p90": 439.2000138759613, + "p95": 586.9119763374329, + "p99": 4775.455951690674 + }, + "combine": { + "p50": 265.8880054950714, + "p90": 271.263986825943, + "p95": 275.29600262641907, + "p99": 3857.599973678589 + }, + "roundtrip": { + "p50": 634.656012058258, + "p90": 697.8560090065002, + "p95": 3157.536029815674, + "p99": 4578.368186950684 + }, + "isolatedSum": { + "p50": 666.6240096092224, + "p90": 710.4640007019043, + "p95": 862.2079789638519, + "p99": 8633.055925369263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1defb98", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_a847bfb7", + "comparisonKey": "5d00b49b39f4513b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:46.806314+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 301.1839985847473, + "p90": 316.70400500297546, + "p95": 422.4640130996704, + "p99": 5476.352214813232 + }, + "combine": { + "p50": 120.19199877977371, + "p90": 127.58399546146393, + "p95": 156.99200332164764, + "p99": 4513.919830322266 + }, + "roundtrip": { + "p50": 432.5760006904602, + "p90": 465.7280147075653, + "p95": 647.487998008728, + "p99": 5743.3600425720215 + }, + "isolatedSum": { + "p50": 421.375997364521, + "p90": 444.2880004644394, + "p95": 579.456016421318, + "p99": 9990.272045135498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 301.15199089050293, + "p90": 316.8320059776306, + "p95": 393.18400621414185, + "p99": 7328.639984130859 + }, + "combine": { + "p50": 120.25599926710129, + "p90": 144.44799721240997, + "p95": 4140.704154968262, + "p99": 4704.127788543701 + }, + "roundtrip": { + "p50": 433.50398540496826, + "p90": 460.54399013519287, + "p95": 633.72802734375, + "p99": 6008.639812469482 + }, + "isolatedSum": { + "p50": 421.4079901576042, + "p90": 461.2800031900406, + "p95": 4533.888161182404, + "p99": 12032.76777267456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 22, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 398.24000000953674, + "p90": 423.48799109458923, + "p95": 760.5760097503662, + "p99": 4894.847869873047 + }, + "combine": { + "p50": 159.96800363063812, + "p90": 174.9120056629181, + "p95": 316.3520097732544, + "p99": 4669.119834899902 + }, + "roundtrip": { + "p50": 570.6239938735962, + "p90": 605.0879955291748, + "p95": 2589.7281169891357, + "p99": 5117.087841033936 + }, + "isolatedSum": { + "p50": 558.2080036401749, + "p90": 598.3999967575073, + "p95": 1076.9280195236206, + "p99": 9563.96770477295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 311.45599484443665, + "p90": 328.0960023403168, + "p95": 397.024005651474, + "p99": 5219.0399169921875 + }, + "combine": { + "p50": 119.71200257539749, + "p90": 125.56800246238708, + "p95": 131.42399489879608, + "p99": 5085.279941558838 + }, + "roundtrip": { + "p50": 446.3360011577606, + "p90": 478.91199588775635, + "p95": 602.1119952201843, + "p99": 5949.344158172607 + }, + "isolatedSum": { + "p50": 431.16799741983414, + "p90": 453.66400480270386, + "p95": 528.4480005502701, + "p99": 10304.319858551025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 79, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 318.1760013103485, + "p90": 408.54400396347046, + "p95": 426.7840087413788, + "p99": 5223.711967468262 + }, + "combine": { + "p50": 123.87199699878693, + "p90": 166.36799275875092, + "p95": 227.90400683879852, + "p99": 4994.719982147217 + }, + "roundtrip": { + "p50": 449.0239918231964, + "p90": 477.2160053253174, + "p95": 1047.2320318222046, + "p99": 5727.200031280518 + }, + "isolatedSum": { + "p50": 442.04799830913544, + "p90": 574.9119967222214, + "p95": 654.6880155801773, + "p99": 10218.431949615479 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 134, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 330.9119939804077, + "p90": 414.40001130104065, + "p95": 427.7760088443756, + "p99": 5028.512001037598 + }, + "combine": { + "p50": 121.44000083208084, + "p90": 132.9279989004135, + "p95": 3289.760112762451, + "p99": 4648.51188659668 + }, + "roundtrip": { + "p50": 467.6479995250702, + "p90": 501.1199712753296, + "p95": 651.199996471405, + "p99": 5358.496189117432 + }, + "isolatedSum": { + "p50": 452.35199481248856, + "p90": 547.3280102014542, + "p95": 3717.536121606827, + "p99": 9677.023887634277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 268, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 338.01600337028503, + "p90": 439.13599848747253, + "p95": 473.9519953727722, + "p99": 5653.791904449463 + }, + "combine": { + "p50": 148.51200580596924, + "p90": 173.8239973783493, + "p95": 178.78399789333344, + "p99": 4229.311943054199 + }, + "roundtrip": { + "p50": 592.1919941902161, + "p90": 646.7199921607971, + "p95": 3618.2079315185547, + "p99": 5315.936088562012 + }, + "isolatedSum": { + "p50": 486.5280091762543, + "p90": 612.9599958658218, + "p95": 652.7359932661057, + "p99": 9883.103847503662 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 533, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 417.5359904766083, + "p90": 434.1759979724884, + "p95": 547.8079915046692, + "p99": 4618.175983428955 + }, + "combine": { + "p50": 250.2720057964325, + "p90": 257.6960027217865, + "p95": 366.5600121021271, + "p99": 3907.4559211730957 + }, + "roundtrip": { + "p50": 647.711992263794, + "p90": 675.167977809906, + "p95": 3538.7840270996094, + "p99": 4625.4401206970215 + }, + "isolatedSum": { + "p50": 667.8079962730408, + "p90": 691.8720006942749, + "p95": 914.3680036067963, + "p99": 8525.63190460205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 1027, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c31a7367", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h100_5d6476ba", + "comparisonKey": "d134443388739b00", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:19.723208+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 311.16798520088196, + "p90": 377.53599882125854, + "p95": 3805.4399490356445, + "p99": 5101.280212402344 + }, + "combine": { + "p50": 115.52000045776367, + "p90": 136.3839954137802, + "p95": 190.94400107860565, + "p99": 4756.576061248779 + }, + "roundtrip": { + "p50": 452.60798931121826, + "p90": 565.0240182876587, + "p95": 4577.47220993042, + "p99": 5411.935806274414 + }, + "isolatedSum": { + "p50": 426.68798565864563, + "p90": 513.9199942350388, + "p95": 3996.38395011425, + "p99": 9857.856273651123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 340.831995010376, + "p90": 451.775997877121, + "p95": 1816.0959482192993, + "p99": 4925.1837730407715 + }, + "combine": { + "p50": 125.08800625801086, + "p90": 176.83200538158417, + "p95": 210.14399826526642, + "p99": 4315.104007720947 + }, + "roundtrip": { + "p50": 490.55999517440796, + "p90": 668.4799790382385, + "p95": 4335.328102111816, + "p99": 6043.360233306885 + }, + "isolatedSum": { + "p50": 465.92000126838684, + "p90": 628.6080032587051, + "p95": 2026.2399464845657, + "p99": 9240.287780761719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 285, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 375.61601400375366, + "p90": 461.66399121284485, + "p95": 3496.448040008545, + "p99": 4920.544147491455 + }, + "combine": { + "p50": 275.4240036010742, + "p90": 320.67200541496277, + "p95": 3246.4001178741455, + "p99": 3950.6239891052246 + }, + "roundtrip": { + "p50": 625.6960034370422, + "p90": 721.2479710578918, + "p95": 3946.8159675598145, + "p99": 4654.751777648926 + }, + "isolatedSum": { + "p50": 651.0400176048279, + "p90": 782.3359966278076, + "p95": 6742.84815788269, + "p99": 8871.16813659668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 1101, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a815989", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_7fe352d9", + "comparisonKey": "385578ee0ac74e35", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:02.362927+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 307.2960078716278, + "p90": 326.78401470184326, + "p95": 384.5439851284027, + "p99": 4872.416019439697 + }, + "combine": { + "p50": 114.23999816179276, + "p90": 125.95200538635254, + "p95": 445.279985666275, + "p99": 5030.01594543457 + }, + "roundtrip": { + "p50": 442.23999977111816, + "p90": 473.37600588798523, + "p95": 606.9759726524353, + "p99": 5830.656051635742 + }, + "isolatedSum": { + "p50": 421.53600603342056, + "p90": 452.7360200881958, + "p95": 829.8239707946777, + "p99": 9902.431964874268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 333.3440124988556, + "p90": 437.75999546051025, + "p95": 572.704017162323, + "p99": 5388.768196105957 + }, + "combine": { + "p50": 122.30399996042252, + "p90": 170.33599317073822, + "p95": 177.88800597190857, + "p99": 5082.208156585693 + }, + "roundtrip": { + "p50": 472.9599952697754, + "p90": 628.6399960517883, + "p95": 1051.4240264892578, + "p99": 5340.511798858643 + }, + "isolatedSum": { + "p50": 455.6480124592781, + "p90": 608.0959886312485, + "p95": 750.5920231342316, + "p99": 10470.97635269165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 368.6079978942871, + "p90": 454.367995262146, + "p95": 484.25599932670593, + "p99": 4770.624160766602 + }, + "combine": { + "p50": 305.2160143852234, + "p90": 318.36798787117004, + "p95": 2339.2961025238037, + "p99": 3907.8080654144287 + }, + "roundtrip": { + "p50": 665.2160286903381, + "p90": 754.0159821510315, + "p95": 2914.4959449768066, + "p99": 4552.224159240723 + }, + "isolatedSum": { + "p50": 673.8240122795105, + "p90": 772.735983133316, + "p95": 2823.5521018505096, + "p99": 8678.43222618103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-13a60aa1", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_728444b7", + "comparisonKey": "68c34ceb98dbc254", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:54.261770+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 298.46400022506714, + "p90": 322.2399950027466, + "p95": 3995.0718879699707, + "p99": 6918.079853057861 + }, + "combine": { + "p50": 113.79200220108032, + "p90": 120.7360029220581, + "p95": 126.30400061607361, + "p99": 4699.808120727539 + }, + "roundtrip": { + "p50": 432.5760006904602, + "p90": 473.1839895248413, + "p95": 1855.3919792175293, + "p99": 5563.680171966553 + }, + "isolatedSum": { + "p50": 412.25600242614746, + "p90": 442.9759979248047, + "p95": 4121.375888586044, + "p99": 11617.8879737854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 15, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 297.66398668289185, + "p90": 314.6879971027374, + "p95": 376.0319948196411, + "p99": 5868.832111358643 + }, + "combine": { + "p50": 114.30399864912033, + "p90": 127.61600315570831, + "p95": 4249.855995178223, + "p99": 5144.927978515625 + }, + "roundtrip": { + "p50": 431.8079948425293, + "p90": 462.3680114746094, + "p95": 621.4079856872559, + "p99": 5290.527820587158 + }, + "isolatedSum": { + "p50": 411.9679853320122, + "p90": 442.30400025844574, + "p95": 4625.887989997864, + "p99": 11013.760089874268 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 24, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 319.5199966430664, + "p90": 433.1200122833252, + "p95": 584.8640203475952, + "p99": 5468.192100524902 + }, + "combine": { + "p50": 117.24799871444702, + "p90": 172.63999581336975, + "p95": 177.91999876499176, + "p99": 3545.5360412597656 + }, + "roundtrip": { + "p50": 467.00799465179443, + "p90": 635.2319717407227, + "p95": 2269.695997238159, + "p99": 5203.616142272949 + }, + "isolatedSum": { + "p50": 436.7679953575134, + "p90": 605.760008096695, + "p95": 762.784019112587, + "p99": 9013.728141784668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 43, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 306.94401264190674, + "p90": 333.0880105495453, + "p95": 422.33601212501526, + "p99": 5024.447917938232 + }, + "combine": { + "p50": 114.27199840545654, + "p90": 121.18399888277054, + "p95": 125.11999905109406, + "p99": 4273.119926452637 + }, + "roundtrip": { + "p50": 443.807989358902, + "p90": 479.13599014282227, + "p95": 589.9199843406677, + "p99": 5317.279815673828 + }, + "isolatedSum": { + "p50": 421.2160110473633, + "p90": 454.2720094323158, + "p95": 547.4560111761093, + "p99": 9297.56784439087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 73, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 306.8479895591736, + "p90": 325.79201459884644, + "p95": 391.07200503349304, + "p99": 5193.535804748535 + }, + "combine": { + "p50": 114.56000059843063, + "p90": 121.08799815177917, + "p95": 123.3920007944107, + "p99": 323.90400767326355 + }, + "roundtrip": { + "p50": 443.4239864349365, + "p90": 521.727979183197, + "p95": 879.5520067214966, + "p99": 5500.736236572266 + }, + "isolatedSum": { + "p50": 421.4079901576042, + "p90": 446.8800127506256, + "p95": 514.4640058279037, + "p99": 5517.439812421799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 142, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 326.78401470184326, + "p90": 346.75198793411255, + "p95": 383.67998600006104, + "p99": 4847.936153411865 + }, + "combine": { + "p50": 115.74400216341019, + "p90": 124.89599734544754, + "p95": 276.0320007801056, + "p99": 4324.5439529418945 + }, + "roundtrip": { + "p50": 463.0720019340515, + "p90": 498.6880123615265, + "p95": 602.9760241508484, + "p99": 5327.775955200195 + }, + "isolatedSum": { + "p50": 442.52801686525345, + "p90": 471.6479852795601, + "p95": 659.7119867801666, + "p99": 9172.48010635376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 274, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 338.0480110645294, + "p90": 452.57601141929626, + "p95": 576.2879848480225, + "p99": 4782.144069671631 + }, + "combine": { + "p50": 148.25600385665894, + "p90": 176.09600722789764, + "p95": 184.7040057182312, + "p99": 4342.239856719971 + }, + "roundtrip": { + "p50": 480.44800758361816, + "p90": 644.5440053939819, + "p95": 3479.6481132507324, + "p99": 5439.36014175415 + }, + "isolatedSum": { + "p50": 486.30401492118835, + "p90": 628.6720186471939, + "p95": 760.9919905662537, + "p99": 9124.383926391602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 367.5200045108795, + "p90": 460.7360064983368, + "p95": 528.8640260696411, + "p99": 4686.240196228027 + }, + "combine": { + "p50": 246.848002076149, + "p90": 260.8320116996765, + "p95": 278.2079875469208, + "p99": 4174.2401123046875 + }, + "roundtrip": { + "p50": 586.9119763374329, + "p90": 689.6640062332153, + "p95": 3035.871982574463, + "p99": 4220.448017120361 + }, + "isolatedSum": { + "p50": 614.3680065870285, + "p90": 721.5680181980133, + "p95": 807.0720136165619, + "p99": 8860.480308532715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 1042, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ffd6b93", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_33cbd342", + "comparisonKey": "9388986ed7ccfaa2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:42.807637+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 314.11200761795044, + "p90": 339.55198526382446, + "p95": 3943.5200691223145, + "p99": 5412.0001792907715 + }, + "combine": { + "p50": 122.079998254776, + "p90": 138.7840062379837, + "p95": 820.2559947967529, + "p99": 4883.008003234863 + }, + "roundtrip": { + "p50": 454.912006855011, + "p90": 512.3519897460938, + "p95": 4153.439998626709, + "p99": 5878.24010848999 + }, + "isolatedSum": { + "p50": 436.19200587272644, + "p90": 478.33599150180817, + "p95": 4763.776063919067, + "p99": 10295.008182525635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 314.2719864845276, + "p90": 338.1440043449402, + "p95": 3188.1918907165527, + "p99": 6009.920120239258 + }, + "combine": { + "p50": 121.79200351238251, + "p90": 128.80000472068787, + "p95": 131.96800649166107, + "p99": 4633.3441734313965 + }, + "roundtrip": { + "p50": 456.28800988197327, + "p90": 511.58398389816284, + "p95": 4082.943916320801, + "p99": 5895.328044891357 + }, + "isolatedSum": { + "p50": 436.0639899969101, + "p90": 466.94400906562805, + "p95": 3320.159897208214, + "p99": 10643.264293670654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 332.60801434516907, + "p90": 475.0399887561798, + "p95": 4091.5842056274414, + "p99": 4858.528137207031 + }, + "combine": { + "p50": 124.64000284671783, + "p90": 185.72799861431122, + "p95": 198.30399751663208, + "p99": 4654.272079467773 + }, + "roundtrip": { + "p50": 476.25601291656494, + "p90": 668.3200001716614, + "p95": 3910.559892654419, + "p99": 5175.360202789307 + }, + "isolatedSum": { + "p50": 457.2480171918869, + "p90": 660.767987370491, + "p95": 4289.8882031440735, + "p99": 9512.800216674805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 326.4319896697998, + "p90": 353.11999917030334, + "p95": 452.7359902858734, + "p99": 5091.072082519531 + }, + "combine": { + "p50": 122.65600264072418, + "p90": 135.71199774742126, + "p95": 150.33599734306335, + "p99": 4749.695777893066 + }, + "roundtrip": { + "p50": 469.4080054759979, + "p90": 499.04000759124756, + "p95": 677.7600049972534, + "p99": 5345.407962799072 + }, + "isolatedSum": { + "p50": 449.087992310524, + "p90": 488.8319969177246, + "p95": 603.0719876289368, + "p99": 9840.767860412598 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 327.67999172210693, + "p90": 349.0560054779053, + "p95": 528.8000106811523, + "p99": 4950.592041015625 + }, + "combine": { + "p50": 123.48800152540207, + "p90": 131.32800161838531, + "p95": 148.83199334144592, + "p99": 4696.095943450928 + }, + "roundtrip": { + "p50": 470.4959988594055, + "p90": 525.7279872894287, + "p95": 706.2720060348511, + "p99": 5633.984088897705 + }, + "isolatedSum": { + "p50": 451.167993247509, + "p90": 480.3840070962906, + "p95": 677.6320040225983, + "p99": 9646.687984466553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 365.56801199913025, + "p90": 380.6400001049042, + "p95": 548.0639934539795, + "p99": 5187.327861785889 + }, + "combine": { + "p50": 144.80000734329224, + "p90": 149.63200688362122, + "p95": 153.6960005760193, + "p99": 4721.05598449707 + }, + "roundtrip": { + "p50": 487.36000061035156, + "p90": 532.2880148887634, + "p95": 696.9919800758362, + "p99": 5445.119857788086 + }, + "isolatedSum": { + "p50": 510.3680193424225, + "p90": 530.2720069885254, + "p95": 701.7599940299988, + "p99": 9908.383846282959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 427.7440011501312, + "p90": 679.423987865448, + "p95": 3208.4479331970215, + "p99": 4533.696174621582 + }, + "combine": { + "p50": 221.15199267864227, + "p90": 242.08000302314758, + "p95": 250.43201446533203, + "p99": 3800.960063934326 + }, + "roundtrip": { + "p50": 621.4720010757446, + "p90": 709.6959948539734, + "p95": 3982.880115509033, + "p99": 4595.168113708496 + }, + "isolatedSum": { + "p50": 648.8959938287735, + "p90": 921.5039908885956, + "p95": 3458.8799476623535, + "p99": 8334.656238555908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 512.1279954910278, + "p90": 601.0559797286987, + "p95": 3280.0960540771484, + "p99": 4370.944023132324 + }, + "combine": { + "p50": 385.15201210975647, + "p90": 400.89601278305054, + "p95": 2385.2479457855225, + "p99": 3082.751989364624 + }, + "roundtrip": { + "p50": 860.5120182037354, + "p90": 912.1919870376587, + "p95": 3504.2879581451416, + "p99": 4185.887813568115 + }, + "isolatedSum": { + "p50": 897.2800076007843, + "p90": 1001.9519925117493, + "p95": 5665.343999862671, + "p99": 7453.696012496948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e9a6f1a0", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_ab04d1e8", + "comparisonKey": "72b0ca57fc7b6ef9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:09.602994+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 304.064005613327, + "p90": 325.56799054145813, + "p95": 3914.367914199829, + "p99": 5518.400192260742 + }, + "combine": { + "p50": 115.00799655914307, + "p90": 125.37600100040436, + "p95": 200.19200444221497, + "p99": 4760.384082794189 + }, + "roundtrip": { + "p50": 439.4879937171936, + "p90": 518.9120173454285, + "p95": 4687.7760887146, + "p99": 5956.352233886719 + }, + "isolatedSum": { + "p50": 419.0720021724701, + "p90": 450.9439915418625, + "p95": 4114.559918642044, + "p99": 10278.784275054932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 59, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 303.42400074005127, + "p90": 324.0639865398407, + "p95": 4104.671955108643, + "p99": 5517.5042152404785 + }, + "combine": { + "p50": 115.07199704647064, + "p90": 129.92000579833984, + "p95": 3409.5358848571777, + "p99": 4988.255977630615 + }, + "roundtrip": { + "p50": 438.0800127983093, + "p90": 555.2319884300232, + "p95": 4484.19189453125, + "p99": 6055.647850036621 + }, + "isolatedSum": { + "p50": 418.4959977865219, + "p90": 453.98399233818054, + "p95": 7514.20783996582, + "p99": 10505.760192871094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 121, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 321.5680122375488, + "p90": 452.1600008010864, + "p95": 4070.8160400390625, + "p99": 5387.455940246582 + }, + "combine": { + "p50": 122.49600142240524, + "p90": 178.14399302005768, + "p95": 3567.647933959961, + "p99": 5574.272155761719 + }, + "roundtrip": { + "p50": 448.60801100730896, + "p90": 522.1760272979736, + "p95": 3713.184118270874, + "p99": 5649.3120193481445 + }, + "isolatedSum": { + "p50": 444.06401365995407, + "p90": 630.3039938211441, + "p95": 7638.463973999023, + "p99": 10961.7280960083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 244, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 313.60000371932983, + "p90": 344.31999921798706, + "p95": 4445.504188537598, + "p99": 6948.927879333496 + }, + "combine": { + "p50": 116.70400202274323, + "p90": 126.78399682044983, + "p95": 4026.2398719787598, + "p99": 5172.607898712158 + }, + "roundtrip": { + "p50": 451.84001326560974, + "p90": 508.1599950790405, + "p95": 4451.327800750732, + "p99": 5967.872142791748 + }, + "isolatedSum": { + "p50": 430.30400574207306, + "p90": 471.1039960384369, + "p95": 8471.744060516357, + "p99": 12121.535778045654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 478, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 317.984014749527, + "p90": 340.1600122451782, + "p95": 3588.063955307007, + "p99": 5086.368083953857 + }, + "combine": { + "p50": 117.79200285673141, + "p90": 125.95200538635254, + "p95": 132.25600123405457, + "p99": 4958.911895751953 + }, + "roundtrip": { + "p50": 452.83201336860657, + "p90": 508.92800092697144, + "p95": 4454.847812652588, + "p99": 5809.823989868164 + }, + "isolatedSum": { + "p50": 435.7760176062584, + "p90": 466.11201763153076, + "p95": 3720.3199565410614, + "p99": 10045.27997970581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 953, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 380.0959885120392, + "p90": 408.86399149894714, + "p95": 3754.848003387451, + "p99": 5067.71183013916 + }, + "combine": { + "p50": 161.3759994506836, + "p90": 166.36799275875092, + "p95": 219.39200162887573, + "p99": 4395.936012268066 + }, + "roundtrip": { + "p50": 523.6480236053467, + "p90": 554.4000267982483, + "p95": 4075.3278732299805, + "p99": 5263.423919677734 + }, + "isolatedSum": { + "p50": 541.4719879627228, + "p90": 575.2319842576981, + "p95": 3974.240005016327, + "p99": 9463.647842407227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 1908, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 457.4719965457916, + "p90": 536.9920134544373, + "p95": 3165.1198863983154, + "p99": 4361.631870269775 + }, + "combine": { + "p50": 265.4719948768616, + "p90": 285.47200560569763, + "p95": 2997.119903564453, + "p99": 3823.199987411499 + }, + "roundtrip": { + "p50": 701.1839747428894, + "p90": 788.2879972457886, + "p95": 3842.24009513855, + "p99": 4422.4958419799805 + }, + "isolatedSum": { + "p50": 722.9439914226532, + "p90": 822.4640190601349, + "p95": 6162.239789962769, + "p99": 8184.831857681274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 3804, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 576.0639905929565, + "p90": 671.4879870414734, + "p95": 3402.656078338623, + "p99": 4456.92777633667 + }, + "combine": { + "p50": 479.2959988117218, + "p90": 491.32800102233887, + "p95": 2214.400053024292, + "p99": 2801.7919063568115 + }, + "roundtrip": { + "p50": 1021.9520330429077, + "p90": 1348.960041999817, + "p95": 3203.808069229126, + "p99": 3707.7438831329346 + }, + "isolatedSum": { + "p50": 1055.3599894046783, + "p90": 1162.8159880638123, + "p95": 5617.056131362915, + "p99": 7258.719682693481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ad3657c0", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_a407f0c6", + "comparisonKey": "dd575150dd1d49b5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:44.768091+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 309.28000807762146, + "p90": 345.40799260139465, + "p95": 512.9280090332031, + "p99": 5466.239929199219 + }, + "combine": { + "p50": 120.95999717712402, + "p90": 130.14400005340576, + "p95": 136.73600554466248, + "p99": 4410.304069519043 + }, + "roundtrip": { + "p50": 445.248007774353, + "p90": 510.55997610092163, + "p95": 671.3280081748962, + "p99": 6287.136077880859 + }, + "isolatedSum": { + "p50": 430.2400052547455, + "p90": 475.5519926548004, + "p95": 649.6640145778656, + "p99": 9876.543998718262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 308.83198976516724, + "p90": 352.3840010166168, + "p95": 497.8559911251068, + "p99": 5218.463897705078 + }, + "combine": { + "p50": 119.6800023317337, + "p90": 126.20800733566284, + "p95": 129.40800189971924, + "p99": 3972.991943359375 + }, + "roundtrip": { + "p50": 448.2559859752655, + "p90": 527.616024017334, + "p95": 2380.4800510406494, + "p99": 5826.335906982422 + }, + "isolatedSum": { + "p50": 428.51199209690094, + "p90": 478.59200835227966, + "p95": 627.263993024826, + "p99": 9191.455841064453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 326.24000310897827, + "p90": 454.8160135746002, + "p95": 522.816002368927, + "p99": 5406.688213348389 + }, + "combine": { + "p50": 123.4240010380745, + "p90": 176.92799866199493, + "p95": 208.8319957256317, + "p99": 4398.335933685303 + }, + "roundtrip": { + "p50": 472.4479913711548, + "p90": 618.7199950218201, + "p95": 798.367977142334, + "p99": 5440.896034240723 + }, + "isolatedSum": { + "p50": 449.66400414705276, + "p90": 631.7440122365952, + "p95": 731.6479980945587, + "p99": 9805.024147033691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 40, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 320.22398710250854, + "p90": 359.360009431839, + "p95": 385.0240111351013, + "p99": 5167.232036590576 + }, + "combine": { + "p50": 121.05599790811539, + "p90": 128.4160017967224, + "p95": 151.7760008573532, + "p99": 4673.567771911621 + }, + "roundtrip": { + "p50": 460.671991109848, + "p90": 536.0640287399292, + "p95": 625.760018825531, + "p99": 5661.471843719482 + }, + "isolatedSum": { + "p50": 441.27998501062393, + "p90": 487.7760112285614, + "p95": 536.8000119924545, + "p99": 9840.799808502197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 71, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 324.864000082016, + "p90": 407.45601058006287, + "p95": 416.703999042511, + "p99": 5420.3200340271 + }, + "combine": { + "p50": 125.69600343704224, + "p90": 163.32800686359406, + "p95": 167.35999286174774, + "p99": 4559.487819671631 + }, + "roundtrip": { + "p50": 471.42401337623596, + "p90": 597.1199870109558, + "p95": 663.9360189437866, + "p99": 5835.008144378662 + }, + "isolatedSum": { + "p50": 450.5600035190582, + "p90": 570.7840174436569, + "p95": 584.0639919042587, + "p99": 9979.80785369873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 143, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 345.0239896774292, + "p90": 421.85598611831665, + "p95": 439.9360120296478, + "p99": 4552.000045776367 + }, + "combine": { + "p50": 125.2480000257492, + "p90": 164.38399255275726, + "p95": 169.3439930677414, + "p99": 4764.1282081604 + }, + "roundtrip": { + "p50": 479.8719882965088, + "p90": 573.5679864883423, + "p95": 1432.8639507293701, + "p99": 5549.824237823486 + }, + "isolatedSum": { + "p50": 470.2719897031784, + "p90": 586.2399786710739, + "p95": 609.2800050973892, + "p99": 9316.128253936768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 266, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 344.06399726867676, + "p90": 471.74400091171265, + "p95": 544.0319776535034, + "p99": 5057.663917541504 + }, + "combine": { + "p50": 149.6639996767044, + "p90": 185.05600094795227, + "p95": 285.5679988861084, + "p99": 4639.039993286133 + }, + "roundtrip": { + "p50": 491.58400297164917, + "p90": 667.1680212020874, + "p95": 932.9919815063477, + "p99": 5026.72004699707 + }, + "isolatedSum": { + "p50": 493.72799694538116, + "p90": 656.8000018596649, + "p95": 829.5999765396118, + "p99": 9696.703910827637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 534, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 369.56799030303955, + "p90": 446.3360011577606, + "p95": 476.7040014266968, + "p99": 4885.791778564453 + }, + "combine": { + "p50": 247.0719963312149, + "p90": 265.1199996471405, + "p95": 369.951993227005, + "p99": 3831.9039344787598 + }, + "roundtrip": { + "p50": 596.2560176849365, + "p90": 686.303973197937, + "p95": 3187.6161098480225, + "p99": 4616.864204406738 + }, + "isolatedSum": { + "p50": 616.6399866342545, + "p90": 711.4560008049011, + "p95": 846.6559946537018, + "p99": 8717.695713043213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1044, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5d19e6eb", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_d9d7f063", + "comparisonKey": "cdfef6ae80b59fe3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:01.456048+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 313.85600566864014, + "p90": 328.99200916290283, + "p95": 444.5759952068329, + "p99": 5520.959854125977 + }, + "combine": { + "p50": 124.03199821710587, + "p90": 132.03200697898865, + "p95": 141.59999787807465, + "p99": 4939.680099487305 + }, + "roundtrip": { + "p50": 452.4799883365631, + "p90": 483.10399055480957, + "p95": 631.0080289840698, + "p99": 5628.511905670166 + }, + "isolatedSum": { + "p50": 437.888003885746, + "p90": 461.0240161418915, + "p95": 586.1759930849075, + "p99": 10460.639953613281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 314.07999992370605, + "p90": 328.0960023403168, + "p95": 3488.192081451416, + "p99": 5734.335899353027 + }, + "combine": { + "p50": 123.55200201272964, + "p90": 133.88800621032715, + "p95": 333.7920010089874, + "p99": 4756.159782409668 + }, + "roundtrip": { + "p50": 452.67200469970703, + "p90": 484.3519926071167, + "p95": 676.6719818115234, + "p99": 5343.520164489746 + }, + "isolatedSum": { + "p50": 437.6320019364357, + "p90": 461.9840085506439, + "p95": 3821.9840824604034, + "p99": 10490.495681762695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 40, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 330.6559920310974, + "p90": 456.959992647171, + "p95": 468.8960015773773, + "p99": 5250.016212463379 + }, + "combine": { + "p50": 126.39999389648438, + "p90": 188.51199746131897, + "p95": 203.45599949359894, + "p99": 4921.951770782471 + }, + "roundtrip": { + "p50": 477.85601019859314, + "p90": 660.5759859085083, + "p95": 984.0319752693176, + "p99": 5149.3120193481445 + }, + "isolatedSum": { + "p50": 457.0559859275818, + "p90": 645.47199010849, + "p95": 672.3520010709763, + "p99": 10171.96798324585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 81, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 324.38400387763977, + "p90": 337.8880023956299, + "p95": 362.62398958206177, + "p99": 4794.6882247924805 + }, + "combine": { + "p50": 123.52000176906586, + "p90": 130.2720010280609, + "p95": 133.40799510478973, + "p99": 4808.127880096436 + }, + "roundtrip": { + "p50": 463.3919894695282, + "p90": 488.19199204444885, + "p95": 604.9919724464417, + "p99": 5423.423767089844 + }, + "isolatedSum": { + "p50": 447.9040056467056, + "p90": 468.1600034236908, + "p95": 496.0319846868515, + "p99": 9602.816104888916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 325.24800300598145, + "p90": 340.86400270462036, + "p95": 399.4559943675995, + "p99": 4824.128150939941 + }, + "combine": { + "p50": 124.15999919176102, + "p90": 132.57600367069244, + "p95": 136.25599443912506, + "p99": 4641.280174255371 + }, + "roundtrip": { + "p50": 465.92000126838684, + "p90": 529.6319723129272, + "p95": 637.3119950294495, + "p99": 5616.096019744873 + }, + "isolatedSum": { + "p50": 449.40800219774246, + "p90": 473.4400063753128, + "p95": 535.7119888067245, + "p99": 9465.408325195312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 339, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 344.0000116825104, + "p90": 357.5040102005005, + "p95": 379.10398840904236, + "p99": 4968.063831329346 + }, + "combine": { + "p50": 125.05599856376648, + "p90": 131.29599392414093, + "p95": 136.3839954137802, + "p99": 4771.8400955200195 + }, + "roundtrip": { + "p50": 484.3840003013611, + "p90": 508.1599950790405, + "p95": 623.0080127716064, + "p99": 5384.86385345459 + }, + "isolatedSum": { + "p50": 469.05601024627686, + "p90": 488.8000041246414, + "p95": 515.4879838228226, + "p99": 9739.903926849365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 676, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 363.103985786438, + "p90": 471.0400104522705, + "p95": 514.0799880027771, + "p99": 4609.824180603027 + }, + "combine": { + "p50": 171.39199376106262, + "p90": 189.56799805164337, + "p95": 386.30399107933044, + "p99": 4361.728191375732 + }, + "roundtrip": { + "p50": 533.407986164093, + "p90": 674.8160123825073, + "p95": 1935.5520009994507, + "p99": 4980.447769165039 + }, + "isolatedSum": { + "p50": 534.4959795475006, + "p90": 660.6080085039139, + "p95": 900.3839790821075, + "p99": 8971.55237197876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 1328, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 417.05599427223206, + "p90": 488.2560074329376, + "p95": 588.9919996261597, + "p99": 5258.272171020508 + }, + "combine": { + "p50": 295.199990272522, + "p90": 302.047997713089, + "p95": 432.3199987411499, + "p99": 3684.2238903045654 + }, + "roundtrip": { + "p50": 700.4160284996033, + "p90": 719.1680073738098, + "p95": 3070.3999996185303, + "p99": 4298.208236694336 + }, + "isolatedSum": { + "p50": 712.255984544754, + "p90": 790.3040051460266, + "p95": 1021.3119983673096, + "p99": 8942.496061325073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fff03641", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_4816949b", + "comparisonKey": "987c221cd3671a31", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:23.798767+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 315.8720135688782, + "p90": 330.6879997253418, + "p95": 3867.327928543091, + "p99": 5470.46422958374 + }, + "combine": { + "p50": 126.0479986667633, + "p90": 133.82400572299957, + "p95": 137.7280056476593, + "p99": 179.1359931230545 + }, + "roundtrip": { + "p50": 458.8159918785095, + "p90": 491.8400049209595, + "p95": 4276.480197906494, + "p99": 5959.807872772217 + }, + "isolatedSum": { + "p50": 441.9200122356415, + "p90": 464.51200544834137, + "p95": 4005.05593419075, + "p99": 5649.600222706795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 316.5760040283203, + "p90": 332.96000957489014, + "p95": 1319.6799755096436, + "p99": 5478.240013122559 + }, + "combine": { + "p50": 124.83199685811996, + "p90": 131.74399733543396, + "p95": 190.36799669265747, + "p99": 4913.343906402588 + }, + "roundtrip": { + "p50": 459.6799910068512, + "p90": 491.2639856338501, + "p95": 599.0399718284607, + "p99": 5516.992092132568 + }, + "isolatedSum": { + "p50": 441.4080008864403, + "p90": 464.7040069103241, + "p95": 1510.047972202301, + "p99": 10391.583919525146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 22, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 332.832008600235, + "p90": 431.8079948425293, + "p95": 550.6240129470825, + "p99": 4702.4641036987305 + }, + "combine": { + "p50": 126.97599828243256, + "p90": 169.69600319862366, + "p95": 173.8239973783493, + "p99": 4379.487991333008 + }, + "roundtrip": { + "p50": 481.3440144062042, + "p90": 618.6559796333313, + "p95": 3374.592065811157, + "p99": 5374.688148498535 + }, + "isolatedSum": { + "p50": 459.80800688266754, + "p90": 601.503998041153, + "p95": 724.4480103254318, + "p99": 9081.952095031738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 326.4960050582886, + "p90": 341.7919874191284, + "p95": 545.4400181770325, + "p99": 5980.35192489624 + }, + "combine": { + "p50": 125.05599856376648, + "p90": 130.97600638866425, + "p95": 134.17600095272064, + "p99": 4701.632022857666 + }, + "roundtrip": { + "p50": 469.88800168037415, + "p90": 502.4319887161255, + "p95": 718.3039784431458, + "p99": 5746.592044830322 + }, + "isolatedSum": { + "p50": 451.55200362205505, + "p90": 472.76799380779266, + "p95": 679.6160191297531, + "p99": 10681.983947753906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 73, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 326.33599638938904, + "p90": 342.5599932670593, + "p95": 430.88001012802124, + "p99": 4824.895858764648 + }, + "combine": { + "p50": 125.47199428081512, + "p90": 131.42399489879608, + "p95": 134.43200290203094, + "p99": 137.66400516033173 + }, + "roundtrip": { + "p50": 470.17601132392883, + "p90": 498.78400564193726, + "p95": 3699.80788230896, + "p99": 5601.791858673096 + }, + "isolatedSum": { + "p50": 451.80799067020416, + "p90": 473.9839881658554, + "p95": 565.3120130300522, + "p99": 4962.55986392498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 138, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 344.86401081085205, + "p90": 359.48801040649414, + "p95": 480.8639883995056, + "p99": 5286.272048950195 + }, + "combine": { + "p50": 126.39999389648438, + "p90": 132.1599930524826, + "p95": 135.6160044670105, + "p99": 4135.9357833862305 + }, + "roundtrip": { + "p50": 492.7999973297119, + "p90": 587.4239802360535, + "p95": 3397.631883621216, + "p99": 5577.824115753174 + }, + "isolatedSum": { + "p50": 471.2640047073364, + "p90": 491.64800345897675, + "p95": 616.4799928665161, + "p99": 9422.207832336426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 273, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 351.39200091362, + "p90": 465.8240079879761, + "p95": 511.3599896430969, + "p99": 4848.22416305542 + }, + "combine": { + "p50": 149.3760049343109, + "p90": 171.64799571037292, + "p95": 175.1679927110672, + "p99": 247.5840002298355 + }, + "roundtrip": { + "p50": 499.7119903564453, + "p90": 642.0800089836121, + "p95": 3931.0081005096436, + "p99": 5114.2401695251465 + }, + "isolatedSum": { + "p50": 500.7680058479309, + "p90": 637.472003698349, + "p95": 686.5279823541641, + "p99": 5095.808163285255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 532, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 376.99198722839355, + "p90": 441.47199392318726, + "p95": 535.1679921150208, + "p99": 4778.91206741333 + }, + "combine": { + "p50": 248.73599410057068, + "p90": 259.16799902915955, + "p95": 3023.7441062927246, + "p99": 4029.888153076172 + }, + "roundtrip": { + "p50": 592.5440192222595, + "p90": 612.2559905052185, + "p95": 4130.943775177002, + "p99": 4731.040000915527 + }, + "isolatedSum": { + "p50": 625.7279813289642, + "p90": 700.6399929523468, + "p95": 3558.9120984077454, + "p99": 8808.800220489502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 1041, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e491db3", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_955629d8", + "comparisonKey": "6972a1230c52d225", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:17.201811+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 316.0640001296997, + "p90": 338.20798993110657, + "p95": 4657.375812530518, + "p99": 6944.9920654296875 + }, + "combine": { + "p50": 126.20800733566284, + "p90": 196.48000597953796, + "p95": 3751.0719299316406, + "p99": 4892.416000366211 + }, + "roundtrip": { + "p50": 453.11999320983887, + "p90": 522.6240158081055, + "p95": 4309.375762939453, + "p99": 5645.472049713135 + }, + "isolatedSum": { + "p50": 442.27200746536255, + "p90": 534.6879959106445, + "p95": 8408.447742462158, + "p99": 11837.408065795898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 318.1439936161041, + "p90": 344.7999954223633, + "p95": 3928.4160137176514, + "p99": 5554.495811462402 + }, + "combine": { + "p50": 126.52799487113953, + "p90": 135.3600025177002, + "p95": 153.82400155067444, + "p99": 4928.415775299072 + }, + "roundtrip": { + "p50": 454.5919895172119, + "p90": 505.2480101585388, + "p95": 4149.248123168945, + "p99": 6071.167945861816 + }, + "isolatedSum": { + "p50": 444.67198848724365, + "p90": 480.1599979400635, + "p95": 4082.240015268326, + "p99": 10482.911586761475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 333.0560028553009, + "p90": 457.63200521469116, + "p95": 3883.903980255127, + "p99": 5010.528087615967 + }, + "combine": { + "p50": 128.35200130939484, + "p90": 173.92000555992126, + "p95": 181.88799917697906, + "p99": 4636.47985458374 + }, + "roundtrip": { + "p50": 481.02399706840515, + "p90": 616.3520216941833, + "p95": 926.9440174102783, + "p99": 5205.599784851074 + }, + "isolatedSum": { + "p50": 461.40800416469574, + "p90": 631.5520107746124, + "p95": 4065.791979432106, + "p99": 9647.007942199707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 334.30400490760803, + "p90": 427.264004945755, + "p95": 516.8319940567017, + "p99": 5001.791954040527 + }, + "combine": { + "p50": 128.80000472068787, + "p90": 171.74400389194489, + "p95": 192.99200177192688, + "p99": 4904.416084289551 + }, + "roundtrip": { + "p50": 467.3599898815155, + "p90": 519.9999809265137, + "p95": 670.527994632721, + "p99": 5485.824108123779 + }, + "isolatedSum": { + "p50": 463.1040096282959, + "p90": 599.0080088376999, + "p95": 709.8239958286285, + "p99": 9906.208038330078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 335.00799536705017, + "p90": 426.56001448631287, + "p95": 517.952024936676, + "p99": 4681.280136108398 + }, + "combine": { + "p50": 128.03199887275696, + "p90": 170.8800047636032, + "p95": 175.77600479125977, + "p99": 4340.703964233398 + }, + "roundtrip": { + "p50": 481.3440144062042, + "p90": 629.9200057983398, + "p95": 4130.176067352295, + "p99": 5587.87202835083 + }, + "isolatedSum": { + "p50": 463.03999423980713, + "p90": 597.4400192499161, + "p95": 693.7280297279358, + "p99": 9021.984100341797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 364.76799845695496, + "p90": 438.6880099773407, + "p95": 463.45600485801697, + "p99": 4455.647945404053 + }, + "combine": { + "p50": 145.88800072669983, + "p90": 171.51999473571777, + "p95": 179.77599799633026, + "p99": 4658.783912658691 + }, + "roundtrip": { + "p50": 495.712012052536, + "p90": 628.8639903068542, + "p95": 2386.6240978240967, + "p99": 5213.40799331665 + }, + "isolatedSum": { + "p50": 510.6559991836548, + "p90": 610.2080047130585, + "p95": 643.2320028543472, + "p99": 9114.431858062744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 418.08000206947327, + "p90": 489.0559911727905, + "p95": 1663.0079746246338, + "p99": 4502.592086791992 + }, + "combine": { + "p50": 216.38399362564087, + "p90": 221.88800573349, + "p95": 226.23999416828156, + "p99": 4120.639801025391 + }, + "roundtrip": { + "p50": 627.9039978981018, + "p90": 710.0480198860168, + "p95": 3292.9599285125732, + "p99": 4578.432083129883 + }, + "isolatedSum": { + "p50": 634.4639956951141, + "p90": 710.9439969062805, + "p95": 1889.2479687929153, + "p99": 8623.231887817383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 505.5360198020935, + "p90": 556.5440058708191, + "p95": 3381.983995437622, + "p99": 4427.807807922363 + }, + "combine": { + "p50": 381.72799348831177, + "p90": 389.72800970077515, + "p95": 1965.3760194778442, + "p99": 3096.3521003723145 + }, + "roundtrip": { + "p50": 877.8560161590576, + "p90": 946.5919733047485, + "p95": 3198.5599994659424, + "p99": 3992.255926132202 + }, + "isolatedSum": { + "p50": 887.2640132904053, + "p90": 946.2720155715942, + "p95": 5347.360014915466, + "p99": 7524.159908294678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f1f0b361", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_56a428b6", + "comparisonKey": "077b263ef57cb63c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:39.813455+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 315.3280019760132, + "p90": 335.6800079345703, + "p95": 4159.3918800354, + "p99": 5041.344165802002 + }, + "combine": { + "p50": 125.37600100040436, + "p90": 135.74400544166565, + "p95": 197.53600656986237, + "p99": 4986.271858215332 + }, + "roundtrip": { + "p50": 455.1039934158325, + "p90": 576.7359733581543, + "p95": 4479.648113250732, + "p99": 5515.488147735596 + }, + "isolatedSum": { + "p50": 440.70400297641754, + "p90": 471.42401337623596, + "p95": 4356.927886605263, + "p99": 10027.616024017334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 316.25598669052124, + "p90": 340.1919901371002, + "p95": 4104.383945465088, + "p99": 5472.320079803467 + }, + "combine": { + "p50": 124.41600114107132, + "p90": 132.9279989004135, + "p95": 194.36800479888916, + "p99": 4726.687908172607 + }, + "roundtrip": { + "p50": 455.1999866962433, + "p90": 504.89598512649536, + "p95": 4324.927806854248, + "p99": 5565.728187561035 + }, + "isolatedSum": { + "p50": 440.67198783159256, + "p90": 473.11998903751373, + "p95": 4298.751950263977, + "p99": 10199.007987976074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 333.0239951610565, + "p90": 451.07200741767883, + "p95": 576.8319964408875, + "p99": 4878.111839294434 + }, + "combine": { + "p50": 126.0479986667633, + "p90": 182.8799992799759, + "p95": 204.92799580097198, + "p99": 4605.1201820373535 + }, + "roundtrip": { + "p50": 478.4959852695465, + "p90": 659.0719819068909, + "p95": 1213.7600183486938, + "p99": 5382.368087768555 + }, + "isolatedSum": { + "p50": 459.0719938278198, + "p90": 633.9520066976547, + "p95": 781.7599922418594, + "p99": 9483.232021331787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 325.1839876174927, + "p90": 350.5280017852783, + "p95": 526.3040065765381, + "p99": 5047.776222229004 + }, + "combine": { + "p50": 124.89599734544754, + "p90": 132.7040046453476, + "p95": 140.9280002117157, + "p99": 4444.863796234131 + }, + "roundtrip": { + "p50": 467.20001101493835, + "p90": 558.3680272102356, + "p95": 4176.064014434814, + "p99": 5440.959930419922 + }, + "isolatedSum": { + "p50": 450.0799849629402, + "p90": 483.2320064306259, + "p95": 667.2320067882538, + "p99": 9492.640018463135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 330.7200074195862, + "p90": 436.6399943828583, + "p95": 1342.3680067062378, + "p99": 4962.33606338501 + }, + "combine": { + "p50": 125.59999525547028, + "p90": 133.05599987506866, + "p95": 145.02400159835815, + "p99": 4719.6478843688965 + }, + "roundtrip": { + "p50": 467.8719937801361, + "p90": 542.1760082244873, + "p95": 4305.11999130249, + "p99": 5472.064018249512 + }, + "isolatedSum": { + "p50": 456.32000267505646, + "p90": 569.6959942579269, + "p95": 1487.392008304596, + "p99": 9681.983947753906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 342.72000193595886, + "p90": 364.9919927120209, + "p95": 545.8559989929199, + "p99": 4980.5121421813965 + }, + "combine": { + "p50": 125.47199428081512, + "p90": 133.95200669765472, + "p95": 153.31199765205383, + "p99": 4555.456161499023 + }, + "roundtrip": { + "p50": 483.5520088672638, + "p90": 549.3760108947754, + "p95": 3259.200096130371, + "p99": 5318.687915802002 + }, + "isolatedSum": { + "p50": 468.191996216774, + "p90": 498.9439994096756, + "p95": 699.1679966449738, + "p99": 9535.96830368042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 351.29600763320923, + "p90": 483.0720126628876, + "p95": 1045.6000566482544, + "p99": 4775.1359939575195 + }, + "combine": { + "p50": 150.94399452209473, + "p90": 191.48799777030945, + "p95": 201.31200551986694, + "p99": 4438.623905181885 + }, + "roundtrip": { + "p50": 496.288001537323, + "p90": 688.9280080795288, + "p95": 3880.095958709717, + "p99": 4842.976093292236 + }, + "isolatedSum": { + "p50": 502.24000215530396, + "p90": 674.560010433197, + "p95": 1246.9120621681213, + "p99": 9213.759899139404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 371.5519905090332, + "p90": 460.83199977874756, + "p95": 550.4639744758606, + "p99": 4815.008163452148 + }, + "combine": { + "p50": 247.26399779319763, + "p90": 261.6960108280182, + "p95": 451.51999592781067, + "p99": 3648.7040519714355 + }, + "roundtrip": { + "p50": 596.9600081443787, + "p90": 727.2639870643616, + "p95": 3811.9359016418457, + "p99": 4745.247840881348 + }, + "isolatedSum": { + "p50": 618.8159883022308, + "p90": 722.5280106067657, + "p95": 1001.9839704036713, + "p99": 8463.712215423584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-688570bb", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_f33c868c", + "comparisonKey": "943bca112e9307e2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:59.432143+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 318.56000423431396, + "p90": 338.20798993110657, + "p95": 4134.335994720459, + "p99": 5522.592067718506 + }, + "combine": { + "p50": 128.60800325870514, + "p90": 138.08000087738037, + "p95": 184.51200425624847, + "p99": 5090.464115142822 + }, + "roundtrip": { + "p50": 457.2480022907257, + "p90": 482.5280010700226, + "p95": 3670.9439754486084, + "p99": 5834.559917449951 + }, + "isolatedSum": { + "p50": 447.1680074930191, + "p90": 476.28799080848694, + "p95": 4318.8479989767075, + "p99": 10613.056182861328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 318.87999176979065, + "p90": 336.2880051136017, + "p95": 4044.9600219726562, + "p99": 5692.319869995117 + }, + "combine": { + "p50": 127.87200510501862, + "p90": 133.5040032863617, + "p95": 137.08800077438354, + "p99": 4126.431941986084 + }, + "roundtrip": { + "p50": 459.03998613357544, + "p90": 503.87197732925415, + "p95": 704.6080231666565, + "p99": 5637.663841247559 + }, + "isolatedSum": { + "p50": 446.75199687480927, + "p90": 469.7920083999634, + "p95": 4182.04802274704, + "p99": 9818.751811981201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 334.75199341773987, + "p90": 463.9680087566376, + "p95": 472.4479913711548, + "p99": 4864.192008972168 + }, + "combine": { + "p50": 131.00799918174744, + "p90": 196.76800072193146, + "p95": 207.2959989309311, + "p99": 4695.295810699463 + }, + "roundtrip": { + "p50": 478.39999198913574, + "p90": 652.6079773902893, + "p95": 751.3920068740845, + "p99": 5381.504058837891 + }, + "isolatedSum": { + "p50": 465.7599925994873, + "p90": 660.736009478569, + "p95": 679.7439903020859, + "p99": 9559.48781967163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 328.41598987579346, + "p90": 344.67199444770813, + "p95": 396.64000272750854, + "p99": 4950.431823730469 + }, + "combine": { + "p50": 128.48000228405, + "p90": 135.23200154304504, + "p95": 138.20800185203552, + "p99": 4003.0078887939453 + }, + "roundtrip": { + "p50": 468.4480130672455, + "p90": 500.67198276519775, + "p95": 671.8400120735168, + "p99": 5222.400188446045 + }, + "isolatedSum": { + "p50": 456.89599215984344, + "p90": 479.9039959907532, + "p95": 534.8480045795441, + "p99": 8953.439712524414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 331.9680094718933, + "p90": 425.24799704551697, + "p95": 488.2879853248596, + "p99": 5005.695819854736 + }, + "combine": { + "p50": 128.4160017967224, + "p90": 135.00800728797913, + "p95": 141.05600118637085, + "p99": 4697.023868560791 + }, + "roundtrip": { + "p50": 469.2479968070984, + "p90": 501.3120174407959, + "p95": 634.4640254974365, + "p99": 5220.287799835205 + }, + "isolatedSum": { + "p50": 460.3840112686157, + "p90": 560.2560043334961, + "p95": 629.3439865112305, + "p99": 9702.719688415527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 347.26399183273315, + "p90": 362.7200126647949, + "p95": 387.1360123157501, + "p99": 4892.831802368164 + }, + "combine": { + "p50": 128.60800325870514, + "p90": 134.71999764442444, + "p95": 137.82399892807007, + "p99": 179.6800047159195 + }, + "roundtrip": { + "p50": 488.0000054836273, + "p90": 518.8159942626953, + "p95": 727.4240255355835, + "p99": 5505.216121673584 + }, + "isolatedSum": { + "p50": 475.8719950914383, + "p90": 497.44001030921936, + "p95": 524.9600112438202, + "p99": 5072.511807084084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 352.8960049152374, + "p90": 479.16799783706665, + "p95": 524.4799852371216, + "p99": 5104.095935821533 + }, + "combine": { + "p50": 150.04800260066986, + "p90": 196.99199497699738, + "p95": 202.55999267101288, + "p99": 4277.599811553955 + }, + "roundtrip": { + "p50": 495.03999948501587, + "p90": 681.9519996643066, + "p95": 981.8559885025024, + "p99": 5231.904029846191 + }, + "isolatedSum": { + "p50": 502.9440075159073, + "p90": 676.159992814064, + "p95": 727.0399779081345, + "p99": 9381.695747375488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 374.2400109767914, + "p90": 440.3519928455353, + "p95": 455.32798767089844, + "p99": 4813.888072967529 + }, + "combine": { + "p50": 248.22400510311127, + "p90": 258.4959864616394, + "p95": 3171.3600158691406, + "p99": 4005.4078102111816 + }, + "roundtrip": { + "p50": 599.4880199432373, + "p90": 682.59197473526, + "p95": 3933.40802192688, + "p99": 4852.608203887939 + }, + "isolatedSum": { + "p50": 622.4640160799026, + "p90": 698.8479793071747, + "p95": 3626.688003540039, + "p99": 8819.295883178711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-839c066a", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_2a048167", + "comparisonKey": "19b0f02d321c017e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:38.407505+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 365.34398794174194, + "p90": 386.59200072288513, + "p95": 479.10401225090027, + "p99": 4273.280143737793 + }, + "combine": { + "p50": 248.1279969215393, + "p90": 256.03199005126953, + "p95": 1133.0879926681519, + "p99": 4131.487846374512 + }, + "roundtrip": { + "p50": 593.280017375946, + "p90": 675.3280162811279, + "p95": 3885.7600688934326, + "p99": 4745.952129364014 + }, + "isolatedSum": { + "p50": 613.4719848632812, + "p90": 642.6239907741547, + "p95": 1612.1920049190521, + "p99": 8404.767990112305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 405.2799940109253, + "p90": 422.7519929409027, + "p95": 538.6559963226318, + "p99": 4026.6880989074707 + }, + "combine": { + "p50": 422.7519929409027, + "p90": 432.73600935935974, + "p95": 2363.2640838623047, + "p99": 3395.456075668335 + }, + "roundtrip": { + "p50": 809.503972530365, + "p90": 848.3840227127075, + "p95": 3533.6320400238037, + "p99": 4111.584186553955 + }, + "isolatedSum": { + "p50": 828.031986951828, + "p90": 855.4880023002625, + "p95": 2901.9200801849365, + "p99": 7422.144174575806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 494.81600522994995, + "p90": 526.9119739532471, + "p95": 2987.135887145996, + "p99": 3881.216049194336 + }, + "combine": { + "p50": 763.2960081100464, + "p90": 1681.8560361862183, + "p95": 1964.8640155792236, + "p99": 2743.040084838867 + }, + "roundtrip": { + "p50": 1234.1439723968506, + "p90": 2627.3279190063477, + "p95": 3035.2001190185547, + "p99": 3657.0560932159424 + }, + "isolatedSum": { + "p50": 1258.1120133399963, + "p90": 2208.7680101394653, + "p95": 4951.99990272522, + "p99": 6624.256134033203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 720.4800248146057, + "p90": 801.5999794006348, + "p95": 2905.1198959350586, + "p99": 3408.0960750579834 + }, + "combine": { + "p50": 1441.5040016174316, + "p90": 1474.1120338439941, + "p95": 1801.8239736557007, + "p99": 2046.0801124572754 + }, + "roundtrip": { + "p50": 2127.7759075164795, + "p90": 2807.6798915863037, + "p95": 3317.919969558716, + "p99": 3687.2639656066895 + }, + "isolatedSum": { + "p50": 2161.9840264320374, + "p90": 2275.712013244629, + "p95": 4706.943869590759, + "p99": 5454.176187515259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1104.416012763977, + "p90": 2052.7679920196533, + "p95": 2542.527914047241, + "p99": 3052.4160861968994 + }, + "combine": { + "p50": 2792.8640842437744, + "p90": 2836.8639945983887, + "p95": 2929.7280311584473, + "p99": 3632.2240829467773 + }, + "roundtrip": { + "p50": 3853.8880348205566, + "p90": 3902.30393409729, + "p95": 4060.192108154297, + "p99": 4404.99210357666 + }, + "isolatedSum": { + "p50": 3897.2800970077515, + "p90": 4889.631986618042, + "p95": 5472.2559452056885, + "p99": 6684.640169143677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1904.736042022705, + "p90": 2042.815923690796, + "p95": 2240.031957626343, + "p99": 2710.047960281372 + }, + "combine": { + "p50": 5477.119922637939, + "p90": 5525.504112243652, + "p95": 5542.655944824219, + "p99": 5568.575859069824 + }, + "roundtrip": { + "p50": 7298.1438636779785, + "p90": 7369.152069091797, + "p95": 7394.239902496338, + "p99": 7472.640037536621 + }, + "isolatedSum": { + "p50": 7381.8559646606445, + "p90": 7568.320035934448, + "p95": 7782.6879024505615, + "p99": 8278.623819351196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ee434121", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_ad30ea71", + "comparisonKey": "8c2856dadba26eb6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:38.835152+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 358.8480055332184, + "p90": 374.1759955883026, + "p95": 398.71999621391296, + "p99": 4128.384113311768 + }, + "combine": { + "p50": 241.56799912452698, + "p90": 249.02400374412537, + "p95": 1566.6879415512085, + "p99": 4175.583839416504 + }, + "roundtrip": { + "p50": 580.3520083427429, + "p90": 601.8880009651184, + "p95": 2158.6239337921143, + "p99": 4763.872146606445 + }, + "isolatedSum": { + "p50": 600.4160046577454, + "p90": 623.199999332428, + "p95": 1965.4079377651215, + "p99": 8303.967952728271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 401.7919898033142, + "p90": 418.08000206947327, + "p95": 442.656010389328, + "p99": 4561.15198135376 + }, + "combine": { + "p50": 420.6399917602539, + "p90": 429.53601479530334, + "p95": 2477.7278900146484, + "p99": 3727.5519371032715 + }, + "roundtrip": { + "p50": 807.3279857635498, + "p90": 832.1599960327148, + "p95": 3350.97599029541, + "p99": 4074.240207672119 + }, + "isolatedSum": { + "p50": 822.4319815635681, + "p90": 847.6160168647766, + "p95": 2920.3839004039764, + "p99": 8288.703918457031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 490.6879961490631, + "p90": 513.3119821548462, + "p95": 2236.2239360809326, + "p99": 3978.7518978118896 + }, + "combine": { + "p50": 758.5920095443726, + "p90": 1291.9360399246216, + "p95": 1895.840048789978, + "p99": 2673.151969909668 + }, + "roundtrip": { + "p50": 1229.7279834747314, + "p90": 1337.8560543060303, + "p95": 3148.2880115509033, + "p99": 3617.9521083831787 + }, + "isolatedSum": { + "p50": 1249.2800056934357, + "p90": 1805.2480220794678, + "p95": 4132.063984870911, + "p99": 6651.903867721558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 713.5360240936279, + "p90": 756.9599747657776, + "p95": 2571.9358921051025, + "p99": 3295.680046081543 + }, + "combine": { + "p50": 1439.296007156372, + "p90": 1467.4880504608154, + "p95": 1645.5999612808228, + "p99": 2077.3439407348633 + }, + "roundtrip": { + "p50": 2121.2480068206787, + "p90": 2470.8480834960938, + "p95": 3237.0240688323975, + "p99": 3596.4479446411133 + }, + "isolatedSum": { + "p50": 2152.83203125, + "p90": 2224.448025226593, + "p95": 4217.535853385925, + "p99": 5373.023986816406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1117.5040006637573, + "p90": 1239.4239902496338, + "p95": 2465.343952178955, + "p99": 2866.6560649871826 + }, + "combine": { + "p50": 2787.519931793213, + "p90": 2812.8321170806885, + "p95": 2822.943925857544, + "p99": 2860.2240085601807 + }, + "roundtrip": { + "p50": 3847.3598957061768, + "p90": 3928.7679195404053, + "p95": 4103.871822357178, + "p99": 4559.167861938477 + }, + "isolatedSum": { + "p50": 3905.02393245697, + "p90": 4052.2561073303223, + "p95": 5288.287878036499, + "p99": 5726.880073547363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1924.288034439087, + "p90": 2047.7759838104248, + "p95": 2312.4160766601562, + "p99": 2757.8558921813965 + }, + "combine": { + "p50": 5473.343849182129, + "p90": 5519.999980926514, + "p95": 5532.959938049316, + "p99": 5561.952114105225 + }, + "roundtrip": { + "p50": 7320.96004486084, + "p90": 7380.224227905273, + "p95": 7399.040222167969, + "p99": 7438.015937805176 + }, + "isolatedSum": { + "p50": 7397.631883621216, + "p90": 7567.7759647369385, + "p95": 7845.376014709473, + "p99": 8319.808006286621 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e4beb0a5", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_3addc428", + "comparisonKey": "c7f04f58d17f13e9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:19.811085+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 362.2719943523407, + "p90": 439.5520091056824, + "p95": 580.7679891586304, + "p99": 4255.231857299805 + }, + "combine": { + "p50": 240.60800671577454, + "p90": 248.03200364112854, + "p95": 366.6880130767822, + "p99": 4383.264064788818 + }, + "roundtrip": { + "p50": 581.8560123443604, + "p90": 611.7439866065979, + "p95": 2499.232053756714, + "p99": 4931.392192840576 + }, + "isolatedSum": { + "p50": 602.8800010681152, + "p90": 687.5840127468109, + "p95": 947.4560022354126, + "p99": 8638.495922088623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 499.64800477027893, + "p90": 594.6879982948303, + "p95": 1765.663981437683, + "p99": 4418.464183807373 + }, + "combine": { + "p50": 762.4959945678711, + "p90": 1322.0800161361694, + "p95": 1876.5759468078613, + "p99": 2664.191961288452 + }, + "roundtrip": { + "p50": 1233.9199781417847, + "p90": 1645.9519863128662, + "p95": 3207.808017730713, + "p99": 3752.351999282837 + }, + "isolatedSum": { + "p50": 1262.14399933815, + "p90": 1916.7680144309998, + "p95": 3642.2399282455444, + "p99": 7082.656145095825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1108.5760593414307, + "p90": 1935.9999895095825, + "p95": 2561.568021774292, + "p99": 3037.5359058380127 + }, + "combine": { + "p50": 2788.0959510803223, + "p90": 2811.840057373047, + "p95": 2820.8959102630615, + "p99": 2835.2959156036377 + }, + "roundtrip": { + "p50": 3853.0240058898926, + "p90": 3910.9439849853516, + "p95": 4061.8557929992676, + "p99": 4533.696174621582 + }, + "isolatedSum": { + "p50": 3896.672010421753, + "p90": 4747.840046882629, + "p95": 5382.4639320373535, + "p99": 5872.83182144165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-585060a4", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_a180493d", + "comparisonKey": "df58419ed3164a35", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:35.104861+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 368.47999691963196, + "p90": 381.28000497817993, + "p95": 415.16798734664917, + "p99": 4444.191932678223 + }, + "combine": { + "p50": 248.89600276947021, + "p90": 255.48800826072693, + "p95": 287.1359884738922, + "p99": 4270.9760665893555 + }, + "roundtrip": { + "p50": 597.6639986038208, + "p90": 619.2640066146851, + "p95": 3827.2318840026855, + "p99": 4739.071846008301 + }, + "isolatedSum": { + "p50": 617.3759996891022, + "p90": 636.7680132389069, + "p95": 702.3039758205414, + "p99": 8715.167999267578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 415.2959883213043, + "p90": 426.7840087413788, + "p95": 479.99998927116394, + "p99": 4281.983852386475 + }, + "combine": { + "p50": 430.88001012802124, + "p90": 439.07201290130615, + "p95": 2686.5921020507812, + "p99": 3847.7120399475098 + }, + "roundtrip": { + "p50": 823.9679932594299, + "p90": 842.7519798278809, + "p95": 3457.6001167297363, + "p99": 4322.944164276123 + }, + "isolatedSum": { + "p50": 846.1759984493256, + "p90": 865.8560216426849, + "p95": 3166.592091321945, + "p99": 8129.695892333984 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 2304, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 514.9760246276855, + "p90": 661.1199975013733, + "p95": 2148.224115371704, + "p99": 4107.935905456543 + }, + "combine": { + "p50": 778.6880135536194, + "p90": 1321.984052658081, + "p95": 1803.4240007400513, + "p99": 2544.0640449523926 + }, + "roundtrip": { + "p50": 1264.1279697418213, + "p90": 2064.9280548095703, + "p95": 3082.848072052002, + "p99": 3790.112018585205 + }, + "isolatedSum": { + "p50": 1293.664038181305, + "p90": 1983.1040501594543, + "p95": 3951.6481161117554, + "p99": 6651.999950408936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 4608, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 758.9439749717712, + "p90": 806.6880106925964, + "p95": 2730.207920074463, + "p99": 3234.3039512634277 + }, + "combine": { + "p50": 1468.1919813156128, + "p90": 1509.984016418457, + "p95": 1690.8799409866333, + "p99": 2005.8879852294922 + }, + "roundtrip": { + "p50": 2184.3841075897217, + "p90": 2710.047960281372, + "p95": 3274.1758823394775, + "p99": 3559.0078830718994 + }, + "isolatedSum": { + "p50": 2227.135956287384, + "p90": 2316.6720271110535, + "p95": 4421.087861061096, + "p99": 5240.19193649292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 9216, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1197.6640224456787, + "p90": 1625.7599592208862, + "p95": 2397.023916244507, + "p99": 3037.184000015259 + }, + "combine": { + "p50": 2841.5040969848633, + "p90": 2877.984046936035, + "p95": 2888.576030731201, + "p99": 2908.2560539245605 + }, + "roundtrip": { + "p50": 3965.6639099121094, + "p90": 4030.5919647216797, + "p95": 4376.095771789551, + "p99": 4671.232223510742 + }, + "isolatedSum": { + "p50": 4039.168119430542, + "p90": 4503.744006156921, + "p95": 5285.599946975708, + "p99": 5945.440053939819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 18432, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2024.735927581787, + "p90": 2198.68803024292, + "p95": 2480.89599609375, + "p99": 2723.072052001953 + }, + "combine": { + "p50": 5581.791877746582, + "p90": 5661.8242263793945, + "p95": 5694.88000869751, + "p99": 5724.8640060424805 + }, + "roundtrip": { + "p50": 7511.199951171875, + "p90": 7572.735786437988, + "p95": 7599.679946899414, + "p99": 7641.695976257324 + }, + "isolatedSum": { + "p50": 7606.527805328369, + "p90": 7860.512256622314, + "p95": 8175.77600479126, + "p99": 8447.936058044434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 36864, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-59fc845d", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_9a966a67", + "comparisonKey": "68553012dea96d65", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:32.586211+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 387.1679902076721, + "p90": 410.6239974498749, + "p95": 490.30399322509766, + "p99": 4410.304069519043 + }, + "combine": { + "p50": 260.80000400543213, + "p90": 268.22400093078613, + "p95": 495.10401487350464, + "p99": 3879.6799182891846 + }, + "roundtrip": { + "p50": 637.6000046730042, + "p90": 673.632025718689, + "p95": 3447.7438926696777, + "p99": 4663.23184967041 + }, + "isolatedSum": { + "p50": 647.9679942131042, + "p90": 678.847998380661, + "p95": 985.4080080986023, + "p99": 8289.983987808228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 454.17600870132446, + "p90": 476.28799080848694, + "p95": 1421.023964881897, + "p99": 4291.967868804932 + }, + "combine": { + "p50": 475.71200132369995, + "p90": 487.3279929161072, + "p95": 2364.032030105591, + "p99": 2992.4800395965576 + }, + "roundtrip": { + "p50": 916.6079759597778, + "p90": 990.8159971237183, + "p95": 3393.02396774292, + "p99": 3998.0480670928955 + }, + "isolatedSum": { + "p50": 929.8880100250244, + "p90": 963.6159837245941, + "p95": 3785.055994987488, + "p99": 7284.447908401489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 3755, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 604.8319935798645, + "p90": 650.3999829292297, + "p95": 2713.599920272827, + "p99": 3721.856117248535 + }, + "combine": { + "p50": 878.6879777908325, + "p90": 1383.6159706115723, + "p95": 1724.9280214309692, + "p99": 2356.44793510437 + }, + "roundtrip": { + "p50": 1457.792043685913, + "p90": 2570.336103439331, + "p95": 3109.855890274048, + "p99": 3522.592067718506 + }, + "isolatedSum": { + "p50": 1483.519971370697, + "p90": 2034.015953540802, + "p95": 4438.527941703796, + "p99": 6078.304052352905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 7556, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 946.4640021324158, + "p90": 1066.1439895629883, + "p95": 2484.0641021728516, + "p99": 2933.759927749634 + }, + "combine": { + "p50": 1686.560034751892, + "p90": 1702.3999691009521, + "p95": 1709.7920179367065, + "p99": 1828.927993774414 + }, + "roundtrip": { + "p50": 2576.2879848480225, + "p90": 2809.0879917144775, + "p95": 3250.2079010009766, + "p99": 3727.679967880249 + }, + "isolatedSum": { + "p50": 2633.024036884308, + "p90": 2768.5439586639404, + "p95": 4193.856120109558, + "p99": 4762.687921524048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 15163, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1554.975986480713, + "p90": 1810.304045677185, + "p95": 2120.448112487793, + "p99": 2525.4719257354736 + }, + "combine": { + "p50": 3304.4159412384033, + "p90": 3332.3519229888916, + "p95": 3338.1760120391846, + "p99": 3350.559949874878 + }, + "roundtrip": { + "p50": 4742.623805999756, + "p90": 4849.631786346436, + "p95": 4874.239921569824, + "p99": 5065.47212600708 + }, + "isolatedSum": { + "p50": 4859.391927719116, + "p90": 5142.655968666077, + "p95": 5458.6241245269775, + "p99": 5876.031875610352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 30215, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2897.088050842285, + "p90": 2961.9200229644775, + "p95": 3007.391929626465, + "p99": 3150.144100189209 + }, + "combine": { + "p50": 6466.495990753174, + "p90": 6626.0480880737305, + "p95": 6648.608207702637, + "p99": 6699.359893798828 + }, + "roundtrip": { + "p50": 9062.687873840332, + "p90": 9262.751579284668, + "p95": 9282.68814086914, + "p99": 9312.000274658203 + }, + "isolatedSum": { + "p50": 9363.584041595459, + "p90": 9587.968111038208, + "p95": 9656.000137329102, + "p99": 9849.503993988037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 60512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd2e687c", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_a847bfb7", + "comparisonKey": "ebcbb6dd8788544a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:26.896246+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 370.65601348876953, + "p90": 390.9119963645935, + "p95": 443.4880018234253, + "p99": 4336.128234863281 + }, + "combine": { + "p50": 247.0400035381317, + "p90": 254.01601195335388, + "p95": 402.6559889316559, + "p99": 4291.903972625732 + }, + "roundtrip": { + "p50": 595.3599810600281, + "p90": 620.3839778900146, + "p95": 3586.591958999634, + "p99": 4949.312210083008 + }, + "isolatedSum": { + "p50": 617.6960170269012, + "p90": 644.9280083179474, + "p95": 846.1439907550812, + "p99": 8628.032207489014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 1080, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 415.4239892959595, + "p90": 429.82399463653564, + "p95": 547.5519895553589, + "p99": 3977.152109146118 + }, + "combine": { + "p50": 423.007994890213, + "p90": 466.2080109119415, + "p95": 2451.040029525757, + "p99": 3227.9999256134033 + }, + "roundtrip": { + "p50": 815.8079981803894, + "p90": 842.5920009613037, + "p95": 3303.328037261963, + "p99": 4157.792091369629 + }, + "isolatedSum": { + "p50": 838.4319841861725, + "p90": 896.0320055484772, + "p95": 2998.5920190811157, + "p99": 7205.1520347595215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 2102, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 503.07202339172363, + "p90": 522.0159888267517, + "p95": 2160.4158878326416, + "p99": 4054.111957550049 + }, + "combine": { + "p50": 765.3120160102844, + "p90": 1391.584038734436, + "p95": 1847.3600149154663, + "p99": 2652.575969696045 + }, + "roundtrip": { + "p50": 1239.7760152816772, + "p90": 2475.1360416412354, + "p95": 3215.1360511779785, + "p99": 3765.727996826172 + }, + "isolatedSum": { + "p50": 1268.384039402008, + "p90": 1913.6000275611877, + "p95": 4007.775902748108, + "p99": 6706.687927246094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 4207, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 726.0479927062988, + "p90": 780.5119752883911, + "p95": 2793.855905532837, + "p99": 3417.9840087890625 + }, + "combine": { + "p50": 1445.7919597625732, + "p90": 1514.464020729065, + "p95": 1694.5600509643555, + "p99": 1991.8080568313599 + }, + "roundtrip": { + "p50": 2135.3600025177, + "p90": 2569.983959197998, + "p95": 3152.0960330963135, + "p99": 3512.511968612671 + }, + "isolatedSum": { + "p50": 2171.839952468872, + "p90": 2294.975996017456, + "p95": 4488.415956497192, + "p99": 5409.792065620422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8365, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1113.312005996704, + "p90": 1284.0640544891357, + "p95": 2495.3598976135254, + "p99": 2942.528009414673 + }, + "combine": { + "p50": 2788.7039184570312, + "p90": 2811.039924621582, + "p95": 2819.391965866089, + "p99": 2840.480089187622 + }, + "roundtrip": { + "p50": 3852.447986602783, + "p90": 3928.607940673828, + "p95": 4107.48815536499, + "p99": 4508.19206237793 + }, + "isolatedSum": { + "p50": 3902.0159244537354, + "p90": 4095.103979110718, + "p95": 5314.751863479614, + "p99": 5783.008098602295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 16483, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1902.4959802627563, + "p90": 2054.431915283203, + "p95": 2497.5359439849854, + "p99": 2761.023998260498 + }, + "combine": { + "p50": 5473.919868469238, + "p90": 5528.831958770752, + "p95": 5540.671825408936, + "p99": 5573.184013366699 + }, + "roundtrip": { + "p50": 7309.023857116699, + "p90": 7363.103866577148, + "p95": 7394.752025604248, + "p99": 7437.727928161621 + }, + "isolatedSum": { + "p50": 7376.415848731995, + "p90": 7583.263874053955, + "p95": 8038.207769393921, + "p99": 8334.208011627197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 32777, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bfabfbe1", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_728444b7", + "comparisonKey": "1df1b47a7c02fe69", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:33.843434+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 373.4399974346161, + "p90": 387.7759873867035, + "p95": 406.23998641967773, + "p99": 3976.0639667510986 + }, + "combine": { + "p50": 246.17600440979004, + "p90": 252.83199548721313, + "p95": 295.52000761032104, + "p99": 4060.7681274414062 + }, + "roundtrip": { + "p50": 599.1680026054382, + "p90": 619.1999912261963, + "p95": 824.8639702796936, + "p99": 4931.039810180664 + }, + "isolatedSum": { + "p50": 619.6160018444061, + "p90": 640.6079828739166, + "p95": 701.7599940299988, + "p99": 8036.832094192505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 1064, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 419.42399740219116, + "p90": 433.1519901752472, + "p95": 457.8239917755127, + "p99": 3952.0959854125977 + }, + "combine": { + "p50": 423.99999499320984, + "p90": 431.8400025367737, + "p95": 2500.9920597076416, + "p99": 3473.599910736084 + }, + "roundtrip": { + "p50": 829.5040130615234, + "p90": 915.3599739074707, + "p95": 3286.7839336395264, + "p99": 4243.648052215576 + }, + "isolatedSum": { + "p50": 843.423992395401, + "p90": 864.9919927120209, + "p95": 2958.8160514831543, + "p99": 7425.695896148682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 2081, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 504.7680139541626, + "p90": 521.9839811325073, + "p95": 621.5680241584778, + "p99": 4217.343807220459 + }, + "combine": { + "p50": 762.9759907722473, + "p90": 790.5920147895813, + "p95": 1736.3840341567993, + "p99": 2564.6719932556152 + }, + "roundtrip": { + "p50": 1241.2480115890503, + "p90": 1333.5039615631104, + "p95": 3162.463903427124, + "p99": 3701.119899749756 + }, + "isolatedSum": { + "p50": 1267.74400472641, + "p90": 1312.5759959220886, + "p95": 2357.952058315277, + "p99": 6782.015800476074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 4153, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 728.5760045051575, + "p90": 771.1679935455322, + "p95": 1884.28795337677, + "p99": 3538.4960174560547 + }, + "combine": { + "p50": 1442.2719478607178, + "p90": 1486.240029335022, + "p95": 1868.7679767608643, + "p99": 1967.0720100402832 + }, + "roundtrip": { + "p50": 2131.9680213928223, + "p90": 2356.4159870147705, + "p95": 3193.1519508361816, + "p99": 3568.160057067871 + }, + "isolatedSum": { + "p50": 2170.8479523658752, + "p90": 2257.408022880554, + "p95": 3753.0559301376343, + "p99": 5505.568027496338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8313, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1126.5920400619507, + "p90": 1206.5919637680054, + "p95": 2357.2800159454346, + "p99": 2817.5361156463623 + }, + "combine": { + "p50": 2790.1759147644043, + "p90": 2815.6800270080566, + "p95": 2820.9919929504395, + "p99": 2842.911958694458 + }, + "roundtrip": { + "p50": 3870.431900024414, + "p90": 3939.6159648895264, + "p95": 4128.223896026611, + "p99": 4542.816162109375 + }, + "isolatedSum": { + "p50": 3916.767954826355, + "p90": 4022.271990776062, + "p95": 5178.272008895874, + "p99": 5660.44807434082 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 16581, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1903.648018836975, + "p90": 2069.4079399108887, + "p95": 2478.3360958099365, + "p99": 2797.2159385681152 + }, + "combine": { + "p50": 5475.2960205078125, + "p90": 5529.2158126831055, + "p95": 5544.447898864746, + "p99": 5583.583831787109 + }, + "roundtrip": { + "p50": 7309.792041778564, + "p90": 7373.0878829956055, + "p95": 7406.847953796387, + "p99": 7469.024181365967 + }, + "isolatedSum": { + "p50": 7378.944039344788, + "p90": 7598.623752593994, + "p95": 8022.783994674683, + "p99": 8380.799770355225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 32887, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-32fd012a", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_33cbd342", + "comparisonKey": "88aa4a402879cf54", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:46.541949+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 493.21600794792175, + "p90": 508.512020111084, + "p95": 1124.7999668121338, + "p99": 3718.751907348633 + }, + "combine": { + "p50": 383.35999846458435, + "p90": 393.47198605537415, + "p95": 2617.664098739624, + "p99": 3502.624034881592 + }, + "roundtrip": { + "p50": 858.240008354187, + "p90": 956.991970539093, + "p95": 3423.840045928955, + "p99": 3877.8560161590576 + }, + "isolatedSum": { + "p50": 876.5760064125061, + "p90": 901.9840061664581, + "p95": 3742.464065551758, + "p99": 7221.375942230225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 667.6160097122192, + "p90": 743.7120079994202, + "p95": 2317.5039291381836, + "p99": 3474.112033843994 + }, + "combine": { + "p50": 709.2159986495972, + "p90": 1428.320050239563, + "p95": 1904.703974723816, + "p99": 2370.7520961761475 + }, + "roundtrip": { + "p50": 1333.6960077285767, + "p90": 2368.4799671173096, + "p95": 2903.424024581909, + "p99": 3524.8000621795654 + }, + "isolatedSum": { + "p50": 1376.8320083618164, + "p90": 2172.032058238983, + "p95": 4222.2079038619995, + "p99": 5844.864130020142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1027.1040201187134, + "p90": 1288.0959510803223, + "p95": 2425.0879287719727, + "p99": 2791.5520668029785 + }, + "combine": { + "p50": 1350.6879806518555, + "p90": 1366.752028465271, + "p95": 1374.9760389328003, + "p99": 1483.6159944534302 + }, + "roundtrip": { + "p50": 2297.7919578552246, + "p90": 2528.640031814575, + "p95": 2968.6079025268555, + "p99": 3402.208089828491 + }, + "isolatedSum": { + "p50": 2377.792000770569, + "p90": 2654.8479795455933, + "p95": 3800.063967704773, + "p99": 4275.168061256409 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1763.7759447097778, + "p90": 1888.8640403747559, + "p95": 1973.5360145568848, + "p99": 2114.784002304077 + }, + "combine": { + "p50": 2522.0160484313965, + "p90": 2633.471965789795, + "p95": 2645.695924758911, + "p99": 2663.7120246887207 + }, + "roundtrip": { + "p50": 4212.224006652832, + "p90": 4358.687877655029, + "p95": 4371.840000152588, + "p99": 4435.264110565186 + }, + "isolatedSum": { + "p50": 4285.791993141174, + "p90": 4522.336006164551, + "p95": 4619.231939315796, + "p99": 4778.496026992798 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 3221.951961517334, + "p90": 3256.767988204956, + "p95": 3292.0639514923096, + "p99": 3401.9839763641357 + }, + "combine": { + "p50": 5015.5839920043945, + "p90": 5099.647998809814, + "p95": 5105.504035949707, + "p99": 5119.935989379883 + }, + "roundtrip": { + "p50": 8116.127967834473, + "p90": 8151.712417602539, + "p95": 8165.887832641602, + "p99": 8194.751739501953 + }, + "isolatedSum": { + "p50": 8237.535953521729, + "p90": 8356.41598701477, + "p95": 8397.567987442017, + "p99": 8521.919965744019 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 6738.880157470703, + "p90": 6801.248073577881, + "p95": 7087.488174438477, + "p99": 7158.175945281982 + }, + "combine": { + "p50": 10516.575813293457, + "p90": 10588.383674621582, + "p95": 10606.047630310059, + "p99": 10638.463973999023 + }, + "roundtrip": { + "p50": 17122.46322631836, + "p90": 17180.67169189453, + "p95": 17195.903778076172, + "p99": 17249.0234375 + }, + "isolatedSum": { + "p50": 17255.45597076416, + "p90": 17389.631748199463, + "p95": 17693.535804748535, + "p99": 17796.639919281006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-904a6090", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_ab04d1e8", + "comparisonKey": "05f95f74610a8012", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:26.501319+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 577.4080157279968, + "p90": 595.2320098876953, + "p95": 2221.2159633636475, + "p99": 3418.11203956604 + }, + "combine": { + "p50": 481.31200671195984, + "p90": 491.67999625205994, + "p95": 1986.143946647644, + "p99": 2708.064079284668 + }, + "roundtrip": { + "p50": 1020.4479694366455, + "p90": 1075.808048248291, + "p95": 3015.6478881835938, + "p99": 3679.8720359802246 + }, + "isolatedSum": { + "p50": 1058.7200224399567, + "p90": 1086.9120061397552, + "p95": 4207.3599100112915, + "p99": 6126.176118850708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 832.9600095748901, + "p90": 1009.8559856414795, + "p95": 2023.360013961792, + "p99": 2953.727960586548 + }, + "combine": { + "p50": 904.7039747238159, + "p90": 938.975989818573, + "p95": 1313.215970993042, + "p99": 1733.2160472869873 + }, + "roundtrip": { + "p50": 1673.8879680633545, + "p90": 2120.6719875335693, + "p95": 2615.391969680786, + "p99": 3364.0639781951904 + }, + "isolatedSum": { + "p50": 1737.663984298706, + "p90": 1948.8319754600525, + "p95": 3336.575984954834, + "p99": 4686.944007873535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 15151, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1349.7920036315918, + "p90": 1722.1759557724, + "p95": 1967.3919677734375, + "p99": 2440.51194190979 + }, + "combine": { + "p50": 1733.9199781417847, + "p90": 1758.1119537353516, + "p95": 1762.6240253448486, + "p99": 1785.7279777526855 + }, + "roundtrip": { + "p50": 2954.14400100708, + "p90": 3082.655906677246, + "p95": 3169.152021408081, + "p99": 3534.303903579712 + }, + "isolatedSum": { + "p50": 3083.7119817733765, + "p90": 3480.2879095077515, + "p95": 3730.015993118286, + "p99": 4226.239919662476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 30290, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 2465.984106063843, + "p90": 2534.3680381774902, + "p95": 2579.200029373169, + "p99": 2683.135986328125 + }, + "combine": { + "p50": 3270.944118499756, + "p90": 3412.1921062469482, + "p95": 3425.312042236328, + "p99": 3438.6239051818848 + }, + "roundtrip": { + "p50": 5581.024169921875, + "p90": 5799.168109893799, + "p95": 5822.080135345459, + "p99": 5884.191989898682 + }, + "isolatedSum": { + "p50": 5736.928224563599, + "p90": 5946.5601444244385, + "p95": 6004.512071609497, + "p99": 6121.75989151001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 60548, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 4527.488231658936, + "p90": 4556.447982788086, + "p95": 4575.87194442749, + "p99": 4632.863998413086 + }, + "combine": { + "p50": 6458.911895751953, + "p90": 6590.688228607178, + "p95": 6602.3359298706055, + "p99": 6620.096206665039 + }, + "roundtrip": { + "p50": 10867.80834197998, + "p90": 10922.528266906738, + "p95": 10943.072319030762, + "p99": 11034.943580627441 + }, + "isolatedSum": { + "p50": 10986.400127410889, + "p90": 11147.136211395264, + "p95": 11178.207874298096, + "p99": 11252.960205078125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 121046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 8812.383651733398, + "p90": 8836.19213104248, + "p95": 8844.832420349121, + "p99": 8870.016098022461 + }, + "combine": { + "p50": 12841.631889343262, + "p90": 12924.063682556152, + "p95": 12944.255828857422, + "p99": 12969.440460205078 + }, + "roundtrip": { + "p50": 21457.632064819336, + "p90": 21581.087112426758, + "p95": 21615.903854370117, + "p99": 21658.016204833984 + }, + "isolatedSum": { + "p50": 21654.01554107666, + "p90": 21760.255813598633, + "p95": 21789.088249206543, + "p99": 21839.45655822754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 242154, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b1b9d1f3", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_a407f0c6", + "comparisonKey": "d51824302977e0b1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:24.105881+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 373.3760118484497, + "p90": 389.3440067768097, + "p95": 406.0479998588562, + "p99": 4283.328056335449 + }, + "combine": { + "p50": 245.31200528144836, + "p90": 251.583993434906, + "p95": 266.36800169944763, + "p99": 4626.976013183594 + }, + "roundtrip": { + "p50": 598.3039736747742, + "p90": 614.5600080490112, + "p95": 975.3599762916565, + "p99": 4933.951854705811 + }, + "isolatedSum": { + "p50": 618.6880171298981, + "p90": 640.9280002117157, + "p95": 672.4160015583038, + "p99": 8910.304069519043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 1049, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 420.5760061740875, + "p90": 434.688001871109, + "p95": 580.4799795150757, + "p99": 4597.8240966796875 + }, + "combine": { + "p50": 423.39199781417847, + "p90": 431.2320053577423, + "p95": 2354.1440963745117, + "p99": 3188.8959407806396 + }, + "roundtrip": { + "p50": 820.2239871025085, + "p90": 849.6639728546143, + "p95": 3613.055944442749, + "p99": 4251.296043395996 + }, + "isolatedSum": { + "p50": 843.968003988266, + "p90": 865.9200072288513, + "p95": 2934.6240758895874, + "p99": 7786.720037460327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 2084, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 507.423996925354, + "p90": 521.888017654419, + "p95": 3281.599998474121, + "p99": 4249.055862426758 + }, + "combine": { + "p50": 766.048014163971, + "p90": 1553.3759593963623, + "p95": 1873.568058013916, + "p99": 2733.6959838867188 + }, + "roundtrip": { + "p50": 1244.5119619369507, + "p90": 2544.800043106079, + "p95": 3233.1199645996094, + "p99": 3738.6560440063477 + }, + "isolatedSum": { + "p50": 1273.472011089325, + "p90": 2075.2639770507812, + "p95": 5155.168056488037, + "p99": 6982.751846313477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 4126, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 725.9520292282104, + "p90": 757.7919960021973, + "p95": 2883.455991744995, + "p99": 3414.8480892181396 + }, + "combine": { + "p50": 1442.6560401916504, + "p90": 1574.560046195984, + "p95": 1770.9439992904663, + "p99": 2411.871910095215 + }, + "roundtrip": { + "p50": 2132.0641040802, + "p90": 2878.9761066436768, + "p95": 3388.0960941314697, + "p99": 3641.37601852417 + }, + "isolatedSum": { + "p50": 2168.608069419861, + "p90": 2332.352042198181, + "p95": 4654.399991035461, + "p99": 5826.7199993133545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8234, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1112.287998199463, + "p90": 1207.7120542526245, + "p95": 2534.6240997314453, + "p99": 3145.440101623535 + }, + "combine": { + "p50": 2790.112018585205, + "p90": 2813.663959503174, + "p95": 2820.9280967712402, + "p99": 2835.360050201416 + }, + "roundtrip": { + "p50": 3857.088088989258, + "p90": 3924.031972885132, + "p95": 4111.616134643555, + "p99": 4461.7600440979 + }, + "isolatedSum": { + "p50": 3902.400016784668, + "p90": 4021.3760137557983, + "p95": 5355.552196502686, + "p99": 5980.800151824951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 16480, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1904.255986213684, + "p90": 2043.071985244751, + "p95": 2382.944107055664, + "p99": 2785.9840393066406 + }, + "combine": { + "p50": 5469.791889190674, + "p90": 5522.0160484313965, + "p95": 5539.968013763428, + "p99": 5569.759845733643 + }, + "roundtrip": { + "p50": 7299.935817718506, + "p90": 7364.992141723633, + "p95": 7387.775897979736, + "p99": 7451.712131500244 + }, + "isolatedSum": { + "p50": 7374.047875404358, + "p90": 7565.0880336761475, + "p95": 7922.912120819092, + "p99": 8355.743885040283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 32889, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b63f7950", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_d9d7f063", + "comparisonKey": "1bca5a0421563fd0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:12.636051+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 413.4719967842102, + "p90": 429.31199073791504, + "p95": 765.3120160102844, + "p99": 4225.599765777588 + }, + "combine": { + "p50": 296.1919903755188, + "p90": 306.0159981250763, + "p95": 3099.9040603637695, + "p99": 3853.2159328460693 + }, + "roundtrip": { + "p50": 700.160026550293, + "p90": 728.5760045051575, + "p95": 3790.2400493621826, + "p99": 4400.576114654541 + }, + "isolatedSum": { + "p50": 709.663987159729, + "p90": 735.3279888629913, + "p95": 3865.216076374054, + "p99": 8078.815698623657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 515.5519843101501, + "p90": 548.3520030975342, + "p95": 2254.879951477051, + "p99": 3861.664056777954 + }, + "combine": { + "p50": 527.8720259666443, + "p90": 1342.527985572815, + "p95": 2347.7439880371094, + "p99": 3295.2640056610107 + }, + "roundtrip": { + "p50": 1032.7359437942505, + "p90": 1137.6960277557373, + "p95": 3370.8159923553467, + "p99": 3788.383960723877 + }, + "isolatedSum": { + "p50": 1043.4240102767944, + "p90": 1890.8799886703491, + "p95": 4602.62393951416, + "p99": 7156.928062438965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 5302, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 713.1839990615845, + "p90": 804.032027721405, + "p95": 2688.86399269104, + "p99": 3411.776065826416 + }, + "combine": { + "p50": 990.4959797859192, + "p90": 1112.6400232315063, + "p95": 1529.1199684143066, + "p99": 1996.6720342636108 + }, + "roundtrip": { + "p50": 1664.0000343322754, + "p90": 2375.648021697998, + "p95": 2841.599941253662, + "p99": 3237.855911254883 + }, + "isolatedSum": { + "p50": 1703.6799788475037, + "p90": 1916.6720509529114, + "p95": 4217.983961105347, + "p99": 5408.448100090027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 10587, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1164.3520593643188, + "p90": 1492.35200881958, + "p95": 2294.464111328125, + "p99": 2877.72798538208 + }, + "combine": { + "p50": 1901.9520282745361, + "p90": 1920.2560186386108, + "p95": 1926.6879558563232, + "p99": 1949.728012084961 + }, + "roundtrip": { + "p50": 3010.240077972412, + "p90": 3112.6720905303955, + "p95": 3385.88809967041, + "p99": 3790.623903274536 + }, + "isolatedSum": { + "p50": 3066.304087638855, + "p90": 3412.608027458191, + "p95": 4221.152067184448, + "p99": 4827.455997467041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 21014, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1927.9680252075195, + "p90": 2049.2160320281982, + "p95": 2151.2959003448486, + "p99": 2307.4560165405273 + }, + "combine": { + "p50": 3635.9360218048096, + "p90": 3748.703956604004, + "p95": 3764.8000717163086, + "p99": 3800.6720542907715 + }, + "roundtrip": { + "p50": 5438.528060913086, + "p90": 5595.967769622803, + "p95": 5611.936092376709, + "p99": 5647.264003753662 + }, + "isolatedSum": { + "p50": 5563.904047012329, + "p90": 5797.919988632202, + "p95": 5916.095972061157, + "p99": 6108.128070831299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 41814, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3884.000062942505, + "p90": 3954.047918319702, + "p95": 3972.0640182495117, + "p99": 4002.7198791503906 + }, + "combine": { + "p50": 7346.464157104492, + "p90": 7433.279991149902, + "p95": 7446.879863739014, + "p99": 7466.1760330200195 + }, + "roundtrip": { + "p50": 10897.600173950195, + "p90": 10927.96802520752, + "p95": 10942.239761352539, + "p99": 10961.631774902344 + }, + "isolatedSum": { + "p50": 11230.464220046997, + "p90": 11387.327909469604, + "p95": 11418.943881988525, + "p99": 11468.89591217041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 83417, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8bb71c0", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_4816949b", + "comparisonKey": "d5f2374033c53cac", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:52.215116+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 362.87999153137207, + "p90": 378.62399220466614, + "p95": 426.144003868103, + "p99": 4874.720096588135 + }, + "combine": { + "p50": 247.13599681854248, + "p90": 253.1839907169342, + "p95": 255.42399287223816, + "p99": 3463.9999866485596 + }, + "roundtrip": { + "p50": 595.583975315094, + "p90": 616.3520216941833, + "p95": 800.544023513794, + "p99": 4877.823829650879 + }, + "isolatedSum": { + "p50": 610.0159883499146, + "p90": 631.8079829216003, + "p95": 681.5679967403412, + "p99": 8338.720083236694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 1067, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 404.2240083217621, + "p90": 427.7440011501312, + "p95": 563.2320046424866, + "p99": 3967.4880504608154 + }, + "combine": { + "p50": 423.48799109458923, + "p90": 432.2560131549835, + "p95": 2432.800054550171, + "p99": 3457.8239917755127 + }, + "roundtrip": { + "p50": 809.1520071029663, + "p90": 855.8400273323059, + "p95": 3540.7679080963135, + "p99": 4228.608131408691 + }, + "isolatedSum": { + "p50": 827.7119994163513, + "p90": 860.0000143051147, + "p95": 2996.0320591926575, + "p99": 7425.312042236328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 2097, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 492.000013589859, + "p90": 524.2239832878113, + "p95": 617.8560256958008, + "p99": 4081.632137298584 + }, + "combine": { + "p50": 762.5280022621155, + "p90": 1307.1680068969727, + "p95": 1850.43203830719, + "p99": 3169.503927230835 + }, + "roundtrip": { + "p50": 1231.8400144577026, + "p90": 1377.0560026168823, + "p95": 3175.2960681915283, + "p99": 3611.392021179199 + }, + "isolatedSum": { + "p50": 1254.5280158519745, + "p90": 1831.391990184784, + "p95": 2468.2880640029907, + "p99": 7251.136064529419 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 4163, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 717.5679802894592, + "p90": 766.5280103683472, + "p95": 2787.935972213745, + "p99": 3699.455976486206 + }, + "combine": { + "p50": 1443.1040287017822, + "p90": 1491.26398563385, + "p95": 1709.8560333251953, + "p99": 1847.9679822921753 + }, + "roundtrip": { + "p50": 2126.3680458068848, + "p90": 2545.4719066619873, + "p95": 3070.591926574707, + "p99": 3590.847969055176 + }, + "isolatedSum": { + "p50": 2160.6720089912415, + "p90": 2257.7919960021973, + "p95": 4497.79200553894, + "p99": 5547.423958778381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8305, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1110.2720499038696, + "p90": 1592.2240018844604, + "p95": 2328.160047531128, + "p99": 2938.2081031799316 + }, + "combine": { + "p50": 2789.088010787964, + "p90": 2813.728094100952, + "p95": 2822.495937347412, + "p99": 2849.4720458984375 + }, + "roundtrip": { + "p50": 3855.232000350952, + "p90": 3934.432029724121, + "p95": 4033.7281227111816, + "p99": 4357.952117919922 + }, + "isolatedSum": { + "p50": 3899.3600606918335, + "p90": 4405.952095985413, + "p95": 5150.65598487854, + "p99": 5787.680149078369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 16529, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1904.6399593353271, + "p90": 2047.327995300293, + "p95": 2269.9201107025146, + "p99": 2665.087938308716 + }, + "combine": { + "p50": 5480.319976806641, + "p90": 5529.632091522217, + "p95": 5543.456077575684, + "p99": 5571.487903594971 + }, + "roundtrip": { + "p50": 7301.695823669434, + "p90": 7362.624168395996, + "p95": 7385.503768920898, + "p99": 7466.879844665527 + }, + "isolatedSum": { + "p50": 7384.959936141968, + "p90": 7576.96008682251, + "p95": 7813.376188278198, + "p99": 8236.575841903687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 32880, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8208024a", + "identity": "h100|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_955629d8", + "comparisonKey": "2ed4273c21ecb534", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:43.292362+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 496.6079890727997, + "p90": 513.6319994926453, + "p95": 590.5280113220215, + "p99": 3590.5280113220215 + }, + "combine": { + "p50": 382.88000226020813, + "p90": 391.7759954929352, + "p95": 2389.8239135742188, + "p99": 3328.9918899536133 + }, + "roundtrip": { + "p50": 861.8879914283752, + "p90": 916.1919951438904, + "p95": 3354.2399406433105, + "p99": 4141.34407043457 + }, + "isolatedSum": { + "p50": 879.4879913330078, + "p90": 905.4079949855804, + "p95": 2980.3519248962402, + "p99": 6919.519901275635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 671.0720062255859, + "p90": 776.960015296936, + "p95": 2147.0720767974854, + "p99": 3235.9681129455566 + }, + "combine": { + "p50": 710.0800275802612, + "p90": 1205.9839963912964, + "p95": 1874.6240139007568, + "p99": 2454.5280933380127 + }, + "roundtrip": { + "p50": 1338.4000062942505, + "p90": 2233.1199645996094, + "p95": 3010.688066482544, + "p99": 3423.840045928955 + }, + "isolatedSum": { + "p50": 1381.1520338058472, + "p90": 1982.9440116882324, + "p95": 4021.696090698242, + "p99": 5690.496206283569 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1030.1439762115479, + "p90": 1282.9760313034058, + "p95": 2337.5039100646973, + "p99": 2845.8878993988037 + }, + "combine": { + "p50": 1350.8479595184326, + "p90": 1369.215965270996, + "p95": 1380.2560567855835, + "p99": 1589.408040046692 + }, + "roundtrip": { + "p50": 2305.567979812622, + "p90": 2513.6959552764893, + "p95": 2927.680015563965, + "p99": 3485.503911972046 + }, + "isolatedSum": { + "p50": 2380.9919357299805, + "p90": 2652.191996574402, + "p95": 3717.7599668502808, + "p99": 4435.295939445496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1766.752004623413, + "p90": 1888.0000114440918, + "p95": 1991.9999837875366, + "p99": 2118.4000968933105 + }, + "combine": { + "p50": 2535.7439517974854, + "p90": 2628.607988357544, + "p95": 2642.303943634033, + "p99": 2659.3921184539795 + }, + "roundtrip": { + "p50": 4217.76008605957, + "p90": 4363.584041595459, + "p95": 4378.079891204834, + "p99": 4445.695877075195 + }, + "isolatedSum": { + "p50": 4302.495956420898, + "p90": 4516.607999801636, + "p95": 4634.30392742157, + "p99": 4777.79221534729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 3227.1039485931396, + "p90": 3260.864019393921, + "p95": 3308.2239627838135, + "p99": 3383.039951324463 + }, + "combine": { + "p50": 5014.7199630737305, + "p90": 5103.775978088379, + "p95": 5113.376140594482, + "p99": 5132.351875305176 + }, + "roundtrip": { + "p50": 8121.024131774902, + "p90": 8156.160354614258, + "p95": 8172.608375549316, + "p99": 8196.703910827637 + }, + "isolatedSum": { + "p50": 8241.82391166687, + "p90": 8364.6399974823, + "p95": 8421.600103378296, + "p99": 8515.391826629639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 6746.560096740723, + "p90": 6820.96004486084, + "p95": 7106.239795684814, + "p99": 7181.119918823242 + }, + "combine": { + "p50": 10519.200325012207, + "p90": 10591.872215270996, + "p95": 10608.768463134766, + "p99": 10639.455795288086 + }, + "roundtrip": { + "p50": 17124.927520751953, + "p90": 17189.151763916016, + "p95": 17215.808868408203, + "p99": 17264.991760253906 + }, + "isolatedSum": { + "p50": 17265.76042175293, + "p90": 17412.832260131836, + "p95": 17715.00825881958, + "p99": 17820.575714111328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2cab1a80", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_56a428b6", + "comparisonKey": "87869d8ea4271754", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:22.283298+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 375.67999958992004, + "p90": 392.38399267196655, + "p95": 460.57599782943726, + "p99": 4399.680137634277 + }, + "combine": { + "p50": 245.79200148582458, + "p90": 255.3279995918274, + "p95": 2443.8719749450684, + "p99": 4183.487892150879 + }, + "roundtrip": { + "p50": 594.7200059890747, + "p90": 616.6719794273376, + "p95": 2713.43994140625, + "p99": 5163.680076599121 + }, + "isolatedSum": { + "p50": 621.4720010757446, + "p90": 647.711992263794, + "p95": 2904.4479727745056, + "p99": 8583.168029785156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 415.19999504089355, + "p90": 431.71200156211853, + "p95": 1425.6960153579712, + "p99": 4182.464122772217 + }, + "combine": { + "p50": 423.1039881706238, + "p90": 432.19199776649475, + "p95": 2629.3439865112305, + "p99": 3896.1920738220215 + }, + "roundtrip": { + "p50": 818.1120157241821, + "p90": 846.8800187110901, + "p95": 3420.6080436706543, + "p99": 4387.167930603027 + }, + "isolatedSum": { + "p50": 838.3039832115173, + "p90": 863.9039993286133, + "p95": 4055.0400018692017, + "p99": 8078.656196594238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 504.7039985656738, + "p90": 521.1520195007324, + "p95": 627.0080208778381, + "p99": 4168.960094451904 + }, + "combine": { + "p50": 762.4319791793823, + "p90": 1134.335994720459, + "p95": 1661.471962928772, + "p99": 2712.9600048065186 + }, + "roundtrip": { + "p50": 1242.9440021514893, + "p90": 1354.5600175857544, + "p95": 3184.864044189453, + "p99": 3742.8159713745117 + }, + "isolatedSum": { + "p50": 1267.1359777450562, + "p90": 1655.4880142211914, + "p95": 2288.47998380661, + "p99": 6881.920099258423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 723.9360213279724, + "p90": 760.1919770240784, + "p95": 2381.824016571045, + "p99": 3424.7679710388184 + }, + "combine": { + "p50": 1440.7360553741455, + "p90": 1481.2159538269043, + "p95": 1740.6400442123413, + "p99": 2224.479913711548 + }, + "roundtrip": { + "p50": 2132.8320503234863, + "p90": 2715.456008911133, + "p95": 3258.2719326019287, + "p99": 3600.127935409546 + }, + "isolatedSum": { + "p50": 2164.672076702118, + "p90": 2241.4079308509827, + "p95": 4122.464060783386, + "p99": 5649.247884750366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1112.768054008484, + "p90": 2082.5281143188477, + "p95": 2501.471996307373, + "p99": 3049.9520301818848 + }, + "combine": { + "p50": 2788.032054901123, + "p90": 2813.7600421905518, + "p95": 2821.984052658081, + "p99": 2842.400074005127 + }, + "roundtrip": { + "p50": 3856.99200630188, + "p90": 3908.672094345093, + "p95": 4121.407985687256, + "p99": 4463.136196136475 + }, + "isolatedSum": { + "p50": 3900.800108909607, + "p90": 4896.288156509399, + "p95": 5323.456048965454, + "p99": 5892.352104187012 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1904.960036277771, + "p90": 2050.9119033813477, + "p95": 2355.423927307129, + "p99": 2762.3040676116943 + }, + "combine": { + "p50": 5479.263782501221, + "p90": 5527.359962463379, + "p95": 5540.160179138184, + "p99": 5573.984146118164 + }, + "roundtrip": { + "p50": 7304.448127746582, + "p90": 7360.256195068359, + "p95": 7379.456043243408, + "p99": 7460.8001708984375 + }, + "isolatedSum": { + "p50": 7384.223818778992, + "p90": 7578.271865844727, + "p95": 7895.5841064453125, + "p99": 8336.288213729858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-012f2a96", + "identity": "h100|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_f33c868c", + "comparisonKey": "d02e919fe9979369", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:38.927447+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_18", + "sku": "h100", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 367.42401123046875, + "p90": 385.18399000167847, + "p95": 487.39200830459595, + "p99": 4310.0481033325195 + }, + "combine": { + "p50": 244.54399943351746, + "p90": 255.45600056648254, + "p95": 319.90399956703186, + "p99": 4274.816036224365 + }, + "roundtrip": { + "p50": 592.6079750061035, + "p90": 626.8799901008606, + "p95": 3956.063985824585, + "p99": 4895.4877853393555 + }, + "isolatedSum": { + "p50": 611.9680106639862, + "p90": 640.639990568161, + "p95": 807.2960078716278, + "p99": 8584.864139556885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 410.17600893974304, + "p90": 428.5440146923065, + "p95": 526.528000831604, + "p99": 4199.967861175537 + }, + "combine": { + "p50": 423.23198914527893, + "p90": 431.93599581718445, + "p95": 2360.3200912475586, + "p99": 3661.5679264068604 + }, + "roundtrip": { + "p50": 815.0079846382141, + "p90": 863.6159896850586, + "p95": 3586.6880416870117, + "p99": 4237.887859344482 + }, + "isolatedSum": { + "p50": 833.407998085022, + "p90": 860.480010509491, + "p95": 2886.8480920791626, + "p99": 7861.5357875823975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 498.879998922348, + "p90": 536.736011505127, + "p95": 1436.959981918335, + "p99": 4111.487865447998 + }, + "combine": { + "p50": 762.8800272941589, + "p90": 1438.1120204925537, + "p95": 1962.0800018310547, + "p99": 2897.279977798462 + }, + "roundtrip": { + "p50": 1238.5599613189697, + "p90": 1566.2399530410767, + "p95": 3309.8878860473633, + "p99": 3761.023998260498 + }, + "isolatedSum": { + "p50": 1261.760026216507, + "p90": 1974.8480319976807, + "p95": 3399.0399837493896, + "p99": 7008.76784324646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 724.128007888794, + "p90": 763.9679908752441, + "p95": 2791.424036026001, + "p99": 3403.2320976257324 + }, + "combine": { + "p50": 1443.0400133132935, + "p90": 1478.6560535430908, + "p95": 1720.255970954895, + "p99": 1970.1759815216064 + }, + "roundtrip": { + "p50": 2132.0960521698, + "p90": 2936.0640048980713, + "p95": 3306.4639568328857, + "p99": 3576.672077178955 + }, + "isolatedSum": { + "p50": 2167.1680212020874, + "p90": 2242.624044418335, + "p95": 4511.680006980896, + "p99": 5373.408079147339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1113.53600025177, + "p90": 1263.9679908752441, + "p95": 2443.7758922576904, + "p99": 3040.191888809204 + }, + "combine": { + "p50": 2789.3118858337402, + "p90": 2816.767930984497, + "p95": 2823.9359855651855, + "p99": 2844.032049179077 + }, + "roundtrip": { + "p50": 3859.584093093872, + "p90": 3937.376022338867, + "p95": 4110.976219177246, + "p99": 4576.96008682251 + }, + "isolatedSum": { + "p50": 3902.8478860855103, + "p90": 4080.735921859741, + "p95": 5267.711877822876, + "p99": 5884.223937988281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1906.4960479736328, + "p90": 2034.3680381774902, + "p95": 2312.5760555267334, + "p99": 2778.7840366363525 + }, + "combine": { + "p50": 5480.480194091797, + "p90": 5530.303955078125, + "p95": 5555.295944213867, + "p99": 5599.391937255859 + }, + "roundtrip": { + "p50": 7307.712078094482, + "p90": 7372.704029083252, + "p95": 7409.440040588379, + "p99": 7494.944095611572 + }, + "isolatedSum": { + "p50": 7386.97624206543, + "p90": 7564.671993255615, + "p95": 7867.871999740601, + "p99": 8378.175973892212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02d8e8f1", + "identity": "h100|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_ef4af798", + "comparisonKey": "c226930f8eeb85e4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:45.644838+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 128.31999361515045, + "p90": 181.7920058965683, + "p95": 185.02399325370789, + "p99": 188.73600661754608 + }, + "combine": { + "p50": 74.17599856853485, + "p90": 86.71999722719193, + "p95": 87.77599781751633, + "p99": 90.7519981265068 + }, + "roundtrip": { + "p50": 185.72799861431122, + "p90": 206.14400506019592, + "p95": 241.72799289226532, + "p99": 251.74400210380554 + }, + "isolatedSum": { + "p50": 202.4959921836853, + "p90": 268.5120031237602, + "p95": 272.7999910712242, + "p99": 279.4880047440529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 125.18399953842163, + "p90": 131.74399733543396, + "p95": 134.36800241470337, + "p99": 142.17600226402283 + }, + "combine": { + "p50": 74.52800124883652, + "p90": 78.68800312280655, + "p95": 79.26400005817413, + "p99": 85.08799970149994 + }, + "roundtrip": { + "p50": 188.73600661754608, + "p90": 194.04800236225128, + "p95": 197.08800315856934, + "p99": 203.45599949359894 + }, + "isolatedSum": { + "p50": 199.71200078725815, + "p90": 210.4320004582405, + "p95": 213.6320024728775, + "p99": 227.26400196552277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 128.28800082206726, + "p90": 190.2720034122467, + "p95": 194.20799612998962, + "p99": 203.3279985189438 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 94.08000111579895, + "p95": 95.10400146245956, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 192.22399592399597, + "p90": 257.0880055427551, + "p95": 261.6960108280182, + "p99": 288.32000494003296 + }, + "isolatedSum": { + "p50": 206.14399760961533, + "p90": 284.35200452804565, + "p95": 289.3119975924492, + "p99": 300.86399614810944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.58399546146393, + "p90": 184.1920018196106, + "p95": 190.23999571800232, + "p99": 224.48000311851501 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 94.87999975681305, + "p95": 95.32800316810608, + "p99": 98.7199991941452 + }, + "roundtrip": { + "p50": 193.15199553966522, + "p90": 252.60800123214722, + "p95": 254.36800718307495, + "p99": 260.1599991321564 + }, + "isolatedSum": { + "p50": 206.30399882793427, + "p90": 279.07200157642365, + "p95": 285.5679988861084, + "p99": 323.2000023126602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.61600315570831, + "p90": 189.2160028219223, + "p95": 194.7840005159378, + "p99": 203.39199900627136 + }, + "combine": { + "p50": 78.97599786520004, + "p90": 95.23200243711472, + "p95": 97.75999933481216, + "p99": 101.08800232410431 + }, + "roundtrip": { + "p50": 193.50400567054749, + "p90": 259.6159875392914, + "p95": 263.7439966201782, + "p99": 291.3599908351898 + }, + "isolatedSum": { + "p50": 206.59200102090836, + "p90": 284.448005259037, + "p95": 292.54399985074997, + "p99": 304.48000133037567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 128.51199507713318, + "p90": 186.68800592422485, + "p95": 189.79200720787048, + "p99": 221.5999960899353 + }, + "combine": { + "p50": 81.69600367546082, + "p90": 98.49599748849869, + "p95": 100.16000270843506, + "p99": 111.10399663448334 + }, + "roundtrip": { + "p50": 194.62400674819946, + "p90": 256.54399394989014, + "p95": 261.6319954395294, + "p99": 289.0239953994751 + }, + "isolatedSum": { + "p50": 210.207998752594, + "p90": 285.18400341272354, + "p95": 289.95200991630554, + "p99": 332.70399272441864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.4479959011078, + "p90": 192.76799261569977, + "p95": 196.383997797966, + "p99": 212.92799711227417 + }, + "combine": { + "p50": 89.47200328111649, + "p90": 104.51199859380722, + "p95": 106.65600001811981, + "p99": 112.2559979557991 + }, + "roundtrip": { + "p50": 203.2639980316162, + "p90": 273.6319899559021, + "p95": 277.47198939323425, + "p99": 293.92001032829285 + }, + "isolatedSum": { + "p50": 225.91999918222427, + "p90": 297.279991209507, + "p95": 303.0399978160858, + "p99": 325.1839950680733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.83200013637543, + "p90": 193.24800372123718, + "p95": 196.1279958486557, + "p99": 209.50399339199066 + }, + "combine": { + "p50": 102.14400291442871, + "p90": 119.23199892044067, + "p95": 121.05599790811539, + "p99": 123.96799772977829 + }, + "roundtrip": { + "p50": 216.5759950876236, + "p90": 281.8560004234314, + "p95": 285.75998544692993, + "p99": 303.5520017147064 + }, + "isolatedSum": { + "p50": 246.97600305080414, + "p90": 312.48000264167786, + "p95": 317.1839937567711, + "p99": 333.47199112176895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d5a44344", + "identity": "h100|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_ef4af798", + "comparisonKey": "2f4ead02f3d91530", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:44.323756+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 124.54400211572647, + "p90": 130.94399869441986, + "p95": 132.28799402713776, + "p99": 139.77600634098053 + }, + "combine": { + "p50": 78.72000336647034, + "p90": 80.76799660921097, + "p95": 81.56800270080566, + "p99": 86.84799820184708 + }, + "roundtrip": { + "p50": 189.31199610233307, + "p90": 195.23200392723083, + "p95": 197.76000082492828, + "p99": 203.96800339221954 + }, + "isolatedSum": { + "p50": 203.2640054821968, + "p90": 211.71199530363083, + "p95": 213.85599672794342, + "p99": 226.6240045428276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 124.28800016641617, + "p90": 130.0799995660782, + "p95": 131.99999928474426, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 79.83999699354172, + "p90": 81.69600367546082, + "p95": 82.71999657154083, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 190.91199338436127, + "p90": 195.93599438667297, + "p95": 198.2399970293045, + "p99": 203.64800095558167 + }, + "isolatedSum": { + "p50": 204.12799715995789, + "p90": 211.776003241539, + "p95": 214.7199958562851, + "p99": 225.91999918222427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.87200510501862, + "p90": 185.92000007629395, + "p95": 188.73600661754608, + "p99": 194.91200149059296 + }, + "combine": { + "p50": 81.15199953317642, + "p90": 96.83199971914291, + "p95": 98.04800152778625, + "p99": 101.31199657917023 + }, + "roundtrip": { + "p50": 194.14399564266205, + "p90": 256.76798820495605, + "p95": 259.2320144176483, + "p99": 263.35999369621277 + }, + "isolatedSum": { + "p50": 209.02400463819504, + "p90": 282.75199979543686, + "p95": 286.78400814533234, + "p99": 296.2239980697632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.82400441169739, + "p90": 180.95999956130981, + "p95": 184.03199315071106, + "p99": 188.12799453735352 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 82.14399963617325, + "p95": 83.23200047016144, + "p99": 88.92799913883209 + }, + "roundtrip": { + "p50": 193.12000274658203, + "p90": 198.4959989786148, + "p95": 200.6720006465912, + "p99": 203.93599569797516 + }, + "isolatedSum": { + "p50": 206.11200481653214, + "p90": 263.10399919748306, + "p95": 267.2639936208725, + "p99": 277.0559936761856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.58399546146393, + "p90": 188.89600038528442, + "p95": 191.103994846344, + "p99": 193.95199418067932 + }, + "combine": { + "p50": 81.56800270080566, + "p90": 98.04800152778625, + "p95": 99.29600358009338, + "p99": 104.86400127410889 + }, + "roundtrip": { + "p50": 196.57599925994873, + "p90": 258.87998938560486, + "p95": 262.0159983634949, + "p99": 265.28000831604004 + }, + "isolatedSum": { + "p50": 209.1519981622696, + "p90": 286.9440019130707, + "p95": 290.3999984264374, + "p99": 298.8159954547882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 127.03999876976013, + "p90": 184.76800620555878, + "p95": 186.91200017929077, + "p99": 191.3280040025711 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 104.00000214576721, + "p95": 105.02400249242783, + "p99": 107.29599744081497 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 260.6399953365326, + "p95": 262.7840042114258, + "p99": 265.7279968261719 + }, + "isolatedSum": { + "p50": 215.2639999985695, + "p90": 288.768008351326, + "p95": 291.9360026717186, + "p99": 298.6240014433861 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.54400408267975, + "p90": 247.1040040254593, + "p95": 251.90401077270508, + "p99": 255.51998615264893 + }, + "combine": { + "p50": 96.3520035147667, + "p90": 137.11999356746674, + "p95": 137.66400516033173, + "p99": 139.52000439167023 + }, + "roundtrip": { + "p50": 208.25600624084473, + "p90": 331.712007522583, + "p95": 336.2559974193573, + "p99": 345.63198685646057 + }, + "isolatedSum": { + "p50": 232.89600759744644, + "p90": 384.223997592926, + "p95": 389.5680159330368, + "p99": 395.03999054431915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.39199793338776, + "p90": 190.2720034122467, + "p95": 192.22399592399597, + "p99": 196.6399997472763 + }, + "combine": { + "p50": 107.96800255775452, + "p90": 124.32000041007996, + "p95": 128.7360042333603, + "p99": 130.75199723243713 + }, + "roundtrip": { + "p50": 222.75200486183167, + "p90": 286.27198934555054, + "p95": 289.3120050430298, + "p99": 303.3919930458069 + }, + "isolatedSum": { + "p50": 259.3600004911423, + "p90": 314.59200382232666, + "p95": 320.96000015735626, + "p99": 327.39199697971344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28bb6cb1", + "identity": "h100|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ef4af798", + "comparisonKey": "911c7727fb4fd5b0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:47.048171+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 178.20799350738525, + "p90": 185.72799861431122, + "p95": 189.53600525856018, + "p99": 230.97600042819977 + }, + "combine": { + "p50": 92.00000017881393, + "p90": 98.24000298976898, + "p95": 100.28800368309021, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 247.42400646209717, + "p90": 256.54399394989014, + "p95": 259.16799902915955, + "p99": 269.1519856452942 + }, + "isolatedSum": { + "p50": 270.2079936861992, + "p90": 283.9680016040802, + "p95": 289.8240089416504, + "p99": 337.15199679136276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 127.07200646400452, + "p90": 182.78400599956512, + "p95": 185.56800484657288, + "p99": 192.35199689865112 + }, + "combine": { + "p50": 81.82399719953537, + "p90": 94.84799951314926, + "p95": 95.8079993724823, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 198.43199849128723, + "p90": 248.25599789619446, + "p95": 250.43201446533203, + "p99": 257.56800174713135 + }, + "isolatedSum": { + "p50": 208.8960036635399, + "p90": 277.6320055127144, + "p95": 281.3760042190552, + "p99": 290.6240001320839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.83999741077423, + "p90": 182.3360025882721, + "p95": 186.8479996919632, + "p99": 282.0799946784973 + }, + "combine": { + "p50": 83.13599973917007, + "p90": 98.1760025024414, + "p95": 99.61599856615067, + "p99": 104.35199737548828 + }, + "roundtrip": { + "p50": 197.76000082492828, + "p90": 260.80000400543213, + "p95": 266.01600646972656, + "p99": 387.8079950809479 + }, + "isolatedSum": { + "p50": 210.9759971499443, + "p90": 280.5120050907135, + "p95": 286.46399825811386, + "p99": 386.4319920539856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.03999876976013, + "p90": 184.92799997329712, + "p95": 189.4720047712326, + "p99": 195.3279972076416 + }, + "combine": { + "p50": 85.7279971241951, + "p90": 102.36799716949463, + "p95": 103.84000092744827, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 198.97599518299103, + "p90": 262.5280022621155, + "p95": 265.53601026535034, + "p99": 283.3600044250488 + }, + "isolatedSum": { + "p50": 212.76799589395523, + "p90": 287.29599714279175, + "p95": 293.3120056986809, + "p99": 301.05599761009216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 129.7599971294403, + "p90": 186.94399297237396, + "p95": 192.80000030994415, + "p99": 212.76800334453583 + }, + "combine": { + "p50": 87.52000331878662, + "p90": 101.75999999046326, + "p95": 105.24799674749374, + "p99": 108.12799632549286 + }, + "roundtrip": { + "p50": 200.1280039548874, + "p90": 265.02400636672974, + "p95": 268.3520019054413, + "p99": 276.0320007801056 + }, + "isolatedSum": { + "p50": 217.28000044822693, + "p90": 288.7039929628372, + "p95": 298.0479970574379, + "p99": 320.8959996700287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 129.50399518013, + "p90": 184.54399704933167, + "p95": 187.45599687099457, + "p99": 193.56800615787506 + }, + "combine": { + "p50": 89.56799656152725, + "p90": 105.69600015878677, + "p95": 106.62399977445602, + "p99": 112.86400258541107 + }, + "roundtrip": { + "p50": 205.05599677562714, + "p90": 267.3279941082001, + "p95": 272.41599559783936, + "p99": 282.1440100669861 + }, + "isolatedSum": { + "p50": 219.07199174165726, + "p90": 290.23999720811844, + "p95": 294.0799966454506, + "p99": 306.43200874328613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 145.50399780273438, + "p90": 179.45599555969238, + "p95": 182.75199830532074, + "p99": 189.37599658966064 + }, + "combine": { + "p50": 98.27200323343277, + "p90": 111.55200004577637, + "p95": 115.10399729013443, + "p99": 118.43200027942657 + }, + "roundtrip": { + "p50": 214.65599536895752, + "p90": 275.9679853916168, + "p95": 277.8240144252777, + "p99": 285.43999791145325 + }, + "isolatedSum": { + "p50": 243.77600103616714, + "p90": 291.00799560546875, + "p95": 297.85599559545517, + "p99": 307.8079968690872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.99199521541595, + "p90": 196.6399997472763, + "p95": 197.9839950799942, + "p99": 202.87999510765076 + }, + "combine": { + "p50": 114.3999993801117, + "p90": 127.20000743865967, + "p95": 128.4479945898056, + "p99": 133.7279975414276 + }, + "roundtrip": { + "p50": 230.5919975042343, + "p90": 290.20801186561584, + "p95": 293.2159900665283, + "p99": 299.5840013027191 + }, + "isolatedSum": { + "p50": 267.39199459552765, + "p90": 323.840007185936, + "p95": 326.4319896697998, + "p99": 336.60799264907837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5861756e", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_b521aff0", + "comparisonKey": "d2fd76f5ec2f3d88", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:05.134552+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 129.63199615478516, + "p90": 186.5600049495697, + "p95": 188.89600038528442, + "p99": 196.57599925994873 + }, + "combine": { + "p50": 86.7839977145195, + "p90": 103.96800190210342, + "p95": 105.15200346708298, + "p99": 108.31999778747559 + }, + "roundtrip": { + "p50": 200.9280025959015, + "p90": 261.9200050830841, + "p95": 263.8719975948334, + "p99": 271.232008934021 + }, + "isolatedSum": { + "p50": 216.41599386930466, + "p90": 290.5280068516731, + "p95": 294.0480038523674, + "p99": 304.8959970474243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 127.07200646400452, + "p90": 135.00800728797913, + "p95": 138.91200721263885, + "p99": 149.72800016403198 + }, + "combine": { + "p50": 87.10400015115738, + "p90": 89.66399729251862, + "p95": 90.62399715185165, + "p99": 96.6079980134964 + }, + "roundtrip": { + "p50": 201.53599977493286, + "p90": 206.01600408554077, + "p95": 208.064004778862, + "p99": 214.65599536895752 + }, + "isolatedSum": { + "p50": 214.1760066151619, + "p90": 224.67200458049774, + "p95": 229.5360043644905, + "p99": 246.33599817752838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.071999669075, + "p90": 195.360004901886, + "p95": 197.76000082492828, + "p99": 202.94399559497833 + }, + "combine": { + "p50": 88.8959988951683, + "p90": 104.47999835014343, + "p95": 110.46399921178818, + "p99": 112.38399893045425 + }, + "roundtrip": { + "p50": 203.5840004682541, + "p90": 271.232008934021, + "p95": 274.56000447273254, + "p99": 279.6480059623718 + }, + "isolatedSum": { + "p50": 219.96799856424332, + "p90": 299.8400032520294, + "p95": 308.22400003671646, + "p99": 315.3279945254326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 130.72000443935394, + "p90": 182.27200210094452, + "p95": 186.52799725532532, + "p99": 612.0319962501526 + }, + "combine": { + "p50": 89.40800279378891, + "p90": 104.60799932479858, + "p95": 105.85600137710571, + "p99": 110.55999994277954 + }, + "roundtrip": { + "p50": 204.79999482631683, + "p90": 259.93600487709045, + "p95": 261.53600215911865, + "p99": 269.1839933395386 + }, + "isolatedSum": { + "p50": 220.12800723314285, + "p90": 286.8800014257431, + "p95": 292.38399863243103, + "p99": 722.5919961929321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 128.86400520801544, + "p90": 192.00000166893005, + "p95": 194.5600062608719, + "p99": 200.44800639152527 + }, + "combine": { + "p50": 89.91999924182892, + "p90": 107.4879989027977, + "p95": 111.7120012640953, + "p99": 115.10399729013443 + }, + "roundtrip": { + "p50": 205.9839963912964, + "p90": 271.87201380729675, + "p95": 282.368004322052, + "p99": 365.2479946613312 + }, + "isolatedSum": { + "p50": 218.78400444984436, + "p90": 299.48800057172775, + "p95": 306.2720075249672, + "p99": 315.5520036816597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 135.96799969673157, + "p90": 186.27199530601501, + "p95": 190.0160014629364, + "p99": 195.16800343990326 + }, + "combine": { + "p50": 96.8639999628067, + "p90": 112.60800063610077, + "p95": 113.88800293207169, + "p99": 120.31999975442886 + }, + "roundtrip": { + "p50": 212.0639979839325, + "p90": 270.24000883102417, + "p95": 272.7360129356384, + "p99": 280.2880108356476 + }, + "isolatedSum": { + "p50": 232.83199965953827, + "p90": 298.8799959421158, + "p95": 303.9040043950081, + "p99": 315.4880031943321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.73600006103516, + "p90": 194.84800100326538, + "p95": 199.61600005626678, + "p99": 205.9520035982132 + }, + "combine": { + "p50": 105.79200088977814, + "p90": 126.71999633312225, + "p95": 127.93600559234619, + "p99": 128.76799702644348 + }, + "roundtrip": { + "p50": 221.72799706459045, + "p90": 289.72798585891724, + "p95": 293.2159900665283, + "p99": 299.4239926338196 + }, + "isolatedSum": { + "p50": 254.5280009508133, + "p90": 321.56799733638763, + "p95": 327.552005648613, + "p99": 334.7200006246567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.07200491428375, + "p90": 201.34399831295013, + "p95": 204.6079933643341, + "p99": 211.45600080490112 + }, + "combine": { + "p50": 122.27199971675873, + "p90": 142.4960047006607, + "p95": 144.3520039319992, + "p99": 146.04799449443817 + }, + "roundtrip": { + "p50": 245.85600197315216, + "p90": 307.16800689697266, + "p95": 310.1760149002075, + "p99": 321.1840093135834 + }, + "isolatedSum": { + "p50": 285.3440046310425, + "p90": 343.84000301361084, + "p95": 348.9599972963333, + "p99": 357.5039952993393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ebb1ca96", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ef4af798", + "comparisonKey": "9dbbe77a83e79437", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:03.911824+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 174.8799979686737, + "p90": 187.80800700187683, + "p95": 190.8479928970337, + "p99": 197.85599410533905 + }, + "combine": { + "p50": 86.84799820184708, + "p90": 104.06400263309479, + "p95": 105.66399991512299, + "p99": 107.4879989027977 + }, + "roundtrip": { + "p50": 203.2639980316162, + "p90": 256.8959891796112, + "p95": 260.672003030777, + "p99": 268.44799518585205 + }, + "isolatedSum": { + "p50": 261.7279961705208, + "p90": 291.8720096349716, + "p95": 296.5119928121567, + "p99": 305.34399300813675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 129.85600531101227, + "p90": 176.35199427604675, + "p95": 178.847998380661, + "p99": 185.248002409935 + }, + "combine": { + "p50": 88.0960002541542, + "p90": 97.15200215578079, + "p95": 98.08000177145004, + "p99": 103.32799702882767 + }, + "roundtrip": { + "p50": 202.2079974412918, + "p90": 252.48000025749207, + "p95": 255.16799092292786, + "p99": 259.2639923095703 + }, + "isolatedSum": { + "p50": 217.95200556516647, + "p90": 273.50399643182755, + "p95": 276.92800015211105, + "p99": 288.57599943876266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 143.0719941854477, + "p90": 183.29599499702454, + "p95": 186.3040030002594, + "p99": 191.9039934873581 + }, + "combine": { + "p50": 94.65599805116653, + "p90": 102.68799960613251, + "p95": 103.7760004401207, + "p99": 109.8560020327568 + }, + "roundtrip": { + "p50": 205.6960016489029, + "p90": 265.50400257110596, + "p95": 267.551988363266, + "p99": 272.8640139102936 + }, + "isolatedSum": { + "p50": 237.72799223661423, + "p90": 285.98399460315704, + "p95": 290.0800034403801, + "p99": 301.7599955201149 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 130.3360015153885, + "p90": 188.35200369358063, + "p95": 192.1599954366684, + "p99": 198.55999946594238 + }, + "combine": { + "p50": 88.83199840784073, + "p90": 105.66399991512299, + "p95": 106.4319983124733, + "p99": 108.73600095510483 + }, + "roundtrip": { + "p50": 204.16000485420227, + "p90": 267.4880027770996, + "p95": 270.30399441719055, + "p99": 275.29600262641907 + }, + "isolatedSum": { + "p50": 219.16799992322922, + "p90": 294.0160036087036, + "p95": 298.5919937491417, + "p99": 307.2960004210472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 129.31199371814728, + "p90": 182.3039948940277, + "p95": 185.2159947156906, + "p99": 194.2719966173172 + }, + "combine": { + "p50": 90.2400016784668, + "p90": 104.19200360774994, + "p95": 108.12799632549286, + "p99": 122.78400361537933 + }, + "roundtrip": { + "p50": 206.36799931526184, + "p90": 268.095999956131, + "p95": 271.0399925708771, + "p99": 276.41600370407104 + }, + "isolatedSum": { + "p50": 219.55199539661407, + "p90": 286.49599850177765, + "p95": 293.3439910411835, + "p99": 317.05600023269653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 135.3600025177002, + "p90": 186.20799481868744, + "p95": 189.69599902629852, + "p99": 196.4160054922104 + }, + "combine": { + "p50": 96.67199850082397, + "p90": 113.88800293207169, + "p95": 114.72000181674957, + "p99": 115.68000167608261 + }, + "roundtrip": { + "p50": 209.82399582862854, + "p90": 274.7519910335541, + "p95": 277.21598744392395, + "p99": 281.72799944877625 + }, + "isolatedSum": { + "p50": 232.03200101852417, + "p90": 300.0959977507591, + "p95": 304.4160008430481, + "p99": 312.096007168293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 147.77599275112152, + "p90": 188.03200125694275, + "p95": 190.528005361557, + "p99": 200.8640021085739 + }, + "combine": { + "p50": 105.50399869680405, + "p90": 119.55200135707855, + "p95": 126.94400548934937, + "p99": 439.9999976158142 + }, + "roundtrip": { + "p50": 220.5439954996109, + "p90": 284.9920094013214, + "p95": 287.77599334716797, + "p99": 301.472008228302 + }, + "isolatedSum": { + "p50": 253.27999144792557, + "p90": 307.5840026140213, + "p95": 317.4720108509064, + "p99": 640.8639997243881 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.60800170898438, + "p90": 202.01599597930908, + "p95": 203.90400290489197, + "p99": 209.02399718761444 + }, + "combine": { + "p50": 121.85599654912949, + "p90": 135.48800349235535, + "p95": 136.76799833774567, + "p99": 147.5519984960556 + }, + "roundtrip": { + "p50": 243.00800263881683, + "p90": 301.34400725364685, + "p95": 304.4480085372925, + "p99": 316.79999828338623 + }, + "isolatedSum": { + "p50": 286.46399825811386, + "p90": 337.50399947166443, + "p95": 340.67200124263763, + "p99": 356.57599568367004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-212a1fcb", + "identity": "h100|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_ef4af798", + "comparisonKey": "e498dabd3da9c207", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:45.273648+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 123.87199699878693, + "p90": 131.1360001564026, + "p95": 134.94400680065155, + "p99": 139.93600010871887 + }, + "combine": { + "p50": 82.59200304746628, + "p90": 88.25600147247314, + "p95": 88.92799913883209, + "p99": 90.91199934482574 + }, + "roundtrip": { + "p50": 195.3279972076416, + "p90": 202.39999890327454, + "p95": 204.19199764728546, + "p99": 211.8079960346222 + }, + "isolatedSum": { + "p50": 206.4640000462532, + "p90": 219.39200162887573, + "p95": 223.87200593948364, + "p99": 230.84799945354462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 124.12799894809723, + "p90": 130.87999820709229, + "p95": 133.40799510478973, + "p99": 137.56799697875977 + }, + "combine": { + "p50": 86.71999722719193, + "p90": 88.73599767684937, + "p95": 89.24800157546997, + "p99": 93.85599941015244 + }, + "roundtrip": { + "p50": 197.05599546432495, + "p90": 204.22400534152985, + "p95": 206.59199357032776, + "p99": 211.10400557518005 + }, + "isolatedSum": { + "p50": 210.84799617528915, + "p90": 219.61599588394165, + "p95": 222.6559966802597, + "p99": 231.4239963889122 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.87200510501862, + "p90": 192.80000030994415, + "p95": 194.97600197792053, + "p99": 199.2959976196289 + }, + "combine": { + "p50": 87.93599903583527, + "p90": 104.99200224876404, + "p95": 106.4319983124733, + "p99": 111.96800321340561 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 267.90401339530945, + "p95": 271.4560031890869, + "p99": 277.0560085773468 + }, + "isolatedSum": { + "p50": 215.80800414085388, + "p90": 297.7920025587082, + "p95": 301.40800029039383, + "p99": 311.2640008330345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 123.83999675512314, + "p90": 131.1360001564026, + "p95": 133.2480013370514, + "p99": 139.39200341701508 + }, + "combine": { + "p50": 87.26400136947632, + "p90": 89.4400030374527, + "p95": 94.68799829483032, + "p99": 116.09599739313126 + }, + "roundtrip": { + "p50": 199.20000433921814, + "p90": 205.4399996995926, + "p95": 208.0959975719452, + "p99": 217.02399849891663 + }, + "isolatedSum": { + "p50": 211.10399812459946, + "p90": 220.57600319385529, + "p95": 227.9359996318817, + "p99": 255.48800081014633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.36000120639801, + "p90": 191.00800156593323, + "p95": 192.83199310302734, + "p99": 199.61600005626678 + }, + "combine": { + "p50": 89.63199704885483, + "p90": 109.0560033917427, + "p95": 114.84800279140472, + "p99": 375.10401010513306 + }, + "roundtrip": { + "p50": 204.92799580097198, + "p90": 272.67199754714966, + "p95": 275.6800055503845, + "p99": 280.9920012950897 + }, + "isolatedSum": { + "p50": 216.99199825525284, + "p90": 300.06400495767593, + "p95": 307.67999589443207, + "p99": 574.7200101613998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 137.7280056476593, + "p90": 188.86399269104004, + "p95": 192.57600605487823, + "p99": 201.47199928760529 + }, + "combine": { + "p50": 96.3200032711029, + "p90": 113.3119985461235, + "p95": 114.14399743080139, + "p99": 145.4399973154068 + }, + "roundtrip": { + "p50": 207.5520008802414, + "p90": 215.64799547195435, + "p95": 228.09599339962006, + "p99": 272.5119888782501 + }, + "isolatedSum": { + "p50": 234.0480089187622, + "p90": 302.17599123716354, + "p95": 306.7200034856796, + "p99": 346.9119966030121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.0959951877594, + "p90": 193.53599846363068, + "p95": 196.06399536132812, + "p99": 201.9840031862259 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 121.91999703645706, + "p95": 122.75200337171555, + "p99": 128.48000228405 + }, + "roundtrip": { + "p50": 219.80799734592438, + "p90": 287.58400678634644, + "p95": 290.8479869365692, + "p99": 296.640008687973 + }, + "isolatedSum": { + "p50": 252.83199548721313, + "p90": 315.45599550008774, + "p95": 318.81599873304367, + "p99": 330.4640054702759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.2560064792633, + "p90": 198.11199605464935, + "p95": 200.47999918460846, + "p99": 206.91199600696564 + }, + "combine": { + "p50": 124.44800138473511, + "p90": 138.72000575065613, + "p95": 139.55199718475342, + "p99": 143.71199905872345 + }, + "roundtrip": { + "p50": 257.4720084667206, + "p90": 304.22401428222656, + "p95": 307.3279857635498, + "p99": 321.24799489974976 + }, + "isolatedSum": { + "p50": 288.7040078639984, + "p90": 336.8320018053055, + "p95": 340.0319963693619, + "p99": 350.6239950656891 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56bffd8c", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h100_9baf7d14", + "comparisonKey": "1570d5846b97eb4b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:14.714400+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 182.11199343204498, + "p90": 191.52000546455383, + "p95": 195.23200392723083, + "p99": 231.64799809455872 + }, + "combine": { + "p50": 102.88000106811523, + "p90": 110.84800213575363, + "p95": 111.80800199508667, + "p99": 120.28799951076508 + }, + "roundtrip": { + "p50": 269.47200298309326, + "p90": 323.5520124435425, + "p95": 361.7280125617981, + "p99": 440.89600443840027 + }, + "isolatedSum": { + "p50": 284.9919945001602, + "p90": 302.36800760030746, + "p95": 307.0400059223175, + "p99": 351.9359976053238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.68000602722168, + "p90": 207.23199844360352, + "p95": 234.20800268650055, + "p99": 272.2240090370178 + }, + "combine": { + "p50": 111.96800321340561, + "p90": 120.4800009727478, + "p95": 122.40000069141388, + "p99": 133.66399705410004 + }, + "roundtrip": { + "p50": 277.9519855976105, + "p90": 287.80800104141235, + "p95": 292.28800535202026, + "p99": 338.6879861354828 + }, + "isolatedSum": { + "p50": 299.6480092406273, + "p90": 327.7119994163513, + "p95": 356.60800337791443, + "p99": 405.88800609111786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 204.22400534152985, + "p90": 244.60799992084503, + "p95": 258.2719922065735, + "p99": 313.02401423454285 + }, + "combine": { + "p50": 143.77599954605103, + "p90": 154.4959992170334, + "p95": 162.81600296497345, + "p99": 200.73600113391876 + }, + "roundtrip": { + "p50": 311.96799874305725, + "p90": 357.5040102005005, + "p95": 372.70399928092957, + "p99": 401.66398882865906 + }, + "isolatedSum": { + "p50": 348.0000048875809, + "p90": 399.1039991378784, + "p95": 421.08799517154694, + "p99": 513.7600153684616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d0fddf9", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h100_23b2a104", + "comparisonKey": "42512b3ff7b58fec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:39.686964+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 178.30400168895721, + "p90": 194.5600062608719, + "p95": 196.3520050048828, + "p99": 204.54399287700653 + }, + "combine": { + "p50": 97.59999811649323, + "p90": 106.23999685049057, + "p95": 111.07199639081955, + "p99": 112.5440001487732 + }, + "roundtrip": { + "p50": 251.48800015449524, + "p90": 262.1760070323944, + "p95": 266.7199969291687, + "p99": 272.96000719070435 + }, + "isolatedSum": { + "p50": 275.90399980545044, + "p90": 300.80000311136246, + "p95": 307.42400139570236, + "p99": 317.0879930257797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 177.91999876499176, + "p90": 249.02400374412537, + "p95": 252.73600220680237, + "p99": 257.9520046710968 + }, + "combine": { + "p50": 104.3199971318245, + "p90": 135.96799969673157, + "p95": 136.63999736309052, + "p99": 138.08000087738037 + }, + "roundtrip": { + "p50": 263.9999985694885, + "p90": 335.1680040359497, + "p95": 339.55198526382446, + "p99": 350.1119911670685 + }, + "isolatedSum": { + "p50": 282.23999589681625, + "p90": 384.99200344085693, + "p95": 389.3759995698929, + "p99": 396.0320055484772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.79200327396393, + "p90": 195.13599574565887, + "p95": 197.76000082492828, + "p99": 202.87999510765076 + }, + "combine": { + "p50": 130.91200590133667, + "p90": 153.60000729560852, + "p95": 159.67999398708344, + "p99": 161.47199273109436 + }, + "roundtrip": { + "p50": 254.72000241279602, + "p90": 317.02399253845215, + "p95": 320.25599479675293, + "p99": 332.2240114212036 + }, + "isolatedSum": { + "p50": 296.7040091753006, + "p90": 348.7360030412674, + "p95": 357.4399948120117, + "p99": 364.3519878387451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-187757d3", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h100_26b2a5bd", + "comparisonKey": "3702161a6b1e7696", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:04.424512+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 185.7919991016388, + "p90": 196.28800451755524, + "p95": 199.8399943113327, + "p99": 209.98400449752808 + }, + "combine": { + "p50": 105.72800040245056, + "p90": 112.19199746847153, + "p95": 113.69600147008896, + "p99": 115.61600118875504 + }, + "roundtrip": { + "p50": 269.4399952888489, + "p90": 277.5680124759674, + "p95": 280.0000011920929, + "p99": 288.1599962711334 + }, + "isolatedSum": { + "p50": 291.51999950408936, + "p90": 308.48000198602676, + "p95": 313.53599578142166, + "p99": 325.6000056862831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 186.88000738620758, + "p90": 195.0719952583313, + "p95": 198.7839937210083, + "p99": 207.2959989309311 + }, + "combine": { + "p50": 113.72800171375275, + "p90": 119.74400281906128, + "p95": 121.5360015630722, + "p99": 122.8799968957901 + }, + "roundtrip": { + "p50": 279.231995344162, + "p90": 286.3039970397949, + "p95": 288.2240116596222, + "p99": 293.15200448036194 + }, + "isolatedSum": { + "p50": 300.6080090999603, + "p90": 314.8159980773926, + "p95": 320.3199952840805, + "p99": 330.1759958267212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 192.76799261569977, + "p90": 212.99199759960175, + "p95": 214.6880030632019, + "p99": 220.67199647426605 + }, + "combine": { + "p50": 147.039994597435, + "p90": 153.6960005760193, + "p95": 154.65599298477173, + "p99": 156.25600516796112 + }, + "roundtrip": { + "p50": 311.2320005893707, + "p90": 320.3519880771637, + "p95": 323.2319951057434, + "p99": 346.8480110168457 + }, + "isolatedSum": { + "p50": 339.80798721313477, + "p90": 366.68799817562103, + "p95": 369.34399604797363, + "p99": 376.9280016422272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f6841101", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h100_25b2a42a", + "comparisonKey": "a334a25f37bea2f6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:28.763560+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 196.51199877262115, + "p90": 204.96000349521637, + "p95": 207.5520008802414, + "p99": 228.0000001192093 + }, + "combine": { + "p50": 111.42399907112122, + "p90": 113.11999708414078, + "p95": 118.8800036907196, + "p99": 120.54400146007538 + }, + "roundtrip": { + "p50": 269.567996263504, + "p90": 283.1999957561493, + "p95": 286.080002784729, + "p99": 297.7280020713806 + }, + "isolatedSum": { + "p50": 307.93599784374237, + "p90": 318.08000057935715, + "p95": 326.432004570961, + "p99": 348.54400157928467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 195.71200013160706, + "p90": 204.3839991092682, + "p95": 206.62400126457214, + "p99": 217.69599616527557 + }, + "combine": { + "p50": 119.23199892044067, + "p90": 122.36800044775009, + "p95": 128.1919926404953, + "p99": 136.76799833774567 + }, + "roundtrip": { + "p50": 285.7919931411743, + "p90": 293.2800054550171, + "p95": 296.35199904441833, + "p99": 313.1519854068756 + }, + "isolatedSum": { + "p50": 314.94399905204773, + "p90": 326.7519995570183, + "p95": 334.81599390506744, + "p99": 354.46399450302124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 193.08799505233765, + "p90": 203.8400024175644, + "p95": 206.01600408554077, + "p99": 221.53599560260773 + }, + "combine": { + "p50": 144.83200013637543, + "p90": 154.30399775505066, + "p95": 155.71199357509613, + "p99": 160.5760008096695 + }, + "roundtrip": { + "p50": 293.3120131492615, + "p90": 324.16000962257385, + "p95": 327.0080089569092, + "p99": 335.6800079345703 + }, + "isolatedSum": { + "p50": 337.9199951887131, + "p90": 358.14400017261505, + "p95": 361.7279976606369, + "p99": 382.1119964122772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f27f1299", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h100_74591c48", + "comparisonKey": "71f0988953294527", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:45.300028+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 122.94399738311768, + "p90": 130.78400492668152, + "p95": 134.71999764442444, + "p99": 188.1919950246811 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 89.79199826717377, + "p95": 91.20000153779984, + "p99": 102.14400291442871 + }, + "roundtrip": { + "p50": 199.072003364563, + "p90": 206.2399983406067, + "p95": 208.5759937763214, + "p99": 225.79200565814972 + }, + "isolatedSum": { + "p50": 210.719995200634, + "p90": 220.57600319385529, + "p95": 225.91999918222427, + "p99": 290.3359979391098 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 122.97599762678146, + "p90": 130.3039938211441, + "p95": 132.6719969511032, + "p99": 139.42399621009827 + }, + "combine": { + "p50": 88.3840024471283, + "p90": 90.27200192213058, + "p95": 94.04800087213516, + "p99": 99.23200309276581 + }, + "roundtrip": { + "p50": 200.80000162124634, + "p90": 206.88000321388245, + "p95": 209.27999913692474, + "p99": 269.4399952888489 + }, + "isolatedSum": { + "p50": 211.36000007390976, + "p90": 220.5759957432747, + "p95": 226.71999782323837, + "p99": 238.65599930286407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.36800110340118, + "p90": 188.06399405002594, + "p95": 191.3280040025711, + "p99": 196.9279944896698 + }, + "combine": { + "p50": 89.21600133180618, + "p90": 103.5199984908104, + "p95": 106.20799660682678, + "p99": 118.14399808645248 + }, + "roundtrip": { + "p50": 204.03200387954712, + "p90": 267.42398738861084, + "p95": 270.3999876976013, + "p99": 277.0879864692688 + }, + "isolatedSum": { + "p50": 215.58400243520737, + "p90": 291.58399254083633, + "p95": 297.5360006093979, + "p99": 315.0719925761223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 124.12799894809723, + "p90": 188.06399405002594, + "p95": 195.0400024652481, + "p99": 210.1760059595108 + }, + "combine": { + "p50": 88.44800293445587, + "p90": 90.7519981265068, + "p95": 94.17600184679031, + "p99": 97.53599762916565 + }, + "roundtrip": { + "p50": 202.27199792861938, + "p90": 207.2640061378479, + "p95": 210.40000021457672, + "p99": 215.29600024223328 + }, + "isolatedSum": { + "p50": 212.5760018825531, + "p90": 278.81599217653275, + "p95": 289.2160043120384, + "p99": 307.71200358867645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.71999633312225, + "p90": 187.71199882030487, + "p95": 190.5599981546402, + "p99": 199.3280053138733 + }, + "combine": { + "p50": 94.17600184679031, + "p90": 111.7440015077591, + "p95": 113.0559965968132, + "p99": 120.03199756145477 + }, + "roundtrip": { + "p50": 206.27200603485107, + "p90": 271.7440128326416, + "p95": 274.4640111923218, + "p99": 280.7680070400238 + }, + "isolatedSum": { + "p50": 220.89599817991257, + "p90": 299.45600032806396, + "p95": 303.6159947514534, + "p99": 319.36000287532806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 135.3919953107834, + "p90": 188.7039989233017, + "p95": 190.72000682353973, + "p99": 197.24799692630768 + }, + "combine": { + "p50": 97.75999933481216, + "p90": 113.82400244474411, + "p95": 114.56000059843063, + "p99": 120.51200121641159 + }, + "roundtrip": { + "p50": 213.56800198554993, + "p90": 276.1920094490051, + "p95": 279.2640030384064, + "p99": 647.0400094985962 + }, + "isolatedSum": { + "p50": 233.15199464559555, + "p90": 302.5280013680458, + "p95": 305.28000742197037, + "p99": 317.75999814271927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 151.13599598407745, + "p90": 195.13599574565887, + "p95": 197.28000462055206, + "p99": 202.72000133991241 + }, + "combine": { + "p50": 111.93600296974182, + "p90": 128.28800082206726, + "p95": 129.5360028743744, + "p99": 137.53600418567657 + }, + "roundtrip": { + "p50": 224.92800652980804, + "p90": 291.0720109939575, + "p95": 294.2720055580139, + "p99": 302.8480112552643 + }, + "isolatedSum": { + "p50": 263.0719989538193, + "p90": 323.42399656772614, + "p95": 326.81600749492645, + "p99": 340.256005525589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 177.7919977903366, + "p90": 219.32800114154816, + "p95": 221.82400524616241, + "p99": 226.30399465560913 + }, + "combine": { + "p50": 143.0400013923645, + "p90": 156.70399367809296, + "p95": 160.35200655460358, + "p99": 163.4880006313324 + }, + "roundtrip": { + "p50": 279.231995344162, + "p90": 323.0400085449219, + "p95": 326.04798674583435, + "p99": 332.15999603271484 + }, + "isolatedSum": { + "p50": 320.8319991827011, + "p90": 376.0319948196411, + "p95": 382.176011800766, + "p99": 389.7919952869415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-100011e2", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h100_2707bc2b", + "comparisonKey": "b18a71c502b29ae5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:45.458428+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 186.68800592422485, + "p90": 195.6160068511963, + "p95": 199.5519995689392, + "p99": 210.01599729061127 + }, + "combine": { + "p50": 84.927998483181, + "p90": 103.71199995279312, + "p95": 105.43999820947647, + "p99": 108.15999656915665 + }, + "roundtrip": { + "p50": 201.37600600719452, + "p90": 271.807998418808, + "p95": 275.2000093460083, + "p99": 283.90398621559143 + }, + "isolatedSum": { + "p50": 271.61600440740585, + "p90": 299.3280068039894, + "p95": 304.9919977784157, + "p99": 318.1759938597679 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 180.2240014076233, + "p90": 191.29599630832672, + "p95": 195.5839991569519, + "p99": 213.34399282932281 + }, + "combine": { + "p50": 89.6959975361824, + "p90": 96.12800180912018, + "p95": 97.02400118112564, + "p99": 103.20000350475311 + }, + "roundtrip": { + "p50": 251.52000784873962, + "p90": 260.6399953365326, + "p95": 264.41600918769836, + "p99": 291.9999957084656 + }, + "isolatedSum": { + "p50": 269.9199989438057, + "p90": 287.4239981174469, + "p95": 292.60800033807755, + "p99": 316.5439963340759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 186.0799938440323, + "p90": 193.2159960269928, + "p95": 196.57599925994873, + "p99": 208.28799903392792 + }, + "combine": { + "p50": 102.11200267076492, + "p90": 105.43999820947647, + "p95": 110.04800349473953, + "p99": 112.83200234174728 + }, + "roundtrip": { + "p50": 258.7839961051941, + "p90": 270.4960107803345, + "p95": 274.04800057411194, + "p99": 278.4639894962311 + }, + "isolatedSum": { + "p50": 288.1919965147972, + "p90": 298.65599423646927, + "p95": 306.62400275468826, + "p99": 321.1200013756752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 180.4479956626892, + "p90": 193.7599927186966, + "p95": 195.80799341201782, + "p99": 202.33599841594696 + }, + "combine": { + "p50": 102.39999741315842, + "p90": 107.00800269842148, + "p95": 110.68800091743469, + "p99": 118.84800344705582 + }, + "roundtrip": { + "p50": 260.3200078010559, + "p90": 268.640011548996, + "p95": 270.9439992904663, + "p99": 277.72799134254456 + }, + "isolatedSum": { + "p50": 282.8479930758476, + "p90": 300.7679954171181, + "p95": 306.4959943294525, + "p99": 321.1840018630028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d1ef5c9b", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h100_6cf5e0a6", + "comparisonKey": "ed10183a10ab4f12", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:41.644089+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 177.88800597190857, + "p90": 189.4720047712326, + "p95": 194.30400431156158, + "p99": 204.92799580097198 + }, + "combine": { + "p50": 91.64799749851227, + "p90": 98.24000298976898, + "p95": 99.04000163078308, + "p99": 106.01600259542465 + }, + "roundtrip": { + "p50": 245.88799476623535, + "p90": 253.34399938583374, + "p95": 254.8159956932068, + "p99": 259.5199942588806 + }, + "isolatedSum": { + "p50": 269.53600347042084, + "p90": 287.7120077610016, + "p95": 293.34400594234467, + "p99": 310.94399839639664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 130.46400249004364, + "p90": 180.38399517536163, + "p95": 183.9359998703003, + "p99": 189.53600525856018 + }, + "combine": { + "p50": 80.70400357246399, + "p90": 95.551997423172, + "p95": 96.44799679517746, + "p99": 98.14400225877762 + }, + "roundtrip": { + "p50": 195.0400024652481, + "p90": 251.8720030784607, + "p95": 253.4080147743225, + "p99": 257.3759853839874 + }, + "isolatedSum": { + "p50": 211.16800606250763, + "p90": 275.93599259853363, + "p95": 280.38399666547775, + "p99": 287.6800075173378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 129.37599420547485, + "p90": 321.6319978237152, + "p95": 342.9439961910248, + "p99": 357.56799578666687 + }, + "combine": { + "p50": 81.216000020504, + "p90": 96.8639999628067, + "p95": 99.48799759149551, + "p99": 205.50400018692017 + }, + "roundtrip": { + "p50": 195.26399672031403, + "p90": 351.80801153182983, + "p95": 363.0400002002716, + "p99": 780.672013759613 + }, + "isolatedSum": { + "p50": 210.59199422597885, + "p90": 418.4959977865219, + "p95": 442.4319937825203, + "p99": 563.071995973587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.15199995040894, + "p90": 194.33599710464478, + "p95": 195.8719938993454, + "p99": 201.4400064945221 + }, + "combine": { + "p50": 81.37600123882294, + "p90": 98.08000177145004, + "p95": 98.65599870681763, + "p99": 104.22399640083313 + }, + "roundtrip": { + "p50": 196.3520050048828, + "p90": 261.28000020980835, + "p95": 264.44798707962036, + "p99": 268.8960134983063 + }, + "isolatedSum": { + "p50": 210.52800118923187, + "p90": 292.4159988760948, + "p95": 294.527992606163, + "p99": 305.6640028953552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 178.39999496936798, + "p90": 193.7279999256134, + "p95": 195.90400159358978, + "p99": 202.30400562286377 + }, + "combine": { + "p50": 94.14400160312653, + "p90": 103.29599678516388, + "p95": 104.00000214576721, + "p99": 105.8880016207695 + }, + "roundtrip": { + "p50": 199.45600628852844, + "p90": 265.9519910812378, + "p95": 268.12800765037537, + "p99": 275.4240036010742 + }, + "isolatedSum": { + "p50": 272.5439965724945, + "p90": 297.0239967107773, + "p95": 299.904003739357, + "p99": 308.19200724363327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 129.63199615478516, + "p90": 192.9599940776825, + "p95": 195.68000733852386, + "p99": 201.92000269889832 + }, + "combine": { + "p50": 85.56800335645676, + "p90": 103.64799946546555, + "p95": 104.44799810647964, + "p99": 107.32799768447876 + }, + "roundtrip": { + "p50": 200.3840059041977, + "p90": 265.9519910812378, + "p95": 268.7999904155731, + "p99": 274.7200131416321 + }, + "isolatedSum": { + "p50": 215.1999995112419, + "p90": 296.60799354314804, + "p95": 300.1280054450035, + "p99": 309.2480003833771 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 131.29599392414093, + "p90": 191.6159987449646, + "p95": 195.16800343990326, + "p99": 203.2639980316162 + }, + "combine": { + "p50": 88.8959988951683, + "p90": 104.73600029945374, + "p95": 105.3759977221489, + "p99": 112.28799819946289 + }, + "roundtrip": { + "p50": 203.13599705696106, + "p90": 270.9760069847107, + "p95": 274.6880054473877, + "p99": 282.8800082206726 + }, + "isolatedSum": { + "p50": 220.19199281930923, + "p90": 296.35199904441833, + "p95": 300.54400116205215, + "p99": 315.5519962310791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 145.21600306034088, + "p90": 271.7759907245636, + "p95": 303.00799012184143, + "p99": 349.15199875831604 + }, + "combine": { + "p50": 99.71199929714203, + "p90": 120.51200121641159, + "p95": 121.31199985742569, + "p99": 127.58399546146393 + }, + "roundtrip": { + "p50": 216.8319970369339, + "p90": 282.6879918575287, + "p95": 284.7039997577667, + "p99": 292.54400730133057 + }, + "isolatedSum": { + "p50": 244.9280023574829, + "p90": 392.2879919409752, + "p95": 424.3199899792671, + "p99": 476.73599421977997 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-98986ce9", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h100_aae11d50", + "comparisonKey": "0b3ff94152566a2a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:34.976878+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 184.57600474357605, + "p90": 194.04800236225128, + "p95": 195.77600061893463, + "p99": 202.4960070848465 + }, + "combine": { + "p50": 102.88000106811523, + "p90": 106.75200074911118, + "p95": 108.22399705648422, + "p99": 139.42399621009827 + }, + "roundtrip": { + "p50": 260.73598861694336, + "p90": 269.9199914932251, + "p95": 274.1760015487671, + "p99": 577.4400234222412 + }, + "isolatedSum": { + "p50": 287.4560058116913, + "p90": 300.80000311136246, + "p95": 303.99999767541885, + "p99": 341.92000329494476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 188.63999843597412, + "p90": 250.7840096950531, + "p95": 254.88001108169556, + "p99": 257.88798928260803 + }, + "combine": { + "p50": 114.01599645614624, + "p90": 136.22400164604187, + "p95": 137.95199990272522, + "p99": 141.02399349212646 + }, + "roundtrip": { + "p50": 273.72801303863525, + "p90": 339.00800347328186, + "p95": 342.4000144004822, + "p99": 348.9919900894165 + }, + "isolatedSum": { + "p50": 302.65599489212036, + "p90": 387.00801134109497, + "p95": 392.8320109844208, + "p99": 398.9119827747345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 197.56799936294556, + "p90": 209.08799767494202, + "p95": 211.776003241539, + "p99": 221.98399901390076 + }, + "combine": { + "p50": 144.3520039319992, + "p90": 151.96800231933594, + "p95": 152.70400047302246, + "p99": 154.84799444675446 + }, + "roundtrip": { + "p50": 303.9039969444275, + "p90": 315.36000967025757, + "p95": 317.82400608062744, + "p99": 711.7440104484558 + }, + "isolatedSum": { + "p50": 341.92000329494476, + "p90": 361.05599999427795, + "p95": 364.48000371456146, + "p99": 376.8319934606552 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-76095677", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h100_14d47c58", + "comparisonKey": "e1b174b4a3075765", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:59.923816+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 184.1920018196106, + "p90": 193.34399700164795, + "p95": 196.54400646686554, + "p99": 214.36800062656403 + }, + "combine": { + "p50": 103.64799946546555, + "p90": 106.49599879980087, + "p95": 110.52799969911575, + "p99": 113.08799684047699 + }, + "roundtrip": { + "p50": 261.3759934902191, + "p90": 269.3440020084381, + "p95": 271.807998418808, + "p99": 281.8880081176758 + }, + "isolatedSum": { + "p50": 287.84000128507614, + "p90": 299.8399958014488, + "p95": 307.0720061659813, + "p99": 327.455997467041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.74399840831757, + "p90": 192.73599982261658, + "p95": 196.86399400234222, + "p99": 211.87199652194977 + }, + "combine": { + "p50": 111.68000102043152, + "p90": 113.66400122642517, + "p95": 118.07999759912491, + "p99": 121.37600034475327 + }, + "roundtrip": { + "p50": 269.72800493240356, + "p90": 277.6640057563782, + "p95": 280.0320088863373, + "p99": 288.06400299072266 + }, + "isolatedSum": { + "p50": 295.4239994287491, + "p90": 306.40000104904175, + "p95": 314.94399160146713, + "p99": 333.24799686670303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 172.19200730323792, + "p90": 204.57600057125092, + "p95": 206.7199945449829, + "p99": 213.34399282932281 + }, + "combine": { + "p50": 131.16799294948578, + "p90": 152.19199657440186, + "p95": 153.08800339698792, + "p99": 179.3919950723648 + }, + "roundtrip": { + "p50": 263.61599564552307, + "p90": 312.6719892024994, + "p95": 315.2639865875244, + "p99": 325.3119885921478 + }, + "isolatedSum": { + "p50": 303.3600002527237, + "p90": 356.76799714565277, + "p95": 359.8079979419708, + "p99": 392.7359879016876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cb475596", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h100_17d48111", + "comparisonKey": "45d862755937ad85", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:24.932871+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 187.3600035905838, + "p90": 196.6399997472763, + "p95": 199.68000054359436, + "p99": 205.88800311088562 + }, + "combine": { + "p50": 103.5199984908104, + "p90": 111.23199760913849, + "p95": 111.90400272607803, + "p99": 115.93600362539291 + }, + "roundtrip": { + "p50": 266.07999205589294, + "p90": 274.3360102176666, + "p95": 277.15200185775757, + "p99": 284.7039997577667 + }, + "isolatedSum": { + "p50": 290.8800020813942, + "p90": 307.8719973564148, + "p95": 311.5840032696724, + "p99": 321.82400673627853 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.48800456523895, + "p90": 197.40800559520721, + "p95": 201.27999782562256, + "p99": 293.5999929904938 + }, + "combine": { + "p50": 111.55200004577637, + "p90": 113.53600025177002, + "p95": 118.84800344705582, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 270.30399441719055, + "p90": 279.5200049877167, + "p95": 282.1440100669861, + "p99": 290.43200612068176 + }, + "isolatedSum": { + "p50": 299.0400046110153, + "p90": 310.94400584697723, + "p95": 320.1280012726784, + "p99": 414.39999639987946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 201.56799256801605, + "p90": 209.53600108623505, + "p95": 210.78400313854218, + "p99": 215.7440036535263 + }, + "combine": { + "p50": 143.5839980840683, + "p90": 150.14399588108063, + "p95": 152.54400670528412, + "p99": 155.07200360298157 + }, + "roundtrip": { + "p50": 264.19198513031006, + "p90": 314.6879971027374, + "p95": 316.8320059776306, + "p99": 322.2079873085022 + }, + "isolatedSum": { + "p50": 345.15199065208435, + "p90": 359.6799969673157, + "p95": 363.3280098438263, + "p99": 370.8160072565079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-20808623", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h100_16d47f7e", + "comparisonKey": "621e19638b6376f4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:49.841823+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 181.92000687122345, + "p90": 192.4159973859787, + "p95": 196.25599682331085, + "p99": 204.19199764728546 + }, + "combine": { + "p50": 98.94400089979172, + "p90": 105.82400113344193, + "p95": 106.59199953079224, + "p99": 112.83200234174728 + }, + "roundtrip": { + "p50": 260.8959972858429, + "p90": 269.567996263504, + "p95": 272.15999364852905, + "p99": 277.536004781723 + }, + "isolatedSum": { + "p50": 280.86400777101517, + "p90": 298.2399985194206, + "p95": 302.8479963541031, + "p99": 317.02399998903275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.16799402236938, + "p90": 191.67999923229218, + "p95": 195.3279972076416, + "p99": 205.63200116157532 + }, + "combine": { + "p50": 109.6000000834465, + "p90": 114.3679991364479, + "p95": 115.39199948310852, + "p99": 138.68799805641174 + }, + "roundtrip": { + "p50": 270.7520127296448, + "p90": 279.231995344162, + "p95": 283.7759852409363, + "p99": 311.3600015640259 + }, + "isolatedSum": { + "p50": 292.7679941058159, + "p90": 306.0479983687401, + "p95": 310.7199966907501, + "p99": 344.31999921798706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 201.24800503253937, + "p90": 211.58400177955627, + "p95": 213.15200626850128, + "p99": 833.0240249633789 + }, + "combine": { + "p50": 143.48800480365753, + "p90": 149.08799529075623, + "p95": 153.24799716472626, + "p99": 155.35999834537506 + }, + "roundtrip": { + "p50": 304.1599988937378, + "p90": 314.04799222946167, + "p95": 317.6639974117279, + "p99": 324.41601157188416 + }, + "isolatedSum": { + "p50": 344.7360098361969, + "p90": 360.6719970703125, + "p95": 366.40000343322754, + "p99": 988.384023308754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ce8d13a", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h100_bd3f0c52", + "comparisonKey": "d0672685ad51d4ed", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:39.796926+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 177.95200645923615, + "p90": 188.48000466823578, + "p95": 193.50400567054749, + "p99": 262.5280022621155 + }, + "combine": { + "p50": 97.9200005531311, + "p90": 104.5759990811348, + "p95": 106.33599758148193, + "p99": 189.66400623321533 + }, + "roundtrip": { + "p50": 251.77600979804993, + "p90": 264.8960053920746, + "p95": 267.1999931335449, + "p99": 282.6240062713623 + }, + "isolatedSum": { + "p50": 275.87200701236725, + "p90": 293.0560037493706, + "p95": 299.8400032520294, + "p99": 452.1920084953308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 129.40800189971924, + "p90": 184.35199558734894, + "p95": 187.68000602722168, + "p99": 197.1520036458969 + }, + "combine": { + "p50": 87.8399983048439, + "p90": 103.55199873447418, + "p95": 104.2879968881607, + "p99": 106.9440022110939 + }, + "roundtrip": { + "p50": 202.59200036525726, + "p90": 258.11201333999634, + "p95": 259.42400097846985, + "p99": 262.87999749183655 + }, + "isolatedSum": { + "p50": 217.24800020456314, + "p90": 287.9039943218231, + "p95": 291.9680029153824, + "p99": 304.0960058569908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 129.31199371814728, + "p90": 190.14400243759155, + "p95": 192.83199310302734, + "p99": 200.15999674797058 + }, + "combine": { + "p50": 88.16000074148178, + "p90": 103.20000350475311, + "p95": 103.93600165843964, + "p99": 110.84800213575363 + }, + "roundtrip": { + "p50": 202.97600328922272, + "p90": 266.55998826026917, + "p95": 271.13598585128784, + "p99": 277.536004781723 + }, + "isolatedSum": { + "p50": 217.47199445962906, + "p90": 293.34400594234467, + "p95": 296.767994761467, + "p99": 311.0079988837242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.15199995040894, + "p90": 188.73600661754608, + "p95": 193.2159960269928, + "p99": 197.4720060825348 + }, + "combine": { + "p50": 88.86399865150452, + "p90": 104.73600029945374, + "p95": 106.08000308275223, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 203.19999754428864, + "p90": 264.384001493454, + "p95": 266.975998878479, + "p99": 271.84000611305237 + }, + "isolatedSum": { + "p50": 218.01599860191345, + "p90": 293.4720069169998, + "p95": 299.295999109745, + "p99": 305.9200048446655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 130.2720010280609, + "p90": 187.99999356269836, + "p95": 191.0720020532608, + "p99": 194.11200284957886 + }, + "combine": { + "p50": 90.33600240945816, + "p90": 104.8320010304451, + "p95": 108.47999900579453, + "p99": 112.70400136709213 + }, + "roundtrip": { + "p50": 205.72799444198608, + "p90": 269.8560059070587, + "p95": 272.6080119609833, + "p99": 278.8800001144409 + }, + "isolatedSum": { + "p50": 220.60800343751907, + "p90": 292.83199459314346, + "p95": 299.5520010590553, + "p99": 306.816004216671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 138.2399946451187, + "p90": 189.91999328136444, + "p95": 194.5600062608719, + "p99": 326.9760012626648 + }, + "combine": { + "p50": 97.08800166845322, + "p90": 112.89600282907486, + "p95": 115.26399850845337, + "p99": 121.47200107574463 + }, + "roundtrip": { + "p50": 212.3199999332428, + "p90": 273.824006319046, + "p95": 277.47198939323425, + "p99": 297.5679934024811 + }, + "isolatedSum": { + "p50": 235.32799631357193, + "p90": 302.8159961104393, + "p95": 309.82400476932526, + "p99": 448.4480023384094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.8640010356903, + "p90": 190.5599981546402, + "p95": 193.15199553966522, + "p99": 197.88800179958344 + }, + "combine": { + "p50": 106.46399855613708, + "p90": 120.57600170373917, + "p95": 127.23200023174286, + "p99": 128.48000228405 + }, + "roundtrip": { + "p50": 224.0000069141388, + "p90": 288.9600098133087, + "p95": 291.456013917923, + "p99": 296.25600576400757 + }, + "isolatedSum": { + "p50": 255.3279995918274, + "p90": 311.13599985837936, + "p95": 320.3839957714081, + "p99": 326.3680040836334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 169.44000124931335, + "p90": 206.91199600696564, + "p95": 209.79200303554535, + "p99": 214.33599293231964 + }, + "combine": { + "p50": 131.071999669075, + "p90": 145.82400023937225, + "p95": 150.87999403476715, + "p99": 152.73599326610565 + }, + "roundtrip": { + "p50": 264.8639976978302, + "p90": 312.73600459098816, + "p95": 316.3520097732544, + "p99": 321.5680122375488 + }, + "isolatedSum": { + "p50": 300.51200091838837, + "p90": 352.7359962463379, + "p95": 360.6719970703125, + "p99": 367.0719861984253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4fff15e7", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h100_7fb780dc", + "comparisonKey": "383797c085f837cc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:37.039000+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 124.70400333404541, + "p90": 133.12000036239624, + "p95": 137.1839940547943, + "p99": 417.1519875526428 + }, + "combine": { + "p50": 86.2400010228157, + "p90": 88.48000317811966, + "p95": 89.37600255012512, + "p99": 92.44800359010696 + }, + "roundtrip": { + "p50": 195.71200013160706, + "p90": 203.23200523853302, + "p95": 205.1520049571991, + "p99": 208.38400721549988 + }, + "isolatedSum": { + "p50": 210.94400435686111, + "p90": 221.6000035405159, + "p95": 226.55999660491943, + "p99": 509.5999911427498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 124.57600235939026, + "p90": 131.52000308036804, + "p95": 134.3040019273758, + "p99": 139.74399864673615 + }, + "combine": { + "p50": 86.97599917650223, + "p90": 89.12000060081482, + "p95": 89.79199826717377, + "p99": 96.03200107812881 + }, + "roundtrip": { + "p50": 197.1839964389801, + "p90": 204.28800582885742, + "p95": 205.85599541664124, + "p99": 211.58400177955627 + }, + "isolatedSum": { + "p50": 211.5520015358925, + "p90": 220.64000368118286, + "p95": 224.09600019454956, + "p99": 235.77599972486496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 128.4160017967224, + "p90": 192.00000166893005, + "p95": 193.66399943828583, + "p99": 201.60000026226044 + }, + "combine": { + "p50": 88.54400366544724, + "p90": 105.34399747848511, + "p95": 106.23999685049057, + "p99": 113.18399757146835 + }, + "roundtrip": { + "p50": 202.91200280189514, + "p90": 265.7279968261719, + "p95": 302.4640083312988, + "p99": 335.2000117301941 + }, + "isolatedSum": { + "p50": 216.96000546216965, + "p90": 297.34399914741516, + "p95": 299.9039962887764, + "p99": 314.7839978337288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 124.7360035777092, + "p90": 131.99999928474426, + "p95": 135.3600025177002, + "p99": 141.95199310779572 + }, + "combine": { + "p50": 87.52000331878662, + "p90": 89.66399729251862, + "p95": 90.20800143480301, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 206.11199736595154, + "p95": 207.96799659729004, + "p99": 213.02400529384613 + }, + "isolatedSum": { + "p50": 212.25600689649582, + "p90": 221.66399657726288, + "p95": 225.5680039525032, + "p99": 237.69599199295044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 124.1919994354248, + "p90": 132.09599256515503, + "p95": 134.65599715709686, + "p99": 138.3039951324463 + }, + "combine": { + "p50": 89.05600011348724, + "p90": 90.52799642086029, + "p95": 94.81599926948547, + "p99": 97.82399982213974 + }, + "roundtrip": { + "p50": 203.80799472332, + "p90": 208.44799280166626, + "p95": 210.36800742149353, + "p99": 217.8560048341751 + }, + "isolatedSum": { + "p50": 213.24799954891205, + "p90": 222.62398898601532, + "p95": 229.47199642658234, + "p99": 236.12799495458603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 135.29600203037262, + "p90": 138.43199610710144, + "p95": 140.3840035200119, + "p99": 143.61600577831268 + }, + "combine": { + "p50": 95.32800316810608, + "p90": 97.4079966545105, + "p95": 98.04800152778625, + "p99": 103.42399775981903 + }, + "roundtrip": { + "p50": 208.19200575351715, + "p90": 213.1199985742569, + "p95": 214.91199731826782, + "p99": 219.39200162887573 + }, + "isolatedSum": { + "p50": 230.6240051984787, + "p90": 235.83999276161194, + "p95": 238.43200504779816, + "p99": 247.0400035381317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 146.65600657463074, + "p90": 190.46400487422943, + "p95": 192.9280012845993, + "p99": 200.15999674797058 + }, + "combine": { + "p50": 105.50399869680405, + "p90": 120.19199877977371, + "p95": 120.80000340938568, + "p99": 127.74400413036346 + }, + "roundtrip": { + "p50": 221.0559993982315, + "p90": 287.200003862381, + "p95": 291.3280129432678, + "p99": 353.43998670578003 + }, + "isolatedSum": { + "p50": 252.16000527143478, + "p90": 310.65600365400314, + "p95": 313.728004693985, + "p99": 327.90400087833405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.01600432395935, + "p90": 189.5039975643158, + "p95": 194.5600062608719, + "p99": 201.88799500465393 + }, + "combine": { + "p50": 121.79200351238251, + "p90": 126.43200159072876, + "p95": 127.83999741077423, + "p99": 129.08799946308136 + }, + "roundtrip": { + "p50": 243.20000410079956, + "p90": 248.3839988708496, + "p95": 249.66399371623993, + "p99": 256.03199005126953 + }, + "isolatedSum": { + "p50": 283.80800783634186, + "p90": 315.93599915504456, + "p95": 322.4000036716461, + "p99": 330.9759944677353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5759428c", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h100_a2256bc0", + "comparisonKey": "4c7049366b768ec0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:53.715698+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 184.4799965620041, + "p90": 194.87999379634857, + "p95": 197.24799692630768, + "p99": 208.8640034198761 + }, + "combine": { + "p50": 103.29599678516388, + "p90": 106.65600001811981, + "p95": 108.60799998044968, + "p99": 115.00799655914307 + }, + "roundtrip": { + "p50": 260.8639895915985, + "p90": 270.27198672294617, + "p95": 273.0880081653595, + "p99": 281.66401386260986 + }, + "isolatedSum": { + "p50": 287.77599334716797, + "p90": 301.5359938144684, + "p95": 305.85599690675735, + "p99": 323.87199997901917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 184.9920004606247, + "p90": 193.88799369335175, + "p95": 197.34400510787964, + "p99": 203.36000621318817 + }, + "combine": { + "p50": 111.84000223875046, + "p90": 115.00799655914307, + "p95": 119.61600184440613, + "p99": 126.81600451469421 + }, + "roundtrip": { + "p50": 269.72800493240356, + "p90": 279.35999631881714, + "p95": 282.24000334739685, + "p99": 290.8799946308136 + }, + "isolatedSum": { + "p50": 296.83200269937515, + "p90": 308.8959902524948, + "p95": 316.96000695228577, + "p99": 330.1760107278824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 204.48000729084015, + "p90": 212.38400042057037, + "p95": 214.56000208854675, + "p99": 220.92799842357635 + }, + "combine": { + "p50": 143.13599467277527, + "p90": 150.14399588108063, + "p95": 151.0079950094223, + "p99": 155.07200360298157 + }, + "roundtrip": { + "p50": 264.8319900035858, + "p90": 313.08799982070923, + "p95": 316.4159953594208, + "p99": 332.0640027523041 + }, + "isolatedSum": { + "p50": 347.6160019636154, + "p90": 362.527996301651, + "p95": 365.56799709796906, + "p99": 376.0000020265579 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-99fd7a38", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h100_9a44fbfe", + "comparisonKey": "6c9de5a8c1998e3d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:41.330168+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 127.61600315570831, + "p90": 134.49600338935852, + "p95": 137.37599551677704, + "p99": 145.7280069589615 + }, + "combine": { + "p50": 82.65600353479385, + "p90": 87.3280018568039, + "p95": 88.76799792051315, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 199.64799284934998, + "p90": 205.21600544452667, + "p95": 209.1200053691864, + "p99": 220.22399306297302 + }, + "isolatedSum": { + "p50": 210.27200669050217, + "p90": 221.82400524616241, + "p95": 226.1439934372902, + "p99": 237.792007625103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 128.03199887275696, + "p90": 134.14399325847626, + "p95": 137.5039964914322, + "p99": 146.36799693107605 + }, + "combine": { + "p50": 85.24800091981888, + "p90": 88.54400366544724, + "p95": 89.88799899816513, + "p99": 94.84799951314926 + }, + "roundtrip": { + "p50": 199.8399943113327, + "p90": 205.88800311088562, + "p95": 209.3760073184967, + "p99": 217.75999665260315 + }, + "isolatedSum": { + "p50": 213.27999979257584, + "p90": 222.6879969239235, + "p95": 227.39199548959732, + "p99": 241.2159964442253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 130.3360015153885, + "p90": 197.91999459266663, + "p95": 202.07999646663666, + "p99": 209.9200040102005 + }, + "combine": { + "p50": 88.95999938249588, + "p90": 105.85600137710571, + "p95": 109.82400178909302, + "p99": 407.45601058006287 + }, + "roundtrip": { + "p50": 203.90400290489197, + "p90": 268.99200677871704, + "p95": 272.2240090370178, + "p99": 283.7440073490143 + }, + "isolatedSum": { + "p50": 219.29600089788437, + "p90": 303.77599596977234, + "p95": 311.9039982557297, + "p99": 617.3760145902634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.83999741077423, + "p90": 134.75200533866882, + "p95": 138.17599415779114, + "p99": 143.96800100803375 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 89.59999680519104, + "p95": 90.55999666452408, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 200.9280025959015, + "p90": 207.23199844360352, + "p95": 209.79200303554535, + "p99": 218.1120067834854 + }, + "isolatedSum": { + "p50": 214.9759978055954, + "p90": 224.35200214385986, + "p95": 228.73599082231522, + "p99": 238.8800010085106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 130.94399869441986, + "p90": 196.83200120925903, + "p95": 198.2720047235489, + "p99": 204.03200387954712 + }, + "combine": { + "p50": 89.9839997291565, + "p90": 110.88000237941742, + "p95": 112.12799698114395, + "p99": 114.20799791812897 + }, + "roundtrip": { + "p50": 206.30399882793427, + "p90": 274.78399872779846, + "p95": 276.8000066280365, + "p99": 298.11200499534607 + }, + "isolatedSum": { + "p50": 220.92799842357635, + "p90": 307.71200358867645, + "p95": 310.40000170469284, + "p99": 318.2400017976761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 137.2160017490387, + "p90": 193.05600225925446, + "p95": 196.31999731063843, + "p99": 214.91199731826782 + }, + "combine": { + "p50": 96.70399874448776, + "p90": 113.63200098276138, + "p95": 114.656001329422, + "p99": 117.24799871444702 + }, + "roundtrip": { + "p50": 211.61599457263947, + "p90": 271.4560031890869, + "p95": 273.8560140132904, + "p99": 277.47198939323425 + }, + "isolatedSum": { + "p50": 233.92000049352646, + "p90": 306.68800324201584, + "p95": 310.9759986400604, + "p99": 332.15999603271484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.83199334144592, + "p90": 195.5520063638687, + "p95": 198.4959989786148, + "p99": 206.40000700950623 + }, + "combine": { + "p50": 105.56799918413162, + "p90": 122.78400361537933, + "p95": 123.87199699878693, + "p99": 186.8479996919632 + }, + "roundtrip": { + "p50": 221.91999852657318, + "p90": 290.20801186561584, + "p95": 297.85600304603577, + "p99": 821.6000199317932 + }, + "isolatedSum": { + "p50": 254.39999252557755, + "p90": 318.33600997924805, + "p95": 322.36799597740173, + "p99": 393.2480067014694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.24800658226013, + "p90": 202.2400051355362, + "p95": 205.85599541664124, + "p99": 267.39200949668884 + }, + "combine": { + "p50": 122.84799665212631, + "p90": 140.22399485111237, + "p95": 141.7279988527298, + "p99": 147.67999947071075 + }, + "roundtrip": { + "p50": 245.08799612522125, + "p90": 305.4719865322113, + "p95": 308.0640137195587, + "p99": 313.53598833084106 + }, + "isolatedSum": { + "p50": 288.09600323438644, + "p90": 342.46399998664856, + "p95": 347.58399426937103, + "p99": 415.0720089673996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-26d4de9e", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_896e9933", + "comparisonKey": "1fa6fb2a4a0fd829", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:36.995904+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 182.36799538135529, + "p90": 190.14400243759155, + "p95": 192.06400215625763, + "p99": 202.7519941329956 + }, + "combine": { + "p50": 84.22400057315826, + "p90": 98.4639972448349, + "p95": 105.05600273609161, + "p99": 405.15199303627014 + }, + "roundtrip": { + "p50": 198.08000326156616, + "p90": 261.6640031337738, + "p95": 263.71198892593384, + "p99": 268.70399713516235 + }, + "isolatedSum": { + "p50": 266.59199595451355, + "p90": 288.60799968242645, + "p95": 297.12000489234924, + "p99": 607.9039871692657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 127.23200023174286, + "p90": 187.3600035905838, + "p95": 190.91199338436127, + "p99": 196.96000218391418 + }, + "combine": { + "p50": 82.5280025601387, + "p90": 103.64799946546555, + "p95": 104.86400127410889, + "p99": 107.61599987745285 + }, + "roundtrip": { + "p50": 198.14400374889374, + "p90": 260.1599991321564, + "p95": 262.4320089817047, + "p99": 268.0320143699646 + }, + "isolatedSum": { + "p50": 209.76000279188156, + "p90": 291.00800305604935, + "p95": 295.77599465847015, + "p99": 304.57600206136703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 127.20000743865967, + "p90": 190.17599523067474, + "p95": 193.31200420856476, + "p99": 199.96799528598785 + }, + "combine": { + "p50": 86.97599917650223, + "p90": 103.29599678516388, + "p95": 103.96800190210342, + "p99": 105.24799674749374 + }, + "roundtrip": { + "p50": 197.56799936294556, + "p90": 265.0879919528961, + "p95": 267.8079903125763, + "p99": 274.1760015487671 + }, + "isolatedSum": { + "p50": 214.1760066151619, + "p90": 293.4719920158386, + "p95": 297.2800061106682, + "p99": 305.2159920334816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.48000228405, + "p90": 188.22400271892548, + "p95": 191.0720020532608, + "p99": 195.39199769496918 + }, + "combine": { + "p50": 87.39200234413147, + "p90": 104.16000336408615, + "p95": 104.99200224876404, + "p99": 106.08000308275223 + }, + "roundtrip": { + "p50": 197.91999459266663, + "p90": 263.96799087524414, + "p95": 265.6640112400055, + "p99": 268.99200677871704 + }, + "isolatedSum": { + "p50": 215.87200462818146, + "p90": 292.3840060830116, + "p95": 296.06400430202484, + "p99": 301.4720007777214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.74400413036346, + "p90": 191.80800020694733, + "p95": 195.68000733852386, + "p99": 265.3760015964508 + }, + "combine": { + "p50": 88.41600269079208, + "p90": 105.21599650382996, + "p95": 105.82400113344193, + "p99": 137.34400272369385 + }, + "roundtrip": { + "p50": 201.664000749588, + "p90": 265.9519910812378, + "p95": 268.0320143699646, + "p99": 275.2639949321747 + }, + "isolatedSum": { + "p50": 216.16000682115555, + "p90": 297.0239967107773, + "p95": 301.5040084719658, + "p99": 402.72000432014465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 132.47999548912048, + "p90": 189.85599279403687, + "p95": 195.51999866962433, + "p99": 270.30399441719055 + }, + "combine": { + "p50": 95.64799815416336, + "p90": 112.2559979557991, + "p95": 113.47199976444244, + "p99": 116.73600226640701 + }, + "roundtrip": { + "p50": 206.43199980258942, + "p90": 273.8879919052124, + "p95": 276.44801139831543, + "p99": 281.0879945755005 + }, + "isolatedSum": { + "p50": 228.12799364328384, + "p90": 302.11199074983597, + "p95": 308.9919984340668, + "p99": 387.03999668359756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 145.50399780273438, + "p90": 188.73600661754608, + "p95": 191.3599967956543, + "p99": 196.383997797966 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 120.64000219106674, + "p95": 121.24799937009811, + "p99": 127.55200266838074 + }, + "roundtrip": { + "p50": 217.31199324131012, + "p90": 282.78398513793945, + "p95": 286.24001145362854, + "p99": 291.3280129432678 + }, + "isolatedSum": { + "p50": 250.46399980783463, + "p90": 309.3760088086128, + "p95": 312.6079961657524, + "p99": 323.93600046634674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 170.1119989156723, + "p90": 207.13600516319275, + "p95": 209.18400585651398, + "p99": 219.35999393463135 + }, + "combine": { + "p50": 128.1599998474121, + "p90": 144.896000623703, + "p95": 145.53600549697876, + "p99": 149.1840034723282 + }, + "roundtrip": { + "p50": 260.25599241256714, + "p90": 307.776004076004, + "p95": 310.8159899711609, + "p99": 342.3680067062378 + }, + "isolatedSum": { + "p50": 298.2719987630844, + "p90": 352.03200578689575, + "p95": 354.72001135349274, + "p99": 368.54399740695953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2beb17d", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h100_05d96d49", + "comparisonKey": "08fb0837b9d0232a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:38.535918+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 183.87199938297272, + "p90": 190.0160014629364, + "p95": 192.57600605487823, + "p99": 196.06399536132812 + }, + "combine": { + "p50": 97.24800288677216, + "p90": 105.53599894046783, + "p95": 106.20799660682678, + "p99": 112.15999722480774 + }, + "roundtrip": { + "p50": 261.9520127773285, + "p90": 269.79199051856995, + "p95": 271.58400416374207, + "p99": 276.2880027294159 + }, + "isolatedSum": { + "p50": 281.1200022697449, + "p90": 295.55200040340424, + "p95": 298.784002661705, + "p99": 308.22399258613586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 139.13600146770477, + "p90": 206.59199357032776, + "p95": 214.56000208854675, + "p99": 460.5120122432709 + }, + "combine": { + "p50": 82.8159973025322, + "p90": 96.09600156545639, + "p95": 96.99200093746185, + "p99": 98.91200065612793 + }, + "roundtrip": { + "p50": 203.90400290489197, + "p90": 251.74400210380554, + "p95": 254.11200523376465, + "p99": 263.39200139045715 + }, + "isolatedSum": { + "p50": 221.95199877023697, + "p90": 302.68799513578415, + "p95": 311.5520030260086, + "p99": 559.4240128993988 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 132.57600367069244, + "p90": 195.0400024652481, + "p95": 198.88000190258026, + "p99": 203.10400426387787 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 100.60799866914749, + "p95": 104.54399883747101, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 204.73599433898926, + "p90": 274.6880054473877, + "p95": 277.0560085773468, + "p99": 283.52001309394836 + }, + "isolatedSum": { + "p50": 214.24000710248947, + "p90": 295.6480011343956, + "p95": 303.42400074005127, + "p99": 316.3840025663376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 176.4480024576187, + "p90": 182.8480064868927, + "p95": 185.7600063085556, + "p99": 190.65600633621216 + }, + "combine": { + "p50": 90.20800143480301, + "p90": 97.50399738550186, + "p95": 98.36799651384354, + "p99": 100.73599964380264 + }, + "roundtrip": { + "p50": 246.14399671554565, + "p90": 254.36800718307495, + "p95": 256.54399394989014, + "p99": 261.56800985336304 + }, + "isolatedSum": { + "p50": 266.6560038924217, + "p90": 280.35200387239456, + "p95": 284.12800282239914, + "p99": 291.3920059800148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 180.16000092029572, + "p90": 189.43999707698822, + "p95": 191.45600497722626, + "p99": 195.90400159358978 + }, + "combine": { + "p50": 96.8639999628067, + "p90": 104.032002389431, + "p95": 104.96000200510025, + "p99": 107.744000852108 + }, + "roundtrip": { + "p50": 252.31999158859253, + "p90": 263.10399174690247, + "p95": 265.56798815727234, + "p99": 270.3999876976013 + }, + "isolatedSum": { + "p50": 277.0240008831024, + "p90": 293.4719994664192, + "p95": 296.4160069823265, + "p99": 303.6480024456978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 132.57600367069244, + "p90": 190.88000059127808, + "p95": 192.60799884796143, + "p99": 196.19199633598328 + }, + "combine": { + "p50": 89.66399729251862, + "p90": 106.39999806880951, + "p95": 107.32799768447876, + "p99": 122.52800166606903 + }, + "roundtrip": { + "p50": 205.85599541664124, + "p90": 263.13599944114685, + "p95": 293.8239872455597, + "p99": 456.31998777389526 + }, + "isolatedSum": { + "p50": 222.24000096321106, + "p90": 297.2799986600876, + "p95": 299.9359965324402, + "p99": 318.7199980020523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 144.31999623775482, + "p90": 191.80800020694733, + "p95": 194.62400674819946, + "p99": 198.14400374889374 + }, + "combine": { + "p50": 99.0080013871193, + "p90": 113.92000317573547, + "p95": 119.35999989509583, + "p99": 121.50400131940842 + }, + "roundtrip": { + "p50": 214.39999341964722, + "p90": 281.21599555015564, + "p95": 283.32799673080444, + "p99": 288.7359857559204 + }, + "isolatedSum": { + "p50": 243.32799762487411, + "p90": 305.7280033826828, + "p95": 313.9840066432953, + "p99": 319.64800506830215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 166.4319932460785, + "p90": 201.88799500465393, + "p95": 204.57600057125092, + "p99": 210.24000644683838 + }, + "combine": { + "p50": 122.75200337171555, + "p90": 138.75199854373932, + "p95": 140.25600254535675, + "p99": 145.9839940071106 + }, + "roundtrip": { + "p50": 253.12000513076782, + "p90": 302.7519881725311, + "p95": 305.85598945617676, + "p99": 309.56798791885376 + }, + "isolatedSum": { + "p50": 289.18399661779404, + "p90": 340.63999354839325, + "p95": 344.83200311660767, + "p99": 356.224000453949 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-240cd58b", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h100_e4941d05", + "comparisonKey": "67b976683305a2fb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:37.211296+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 127.20000743865967, + "p90": 133.91999900341034, + "p95": 135.71199774742126, + "p99": 144.6080058813095 + }, + "combine": { + "p50": 82.11199939250946, + "p90": 86.65599673986435, + "p95": 87.45600283145905, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 199.10399615764618, + "p90": 204.92799580097198, + "p95": 207.519993185997, + "p99": 224.03199970722198 + }, + "isolatedSum": { + "p50": 209.31200683116913, + "p90": 220.5759957432747, + "p95": 223.1680005788803, + "p99": 234.52800512313843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 127.87200510501862, + "p90": 134.20799374580383, + "p95": 138.20800185203552, + "p99": 148.60799908638 + }, + "combine": { + "p50": 83.3280012011528, + "p90": 88.76799792051315, + "p95": 89.50400352478027, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 199.0080028772354, + "p90": 205.79199492931366, + "p95": 209.21599864959717, + "p99": 217.72800385951996 + }, + "isolatedSum": { + "p50": 211.20000630617142, + "p90": 222.975991666317, + "p95": 227.7120053768158, + "p99": 245.85600197315216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.55199587345123, + "p90": 196.60800695419312, + "p95": 199.5519995689392, + "p99": 213.44000101089478 + }, + "combine": { + "p50": 89.28000181913376, + "p90": 105.59999942779541, + "p95": 106.46399855613708, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 204.16000485420227, + "p90": 268.70399713516235, + "p95": 272.352010011673, + "p99": 277.7920067310333 + }, + "isolatedSum": { + "p50": 220.831997692585, + "p90": 302.2080063819885, + "p95": 306.0159981250763, + "p99": 326.7199993133545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 128.1599998474121, + "p90": 134.20799374580383, + "p95": 136.00000739097595, + "p99": 140.4159963130951 + }, + "combine": { + "p50": 88.32000195980072, + "p90": 90.17600119113922, + "p95": 92.12800115346909, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 203.2639980316162, + "p90": 208.22399854660034, + "p95": 210.55999398231506, + "p99": 221.6320037841797 + }, + "isolatedSum": { + "p50": 216.48000180721283, + "p90": 224.38399493694305, + "p95": 228.12800854444504, + "p99": 237.69599944353104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 130.40000200271606, + "p90": 194.62400674819946, + "p95": 196.22400403022766, + "p99": 201.7280012369156 + }, + "combine": { + "p50": 89.82399851083755, + "p90": 106.04800283908844, + "p95": 107.10400342941284, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 207.16799795627594, + "p90": 270.687997341156, + "p95": 273.24798703193665, + "p99": 277.1199941635132 + }, + "isolatedSum": { + "p50": 220.22400051355362, + "p90": 300.6720095872879, + "p95": 303.3280074596405, + "p99": 314.07999992370605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 131.04000687599182, + "p90": 138.3039951324463, + "p95": 140.44800400733948, + "p99": 148.83199334144592 + }, + "combine": { + "p50": 95.8079993724823, + "p90": 97.85600006580353, + "p95": 98.4639972448349, + "p99": 103.84000092744827 + }, + "roundtrip": { + "p50": 209.34399962425232, + "p90": 214.65599536895752, + "p95": 217.66400337219238, + "p99": 225.95199942588806 + }, + "isolatedSum": { + "p50": 226.84800624847412, + "p90": 236.15999519824982, + "p95": 238.91200125217438, + "p99": 252.6719942688942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 147.48799800872803, + "p90": 196.57599925994873, + "p95": 198.43199849128723, + "p99": 204.48000729084015 + }, + "combine": { + "p50": 105.72800040245056, + "p90": 122.40000069141388, + "p95": 126.71999633312225, + "p99": 129.72800433635712 + }, + "roundtrip": { + "p50": 222.6240038871765, + "p90": 293.11999678611755, + "p95": 296.86400294303894, + "p99": 312.8960132598877 + }, + "isolatedSum": { + "p50": 253.2159984111786, + "p90": 318.9759999513626, + "p95": 325.1519948244095, + "p99": 334.20801162719727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.66400229930878, + "p90": 199.8720020055771, + "p95": 202.01599597930908, + "p99": 207.48800039291382 + }, + "combine": { + "p50": 121.98399752378464, + "p90": 139.48799669742584, + "p95": 142.04800128936768, + "p99": 145.9839940071106 + }, + "roundtrip": { + "p50": 245.40799856185913, + "p90": 302.68800258636475, + "p95": 305.759996175766, + "p99": 311.16798520088196 + }, + "isolatedSum": { + "p50": 287.6479998230934, + "p90": 339.35999870300293, + "p95": 344.06399726867676, + "p99": 353.4719944000244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8905e94", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h100_260e03e4", + "comparisonKey": "2caa7b9be4327ded", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:36.070418+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 123.10399860143661, + "p90": 130.97600638866425, + "p95": 134.94400680065155, + "p99": 146.17599546909332 + }, + "combine": { + "p50": 82.2720006108284, + "p90": 88.3840024471283, + "p95": 89.24800157546997, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 195.8719938993454, + "p90": 200.99200308322906, + "p95": 204.28800582885742, + "p99": 208.99200439453125 + }, + "isolatedSum": { + "p50": 205.37599921226501, + "p90": 219.36000883579254, + "p95": 224.19200837612152, + "p99": 236.95999383926392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 123.83999675512314, + "p90": 131.04000687599182, + "p95": 134.783998131752, + "p99": 139.3280029296875 + }, + "combine": { + "p50": 86.20800077915192, + "p90": 88.92799913883209, + "p95": 89.63199704885483, + "p99": 91.16800129413605 + }, + "roundtrip": { + "p50": 195.51999866962433, + "p90": 202.07999646663666, + "p95": 204.73599433898926, + "p99": 216.60800278186798 + }, + "isolatedSum": { + "p50": 210.04799753427505, + "p90": 219.9680060148239, + "p95": 224.41599518060684, + "p99": 230.49600422382355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 126.88000500202179, + "p90": 191.52000546455383, + "p95": 193.79200041294098, + "p99": 200.6399929523468 + }, + "combine": { + "p50": 88.19200098514557, + "p90": 104.5759990811348, + "p95": 104.99200224876404, + "p99": 111.32799834012985 + }, + "roundtrip": { + "p50": 199.52000677585602, + "p90": 263.7760043144226, + "p95": 266.400009393692, + "p99": 271.5519964694977 + }, + "isolatedSum": { + "p50": 215.07200598716736, + "p90": 296.09600454568863, + "p95": 298.784002661705, + "p99": 311.96799129247665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 123.96799772977829, + "p90": 130.91200590133667, + "p95": 136.03200018405914, + "p99": 253.02401185035706 + }, + "combine": { + "p50": 87.48800307512283, + "p90": 89.28000181913376, + "p95": 90.17600119113922, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 197.34400510787964, + "p90": 203.2639980316162, + "p95": 205.76000213623047, + "p99": 220.06399929523468 + }, + "isolatedSum": { + "p50": 211.45600080490112, + "p90": 220.19200772047043, + "p95": 226.20800137519836, + "p99": 350.2720147371292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 126.75200402736664, + "p90": 190.528005361557, + "p95": 192.51200556755066, + "p99": 197.9839950799942 + }, + "combine": { + "p50": 89.59999680519104, + "p90": 106.23999685049057, + "p95": 110.62400043010712, + "p99": 114.14399743080139 + }, + "roundtrip": { + "p50": 206.14400506019592, + "p90": 268.5439884662628, + "p95": 271.64798974990845, + "p99": 329.02398705482483 + }, + "isolatedSum": { + "p50": 216.35200083255768, + "p90": 296.7680022120476, + "p95": 303.1360059976578, + "p99": 312.1279925107956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 138.2399946451187, + "p90": 186.20799481868744, + "p95": 188.1919950246811, + "p99": 192.7040070295334 + }, + "combine": { + "p50": 96.73599898815155, + "p90": 114.01599645614624, + "p95": 114.75200206041336, + "p99": 120.2239990234375 + }, + "roundtrip": { + "p50": 207.2640061378479, + "p90": 270.7839906215668, + "p95": 275.29600262641907, + "p99": 279.1999876499176 + }, + "isolatedSum": { + "p50": 234.97599363327026, + "p90": 300.2239912748337, + "p95": 302.94399708509445, + "p99": 312.9280060529709 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 149.08799529075623, + "p90": 195.39199769496918, + "p95": 205.21600544452667, + "p99": 258.59200954437256 + }, + "combine": { + "p50": 105.40799796581268, + "p90": 121.40800058841705, + "p95": 122.68800288438797, + "p99": 128.60800325870514 + }, + "roundtrip": { + "p50": 221.02400660514832, + "p90": 285.40799021720886, + "p95": 289.5359992980957, + "p99": 293.37599873542786 + }, + "isolatedSum": { + "p50": 254.4959932565689, + "p90": 316.79999828338623, + "p95": 327.90400832891464, + "p99": 387.2000128030777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 169.91999745368958, + "p90": 201.60000026226044, + "p95": 203.80799472332, + "p99": 207.35999941825867 + }, + "combine": { + "p50": 129.95199859142303, + "p90": 146.62399888038635, + "p95": 147.32800424098969, + "p99": 152.8320014476776 + }, + "roundtrip": { + "p50": 262.65600323677063, + "p90": 309.08799171447754, + "p95": 311.13600730895996, + "p99": 315.5519962310791 + }, + "isolatedSum": { + "p50": 299.8719960451126, + "p90": 348.2239991426468, + "p95": 351.1359989643097, + "p99": 360.1920008659363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-40375d0c", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h100_75843a0a", + "comparisonKey": "ca4efd5594e6ed9c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:02.481813+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 132.1280002593994, + "p90": 157.31200575828552, + "p95": 163.42400014400482, + "p99": 170.78399658203125 + }, + "combine": { + "p50": 87.87199854850769, + "p90": 98.2080027461052, + "p95": 106.20799660682678, + "p99": 443.807989358902 + }, + "roundtrip": { + "p50": 201.12000405788422, + "p90": 216.70399606227875, + "p95": 222.9440063238144, + "p99": 235.9360009431839 + }, + "isolatedSum": { + "p50": 219.9999988079071, + "p90": 255.52000850439072, + "p95": 269.6319967508316, + "p99": 614.5919859409332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 129.34400141239166, + "p90": 137.9839926958084, + "p95": 140.86399972438812, + "p99": 159.67999398708344 + }, + "combine": { + "p50": 86.07999980449677, + "p90": 89.12000060081482, + "p95": 90.36800265312195, + "p99": 94.30400282144547 + }, + "roundtrip": { + "p50": 202.33599841594696, + "p90": 208.00000429153442, + "p95": 209.9519968032837, + "p99": 213.56800198554993 + }, + "isolatedSum": { + "p50": 215.42400121688843, + "p90": 227.10399329662323, + "p95": 231.23200237751007, + "p99": 253.9839968085289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.20000064373016, + "p90": 198.36799800395966, + "p95": 200.095996260643, + "p99": 204.92799580097198 + }, + "combine": { + "p50": 88.76799792051315, + "p90": 105.21599650382996, + "p95": 106.175996363163, + "p99": 114.17599767446518 + }, + "roundtrip": { + "p50": 206.33600652217865, + "p90": 273.72801303863525, + "p95": 276.38399600982666, + "p99": 283.9680016040802 + }, + "isolatedSum": { + "p50": 219.96799856424332, + "p90": 303.5839945077896, + "p95": 306.271992623806, + "p99": 319.10399347543716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.05600666999817, + "p90": 135.5839967727661, + "p95": 138.14400136470795, + "p99": 147.5840061903 + }, + "combine": { + "p50": 88.0960002541542, + "p90": 91.13600105047226, + "p95": 96.00000083446503, + "p99": 249.5039999485016 + }, + "roundtrip": { + "p50": 204.6079933643341, + "p90": 211.84000372886658, + "p95": 224.99200701713562, + "p99": 311.74400448799133 + }, + "isolatedSum": { + "p50": 217.15200692415237, + "p90": 226.71999782323837, + "p95": 234.14400219917297, + "p99": 397.0880061388016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 132.1280002593994, + "p90": 196.70400023460388, + "p95": 198.65599274635315, + "p99": 211.19999885559082 + }, + "combine": { + "p50": 90.4960036277771, + "p90": 107.93600231409073, + "p95": 110.78400164842606, + "p99": 113.8560026884079 + }, + "roundtrip": { + "p50": 223.26399385929108, + "p90": 275.64799785614014, + "p95": 277.75999903678894, + "p99": 286.72000765800476 + }, + "isolatedSum": { + "p50": 222.6240038871765, + "p90": 304.6400025486946, + "p95": 309.4399943947792, + "p99": 325.0560015439987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 138.17599415779114, + "p90": 192.76799261569977, + "p95": 196.3520050048828, + "p99": 201.37600600719452 + }, + "combine": { + "p50": 97.82399982213974, + "p90": 114.88000303506851, + "p95": 116.44800007343292, + "p99": 121.31199985742569 + }, + "roundtrip": { + "p50": 211.84000372886658, + "p90": 217.8879976272583, + "p95": 221.24800086021423, + "p99": 296.00000381469727 + }, + "isolatedSum": { + "p50": 235.99999397993088, + "p90": 307.6479956507683, + "p95": 312.80000507831573, + "p99": 322.6880058646202 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 150.27199685573578, + "p90": 199.16799664497375, + "p95": 202.91200280189514, + "p99": 264.6079957485199 + }, + "combine": { + "p50": 105.85600137710571, + "p90": 122.46400117874146, + "p95": 125.88800489902496, + "p99": 129.37599420547485 + }, + "roundtrip": { + "p50": 223.29600155353546, + "p90": 293.40800642967224, + "p95": 296.31999135017395, + "p99": 302.2400140762329 + }, + "isolatedSum": { + "p50": 256.1279982328415, + "p90": 321.6319978237152, + "p95": 328.8000077009201, + "p99": 393.98398995399475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 166.52800142765045, + "p90": 202.04800367355347, + "p95": 204.99199628829956, + "p99": 213.21600675582886 + }, + "combine": { + "p50": 122.40000069141388, + "p90": 139.615997672081, + "p95": 142.39999651908875, + "p99": 144.896000623703 + }, + "roundtrip": { + "p50": 244.60799992084503, + "p90": 308.47999453544617, + "p95": 310.43198704719543, + "p99": 314.8159980773926 + }, + "isolatedSum": { + "p50": 288.92800211906433, + "p90": 341.66400134563446, + "p95": 347.3919928073883, + "p99": 358.11200737953186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b7cc9420", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h100_d41b0513", + "comparisonKey": "2c29a44f66b7cc22", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:36.327467+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 124.89599734544754, + "p90": 132.9919993877411, + "p95": 135.3279948234558, + "p99": 142.7839994430542 + }, + "combine": { + "p50": 81.7599967122078, + "p90": 86.81599795818329, + "p95": 88.03199976682663, + "p99": 92.32000261545181 + }, + "roundtrip": { + "p50": 196.1279958486557, + "p90": 201.53599977493286, + "p95": 204.28800582885742, + "p99": 209.1200053691864 + }, + "isolatedSum": { + "p50": 206.65599405765533, + "p90": 219.80799734592438, + "p95": 223.35999459028244, + "p99": 235.104002058506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 125.2480000257492, + "p90": 134.71999764442444, + "p95": 140.8960074186325, + "p99": 187.26399540901184 + }, + "combine": { + "p50": 82.49600231647491, + "p90": 88.95999938249588, + "p95": 93.91999989748001, + "p99": 129.82399761676788 + }, + "roundtrip": { + "p50": 198.71999323368073, + "p90": 205.9520035982132, + "p95": 212.54399418830872, + "p99": 364.3839955329895 + }, + "isolatedSum": { + "p50": 207.74400234222412, + "p90": 223.67999702692032, + "p95": 234.81600731611252, + "p99": 317.0879930257797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 128.4160017967224, + "p90": 194.91200149059296, + "p95": 196.1279958486557, + "p99": 203.2960057258606 + }, + "combine": { + "p50": 86.68799698352814, + "p90": 102.9760017991066, + "p95": 104.06400263309479, + "p99": 110.97600311040878 + }, + "roundtrip": { + "p50": 201.37600600719452, + "p90": 263.5520100593567, + "p95": 265.82399010658264, + "p99": 270.2080011367798 + }, + "isolatedSum": { + "p50": 215.10399878025055, + "p90": 297.88800328969955, + "p95": 300.1919984817505, + "p99": 314.2720088362694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 126.08000636100769, + "p90": 134.62400436401367, + "p95": 139.5840048789978, + "p99": 155.10399639606476 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 87.80799806118011, + "p95": 89.12000060081482, + "p99": 92.96000003814697 + }, + "roundtrip": { + "p50": 199.61600005626678, + "p90": 205.76000213623047, + "p95": 209.31200683116913, + "p99": 243.29599738121033 + }, + "isolatedSum": { + "p50": 209.05600488185883, + "p90": 222.4320024251938, + "p95": 228.70400547981262, + "p99": 248.06399643421173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 127.55200266838074, + "p90": 191.83999300003052, + "p95": 194.30400431156158, + "p99": 218.78400444984436 + }, + "combine": { + "p50": 87.42400258779526, + "p90": 90.01599997282028, + "p95": 91.07200056314468, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 200.25600492954254, + "p90": 206.496000289917, + "p95": 209.82399582862854, + "p99": 214.7199958562851 + }, + "isolatedSum": { + "p50": 214.976005256176, + "p90": 281.8559929728508, + "p95": 285.37600487470627, + "p99": 316.0960078239441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 130.5599957704544, + "p90": 136.22400164604187, + "p95": 139.23199474811554, + "p99": 145.91999351978302 + }, + "combine": { + "p50": 94.87999975681305, + "p90": 97.50399738550186, + "p95": 98.52799773216248, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 208.28799903392792, + "p90": 213.72799575328827, + "p95": 217.6000028848648, + "p99": 228.83200645446777 + }, + "isolatedSum": { + "p50": 225.43999552726746, + "p90": 233.72799903154373, + "p95": 237.75999248027802, + "p99": 249.37599152326584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 146.01600170135498, + "p90": 192.9599940776825, + "p95": 195.19999623298645, + "p99": 203.48800718784332 + }, + "combine": { + "p50": 104.89600151777267, + "p90": 120.57600170373917, + "p95": 121.34400010108948, + "p99": 128.35200130939484 + }, + "roundtrip": { + "p50": 219.42399442195892, + "p90": 290.49599170684814, + "p95": 292.7359938621521, + "p99": 298.335999250412 + }, + "isolatedSum": { + "p50": 250.91200321912766, + "p90": 313.53599578142166, + "p95": 316.5439963340759, + "p99": 331.84000849723816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 169.88800466060638, + "p90": 195.93599438667297, + "p95": 197.7279931306839, + "p99": 201.1519968509674 + }, + "combine": { + "p50": 128.4479945898056, + "p90": 145.34400403499603, + "p95": 146.43199741840363, + "p99": 148.47999811172485 + }, + "roundtrip": { + "p50": 260.1599991321564, + "p90": 309.63200330734253, + "p95": 313.79199028015137, + "p99": 318.4320032596588 + }, + "isolatedSum": { + "p50": 298.335999250412, + "p90": 341.279998421669, + "p95": 344.1599905490875, + "p99": 349.63199496269226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19786fa8", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_811b044b", + "comparisonKey": "dd3ff9ece515024c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:03.201708+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 128.1919926404953, + "p90": 137.69599795341492, + "p95": 145.56799829006195, + "p99": 209.85600352287292 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 88.95999938249588, + "p95": 90.17600119113922, + "p99": 98.81599992513657 + }, + "roundtrip": { + "p50": 199.45600628852844, + "p90": 206.7520022392273, + "p95": 211.64800226688385, + "p99": 230.46399652957916 + }, + "isolatedSum": { + "p50": 211.13599091768265, + "p90": 226.6559973359108, + "p95": 235.74399948120117, + "p99": 308.6720034480095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 128.25599312782288, + "p90": 137.60000467300415, + "p95": 147.20000326633453, + "p99": 216.35200083255768 + }, + "combine": { + "p50": 86.496002972126, + "p90": 89.9519994854927, + "p95": 90.87999910116196, + "p99": 102.39999741315842 + }, + "roundtrip": { + "p50": 201.27999782562256, + "p90": 210.4319930076599, + "p95": 219.00799870491028, + "p99": 508.63999128341675 + }, + "isolatedSum": { + "p50": 214.75199609994888, + "p90": 227.55200415849686, + "p95": 238.0800023674965, + "p99": 318.7519982457161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.55199587345123, + "p90": 196.51199877262115, + "p95": 198.81600141525269, + "p99": 207.93600380420685 + }, + "combine": { + "p50": 88.73599767684937, + "p90": 105.69600015878677, + "p95": 106.4319983124733, + "p99": 115.167997777462 + }, + "roundtrip": { + "p50": 203.64800095558167, + "p90": 272.09600806236267, + "p95": 275.9360074996948, + "p99": 291.3919985294342 + }, + "isolatedSum": { + "p50": 220.2879935503006, + "p90": 302.20799893140793, + "p95": 305.247999727726, + "p99": 323.10400158166885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.87200510501862, + "p90": 135.19999384880066, + "p95": 139.23199474811554, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 87.80799806118011, + "p90": 91.80799871683121, + "p95": 97.21600264310837, + "p99": 106.20799660682678 + }, + "roundtrip": { + "p50": 203.16800475120544, + "p90": 210.40000021457672, + "p95": 216.73600375652313, + "p99": 290.78400135040283 + }, + "isolatedSum": { + "p50": 215.68000316619873, + "p90": 227.00799256563187, + "p95": 236.4479973912239, + "p99": 255.13599812984467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 131.9040060043335, + "p90": 195.45599818229675, + "p95": 198.08000326156616, + "p99": 203.93599569797516 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 107.07200318574905, + "p95": 111.10399663448334, + "p99": 114.52800035476685 + }, + "roundtrip": { + "p50": 207.45599269866943, + "p90": 274.4320034980774, + "p95": 277.9200077056885, + "p99": 283.00800919532776 + }, + "isolatedSum": { + "p50": 221.98400646448135, + "p90": 302.5280013680458, + "p95": 309.1839998960495, + "p99": 318.463996052742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 134.91199910640717, + "p90": 143.96800100803375, + "p95": 181.72800540924072, + "p99": 190.08000195026398 + }, + "combine": { + "p50": 95.10400146245956, + "p90": 98.81599992513657, + "p95": 103.2319962978363, + "p99": 169.18399930000305 + }, + "roundtrip": { + "p50": 210.52800118923187, + "p90": 220.89600563049316, + "p95": 228.96000742912292, + "p99": 309.2480003833771 + }, + "isolatedSum": { + "p50": 230.01600056886673, + "p90": 242.78400093317032, + "p95": 284.960001707077, + "p99": 359.26400125026703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 148.0959951877594, + "p90": 194.4960057735443, + "p95": 197.76000082492828, + "p99": 205.1839977502823 + }, + "combine": { + "p50": 105.66399991512299, + "p90": 121.88799679279327, + "p95": 122.84799665212631, + "p99": 124.28800016641617 + }, + "roundtrip": { + "p50": 223.1999933719635, + "p90": 287.9999876022339, + "p95": 291.3280129432678, + "p99": 316.3520097732544 + }, + "isolatedSum": { + "p50": 253.75999510288239, + "p90": 316.3840025663376, + "p95": 320.6079974770546, + "p99": 329.47199791669846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.92800414562225, + "p90": 200.00000298023224, + "p95": 203.64800095558167, + "p99": 212.70400285720825 + }, + "combine": { + "p50": 122.5920021533966, + "p90": 140.06400108337402, + "p95": 144.41600441932678, + "p99": 148.28799664974213 + }, + "roundtrip": { + "p50": 245.4719990491867, + "p90": 307.68001079559326, + "p95": 312.3840093612671, + "p99": 321.1199939250946 + }, + "isolatedSum": { + "p50": 287.52000629901886, + "p90": 340.06400406360626, + "p95": 348.06400537490845, + "p99": 360.9919995069504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eed3fa08", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h100_ff8c31eb", + "comparisonKey": "ee213ccd80c44d1c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:37.309873+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 129.56799566745758, + "p90": 136.09600067138672, + "p95": 138.43199610710144, + "p99": 147.64800667762756 + }, + "combine": { + "p50": 84.60800349712372, + "p90": 88.41600269079208, + "p95": 89.28000181913376, + "p99": 93.08800101280212 + }, + "roundtrip": { + "p50": 200.3519982099533, + "p90": 206.59199357032776, + "p95": 208.76799523830414, + "p99": 219.00799870491028 + }, + "isolatedSum": { + "p50": 214.1759991645813, + "p90": 224.5120033621788, + "p95": 227.7119979262352, + "p99": 240.7360076904297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 129.4720023870468, + "p90": 136.00000739097595, + "p95": 138.87999951839447, + "p99": 149.6960073709488 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 89.88799899816513, + "p95": 90.91199934482574, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 201.82399451732635, + "p90": 207.42399990558624, + "p95": 210.4640007019043, + "p99": 215.68000316619873 + }, + "isolatedSum": { + "p50": 216.76800400018692, + "p90": 225.88800638914108, + "p95": 229.79199886322021, + "p99": 245.34400552511215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 131.67999684810638, + "p90": 196.96000218391418, + "p95": 199.5519995689392, + "p99": 206.1759978532791 + }, + "combine": { + "p50": 89.15200084447861, + "p90": 110.11199653148651, + "p95": 111.32799834012985, + "p99": 208.41600000858307 + }, + "roundtrip": { + "p50": 205.9199959039688, + "p90": 275.2000093460083, + "p95": 277.75999903678894, + "p99": 282.6240062713623 + }, + "isolatedSum": { + "p50": 220.831997692585, + "p90": 307.0719987154007, + "p95": 310.87999790906906, + "p99": 414.5919978618622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.63199615478516, + "p90": 136.22400164604187, + "p95": 139.74399864673615, + "p99": 151.99999511241913 + }, + "combine": { + "p50": 88.95999938249588, + "p90": 91.39200299978256, + "p95": 94.40000355243683, + "p99": 104.73600029945374 + }, + "roundtrip": { + "p50": 203.87199521064758, + "p90": 208.8640034198761, + "p95": 212.41599321365356, + "p99": 226.46400332450867 + }, + "isolatedSum": { + "p50": 218.59199553728104, + "p90": 227.61600464582443, + "p95": 234.14400219917297, + "p99": 256.73599541187286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 131.20000064373016, + "p90": 192.6400065422058, + "p95": 196.22400403022766, + "p99": 202.30400562286377 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 92.0960009098053, + "p95": 94.46399658918381, + "p99": 98.7199991941452 + }, + "roundtrip": { + "p50": 205.72799444198608, + "p90": 210.4959934949875, + "p95": 212.41599321365356, + "p99": 221.5999960899353 + }, + "isolatedSum": { + "p50": 220.32000124454498, + "p90": 284.7360074520111, + "p95": 290.68800061941147, + "p99": 301.024004817009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 134.33599472045898, + "p90": 139.71200585365295, + "p95": 142.17600226402283, + "p99": 151.93599462509155 + }, + "combine": { + "p50": 94.97600048780441, + "p90": 97.6639986038208, + "p95": 98.62399846315384, + "p99": 105.40799796581268 + }, + "roundtrip": { + "p50": 211.96800470352173, + "p90": 215.87200462818146, + "p95": 217.21599996089935, + "p99": 223.80800545215607 + }, + "isolatedSum": { + "p50": 229.3119952082634, + "p90": 237.37600445747375, + "p95": 240.80000072717667, + "p99": 257.34399259090424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 146.464005112648, + "p90": 200.1280039548874, + "p95": 203.64800095558167, + "p99": 269.6320116519928 + }, + "combine": { + "p50": 105.82400113344193, + "p90": 126.68800354003906, + "p95": 127.80800461769104, + "p99": 157.44000673294067 + }, + "roundtrip": { + "p50": 223.07200729846954, + "p90": 293.3120131492615, + "p95": 296.57599329948425, + "p99": 374.36801195144653 + }, + "isolatedSum": { + "p50": 252.28800624608994, + "p90": 326.81600749492645, + "p95": 331.4560055732727, + "p99": 427.0720183849335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.6880019903183, + "p90": 203.16800475120544, + "p95": 205.34400641918182, + "p99": 213.21600675582886 + }, + "combine": { + "p50": 122.65600264072418, + "p90": 140.1599943637848, + "p95": 143.61600577831268, + "p99": 146.5280055999756 + }, + "roundtrip": { + "p50": 244.9920028448105, + "p90": 307.9040050506592, + "p95": 310.7840120792389, + "p99": 317.6319897174835 + }, + "isolatedSum": { + "p50": 285.3440046310425, + "p90": 343.32799911499023, + "p95": 348.9600121974945, + "p99": 359.74401235580444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-18baf062", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_508883cc", + "comparisonKey": "f11235fb1653ae0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:04.048289+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.49600011110306, + "p90": 146.91199362277985, + "p95": 149.59999918937683, + "p99": 164.000004529953 + }, + "combine": { + "p50": 89.53599631786346, + "p90": 97.69599884748459, + "p95": 98.4639972448349, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 173.95199835300446, + "p90": 197.34400510787964, + "p95": 206.01600408554077, + "p99": 220.15999257564545 + }, + "isolatedSum": { + "p50": 204.03199642896652, + "p90": 244.60799247026443, + "p95": 248.06399643421173, + "p99": 268.2560011744499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 99.67999905347824, + "p90": 108.5439994931221, + "p95": 110.62400043010712, + "p99": 124.7360035777092 + }, + "combine": { + "p50": 86.56000345945358, + "p90": 89.4400030374527, + "p95": 90.97599983215332, + "p99": 105.40799796581268 + }, + "roundtrip": { + "p50": 173.34400117397308, + "p90": 179.29600179195404, + "p95": 183.32800269126892, + "p99": 200.47999918460846 + }, + "isolatedSum": { + "p50": 186.24000251293182, + "p90": 197.9840025305748, + "p95": 201.60000026226044, + "p99": 230.14400154352188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 102.55999863147736, + "p90": 152.99199521541595, + "p95": 154.91199493408203, + "p99": 160.5760008096695 + }, + "combine": { + "p50": 88.99199962615967, + "p90": 105.02400249242783, + "p95": 106.84800148010254, + "p99": 111.77600175142288 + }, + "roundtrip": { + "p50": 178.3359944820404, + "p90": 229.8559993505478, + "p95": 233.21600258350372, + "p99": 239.6479994058609 + }, + "isolatedSum": { + "p50": 191.55199825763702, + "p90": 258.0159977078438, + "p95": 261.75999641418457, + "p99": 272.3520025610924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 103.26399654150009, + "p90": 154.01600301265717, + "p95": 156.5759927034378, + "p99": 163.64799439907074 + }, + "combine": { + "p50": 89.12000060081482, + "p90": 105.69600015878677, + "p95": 107.07200318574905, + "p99": 130.46400249004364 + }, + "roundtrip": { + "p50": 179.29600179195404, + "p90": 227.2000014781952, + "p95": 229.40799593925476, + "p99": 232.9919934272766 + }, + "isolatedSum": { + "p50": 192.3839971423149, + "p90": 259.71200317144394, + "p95": 263.64799588918686, + "p99": 294.1119968891144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 103.07200253009796, + "p90": 151.5520066022873, + "p95": 153.56799960136414, + "p99": 158.4639996290207 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 106.9440022110939, + "p95": 108.57599973678589, + "p99": 114.84800279140472 + }, + "roundtrip": { + "p50": 182.17599391937256, + "p90": 232.41600394248962, + "p95": 234.49599742889404, + "p99": 238.3359968662262 + }, + "isolatedSum": { + "p50": 193.15200299024582, + "p90": 258.4960088133812, + "p95": 262.14399933815, + "p99": 273.3120024204254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 113.0559965968132, + "p90": 152.0639955997467, + "p95": 154.08000349998474, + "p99": 157.9200029373169 + }, + "combine": { + "p50": 96.44799679517746, + "p90": 113.50400000810623, + "p95": 114.78400230407715, + "p99": 122.6240023970604 + }, + "roundtrip": { + "p50": 185.34399569034576, + "p90": 238.39999735355377, + "p95": 240.31999707221985, + "p99": 245.02399563789368 + }, + "isolatedSum": { + "p50": 209.50399339199066, + "p90": 265.56799560785294, + "p95": 268.8640058040619, + "p99": 280.5440053343773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 120.28799951076508, + "p90": 155.10399639606476, + "p95": 157.27999806404114, + "p99": 162.04799711704254 + }, + "combine": { + "p50": 105.34399747848511, + "p90": 121.18399888277054, + "p95": 123.23199957609177, + "p99": 127.58399546146393 + }, + "roundtrip": { + "p50": 194.815993309021, + "p90": 247.48800694942474, + "p95": 250.0160038471222, + "p99": 259.99999046325684 + }, + "isolatedSum": { + "p50": 225.63199698925018, + "p90": 276.2879952788353, + "p95": 280.5119976401329, + "p99": 289.63199257850647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.66400516033173, + "p90": 163.7759953737259, + "p95": 166.30400717258453, + "p99": 171.77599668502808 + }, + "combine": { + "p50": 121.76000326871872, + "p90": 138.2720023393631, + "p95": 139.74399864673615, + "p99": 145.1520025730133 + }, + "roundtrip": { + "p50": 221.11999988555908, + "p90": 266.400009393692, + "p95": 269.72800493240356, + "p99": 346.3039994239807 + }, + "isolatedSum": { + "p50": 259.42400842905045, + "p90": 302.047997713089, + "p95": 306.0480058193207, + "p99": 316.9279992580414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ead7d45", + "identity": "h100|uccl|n-a|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_c75ff0c9", + "comparisonKey": "2d7fd5e177013955", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:08.218071+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.89600241184235, + "p90": 72.51200079917908, + "p95": 73.08799773454666, + "p99": 78.87999713420868 + }, + "combine": { + "p50": 51.77599936723709, + "p90": 53.3440001308918, + "p95": 54.46400120854378, + "p99": 60.7680007815361 + }, + "roundtrip": { + "p50": 91.61599725484848, + "p90": 99.23200309276581, + "p95": 99.90400075912476, + "p99": 105.12000322341919 + }, + "isolatedSum": { + "p50": 116.67200177907944, + "p90": 125.85600093007088, + "p95": 127.55199894309044, + "p99": 139.64799791574478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 55.16799911856651, + "p90": 72.03199714422226, + "p95": 72.73600250482559, + "p99": 79.19999957084656 + }, + "combine": { + "p50": 43.71200129389763, + "p90": 53.279999643564224, + "p95": 53.50400134921074, + "p99": 55.71199953556061 + }, + "roundtrip": { + "p50": 73.72800260782242, + "p90": 98.01600128412247, + "p95": 99.5199978351593, + "p99": 103.5199984908104 + }, + "isolatedSum": { + "p50": 98.88000041246414, + "p90": 125.31199678778648, + "p95": 126.24000385403633, + "p99": 134.91199910640717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 55.52000179886818, + "p90": 72.03199714422226, + "p95": 72.92799651622772, + "p99": 78.84799689054489 + }, + "combine": { + "p50": 43.90399903059006, + "p90": 53.53600159287453, + "p95": 53.85600030422211, + "p99": 55.67999929189682 + }, + "roundtrip": { + "p50": 73.98399710655212, + "p90": 98.78399968147278, + "p95": 100.03200173377991, + "p99": 105.31199723482132 + }, + "isolatedSum": { + "p50": 99.42400082945824, + "p90": 125.56799873709679, + "p95": 126.78399682044983, + "p99": 134.5279961824417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.65600156784058, + "p90": 73.05599749088287, + "p95": 73.34399968385696, + "p99": 80.6720033288002 + }, + "combine": { + "p50": 52.960000932216644, + "p90": 53.98400127887726, + "p95": 54.78399991989136, + "p99": 60.67200005054474 + }, + "roundtrip": { + "p50": 95.77599912881851, + "p90": 99.64799880981445, + "p95": 100.28800368309021, + "p99": 105.50399869680405 + }, + "isolatedSum": { + "p50": 123.61600250005722, + "p90": 127.03999876976013, + "p95": 128.12799960374832, + "p99": 141.34400337934494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.45600020885468, + "p90": 78.52800190448761, + "p95": 81.15199953317642, + "p99": 210.87999641895294 + }, + "combine": { + "p50": 53.408000618219376, + "p90": 59.87200140953064, + "p95": 60.7680007815361, + "p99": 61.88800185918808 + }, + "roundtrip": { + "p50": 98.2080027461052, + "p90": 104.44799810647964, + "p95": 105.3759977221489, + "p99": 107.71200060844421 + }, + "isolatedSum": { + "p50": 124.86400082707405, + "p90": 138.40000331401825, + "p95": 141.92000031471252, + "p99": 272.767998278141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 63.58399987220764, + "p90": 79.52000200748444, + "p95": 80.4160013794899, + "p99": 82.33600109815598 + }, + "combine": { + "p50": 53.727999329566956, + "p90": 62.33600154519081, + "p95": 63.64800035953522, + "p99": 69.18399780988693 + }, + "roundtrip": { + "p50": 91.64799749851227, + "p90": 108.03200304508209, + "p95": 109.11999642848969, + "p99": 115.99999666213989 + }, + "isolatedSum": { + "p50": 117.3119992017746, + "p90": 141.85600355267525, + "p95": 144.06400173902512, + "p99": 151.5199989080429 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.00799942016602, + "p90": 90.65599739551544, + "p95": 93.47199648618698, + "p99": 96.19200229644775 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 79.52000200748444, + "p95": 80.25600016117096, + "p99": 85.24800091981888 + }, + "roundtrip": { + "p50": 116.15999788045883, + "p90": 132.64000415802002, + "p95": 135.29600203037262, + "p99": 140.1599943637848 + }, + "isolatedSum": { + "p50": 164.5760014653206, + "p90": 170.17599940299988, + "p95": 173.72799664735794, + "p99": 181.44000321626663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.0640019774437, + "p90": 113.82400244474411, + "p95": 115.39199948310852, + "p99": 120.64000219106674 + }, + "combine": { + "p50": 101.79200023412704, + "p90": 111.51999980211258, + "p95": 112.5119999051094, + "p99": 113.98400366306305 + }, + "roundtrip": { + "p50": 173.69599640369415, + "p90": 187.96800076961517, + "p95": 189.7599995136261, + "p99": 192.73599982261658 + }, + "isolatedSum": { + "p50": 201.85600221157074, + "p90": 225.3440022468567, + "p95": 227.90399938821793, + "p99": 234.6240058541298 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e5e2f55", + "identity": "h100|uccl|n-a|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_651c5849", + "comparisonKey": "a0afbfa4665e0837", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:33.255198+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.96000289916992, + "p90": 71.61600142717361, + "p95": 72.1919983625412, + "p99": 78.5600021481514 + }, + "combine": { + "p50": 52.51200124621391, + "p90": 53.599998354911804, + "p95": 54.84800040721893, + "p99": 61.02399900555611 + }, + "roundtrip": { + "p50": 96.0640013217926, + "p90": 98.30400347709656, + "p95": 99.35999661684036, + "p99": 105.34399747848511 + }, + "isolatedSum": { + "p50": 117.47200414538383, + "p90": 125.21599978208542, + "p95": 127.03999876976013, + "p99": 139.5840011537075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 55.296000093221664, + "p90": 70.94399631023407, + "p95": 71.45600020885468, + "p99": 77.66400277614594 + }, + "combine": { + "p50": 43.327998369932175, + "p90": 53.31199988722801, + "p95": 53.727999329566956, + "p99": 59.87200140953064 + }, + "roundtrip": { + "p50": 73.60000163316727, + "p90": 97.47199714183807, + "p95": 98.08000177145004, + "p99": 100.60799866914749 + }, + "isolatedSum": { + "p50": 98.62399846315384, + "p90": 124.25599619746208, + "p95": 125.18399953842163, + "p99": 137.53600418567657 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 55.58399856090546, + "p90": 70.91200351715088, + "p95": 71.48800045251846, + "p99": 77.88799703121185 + }, + "combine": { + "p50": 43.74400153756142, + "p90": 52.76799947023392, + "p95": 59.67999994754791, + "p99": 60.95999851822853 + }, + "roundtrip": { + "p50": 73.85600358247757, + "p90": 98.08000177145004, + "p95": 104.12800312042236, + "p99": 230.78399896621704 + }, + "isolatedSum": { + "p50": 99.32800009846687, + "p90": 123.6800029873848, + "p95": 131.16800040006638, + "p99": 138.84799554944038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 65.05600363016129, + "p90": 71.74400240182877, + "p95": 72.51200079917908, + "p99": 79.19999957084656 + }, + "combine": { + "p50": 52.671998739242554, + "p90": 53.727999329566956, + "p95": 54.55999821424484, + "p99": 60.19200012087822 + }, + "roundtrip": { + "p50": 80.6720033288002, + "p90": 97.9200005531311, + "p95": 99.0080013871193, + "p99": 104.54399883747101 + }, + "isolatedSum": { + "p50": 117.72800236940384, + "p90": 125.47200173139572, + "p95": 127.07199901342392, + "p99": 139.39199969172478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.42399996519089, + "p90": 78.59200239181519, + "p95": 79.26400005817413, + "p99": 80.25600016117096 + }, + "combine": { + "p50": 58.52799862623215, + "p90": 60.19200012087822, + "p95": 60.416001826524734, + "p99": 61.88800185918808 + }, + "roundtrip": { + "p50": 97.98400104045868, + "p90": 105.21599650382996, + "p95": 105.98400235176086, + "p99": 113.6000007390976 + }, + "isolatedSum": { + "p50": 129.95199859142303, + "p90": 138.7840025126934, + "p95": 139.68000188469887, + "p99": 142.14400202035904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 63.519999384880066, + "p90": 79.00799810886383, + "p95": 79.74400371313095, + "p99": 81.18399977684021 + }, + "combine": { + "p50": 53.599998354911804, + "p90": 61.91999837756157, + "p95": 67.391999065876, + "p99": 68.89600306749344 + }, + "roundtrip": { + "p50": 91.13600105047226, + "p90": 112.41599917411804, + "p95": 113.18399757146835, + "p99": 114.62400108575821 + }, + "isolatedSum": { + "p50": 117.11999773979187, + "p90": 140.9279964864254, + "p95": 147.13600277900696, + "p99": 150.08000284433365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 74.5920017361641, + "p90": 93.79199892282486, + "p95": 95.42399644851685, + "p99": 96.44799679517746 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 79.03999835252762, + "p95": 79.83999699354172, + "p99": 87.5839963555336 + }, + "roundtrip": { + "p50": 116.83200299739838, + "p90": 133.4719955921173, + "p95": 136.73600554466248, + "p99": 140.09599387645721 + }, + "isolatedSum": { + "p50": 143.67999881505966, + "p90": 172.83199727535248, + "p95": 175.26399344205856, + "p99": 184.03199315071106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 100.41599720716476, + "p90": 117.98399686813354, + "p95": 130.43199479579926, + "p99": 155.29599785804749 + }, + "combine": { + "p50": 102.65599936246872, + "p90": 112.19199746847153, + "p95": 112.92800307273865, + "p99": 118.33599954843521 + }, + "roundtrip": { + "p50": 173.98400604724884, + "p90": 188.6720061302185, + "p95": 189.98399376869202, + "p99": 191.74399971961975 + }, + "isolatedSum": { + "p50": 203.07199656963348, + "p90": 230.17599433660507, + "p95": 243.3599978685379, + "p99": 273.6319974064827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7ead3660", + "identity": "h100|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "8fcab4ce81acc739", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:45.780791+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 263.808012008667, + "p90": 272.96000719070435, + "p95": 276.06400847435, + "p99": 305.9520125389099 + }, + "combine": { + "p50": 64.4799992442131, + "p90": 68.1919977068901, + "p95": 70.8480030298233, + "p99": 74.94399696588516 + }, + "roundtrip": { + "p50": 315.61601161956787, + "p90": 325.1200020313263, + "p95": 328.8640081882477, + "p99": 356.28798604011536 + }, + "isolatedSum": { + "p50": 328.2880112528801, + "p90": 341.15200489759445, + "p95": 346.9120115041733, + "p99": 380.8960095047951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 263.9999985694885, + "p90": 271.87201380729675, + "p95": 276.5119969844818, + "p99": 288.2559895515442 + }, + "combine": { + "p50": 65.88800251483917, + "p90": 69.47200000286102, + "p95": 71.58400118350983, + "p99": 75.71200281381607 + }, + "roundtrip": { + "p50": 316.8320059776306, + "p90": 325.76000690460205, + "p95": 329.24801111221313, + "p99": 340.2880132198334 + }, + "isolatedSum": { + "p50": 329.8880010843277, + "p90": 341.3440138101578, + "p95": 348.09599816799164, + "p99": 363.96799236536026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 269.3760097026825, + "p90": 385.15201210975647, + "p95": 390.8480107784271, + "p99": 412.03200817108154 + }, + "combine": { + "p50": 67.32799857854843, + "p90": 85.50400286912918, + "p95": 87.13600039482117, + "p99": 92.76799857616425 + }, + "roundtrip": { + "p50": 321.0879862308502, + "p90": 456.0000002384186, + "p95": 460.83199977874756, + "p99": 472.57599234580994 + }, + "isolatedSum": { + "p50": 336.7040082812309, + "p90": 470.65601497888565, + "p95": 477.9840111732483, + "p99": 504.8000067472458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 264.5440101623535, + "p90": 273.47201108932495, + "p95": 277.40800380706787, + "p99": 297.88801074028015 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 70.8480030298233, + "p95": 72.83200323581696, + "p99": 76.1599987745285 + }, + "roundtrip": { + "p50": 316.6399896144867, + "p90": 326.1120021343231, + "p95": 330.1759958267212, + "p99": 346.52799367904663 + }, + "isolatedSum": { + "p50": 331.7440077662468, + "p90": 344.32001411914825, + "p95": 350.2400070428848, + "p99": 374.04800951480865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 269.0559923648834, + "p90": 378.464013338089, + "p95": 383.58399271965027, + "p99": 389.50398564338684 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 85.37600189447403, + "p95": 86.2400010228157, + "p99": 90.84799885749817 + }, + "roundtrip": { + "p50": 318.65599751472473, + "p90": 328.73600721359253, + "p95": 332.15999603271484, + "p99": 343.07199716567993 + }, + "isolatedSum": { + "p50": 338.3679911494255, + "p90": 463.840015232563, + "p95": 469.823993742466, + "p99": 480.351984500885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 264.16000723838806, + "p90": 272.7679908275604, + "p95": 275.9999930858612, + "p99": 286.49601340293884 + }, + "combine": { + "p50": 71.48800045251846, + "p90": 75.9039968252182, + "p95": 77.95199751853943, + "p99": 79.96799796819687 + }, + "roundtrip": { + "p50": 322.6560056209564, + "p90": 330.49601316452026, + "p95": 333.6640000343323, + "p99": 341.3119912147522 + }, + "isolatedSum": { + "p50": 335.6480076909065, + "p90": 348.6719876527786, + "p95": 353.95199060440063, + "p99": 366.4640113711357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 269.6000039577484, + "p90": 386.1120045185089, + "p95": 390.49598574638367, + "p99": 401.15201473236084 + }, + "combine": { + "p50": 80.51200211048126, + "p90": 99.5199978351593, + "p95": 101.56799852848053, + "p99": 106.39999806880951 + }, + "roundtrip": { + "p50": 336.7680013179779, + "p90": 462.97600865364075, + "p95": 467.16800332069397, + "p99": 477.4720072746277 + }, + "isolatedSum": { + "p50": 350.1120060682297, + "p90": 485.6320023536682, + "p95": 492.0639842748642, + "p99": 507.55201280117035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 270.8800137042999, + "p90": 387.03998923301697, + "p95": 391.36001467704773, + "p99": 399.9359905719757 + }, + "combine": { + "p50": 92.00000017881393, + "p90": 109.72800105810165, + "p95": 110.75200140476227, + "p99": 116.35199934244156 + }, + "roundtrip": { + "p50": 348.89599680900574, + "p90": 469.215989112854, + "p95": 474.047988653183, + "p99": 482.91200399398804 + }, + "isolatedSum": { + "p50": 362.88001388311386, + "p90": 496.7679902911186, + "p95": 502.11201608181, + "p99": 516.2879899144173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c6ba827", + "identity": "h100|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "f2242a31a5df00fa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:43.144482+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 270.3680098056793, + "p90": 279.32798862457275, + "p95": 283.6480140686035, + "p99": 300.6080090999603 + }, + "combine": { + "p50": 69.5360004901886, + "p90": 72.92799651622772, + "p95": 75.19999891519547, + "p99": 78.94399762153625 + }, + "roundtrip": { + "p50": 326.1759877204895, + "p90": 334.46401357650757, + "p95": 338.3359909057617, + "p99": 358.72000455856323 + }, + "isolatedSum": { + "p50": 339.9040102958679, + "p90": 352.2559851408005, + "p95": 358.848012983799, + "p99": 379.5520067214966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 270.01601457595825, + "p90": 279.4240117073059, + "p95": 282.81599283218384, + "p99": 291.8719947338104 + }, + "combine": { + "p50": 70.36799937486649, + "p90": 74.17599856853485, + "p95": 75.74400305747986, + "p99": 81.24800026416779 + }, + "roundtrip": { + "p50": 326.4000117778778, + "p90": 334.7199857234955, + "p95": 337.47199177742004, + "p99": 352.8960049152374 + }, + "isolatedSum": { + "p50": 340.38401395082474, + "p90": 353.60001027584076, + "p95": 358.5599958896637, + "p99": 373.1199949979782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 275.9999930858612, + "p90": 403.4239947795868, + "p95": 409.37599539756775, + "p99": 735.4879975318909 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 87.96799927949905, + "p95": 88.99199962615967, + "p99": 93.82399916648865 + }, + "roundtrip": { + "p50": 333.98398756980896, + "p90": 437.9520118236542, + "p95": 444.60800290107727, + "p99": 451.9999921321869 + }, + "isolatedSum": { + "p50": 349.11999106407166, + "p90": 491.39199405908585, + "p95": 498.3679950237274, + "p99": 829.3119966983795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 270.27198672294617, + "p90": 280.8000147342682, + "p95": 287.9999876022339, + "p99": 308.31998586654663 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 76.28799974918365, + "p95": 77.47200131416321, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 330.6559920310974, + "p90": 337.92001008987427, + "p95": 340.7360017299652, + "p99": 347.1679985523224 + }, + "isolatedSum": { + "p50": 343.51998567581177, + "p90": 357.08801448345184, + "p95": 365.4719889163971, + "p99": 388.73598724603653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 270.59200406074524, + "p90": 280.0000011920929, + "p95": 285.504013299942, + "p99": 306.5280020236969 + }, + "combine": { + "p50": 73.79200309515, + "p90": 76.64000242948532, + "p95": 78.11199873685837, + "p99": 82.07999914884567 + }, + "roundtrip": { + "p50": 331.5199911594391, + "p90": 339.4559919834137, + "p95": 342.1120047569275, + "p99": 347.84001111984253 + }, + "isolatedSum": { + "p50": 344.38400715589523, + "p90": 356.6400036215782, + "p95": 363.6160120368004, + "p99": 388.6080011725426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 270.143985748291, + "p90": 279.87200021743774, + "p95": 285.47200560569763, + "p99": 305.9839904308319 + }, + "combine": { + "p50": 77.37600058317184, + "p90": 80.35200089216232, + "p95": 82.30400085449219, + "p99": 84.79999750852585 + }, + "roundtrip": { + "p50": 334.879994392395, + "p90": 347.7120101451874, + "p95": 359.6160113811493, + "p99": 377.53599882125854 + }, + "isolatedSum": { + "p50": 347.51998633146286, + "p90": 360.22400110960007, + "p95": 367.7760064601898, + "p99": 390.78398793935776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 275.84001421928406, + "p90": 400.5120098590851, + "p95": 404.448002576828, + "p99": 414.46399688720703 + }, + "combine": { + "p50": 87.71199733018875, + "p90": 106.175996363163, + "p95": 107.61599987745285, + "p99": 112.28799819946289 + }, + "roundtrip": { + "p50": 349.40800070762634, + "p90": 484.3519926071167, + "p95": 487.8399968147278, + "p99": 494.87999081611633 + }, + "isolatedSum": { + "p50": 363.5520115494728, + "p90": 506.6880062222481, + "p95": 512.0640024542809, + "p99": 526.7519950866699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 278.9120078086853, + "p90": 362.87999153137207, + "p95": 367.7760064601898, + "p99": 374.84800815582275 + }, + "combine": { + "p50": 101.50399804115295, + "p90": 112.67200112342834, + "p95": 113.56800049543381, + "p99": 119.00799721479416 + }, + "roundtrip": { + "p50": 366.14400148391724, + "p90": 455.9679925441742, + "p95": 459.29598808288574, + "p99": 465.2799963951111 + }, + "isolatedSum": { + "p50": 380.41600584983826, + "p90": 475.5519926548004, + "p95": 481.3440069556236, + "p99": 493.8560053706169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f13540d0", + "identity": "h100|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "784a18b5a955f1ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:43.736663+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 262.0159983634949, + "p90": 270.9119915962219, + "p95": 274.0800082683563, + "p99": 292.4799919128418 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 75.16799867153168, + "p95": 77.27999985218048, + "p99": 80.32000064849854 + }, + "roundtrip": { + "p50": 322.9120075702667, + "p90": 330.9760093688965, + "p95": 333.407998085022, + "p99": 341.37600660324097 + }, + "isolatedSum": { + "p50": 333.8880017399788, + "p90": 346.0799902677536, + "p95": 351.3600081205368, + "p99": 372.79999256134033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 261.1519992351532, + "p90": 271.10400795936584, + "p95": 274.6880054473877, + "p99": 290.367990732193 + }, + "combine": { + "p50": 73.21599870920181, + "p90": 76.4480009675026, + "p95": 78.23999971151352, + "p99": 83.00799876451492 + }, + "roundtrip": { + "p50": 321.82401418685913, + "p90": 330.7200074195862, + "p95": 332.38399028778076, + "p99": 337.66400814056396 + }, + "isolatedSum": { + "p50": 334.367997944355, + "p90": 347.55200892686844, + "p95": 352.9280051589012, + "p99": 373.3759894967079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 267.07199215888977, + "p90": 385.50400733947754, + "p95": 391.4560079574585, + "p99": 438.4959936141968 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 92.44800359010696, + "p95": 93.31200271844864, + "p99": 97.63199836015701 + }, + "roundtrip": { + "p50": 328.2560110092163, + "p90": 460.25601029396057, + "p95": 484.3200147151947, + "p99": 522.7519869804382 + }, + "isolatedSum": { + "p50": 341.66399389505386, + "p90": 477.9520109295845, + "p95": 484.76801067590714, + "p99": 536.1279919743538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 262.87999749183655, + "p90": 273.0560004711151, + "p95": 276.95998549461365, + "p99": 308.1600069999695 + }, + "combine": { + "p50": 74.07999783754349, + "p90": 77.72800326347351, + "p95": 79.3600007891655, + "p99": 83.42400193214417 + }, + "roundtrip": { + "p50": 324.8319923877716, + "p90": 332.4800133705139, + "p95": 334.81600880622864, + "p99": 342.4000144004822 + }, + "isolatedSum": { + "p50": 336.95999532938004, + "p90": 350.7840037345886, + "p95": 356.31998628377914, + "p99": 391.58400893211365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 261.31200790405273, + "p90": 271.32800221443176, + "p95": 275.4560112953186, + "p99": 286.655992269516 + }, + "combine": { + "p50": 75.77600330114365, + "p90": 80.03199845552444, + "p95": 81.7599967122078, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 325.3439962863922, + "p90": 333.3120048046112, + "p95": 335.6800079345703, + "p99": 344.0319895744324 + }, + "isolatedSum": { + "p50": 337.0880112051964, + "p90": 351.3600006699562, + "p95": 357.2160080075264, + "p99": 371.3599890470505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 262.33598589897156, + "p90": 271.93599939346313, + "p95": 276.16000175476074, + "p99": 292.928010225296 + }, + "combine": { + "p50": 80.28800040483475, + "p90": 83.67999643087387, + "p95": 85.66399663686752, + "p99": 89.1840010881424 + }, + "roundtrip": { + "p50": 330.3999900817871, + "p90": 338.6879861354828, + "p95": 342.4000144004822, + "p99": 351.74399614334106 + }, + "isolatedSum": { + "p50": 342.6239863038063, + "p90": 355.615995824337, + "p95": 361.82399839162827, + "p99": 382.1120113134384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 268.6080038547516, + "p90": 472.51200675964355, + "p95": 527.616024017334, + "p99": 543.1360006332397 + }, + "combine": { + "p50": 90.01599997282028, + "p90": 109.43999886512756, + "p95": 112.19199746847153, + "p99": 115.167997777462 + }, + "roundtrip": { + "p50": 346.20800614356995, + "p90": 474.07999634742737, + "p95": 477.05599665641785, + "p99": 487.36000061035156 + }, + "isolatedSum": { + "p50": 358.62400382757187, + "p90": 581.9520056247711, + "p95": 639.8080214858055, + "p99": 658.3039984107018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 270.7520127296448, + "p90": 443.1360065937042, + "p95": 452.09598541259766, + "p99": 469.9839949607849 + }, + "combine": { + "p50": 105.3759977221489, + "p90": 123.23199957609177, + "p95": 124.67200309038162, + "p99": 136.4479959011078 + }, + "roundtrip": { + "p50": 362.4959886074066, + "p90": 466.592013835907, + "p95": 470.5600142478943, + "p99": 480.19200563430786 + }, + "isolatedSum": { + "p50": 376.12801045179367, + "p90": 566.368006169796, + "p95": 576.7679885029793, + "p99": 606.4319908618927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a474931d", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_4e920b93", + "comparisonKey": "aec1e872b8bd2708", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:03.305059+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 131.6159963607788, + "p90": 139.64800536632538, + "p95": 143.93599331378937, + "p99": 157.02399611473083 + }, + "combine": { + "p50": 76.4160007238388, + "p90": 81.28000050783157, + "p95": 85.91999858617783, + "p99": 102.78400033712387 + }, + "roundtrip": { + "p50": 233.2800030708313, + "p90": 241.18399620056152, + "p95": 245.728000998497, + "p99": 252.25600600242615 + }, + "isolatedSum": { + "p50": 208.03199708461761, + "p90": 220.92800587415695, + "p95": 229.8559918999672, + "p99": 259.8079964518547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 132.09599256515503, + "p90": 139.00800049304962, + "p95": 142.5279974937439, + "p99": 150.39999783039093 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 80.86399734020233, + "p95": 82.46400207281113, + "p99": 85.85599809885025 + }, + "roundtrip": { + "p50": 234.9119931459427, + "p90": 242.8160011768341, + "p95": 245.66400051116943, + "p99": 252.6719868183136 + }, + "isolatedSum": { + "p50": 208.70399475097656, + "p90": 219.87199783325195, + "p95": 224.99199956655502, + "p99": 236.25599592924118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 137.5039964914322, + "p90": 202.36800611019135, + "p95": 205.6639939546585, + "p99": 228.92799973487854 + }, + "combine": { + "p50": 79.1039988398552, + "p90": 97.85600006580353, + "p95": 100.00000149011612, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 239.3600046634674, + "p90": 325.6320059299469, + "p95": 330.6559920310974, + "p99": 343.51998567581177 + }, + "isolatedSum": { + "p50": 216.60799533128738, + "p90": 300.2240061759949, + "p95": 305.6639954447746, + "p99": 333.47199857234955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 133.31200182437897, + "p90": 252.16001272201538, + "p95": 457.5360119342804, + "p99": 473.5040068626404 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 82.5280025601387, + "p95": 84.09599959850311, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 236.1920028924942, + "p90": 247.3279982805252, + "p95": 256.0639977455139, + "p99": 352.35199332237244 + }, + "isolatedSum": { + "p50": 211.71200275421143, + "p90": 334.6880152821541, + "p95": 541.6320115327835, + "p99": 563.616007566452 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 136.19199395179749, + "p90": 198.2399970293045, + "p95": 201.27999782562256, + "p99": 206.9759964942932 + }, + "combine": { + "p50": 81.15199953317642, + "p90": 99.71199929714203, + "p95": 101.6639992594719, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 241.11999571323395, + "p90": 325.8560001850128, + "p95": 329.72800731658936, + "p99": 426.9759953022003 + }, + "isolatedSum": { + "p50": 217.3439934849739, + "p90": 297.95199632644653, + "p95": 302.94399708509445, + "p99": 313.631996512413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 134.88000631332397, + "p90": 196.51199877262115, + "p95": 198.97599518299103, + "p99": 206.81600272655487 + }, + "combine": { + "p50": 86.30400151014328, + "p90": 103.39199751615524, + "p95": 104.19200360774994, + "p99": 109.15199667215347 + }, + "roundtrip": { + "p50": 243.16799640655518, + "p90": 251.93598866462708, + "p95": 256.00001215934753, + "p99": 278.27200293540955 + }, + "isolatedSum": { + "p50": 221.18400782346725, + "p90": 299.9039962887764, + "p95": 303.16799879074097, + "p99": 315.96799939870834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.99199259281158, + "p90": 258.14399123191833, + "p95": 261.75999641418457, + "p99": 494.33600902557373 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 115.48800021409988, + "p95": 118.56000125408173, + "p99": 121.88799679279327 + }, + "roundtrip": { + "p50": 257.9199969768524, + "p90": 347.29599952697754, + "p95": 352.9599905014038, + "p99": 374.08000230789185 + }, + "isolatedSum": { + "p50": 233.2479953765869, + "p90": 373.6319914460182, + "p95": 380.3199976682663, + "p99": 616.224005818367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 142.752006649971, + "p90": 197.91999459266663, + "p95": 200.99200308322906, + "p99": 213.34399282932281 + }, + "combine": { + "p50": 113.11999708414078, + "p90": 130.62399625778198, + "p95": 132.25600123405457, + "p99": 136.86400651931763 + }, + "roundtrip": { + "p50": 275.35998821258545, + "p90": 361.6639971733093, + "p95": 364.6399974822998, + "p99": 372.2879886627197 + }, + "isolatedSum": { + "p50": 255.8720037341118, + "p90": 328.5439908504486, + "p95": 333.24800431728363, + "p99": 350.20799934864044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a7d606bd", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "989dd7ddfd81cb90", + "schemaVersion": 3, + "generatedAt": "2026-07-02T09:00:06.077890+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_02", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577791037", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577791037", + "createdAt": "2026-07-02T08:53:45Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 261.05600595474243, + "p90": 271.2959945201874, + "p95": 274.2080092430115, + "p99": 289.4720137119293 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 78.36800068616867, + "p95": 80.54400235414505, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 323.90400767326355, + "p90": 334.20801162719727, + "p95": 336.7359936237335, + "p99": 343.392014503479 + }, + "isolatedSum": { + "p50": 336.67200803756714, + "p90": 349.66399520635605, + "p95": 354.7520115971565, + "p99": 373.1520101428032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 261.3759934902191, + "p90": 271.84000611305237, + "p95": 274.56000447273254, + "p99": 286.01598739624023 + }, + "combine": { + "p50": 76.80000364780426, + "p90": 81.08799904584885, + "p95": 83.10399949550629, + "p99": 109.3439981341362 + }, + "roundtrip": { + "p50": 324.38400387763977, + "p90": 335.7439935207367, + "p95": 338.6240005493164, + "p99": 351.4559864997864 + }, + "isolatedSum": { + "p50": 338.1759971380234, + "p90": 352.9280051589012, + "p95": 357.66400396823883, + "p99": 395.35998553037643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 266.88000559806824, + "p90": 384.67198610305786, + "p95": 388.5439932346344, + "p99": 398.3039855957031 + }, + "combine": { + "p50": 78.40000092983246, + "p90": 96.63999825716019, + "p95": 98.08000177145004, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 331.03999495506287, + "p90": 454.3040096759796, + "p95": 459.74400639533997, + "p99": 464.92800116539 + }, + "isolatedSum": { + "p50": 345.2800065279007, + "p90": 481.31198436021805, + "p95": 486.62399500608444, + "p99": 500.67198276519775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 260.672003030777, + "p90": 269.6639895439148, + "p95": 272.352010011673, + "p99": 277.72799134254456 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 82.14399963617325, + "p95": 83.93599838018417, + "p99": 87.07199990749359 + }, + "roundtrip": { + "p50": 326.6240060329437, + "p90": 336.544007062912, + "p95": 339.1039967536926, + "p99": 347.5840091705322 + }, + "isolatedSum": { + "p50": 339.1680046916008, + "p90": 351.80798918008804, + "p95": 356.28800839185715, + "p99": 364.79999125003815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 266.7199969291687, + "p90": 372.3199963569641, + "p95": 375.36001205444336, + "p99": 382.207989692688 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 98.52799773216248, + "p95": 99.5199978351593, + "p99": 102.84800082445145 + }, + "roundtrip": { + "p50": 332.67199993133545, + "p90": 444.19199228286743, + "p95": 447.9359984397888, + "p99": 454.78400588035583 + }, + "isolatedSum": { + "p50": 347.6799950003624, + "p90": 470.8479940891266, + "p95": 474.88000988960266, + "p99": 485.05599051713943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 263.0400061607361, + "p90": 272.7360129356384, + "p95": 276.73599123954773, + "p99": 288.4800136089325 + }, + "combine": { + "p50": 85.37600189447403, + "p90": 89.6959975361824, + "p95": 91.80799871683121, + "p99": 93.9520001411438 + }, + "roundtrip": { + "p50": 335.5199992656708, + "p90": 345.3119993209839, + "p95": 348.06400537490845, + "p99": 356.06399178504944 + }, + "isolatedSum": { + "p50": 348.4160080552101, + "p90": 362.43201047182083, + "p95": 368.54398995637894, + "p99": 382.4320137500763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 268.41598749160767, + "p90": 388.0639970302582, + "p95": 392.192006111145, + "p99": 407.3919951915741 + }, + "combine": { + "p50": 95.61599791049957, + "p90": 114.23999816179276, + "p95": 116.5120005607605, + "p99": 119.58400160074234 + }, + "roundtrip": { + "p50": 351.20001435279846, + "p90": 476.25601291656494, + "p95": 481.6319942474365, + "p99": 492.000013589859 + }, + "isolatedSum": { + "p50": 364.03198540210724, + "p90": 502.30399519205093, + "p95": 508.7040066719055, + "p99": 526.9759967923164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 276.16000175476074, + "p90": 377.56800651550293, + "p95": 380.511999130249, + "p99": 389.18399810791016 + }, + "combine": { + "p50": 113.40799927711487, + "p90": 130.5920034646988, + "p95": 131.6159963607788, + "p99": 134.8479986190796 + }, + "roundtrip": { + "p50": 373.02398681640625, + "p90": 486.36800050735474, + "p95": 489.50400948524475, + "p99": 508.60798358917236 + }, + "isolatedSum": { + "p50": 389.5680010318756, + "p90": 508.1600099802017, + "p95": 512.1279954910278, + "p99": 524.0319967269897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-171b38d4", + "identity": "h100|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "90821e79203ec575", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:41.341646+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 276.8000066280365, + "p90": 289.95200991630554, + "p95": 293.08798909187317, + "p99": 307.13599920272827 + }, + "combine": { + "p50": 76.9599974155426, + "p90": 79.64800298213959, + "p95": 81.91999793052673, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 340.5120074748993, + "p90": 350.14399886131287, + "p95": 351.936012506485, + "p99": 360.79999804496765 + }, + "isolatedSum": { + "p50": 353.7600040435791, + "p90": 369.60001289844513, + "p95": 375.0079870223999, + "p99": 394.0479978919029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 278.2079875469208, + "p90": 290.6239926815033, + "p95": 294.75200176239014, + "p99": 306.5919876098633 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 80.48000186681747, + "p95": 83.23200047016144, + "p99": 87.99999952316284 + }, + "roundtrip": { + "p50": 340.5120074748993, + "p90": 349.8559892177582, + "p95": 352.4160087108612, + "p99": 360.8640134334564 + }, + "isolatedSum": { + "p50": 355.6159883737564, + "p90": 371.10399454832077, + "p95": 377.9840022325516, + "p99": 394.5919871330261 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 284.0000092983246, + "p90": 412.31998801231384, + "p95": 418.2719886302948, + "p99": 428.76800894737244 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 100.03200173377991, + "p95": 101.02400183677673, + "p99": 106.30399733781815 + }, + "roundtrip": { + "p50": 352.3840010166168, + "p90": 489.9199903011322, + "p95": 494.59201097488403, + "p99": 507.9039931297302 + }, + "isolatedSum": { + "p50": 363.16800862550735, + "p90": 512.3519897460938, + "p95": 519.2959904670715, + "p99": 535.0720062851906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 277.72799134254456, + "p90": 289.8240089416504, + "p95": 294.5919930934906, + "p99": 315.42399525642395 + }, + "combine": { + "p50": 78.87999713420868, + "p90": 82.14399963617325, + "p95": 84.09599959850311, + "p99": 90.2400016784668 + }, + "roundtrip": { + "p50": 342.9119884967804, + "p90": 353.1840145587921, + "p95": 355.9679985046387, + "p99": 366.4639890193939 + }, + "isolatedSum": { + "p50": 356.60798847675323, + "p90": 371.96800857782364, + "p95": 378.6879926919937, + "p99": 405.66399693489075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 277.15200185775757, + "p90": 289.18400406837463, + "p95": 292.38399863243103, + "p99": 304.3839931488037 + }, + "combine": { + "p50": 80.9599980711937, + "p90": 83.93599838018417, + "p95": 85.91999858617783, + "p99": 89.63199704885483 + }, + "roundtrip": { + "p50": 344.7679877281189, + "p90": 354.8800051212311, + "p95": 358.36800932884216, + "p99": 366.239994764328 + }, + "isolatedSum": { + "p50": 358.11199992895126, + "p90": 373.1200024485588, + "p95": 378.30399721860886, + "p99": 394.01599019765854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 278.0799865722656, + "p90": 289.5359992980957, + "p95": 295.3920066356659, + "p99": 311.42398715019226 + }, + "combine": { + "p50": 86.75199747085571, + "p90": 89.9839997291565, + "p95": 92.03200042247772, + "p99": 96.3520035147667 + }, + "roundtrip": { + "p50": 351.26399993896484, + "p90": 360.54399609565735, + "p95": 364.8320138454437, + "p99": 372.0000088214874 + }, + "isolatedSum": { + "p50": 364.83198404312134, + "p90": 379.5199990272522, + "p95": 387.4240070581436, + "p99": 407.77599066495895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 286.1120104789734, + "p90": 416.3840115070343, + "p95": 422.39999771118164, + "p99": 429.79198694229126 + }, + "combine": { + "p50": 96.57599776983261, + "p90": 115.87200313806534, + "p95": 117.63200163841248, + "p99": 122.3360002040863 + }, + "roundtrip": { + "p50": 367.0719861984253, + "p90": 506.5600275993347, + "p95": 512.6720070838928, + "p99": 548.6400127410889 + }, + "isolatedSum": { + "p50": 382.688008248806, + "p90": 532.2560146450996, + "p95": 540.0319993495941, + "p99": 552.1279871463776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 319.7439908981323, + "p90": 415.77601432800293, + "p95": 421.63199186325073, + "p99": 432.3840141296387 + }, + "combine": { + "p50": 120.19199877977371, + "p90": 132.7359974384308, + "p95": 134.36800241470337, + "p99": 138.20800185203552 + }, + "roundtrip": { + "p50": 385.15201210975647, + "p90": 514.9440169334412, + "p95": 520.1280117034912, + "p99": 528.7359952926636 + }, + "isolatedSum": { + "p50": 439.93598967790604, + "p90": 548.5120117664337, + "p95": 555.9999942779541, + "p99": 570.5920159816742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7b25e329", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_fd11f54b", + "comparisonKey": "1e74a302081ec143", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:03.910648+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 105.34399747848511, + "p90": 117.24799871444702, + "p95": 123.83999675512314, + "p99": 133.5040032863617 + }, + "combine": { + "p50": 74.46400076150894, + "p90": 79.77599650621414, + "p95": 81.63200318813324, + "p99": 84.3840017914772 + }, + "roundtrip": { + "p50": 207.8399956226349, + "p90": 219.07199919223785, + "p95": 226.33600234985352, + "p99": 243.77599358558655 + }, + "isolatedSum": { + "p50": 179.80799823999405, + "p90": 197.02399522066116, + "p95": 205.47199994325638, + "p99": 217.8880050778389 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 106.175996363163, + "p90": 124.7360035777092, + "p95": 127.55200266838074, + "p99": 171.64799571037292 + }, + "combine": { + "p50": 75.45600086450577, + "p90": 81.18399977684021, + "p95": 83.52000266313553, + "p99": 87.3280018568039 + }, + "roundtrip": { + "p50": 208.8319957256317, + "p90": 224.7679978609085, + "p95": 237.05600202083588, + "p99": 322.4959969520569 + }, + "isolatedSum": { + "p50": 181.63199722766876, + "p90": 205.9200033545494, + "p95": 211.07200533151627, + "p99": 258.9759975671768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 111.32799834012985, + "p90": 166.36799275875092, + "p95": 178.46399545669556, + "p99": 199.71199333667755 + }, + "combine": { + "p50": 78.5600021481514, + "p90": 96.96000069379807, + "p95": 99.5199978351593, + "p99": 108.38399827480316 + }, + "roundtrip": { + "p50": 214.20800685882568, + "p90": 289.63199257850647, + "p95": 293.4719920158386, + "p99": 330.9119939804077 + }, + "isolatedSum": { + "p50": 189.88800048828125, + "p90": 263.327993452549, + "p95": 277.98399329185486, + "p99": 308.0959916114807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 106.81600123643875, + "p90": 124.95999783277512, + "p95": 151.32799744606018, + "p99": 156.67200088500977 + }, + "combine": { + "p50": 77.31200009584427, + "p90": 82.8159973025322, + "p95": 84.99199897050858, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 210.207998752594, + "p90": 219.7760045528412, + "p95": 224.09600019454956, + "p99": 240.38399755954742 + }, + "isolatedSum": { + "p50": 184.12800133228302, + "p90": 207.7759951353073, + "p95": 236.31999641656876, + "p99": 244.57599967718124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 111.26399785280228, + "p90": 161.21600568294525, + "p95": 164.41600024700165, + "p99": 188.86399269104004 + }, + "combine": { + "p50": 80.73599636554718, + "p90": 98.9760011434555, + "p95": 100.16000270843506, + "p99": 104.3199971318245 + }, + "roundtrip": { + "p50": 217.056006193161, + "p90": 288.5439991950989, + "p95": 291.80800914764404, + "p99": 296.60800099372864 + }, + "isolatedSum": { + "p50": 191.99999421834946, + "p90": 260.19200682640076, + "p95": 264.5760029554367, + "p99": 293.18398982286453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 110.84800213575363, + "p90": 159.2320054769516, + "p95": 162.81600296497345, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 86.17600053548813, + "p90": 104.5759990811348, + "p95": 106.9440022110939, + "p99": 112.22399771213531 + }, + "roundtrip": { + "p50": 222.81600534915924, + "p90": 286.6879999637604, + "p95": 290.0480031967163, + "p99": 294.65600848197937 + }, + "isolatedSum": { + "p50": 197.02400267124176, + "p90": 263.8080045580864, + "p95": 269.76000517606735, + "p99": 280.67199885845184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 111.7120012640953, + "p90": 161.9199961423874, + "p95": 167.1680063009262, + "p99": 185.18400192260742 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 113.72800171375275, + "p95": 115.42399972677231, + "p99": 119.6800023317337 + }, + "roundtrip": { + "p50": 234.047994017601, + "p90": 311.13600730895996, + "p95": 314.5599961280823, + "p99": 340.5759930610657 + }, + "isolatedSum": { + "p50": 207.07200467586517, + "p90": 275.64799785614014, + "p95": 282.5920060276985, + "p99": 304.8640042543411 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.29599940776825, + "p90": 161.5999937057495, + "p95": 165.3759926557541, + "p99": 189.69599902629852 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 133.37600231170654, + "p95": 138.5599970817566, + "p99": 143.39199662208557 + }, + "roundtrip": { + "p50": 253.60000133514404, + "p90": 326.4960050582886, + "p95": 329.4079899787903, + "p99": 351.83998942375183 + }, + "isolatedSum": { + "p50": 231.87199980020523, + "p90": 294.97599601745605, + "p95": 303.9359897375107, + "p99": 333.0879956483841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-262b3d7a", + "identity": "h100|uccl|n-a|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_46addd92", + "comparisonKey": "3fce8421920fa811", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:23.359877+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 50.944000482559204, + "p90": 54.91200089454651, + "p95": 56.671999394893646, + "p99": 59.84000116586685 + }, + "combine": { + "p50": 43.74400153756142, + "p90": 47.16800153255463, + "p95": 50.016000866889954, + "p99": 59.39200147986412 + }, + "roundtrip": { + "p50": 2086.591958999634, + "p90": 2090.0158882141113, + "p95": 2092.0960903167725, + "p99": 2098.367929458618 + }, + "isolatedSum": { + "p50": 94.68800202012062, + "p90": 102.08000242710114, + "p95": 106.6880002617836, + "p99": 119.23200264573097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.36799982190132, + "p90": 55.48800155520439, + "p95": 56.96000158786774, + "p99": 89.05600011348724 + }, + "combine": { + "p50": 44.03200000524521, + "p90": 48.22399839758873, + "p95": 51.4880008995533, + "p99": 69.60000097751617 + }, + "roundtrip": { + "p50": 2087.199926376343, + "p90": 2091.264009475708, + "p95": 2093.3759212493896, + "p99": 2114.975929260254 + }, + "isolatedSum": { + "p50": 94.39999982714653, + "p90": 103.71199995279312, + "p95": 108.44800248742104, + "p99": 158.65600109100342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 52.671998739242554, + "p90": 73.95199686288834, + "p95": 74.78400319814682, + "p99": 80.60800284147263 + }, + "combine": { + "p50": 44.79999840259552, + "p90": 49.02400076389313, + "p95": 53.47200110554695, + "p99": 60.83200126886368 + }, + "roundtrip": { + "p50": 2087.455987930298, + "p90": 2091.4878845214844, + "p95": 2093.5680866241455, + "p99": 2105.664014816284 + }, + "isolatedSum": { + "p50": 97.47199714183807, + "p90": 122.97599762678146, + "p95": 128.25600430369377, + "p99": 141.4400041103363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.16799846291542, + "p90": 55.904000997543335, + "p95": 57.34400078654289, + "p99": 61.503998935222626 + }, + "combine": { + "p50": 45.471999794244766, + "p90": 49.247998744249344, + "p95": 51.64799839258194, + "p99": 64.44799900054932 + }, + "roundtrip": { + "p50": 2088.7680053710938, + "p90": 2092.1599864959717, + "p95": 2094.0799713134766, + "p99": 2110.7521057128906 + }, + "isolatedSum": { + "p50": 96.63999825716019, + "p90": 105.15199974179268, + "p95": 108.99199917912483, + "p99": 125.95199793577194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 52.70399898290634, + "p90": 56.92800134420395, + "p95": 58.400001376867294, + "p99": 60.35200133919716 + }, + "combine": { + "p50": 49.47200044989586, + "p90": 53.02400141954422, + "p95": 55.87200075387955, + "p99": 66.01600348949432 + }, + "roundtrip": { + "p50": 2094.9440002441406, + "p90": 2098.0799198150635, + "p95": 2100.7680892944336, + "p99": 2113.759994506836 + }, + "isolatedSum": { + "p50": 102.1759994328022, + "p90": 109.95200276374817, + "p95": 114.27200213074684, + "p99": 126.36800482869148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.16799911856651, + "p90": 60.54399907588959, + "p95": 67.32799857854843, + "p99": 229.66399788856506 + }, + "combine": { + "p50": 56.8000003695488, + "p90": 59.90400165319443, + "p95": 61.055999249219894, + "p99": 74.8480036854744 + }, + "roundtrip": { + "p50": 2104.480028152466, + "p90": 2108.2561016082764, + "p95": 2111.2639904022217, + "p99": 2160.320043563843 + }, + "isolatedSum": { + "p50": 111.96799948811531, + "p90": 120.44800072908401, + "p95": 128.38399782776833, + "p99": 304.51200157403946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 61.5679994225502, + "p90": 81.82399719953537, + "p95": 83.16799998283386, + "p99": 86.87999844551086 + }, + "combine": { + "p50": 70.39999961853027, + "p90": 82.24000036716461, + "p95": 83.26400071382523, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 2125.3440380096436, + "p90": 2143.0399417877197, + "p95": 2144.063949584961, + "p99": 2149.280071258545 + }, + "isolatedSum": { + "p50": 131.96799904108047, + "p90": 164.06399756669998, + "p95": 166.4320006966591, + "p99": 174.01599884033203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.42400062084198, + "p90": 77.66400277614594, + "p95": 79.1039988398552, + "p99": 81.63200318813324 + }, + "combine": { + "p50": 99.20000284910202, + "p90": 102.59199887514114, + "p95": 103.45599800348282, + "p99": 105.79200088977814 + }, + "roundtrip": { + "p50": 2166.624069213867, + "p90": 2170.2721118927, + "p95": 2171.488046646118, + "p99": 2177.5360107421875 + }, + "isolatedSum": { + "p50": 174.624003469944, + "p90": 180.25600165128708, + "p95": 182.559996843338, + "p99": 187.42400407791138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3f7d6dfa", + "identity": "h100|uccl|n-a|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h100_f06b5f2e", + "comparisonKey": "9120d2a8a6208dd6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:13.416503+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 51.45600065588951, + "p90": 55.87200075387955, + "p95": 57.98399820923805, + "p99": 66.65600091218948 + }, + "combine": { + "p50": 43.327998369932175, + "p90": 46.46399989724159, + "p95": 48.928000032901764, + "p99": 60.54399907588959 + }, + "roundtrip": { + "p50": 2087.552070617676, + "p90": 2090.0158882141113, + "p95": 2091.8400287628174, + "p99": 2097.759962081909 + }, + "isolatedSum": { + "p50": 94.78399902582169, + "p90": 102.33600065112114, + "p95": 106.91199824213982, + "p99": 127.19999998807907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.40000006556511, + "p90": 55.52000179886818, + "p95": 57.151999324560165, + "p99": 60.70400029420853 + }, + "combine": { + "p50": 44.35199871659279, + "p90": 47.10400104522705, + "p95": 48.70399832725525, + "p99": 52.15999856591225 + }, + "roundtrip": { + "p50": 2088.1600379943848, + "p90": 2091.520071029663, + "p95": 2093.6319828033447, + "p99": 2114.176034927368 + }, + "isolatedSum": { + "p50": 94.7519987821579, + "p90": 102.62400284409523, + "p95": 105.85599765181541, + "p99": 112.86399886012077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 51.231998950242996, + "p90": 56.28800019621849, + "p95": 58.14399942755699, + "p99": 61.43999844789505 + }, + "combine": { + "p50": 44.79999840259552, + "p90": 47.807998955249786, + "p95": 50.175998359918594, + "p99": 57.760000228881836 + }, + "roundtrip": { + "p50": 2088.479995727539, + "p90": 2091.6800498962402, + "p95": 2093.791961669922, + "p99": 2135.5841159820557 + }, + "isolatedSum": { + "p50": 96.03199735283852, + "p90": 104.09599915146828, + "p95": 108.31999778747559, + "p99": 119.19999867677689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 51.7439991235733, + "p90": 56.703999638557434, + "p95": 58.46399813890457, + "p99": 65.98400324583054 + }, + "combine": { + "p50": 46.112000942230225, + "p90": 48.96000027656555, + "p95": 50.4320003092289, + "p99": 57.50399827957153 + }, + "roundtrip": { + "p50": 2090.4319286346436, + "p90": 2094.496011734009, + "p95": 2097.343921661377, + "p99": 2106.8480014801025 + }, + "isolatedSum": { + "p50": 97.85600006580353, + "p90": 105.66399991512299, + "p95": 108.89599844813347, + "p99": 123.48800152540207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 52.73599922657013, + "p90": 56.96000158786774, + "p95": 58.62399935722351, + "p99": 62.52799928188324 + }, + "combine": { + "p50": 48.70399832725525, + "p90": 51.711998879909515, + "p95": 53.85600030422211, + "p99": 59.07199904322624 + }, + "roundtrip": { + "p50": 2094.8479175567627, + "p90": 2098.0799198150635, + "p95": 2099.6479988098145, + "p99": 2109.312057495117 + }, + "isolatedSum": { + "p50": 101.43999755382538, + "p90": 108.67200046777725, + "p95": 112.47999966144562, + "p99": 121.59999832510948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 54.91200089454651, + "p90": 59.967998415231705, + "p95": 62.94400244951248, + "p99": 92.57599711418152 + }, + "combine": { + "p50": 55.84000051021576, + "p90": 58.240000158548355, + "p95": 59.61599946022034, + "p99": 63.840001821517944 + }, + "roundtrip": { + "p50": 2104.9280166625977, + "p90": 2108.351945877075, + "p95": 2109.9839210510254, + "p99": 2122.7200031280518 + }, + "isolatedSum": { + "p50": 110.75200140476227, + "p90": 118.20799857378006, + "p95": 122.56000190973282, + "p99": 156.41599893569946 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 62.49599903821945, + "p90": 82.17599987983704, + "p95": 83.55200290679932, + "p99": 87.3280018568039 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 82.5280025601387, + "p95": 84.35200154781342, + "p99": 89.1840010881424 + }, + "roundtrip": { + "p50": 2124.44806098938, + "p90": 2146.6879844665527, + "p95": 2148.416042327881, + "p99": 2156.4478874206543 + }, + "isolatedSum": { + "p50": 131.55199587345123, + "p90": 164.70400243997574, + "p95": 167.90400445461273, + "p99": 176.5120029449463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 75.39200037717819, + "p90": 78.36800068616867, + "p95": 80.22399991750717, + "p99": 83.39200168848038 + }, + "combine": { + "p50": 96.79999947547913, + "p90": 101.1200025677681, + "p95": 103.39199751615524, + "p99": 156.0640037059784 + }, + "roundtrip": { + "p50": 2165.855884552002, + "p90": 2169.7919368743896, + "p95": 2172.4159717559814, + "p99": 2204.479932785034 + }, + "isolatedSum": { + "p50": 172.19199985265732, + "p90": 179.48800325393677, + "p95": 183.61599743366241, + "p99": 239.45600539445877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5bb00a2e", + "identity": "h100|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_ef4af798", + "comparisonKey": "393a166a10201a81", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:15.352865+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 144.6399986743927, + "p90": 148.73600006103516, + "p95": 151.2320041656494, + "p99": 158.30400586128235 + }, + "combine": { + "p50": 98.30400347709656, + "p90": 103.96800190210342, + "p95": 105.31199723482132, + "p99": 110.68800091743469 + }, + "roundtrip": { + "p50": 212.54399418830872, + "p90": 218.6560034751892, + "p95": 221.18400037288666, + "p99": 227.32800245285034 + }, + "isolatedSum": { + "p50": 242.94400215148926, + "p90": 252.70400196313858, + "p95": 256.54400140047073, + "p99": 268.99200677871704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.52800011634827, + "p90": 164.8319959640503, + "p95": 166.4000004529953, + "p99": 174.81599748134613 + }, + "combine": { + "p50": 123.48800152540207, + "p90": 129.43999469280243, + "p95": 130.36799430847168, + "p99": 134.11200046539307 + }, + "roundtrip": { + "p50": 247.26399779319763, + "p90": 252.73600220680237, + "p95": 254.11200523376465, + "p99": 260.127991437912 + }, + "isolatedSum": { + "p50": 282.01600164175034, + "p90": 294.2719906568527, + "p95": 296.767994761467, + "p99": 308.9279979467392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 195.360004901886, + "p90": 201.12000405788422, + "p95": 204.28800582885742, + "p99": 212.3199999332428 + }, + "combine": { + "p50": 178.14399302005768, + "p90": 181.92000687122345, + "p95": 183.96799266338348, + "p99": 190.20800292491913 + }, + "roundtrip": { + "p50": 331.87198638916016, + "p90": 337.21598982810974, + "p95": 338.81598711013794, + "p99": 342.3359990119934 + }, + "isolatedSum": { + "p50": 373.50399792194366, + "p90": 383.04001092910767, + "p95": 388.2559984922409, + "p99": 402.5280028581619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 264.6400034427643, + "p90": 269.9519991874695, + "p95": 272.3200023174286, + "p99": 281.792014837265 + }, + "combine": { + "p50": 287.84000873565674, + "p90": 292.5119996070862, + "p95": 295.3599989414215, + "p99": 298.3680069446564 + }, + "roundtrip": { + "p50": 515.0399804115295, + "p90": 522.0800042152405, + "p95": 524.4799852371216, + "p99": 527.679979801178 + }, + "isolatedSum": { + "p50": 552.480012178421, + "p90": 562.4639987945557, + "p95": 567.6800012588501, + "p99": 580.1600217819214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 402.24000811576843, + "p90": 409.9839925765991, + "p95": 411.9360148906708, + "p99": 417.7280068397522 + }, + "combine": { + "p50": 476.7040014266968, + "p90": 483.8080108165741, + "p95": 487.0400130748749, + "p99": 493.8560128211975 + }, + "roundtrip": { + "p50": 837.119996547699, + "p90": 847.7439880371094, + "p95": 851.3919711112976, + "p99": 942.6239728927612 + }, + "isolatedSum": { + "p50": 878.9440095424652, + "p90": 893.7920033931732, + "p95": 898.9760279655457, + "p99": 911.5840196609497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 669.75998878479, + "p90": 676.8320202827454, + "p95": 679.6479821205139, + "p99": 686.4320039749146 + }, + "combine": { + "p50": 853.1519770622253, + "p90": 862.9440069198608, + "p95": 866.6560053825378, + "p99": 874.7519850730896 + }, + "roundtrip": { + "p50": 1483.2639694213867, + "p90": 1493.056058883667, + "p95": 1497.0879554748535, + "p99": 1506.719946861267 + }, + "isolatedSum": { + "p50": 1522.9119658470154, + "p90": 1539.7760272026062, + "p95": 1546.3039875030518, + "p99": 1561.1839890480042 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b599d57c", + "identity": "h100|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_ef4af798", + "comparisonKey": "8cfd8a15095fecde", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:15.168720+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.2640037536621, + "p90": 152.67199277877808, + "p95": 154.78399395942688, + "p99": 159.0079963207245 + }, + "combine": { + "p50": 106.1440035700798, + "p90": 111.39199882745743, + "p95": 112.47999966144562, + "p99": 119.00799721479416 + }, + "roundtrip": { + "p50": 221.82400524616241, + "p90": 227.26400196552277, + "p95": 229.15199398994446, + "p99": 234.8479926586151 + }, + "isolatedSum": { + "p50": 253.4080073237419, + "p90": 264.0639916062355, + "p95": 267.2639936208725, + "p99": 278.01599353551865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 170.9440052509308, + "p90": 175.20000040531158, + "p95": 176.5120029449463, + "p99": 182.5920045375824 + }, + "combine": { + "p50": 137.05599308013916, + "p90": 139.55199718475342, + "p95": 140.57600498199463, + "p99": 145.4080045223236 + }, + "roundtrip": { + "p50": 265.82399010658264, + "p90": 270.687997341156, + "p95": 271.84000611305237, + "p99": 276.06400847435 + }, + "isolatedSum": { + "p50": 307.99999833106995, + "p90": 314.751997590065, + "p95": 317.0880079269409, + "p99": 328.000009059906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 209.88799631595612, + "p90": 215.07200598716736, + "p95": 216.86400473117828, + "p99": 220.64000368118286 + }, + "combine": { + "p50": 201.82399451732635, + "p90": 207.2959989309311, + "p95": 208.51199328899384, + "p99": 433.6639940738678 + }, + "roundtrip": { + "p50": 367.5200045108795, + "p90": 372.9279935359955, + "p95": 374.9760091304779, + "p99": 379.5520067214966 + }, + "isolatedSum": { + "p50": 411.71199083328247, + "p90": 422.36800491809845, + "p95": 425.3759980201721, + "p99": 654.3039977550507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 288.06400299072266, + "p90": 294.07998919487, + "p95": 296.671986579895, + "p99": 308.0959916114807 + }, + "combine": { + "p50": 311.19999289512634, + "p90": 316.8320059776306, + "p95": 318.30400228500366, + "p99": 323.39200377464294 + }, + "roundtrip": { + "p50": 560.5760216712952, + "p90": 566.8799877166748, + "p95": 569.3439841270447, + "p99": 575.2320289611816 + }, + "isolatedSum": { + "p50": 599.263995885849, + "p90": 610.9119951725006, + "p95": 614.9759888648987, + "p99": 631.4879953861237 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 450.23998618125916, + "p90": 459.1040015220642, + "p95": 462.3039960861206, + "p99": 473.02401065826416 + }, + "combine": { + "p50": 525.4080295562744, + "p90": 532.7360033988953, + "p95": 535.1359844207764, + "p99": 541.6640043258667 + }, + "roundtrip": { + "p50": 936.2559914588928, + "p90": 946.6879963874817, + "p95": 949.7600197792053, + "p99": 955.51997423172 + }, + "isolatedSum": { + "p50": 975.6480157375336, + "p90": 991.8400049209595, + "p95": 997.439980506897, + "p99": 1014.6880149841309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 788.1600260734558, + "p90": 806.8159818649292, + "p95": 815.1040077209473, + "p99": 828.0640244483948 + }, + "combine": { + "p50": 942.0480132102966, + "p90": 951.1039853096008, + "p95": 953.6319971084595, + "p99": 960.2239727973938 + }, + "roundtrip": { + "p50": 1684.0319633483887, + "p90": 1699.9679803848267, + "p95": 1705.407977104187, + "p99": 1713.696002960205 + }, + "isolatedSum": { + "p50": 1730.2080392837524, + "p90": 1757.91996717453, + "p95": 1768.7360048294067, + "p99": 1788.2879972457886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3c5e498e", + "identity": "h100|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ef4af798", + "comparisonKey": "fbfbaaa4cc052b4a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:18.811892+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 151.8400013446808, + "p90": 156.89599514007568, + "p95": 158.720001578331, + "p99": 165.95199704170227 + }, + "combine": { + "p50": 113.43999952077866, + "p90": 117.3119992017746, + "p95": 119.74400281906128, + "p99": 122.14399874210358 + }, + "roundtrip": { + "p50": 229.98400032520294, + "p90": 235.3920042514801, + "p95": 238.11200261116028, + "p99": 247.0719963312149 + }, + "isolatedSum": { + "p50": 265.28000086545944, + "p90": 274.2079943418503, + "p95": 278.4640043973923, + "p99": 288.09599578380585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 181.11999332904816, + "p90": 193.05600225925446, + "p95": 196.76800072193146, + "p99": 206.62400126457214 + }, + "combine": { + "p50": 149.9519944190979, + "p90": 154.40000593662262, + "p95": 155.32800555229187, + "p99": 159.5200002193451 + }, + "roundtrip": { + "p50": 285.69599986076355, + "p90": 291.6480004787445, + "p95": 293.11999678611755, + "p99": 300.0960052013397 + }, + "isolatedSum": { + "p50": 331.07198774814606, + "p90": 347.4560081958771, + "p95": 352.0960062742233, + "p99": 366.14400148391724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 225.75999796390533, + "p90": 230.97600042819977, + "p95": 232.80000686645508, + "p99": 236.67199909687042 + }, + "combine": { + "p50": 219.35999393463135, + "p90": 224.83199834823608, + "p95": 226.20800137519836, + "p99": 228.7999987602234 + }, + "roundtrip": { + "p50": 405.5359959602356, + "p90": 411.16800904273987, + "p95": 413.6959910392761, + "p99": 417.6639914512634 + }, + "isolatedSum": { + "p50": 445.1199918985367, + "p90": 455.80799877643585, + "p95": 459.00800824165344, + "p99": 465.4719978570938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.7839984893799, + "p90": 324.22399520874023, + "p95": 326.01600885391235, + "p99": 332.41599798202515 + }, + "combine": { + "p50": 342.0160114765167, + "p90": 347.3280072212219, + "p95": 348.5119938850403, + "p99": 353.1840145587921 + }, + "roundtrip": { + "p50": 623.2960224151611, + "p90": 632.1280002593994, + "p95": 635.968029499054, + "p99": 646.6559767723083 + }, + "isolatedSum": { + "p50": 660.8000099658966, + "p90": 671.5520024299622, + "p95": 674.5280027389526, + "p99": 685.6000125408173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 508.7040066719055, + "p90": 519.3920135498047, + "p95": 523.2959985733032, + "p99": 532.2880148887634 + }, + "combine": { + "p50": 587.1999859809875, + "p90": 595.1679944992065, + "p95": 597.760021686554, + "p99": 604.0639877319336 + }, + "roundtrip": { + "p50": 1055.8079481124878, + "p90": 1066.4639472961426, + "p95": 1070.847988128662, + "p99": 1244.9920177459717 + }, + "isolatedSum": { + "p50": 1095.903992652893, + "p90": 1114.5600080490112, + "p95": 1121.0560202598572, + "p99": 1136.352002620697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 893.7600255012512, + "p90": 911.8720293045044, + "p95": 915.8400297164917, + "p99": 927.7439713478088 + }, + "combine": { + "p50": 1067.039966583252, + "p90": 1074.3680000305176, + "p95": 1077.888011932373, + "p99": 1083.456039428711 + }, + "roundtrip": { + "p50": 1913.7920141220093, + "p90": 1928.9920330047607, + "p95": 1935.1359605789185, + "p99": 1964.8000001907349 + }, + "isolatedSum": { + "p50": 1960.7999920845032, + "p90": 1986.240029335022, + "p95": 1993.7280416488647, + "p99": 2011.2000107765198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bff99c63", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_b521aff0", + "comparisonKey": "4f16a23c02cdc2c5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:37.815485+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.79199516773224, + "p90": 165.69599509239197, + "p95": 167.4879938364029, + "p99": 173.567995429039 + }, + "combine": { + "p50": 120.35199999809265, + "p90": 122.49600142240524, + "p95": 123.29600006341934, + "p99": 128.9920061826706 + }, + "roundtrip": { + "p50": 240.22400379180908, + "p90": 244.35199797153473, + "p95": 246.14399671554565, + "p99": 250.46399235725403 + }, + "isolatedSum": { + "p50": 282.1439951658249, + "p90": 288.1919965147972, + "p95": 290.78399389982224, + "p99": 302.5600016117096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.62400543689728, + "p90": 191.3599967956543, + "p95": 193.31200420856476, + "p99": 199.5840072631836 + }, + "combine": { + "p50": 163.7759953737259, + "p90": 167.93599724769592, + "p95": 169.27999258041382, + "p99": 179.1040003299713 + }, + "roundtrip": { + "p50": 305.4719865322113, + "p90": 309.6640110015869, + "p95": 311.0080063343048, + "p99": 317.6000118255615 + }, + "isolatedSum": { + "p50": 350.40000081062317, + "p90": 359.2959940433502, + "p95": 362.5919967889786, + "p99": 378.6880075931549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 237.98400163650513, + "p90": 243.26400458812714, + "p95": 245.69599330425262, + "p99": 251.8720030784607 + }, + "combine": { + "p50": 241.5039986371994, + "p90": 244.89599466323853, + "p95": 246.20799720287323, + "p99": 251.00800395011902 + }, + "roundtrip": { + "p50": 441.43998622894287, + "p90": 446.6879963874817, + "p95": 448.96000623703003, + "p99": 452.35198736190796 + }, + "isolatedSum": { + "p50": 479.48800027370453, + "p90": 488.15999925136566, + "p95": 491.90399050712585, + "p99": 502.8800070285797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 346.72001004219055, + "p90": 353.5679876804352, + "p95": 357.7280044555664, + "p99": 416.0960018634796 + }, + "combine": { + "p50": 377.0880103111267, + "p90": 382.56001472473145, + "p95": 383.93598794937134, + "p99": 387.1679902076721 + }, + "roundtrip": { + "p50": 686.303973197937, + "p90": 694.4000124931335, + "p95": 698.8480091094971, + "p99": 945.5040097236633 + }, + "isolatedSum": { + "p50": 723.8080203533173, + "p90": 736.1280024051666, + "p95": 741.6639924049377, + "p99": 803.2639920711517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 568.8959956169128, + "p90": 582.3040008544922, + "p95": 587.6479744911194, + "p99": 601.9840240478516 + }, + "combine": { + "p50": 648.7680077552795, + "p90": 659.2640280723572, + "p95": 663.4560227394104, + "p99": 672.8960275650024 + }, + "roundtrip": { + "p50": 1172.6399660110474, + "p90": 1184.7679615020752, + "p95": 1189.9199485778809, + "p99": 1200.3519535064697 + }, + "isolatedSum": { + "p50": 1217.6640033721924, + "p90": 1241.5680289268494, + "p95": 1251.1039972305298, + "p99": 1274.880051612854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1036.8640422821045, + "p90": 1060.096025466919, + "p95": 1067.7759647369385, + "p99": 1087.7759456634521 + }, + "combine": { + "p50": 1176.7359972000122, + "p90": 1187.648057937622, + "p95": 1191.6799545288086, + "p99": 1282.3679447174072 + }, + "roundtrip": { + "p50": 2160.320043563843, + "p90": 2177.248001098633, + "p95": 2184.3841075897217, + "p99": 2206.048011779785 + }, + "isolatedSum": { + "p50": 2213.6000394821167, + "p90": 2247.744083404541, + "p95": 2259.455919265747, + "p99": 2370.1438903808594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ed336fb", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ef4af798", + "comparisonKey": "3465e78d40901e38", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:36.643666+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.85599565505981, + "p90": 165.66400229930878, + "p95": 167.77600347995758, + "p99": 177.66399681568146 + }, + "combine": { + "p50": 120.38400024175644, + "p90": 122.40000069141388, + "p95": 123.29600006341934, + "p99": 128.38399410247803 + }, + "roundtrip": { + "p50": 241.56799912452698, + "p90": 245.9840029478073, + "p95": 248.48000705242157, + "p99": 283.58399868011475 + }, + "isolatedSum": { + "p50": 282.23999589681625, + "p90": 288.06400299072266, + "p95": 291.0720035433769, + "p99": 306.0479909181595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 188.09600174427032, + "p90": 194.04800236225128, + "p95": 197.2160041332245, + "p99": 227.13600099086761 + }, + "combine": { + "p50": 162.78399527072906, + "p90": 167.07199811935425, + "p95": 169.44000124931335, + "p99": 173.3119934797287 + }, + "roundtrip": { + "p50": 304.9919903278351, + "p90": 309.7279965877533, + "p95": 311.13600730895996, + "p99": 315.0080144405365 + }, + "isolatedSum": { + "p50": 350.8799970149994, + "p90": 361.12000048160553, + "p95": 366.65600538253784, + "p99": 400.4479944705963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.8480007648468, + "p90": 243.6159998178482, + "p95": 246.75199389457703, + "p99": 288.8000011444092 + }, + "combine": { + "p50": 239.84000086784363, + "p90": 246.17600440979004, + "p95": 251.71199440956116, + "p99": 280.64000606536865 + }, + "roundtrip": { + "p50": 441.9519901275635, + "p90": 448.5119879245758, + "p95": 463.6160135269165, + "p99": 896.1920142173767 + }, + "isolatedSum": { + "p50": 478.68800163269043, + "p90": 489.79200422763824, + "p95": 498.4639883041382, + "p99": 569.4400072097778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.03999757766724, + "p90": 352.1600067615509, + "p95": 355.23200035095215, + "p99": 400.5120098590851 + }, + "combine": { + "p50": 376.5760064125061, + "p90": 382.78400897979736, + "p95": 384.3199908733368, + "p99": 388.12801241874695 + }, + "roundtrip": { + "p50": 685.2800250053406, + "p90": 692.4480199813843, + "p95": 695.2639818191528, + "p99": 735.9039783477783 + }, + "isolatedSum": { + "p50": 723.6160039901733, + "p90": 734.9440157413483, + "p95": 739.5519912242889, + "p99": 788.640022277832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 569.8879957199097, + "p90": 579.9360275268555, + "p95": 583.6160182952881, + "p99": 592.8959846496582 + }, + "combine": { + "p50": 647.0080018043518, + "p90": 655.5200219154358, + "p95": 658.3359837532043, + "p99": 679.2960166931152 + }, + "roundtrip": { + "p50": 1173.7600564956665, + "p90": 1185.7600212097168, + "p95": 1189.8880004882812, + "p99": 1195.904016494751 + }, + "isolatedSum": { + "p50": 1216.8959975242615, + "p90": 1235.4560494422913, + "p95": 1241.9520020484924, + "p99": 1272.1920013427734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1034.7199440002441, + "p90": 1057.695984840393, + "p95": 1064.96000289917, + "p99": 1195.2639818191528 + }, + "combine": { + "p50": 1173.9519834518433, + "p90": 1184.5120191574097, + "p95": 1187.2320175170898, + "p99": 1196.7040300369263 + }, + "roundtrip": { + "p50": 2158.6880683898926, + "p90": 2177.9839992523193, + "p95": 2183.232069015503, + "p99": 2199.4879245758057 + }, + "isolatedSum": { + "p50": 2208.6719274520874, + "p90": 2242.2080039978027, + "p95": 2252.1920204162598, + "p99": 2391.968011856079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c4a3d708", + "identity": "h100|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_ef4af798", + "comparisonKey": "7c1d7201338cff0a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:18.038337+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.30399906635284, + "p90": 166.4319932460785, + "p95": 167.87199676036835, + "p99": 171.6800034046173 + }, + "combine": { + "p50": 120.4800009727478, + "p90": 122.23999947309494, + "p95": 123.32800030708313, + "p99": 128.92800569534302 + }, + "roundtrip": { + "p50": 241.4720058441162, + "p90": 246.07999622821808, + "p95": 247.8400021791458, + "p99": 255.3279995918274 + }, + "isolatedSum": { + "p50": 282.78400003910065, + "p90": 288.67199271917343, + "p95": 291.1999970674515, + "p99": 300.6080090999603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 186.8479996919632, + "p90": 190.11199474334717, + "p95": 192.51200556755066, + "p99": 197.1520036458969 + }, + "combine": { + "p50": 163.2000058889389, + "p90": 167.67999529838562, + "p95": 168.99199783802032, + "p99": 171.2000072002411 + }, + "roundtrip": { + "p50": 310.8479976654053, + "p90": 314.7200047969818, + "p95": 316.73601269721985, + "p99": 389.9199962615967 + }, + "isolatedSum": { + "p50": 350.0480055809021, + "p90": 357.7919900417328, + "p95": 361.504003405571, + "p99": 368.352010846138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.58399891853333, + "p90": 243.45600605010986, + "p95": 245.82399427890778, + "p99": 249.53599274158478 + }, + "combine": { + "p50": 242.0479953289032, + "p90": 246.20799720287323, + "p95": 247.55200743675232, + "p99": 251.52000784873962 + }, + "roundtrip": { + "p50": 441.6320025920868, + "p90": 446.8800127506256, + "p95": 448.5760033130646, + "p99": 451.9999921321869 + }, + "isolatedSum": { + "p50": 481.6319942474365, + "p90": 489.6640032529831, + "p95": 493.3760017156601, + "p99": 501.0560005903244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.8719890117645, + "p90": 353.5360097885132, + "p95": 355.648010969162, + "p99": 365.6960129737854 + }, + "combine": { + "p50": 380.8319866657257, + "p90": 386.1120045185089, + "p95": 388.7679874897003, + "p99": 393.1199908256531 + }, + "roundtrip": { + "p50": 693.120002746582, + "p90": 700.9279727935791, + "p95": 702.9759883880615, + "p99": 711.3919854164124 + }, + "isolatedSum": { + "p50": 728.7039756774902, + "p90": 739.6480143070221, + "p95": 744.4159984588623, + "p99": 758.8160037994385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 562.2400045394897, + "p90": 572.4480152130127, + "p95": 576.4480233192444, + "p99": 588.9599919319153 + }, + "combine": { + "p50": 641.1200165748596, + "p90": 651.8080234527588, + "p95": 656.5759778022766, + "p99": 665.8239960670471 + }, + "roundtrip": { + "p50": 1161.3759994506836, + "p90": 1170.5280542373657, + "p95": 1174.2080450057983, + "p99": 1179.9039840698242 + }, + "isolatedSum": { + "p50": 1203.3600211143494, + "p90": 1224.2560386657715, + "p95": 1233.024001121521, + "p99": 1254.7839879989624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1034.7199440002441, + "p90": 1057.2479963302612, + "p95": 1062.9119873046875, + "p99": 1075.7759809494019 + }, + "combine": { + "p50": 1151.7759561538696, + "p90": 1159.3600511550903, + "p95": 1161.087989807129, + "p99": 1166.1440134048462 + }, + "roundtrip": { + "p50": 2130.5599212646484, + "p90": 2150.5279541015625, + "p95": 2156.9600105285645, + "p99": 2166.879892349243 + }, + "isolatedSum": { + "p50": 2186.4959001541138, + "p90": 2216.6080474853516, + "p95": 2223.9999771118164, + "p99": 2241.919994354248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d53dd93e", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h100_74591c48", + "comparisonKey": "bf3e7ee72b605271", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:20.480201+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 178.24000120162964, + "p90": 182.0800006389618, + "p95": 183.4239959716797, + "p99": 188.7039989233017 + }, + "combine": { + "p50": 139.93600010871887, + "p90": 145.75999975204468, + "p95": 146.7519998550415, + "p99": 148.67199957370758 + }, + "roundtrip": { + "p50": 277.3439884185791, + "p90": 283.26401114463806, + "p95": 284.2240035533905, + "p99": 289.66400027275085 + }, + "isolatedSum": { + "p50": 318.1760013103485, + "p90": 327.84000039100647, + "p95": 330.1759958267212, + "p99": 337.3759984970093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 220.38400173187256, + "p90": 224.31999444961548, + "p95": 226.0800004005432, + "p99": 230.880007147789 + }, + "combine": { + "p50": 193.88799369335175, + "p90": 198.14400374889374, + "p95": 199.072003364563, + "p99": 202.43200659751892 + }, + "roundtrip": { + "p50": 372.0000088214874, + "p90": 376.3520121574402, + "p95": 378.2399892807007, + "p99": 381.21598958969116 + }, + "isolatedSum": { + "p50": 414.2719954252243, + "p90": 422.4639981985092, + "p95": 425.1520037651062, + "p99": 433.3120137453079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 311.1039996147156, + "p90": 315.3280019760132, + "p95": 316.73601269721985, + "p99": 321.02400064468384 + }, + "combine": { + "p50": 286.9440019130707, + "p90": 291.51999950408936, + "p95": 292.60799288749695, + "p99": 294.94398832321167 + }, + "roundtrip": { + "p50": 558.8480234146118, + "p90": 564.5120143890381, + "p95": 567.1359896659851, + "p99": 838.3039832115173 + }, + "isolatedSum": { + "p50": 598.0480015277863, + "p90": 606.8480014801025, + "p95": 609.3440055847168, + "p99": 615.9679889678955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 489.47200179100037, + "p90": 495.13599276542664, + "p95": 497.1199929714203, + "p99": 505.5360198020935 + }, + "combine": { + "p50": 477.4079918861389, + "p90": 482.91200399398804, + "p95": 484.47999358177185, + "p99": 487.8399968147278 + }, + "roundtrip": { + "p50": 927.7439713478088, + "p90": 934.0800046920776, + "p95": 936.4799857139587, + "p99": 940.2559995651245 + }, + "isolatedSum": { + "p50": 966.8799936771393, + "p90": 978.0479967594147, + "p95": 981.5999865531921, + "p99": 993.3760166168213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 845.2159762382507, + "p90": 885.6959939002991, + "p95": 888.4159922599792, + "p99": 893.6319947242737 + }, + "combine": { + "p50": 863.8399839401245, + "p90": 876.8960237503052, + "p95": 879.4559836387634, + "p99": 885.1839900016785 + }, + "roundtrip": { + "p50": 1665.1840209960938, + "p90": 1700.1919746398926, + "p95": 1706.272006034851, + "p99": 1712.3520374298096 + }, + "isolatedSum": { + "p50": 1709.0559601783752, + "p90": 1762.5920176506042, + "p95": 1767.8719758987427, + "p99": 1778.8159847259521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1548.2239723205566, + "p90": 1557.4079751968384, + "p95": 1560.3200197219849, + "p99": 1563.8400316238403 + }, + "combine": { + "p50": 1601.472020149231, + "p90": 1607.6799631118774, + "p95": 1609.5679998397827, + "p99": 1613.6640310287476 + }, + "roundtrip": { + "p50": 3113.568067550659, + "p90": 3124.608039855957, + "p95": 3127.6159286499023, + "p99": 3134.079933166504 + }, + "isolatedSum": { + "p50": 3149.6959924697876, + "p90": 3165.087938308716, + "p95": 3169.8880195617676, + "p99": 3177.504062652588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56a8d5a4", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h100_2707bc2b", + "comparisonKey": "3328d1e3abe251fb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:10.377967+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 138.20800185203552, + "p90": 183.03999304771423, + "p95": 185.66399812698364, + "p99": 190.40000438690186 + }, + "combine": { + "p50": 89.31200206279755, + "p90": 105.3759977221489, + "p95": 106.1440035700798, + "p99": 108.35199803113937 + }, + "roundtrip": { + "p50": 202.4960070848465, + "p90": 262.1760070323944, + "p95": 264.384001493454, + "p99": 268.5120105743408 + }, + "isolatedSum": { + "p50": 227.52000391483307, + "p90": 288.41599076986313, + "p95": 291.80800169706345, + "p99": 298.75200241804123 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 184.7359985113144, + "p90": 196.51199877262115, + "p95": 199.16799664497375, + "p99": 207.07200467586517 + }, + "combine": { + "p50": 145.05599439144135, + "p90": 148.19200336933136, + "p95": 153.18399667739868, + "p99": 155.64799308776855 + }, + "roundtrip": { + "p50": 300.03198981285095, + "p90": 311.2320005893707, + "p95": 313.82399797439575, + "p99": 321.0560083389282 + }, + "isolatedSum": { + "p50": 329.79199290275574, + "p90": 344.7040021419525, + "p95": 352.35199332237244, + "p99": 362.7199977636337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 239.68000710010529, + "p90": 280.67201375961304, + "p95": 283.26401114463806, + "p99": 297.9840040206909 + }, + "combine": { + "p50": 302.4959862232208, + "p90": 318.65599751472473, + "p95": 320.92800736427307, + "p99": 323.3279883861542 + }, + "roundtrip": { + "p50": 499.64800477027893, + "p90": 538.3679866790771, + "p95": 540.831983089447, + "p99": 733.4079742431641 + }, + "isolatedSum": { + "p50": 542.1759933233261, + "p90": 599.3280112743378, + "p95": 604.1920185089111, + "p99": 621.3119924068451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a36f9715", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h100_6cf5e0a6", + "comparisonKey": "3c8fb05c3e6ceb18", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:10.551488+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 142.56000518798828, + "p90": 146.65600657463074, + "p95": 148.92800152301788, + "p99": 152.54400670528412 + }, + "combine": { + "p50": 97.98400104045868, + "p90": 103.42399775981903, + "p95": 104.44799810647964, + "p99": 106.97600245475769 + }, + "roundtrip": { + "p50": 213.44000101089478, + "p90": 219.4879949092865, + "p95": 222.33599424362183, + "p99": 229.44000363349915 + }, + "isolatedSum": { + "p50": 240.54400622844696, + "p90": 250.08000433444977, + "p95": 253.37599962949753, + "p99": 259.5200091600418 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.13600540161133, + "p90": 168.06399822235107, + "p95": 169.0559983253479, + "p99": 174.14399981498718 + }, + "combine": { + "p50": 119.77600306272507, + "p90": 121.91999703645706, + "p95": 122.65600264072418, + "p99": 127.71199643611908 + }, + "roundtrip": { + "p50": 243.23199689388275, + "p90": 247.5840002298355, + "p95": 249.31199848651886, + "p99": 255.0080120563507 + }, + "isolatedSum": { + "p50": 282.9120084643364, + "p90": 289.98399525880814, + "p95": 291.7120009660721, + "p99": 301.85599625110626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.84800100326538, + "p90": 204.03200387954712, + "p95": 224.0000069141388, + "p99": 524.1600275039673 + }, + "combine": { + "p50": 155.10399639606476, + "p90": 161.43999993801117, + "p95": 162.08000481128693, + "p99": 164.32000696659088 + }, + "roundtrip": { + "p50": 311.71199679374695, + "p90": 316.6399896144867, + "p95": 318.11198592185974, + "p99": 391.7120099067688 + }, + "isolatedSum": { + "p50": 349.95199739933014, + "p90": 365.4720038175583, + "p95": 386.0800117254257, + "p99": 688.4800344705582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 250.97599625587463, + "p90": 256.03199005126953, + "p95": 257.6960027217865, + "p99": 263.8719975948334 + }, + "combine": { + "p50": 234.9119931459427, + "p90": 240.54400622844696, + "p95": 241.95200204849243, + "p99": 246.2719976902008 + }, + "roundtrip": { + "p50": 450.080007314682, + "p90": 455.00800013542175, + "p95": 456.959992647171, + "p99": 462.0159864425659 + }, + "isolatedSum": { + "p50": 485.8879894018173, + "p90": 496.5759962797165, + "p95": 499.64800477027893, + "p99": 510.1439952850342 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 374.752014875412, + "p90": 405.5039882659912, + "p95": 409.08798575401306, + "p99": 414.3039882183075 + }, + "combine": { + "p50": 384.16001200675964, + "p90": 390.3680145740509, + "p95": 393.66400241851807, + "p99": 400.38400888442993 + }, + "roundtrip": { + "p50": 713.8879895210266, + "p90": 721.8239903450012, + "p95": 725.4400253295898, + "p99": 750.6880164146423 + }, + "isolatedSum": { + "p50": 758.9120268821716, + "p90": 795.8720028400421, + "p95": 802.7519881725311, + "p99": 814.6879971027374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 619.7760105133057, + "p90": 629.2160153388977, + "p95": 632.7360272407532, + "p99": 687.1039867401123 + }, + "combine": { + "p50": 658.3679914474487, + "p90": 665.2160286903381, + "p95": 667.3920154571533, + "p99": 671.8720197677612 + }, + "roundtrip": { + "p50": 1240.8640384674072, + "p90": 1249.6000528335571, + "p95": 1252.5759935379028, + "p99": 1288.9280319213867 + }, + "isolatedSum": { + "p50": 1278.1440019607544, + "p90": 1294.4320440292358, + "p95": 1300.1280426979065, + "p99": 1358.9760065078735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ecfcf95e", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h100_bd3f0c52", + "comparisonKey": "14500fcb2a1e94e5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:14.642036+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.66400229930878, + "p90": 171.07200622558594, + "p95": 173.88799786567688, + "p99": 617.3440217971802 + }, + "combine": { + "p50": 130.94399869441986, + "p90": 132.51200318336487, + "p95": 136.35200262069702, + "p99": 139.55199718475342 + }, + "roundtrip": { + "p50": 263.2000148296356, + "p90": 268.0320143699646, + "p95": 269.47200298309326, + "p99": 274.4640111923218 + }, + "isolatedSum": { + "p50": 296.60800099372864, + "p90": 303.5840094089508, + "p95": 310.2400004863739, + "p99": 756.8960189819336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 203.93599569797516, + "p90": 211.0079973936081, + "p95": 212.51200139522552, + "p99": 217.3440009355545 + }, + "combine": { + "p50": 186.65599822998047, + "p90": 190.46400487422943, + "p95": 192.06400215625763, + "p99": 196.60800695419312 + }, + "roundtrip": { + "p50": 350.75199604034424, + "p90": 355.3920090198517, + "p95": 356.83199763298035, + "p99": 359.6799969673157 + }, + "isolatedSum": { + "p50": 390.5919939279556, + "p90": 401.4720022678375, + "p95": 404.57600355148315, + "p99": 413.9520078897476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 276.95998549461365, + "p90": 283.488005399704, + "p95": 285.504013299942, + "p99": 289.15199637413025 + }, + "combine": { + "p50": 281.66401386260986, + "p90": 286.4319980144501, + "p95": 288.09601068496704, + "p99": 293.88800263404846 + }, + "roundtrip": { + "p50": 523.967981338501, + "p90": 529.151976108551, + "p95": 530.784010887146, + "p99": 534.1119766235352 + }, + "isolatedSum": { + "p50": 558.6239993572235, + "p90": 569.920003414154, + "p95": 573.6000239849091, + "p99": 583.0399990081787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 426.2079894542694, + "p90": 433.4399998188019, + "p95": 435.2000057697296, + "p99": 517.9200172424316 + }, + "combine": { + "p50": 468.32001209259033, + "p90": 473.63200783729553, + "p95": 475.16798973083496, + "p99": 479.0720045566559 + }, + "roundtrip": { + "p50": 859.4880104064941, + "p90": 866.0159707069397, + "p95": 867.8719997406006, + "p99": 872.4480271339417 + }, + "isolatedSum": { + "p50": 894.5280015468597, + "p90": 907.0720076560974, + "p95": 910.3679955005646, + "p99": 996.9920217990875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 748.6079931259155, + "p90": 758.8800191879272, + "p95": 766.048014163971, + "p99": 990.336000919342 + }, + "combine": { + "p50": 836.6400003433228, + "p90": 843.8720107078552, + "p95": 846.9120264053345, + "p99": 864.8959994316101 + }, + "roundtrip": { + "p50": 1545.632004737854, + "p90": 1553.1519651412964, + "p95": 1555.7760000228882, + "p99": 1559.8399639129639 + }, + "isolatedSum": { + "p50": 1585.2479934692383, + "p90": 1602.7520298957825, + "p95": 1612.9600405693054, + "p99": 1855.2320003509521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1406.9119691848755, + "p90": 1420.5119609832764, + "p95": 1424.415946006775, + "p99": 1433.0559968948364 + }, + "combine": { + "p50": 1550.879955291748, + "p90": 1557.6640367507935, + "p95": 1559.8399639129639, + "p99": 1564.4479990005493 + }, + "roundtrip": { + "p50": 2920.703887939453, + "p90": 2931.4560890197754, + "p95": 2934.688091278076, + "p99": 2945.472002029419 + }, + "isolatedSum": { + "p50": 2957.7919244766235, + "p90": 2978.17599773407, + "p95": 2984.2559099197388, + "p99": 2997.5039958953857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d62add15", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h100_7fb780dc", + "comparisonKey": "861a07c19fcf2efb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:10.045583+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.6559942960739, + "p90": 166.87999665737152, + "p95": 168.60799491405487, + "p99": 172.8000044822693 + }, + "combine": { + "p50": 120.57600170373917, + "p90": 122.78400361537933, + "p95": 123.87199699878693, + "p99": 131.16799294948578 + }, + "roundtrip": { + "p50": 243.26400458812714, + "p90": 248.09600412845612, + "p95": 249.91999566555023, + "p99": 252.6719868183136 + }, + "isolatedSum": { + "p50": 283.2319959998131, + "p90": 289.66400027275085, + "p95": 292.4799919128418, + "p99": 303.96799743175507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 187.6479983329773, + "p90": 192.83199310302734, + "p95": 194.11200284957886, + "p99": 199.96799528598785 + }, + "combine": { + "p50": 163.80800306797028, + "p90": 169.8240041732788, + "p95": 171.51999473571777, + "p99": 173.37599396705627 + }, + "roundtrip": { + "p50": 308.0959916114807, + "p90": 312.9599988460541, + "p95": 314.62401151657104, + "p99": 317.56800413131714 + }, + "isolatedSum": { + "p50": 351.45600140094757, + "p90": 362.65599727630615, + "p95": 365.63199758529663, + "p99": 373.3439892530441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 242.2720044851303, + "p90": 247.51999974250793, + "p95": 249.439999461174, + "p99": 256.03199005126953 + }, + "combine": { + "p50": 241.05599522590637, + "p90": 246.20799720287323, + "p95": 247.80799448490143, + "p99": 252.70399451255798 + }, + "roundtrip": { + "p50": 445.3119933605194, + "p90": 450.78399777412415, + "p95": 452.57601141929626, + "p99": 457.0240080356598 + }, + "isolatedSum": { + "p50": 483.3279997110367, + "p90": 493.72799694538116, + "p95": 497.24799394607544, + "p99": 508.7359845638275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 349.7599959373474, + "p90": 354.46399450302124, + "p95": 356.86400532722473, + "p99": 361.31200194358826 + }, + "combine": { + "p50": 382.4000060558319, + "p90": 389.21600580215454, + "p95": 390.56000113487244, + "p99": 394.23999190330505 + }, + "roundtrip": { + "p50": 695.3920125961304, + "p90": 702.0480036735535, + "p95": 704.1919827461243, + "p99": 706.8160176277161 + }, + "isolatedSum": { + "p50": 732.1600019931793, + "p90": 743.6800003051758, + "p95": 747.4240064620972, + "p99": 755.5519938468933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 572.8319883346558, + "p90": 585.4079723358154, + "p95": 588.0640149116516, + "p99": 594.5280194282532 + }, + "combine": { + "p50": 648.4479904174805, + "p90": 655.5839776992798, + "p95": 657.7280163764954, + "p99": 662.2400283813477 + }, + "roundtrip": { + "p50": 1178.6240339279175, + "p90": 1192.479968070984, + "p95": 1196.0639953613281, + "p99": 1200.2880573272705 + }, + "isolatedSum": { + "p50": 1221.2799787521362, + "p90": 1240.9919500350952, + "p95": 1245.792031288147, + "p99": 1256.7680478096008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1027.0400047302246, + "p90": 1051.103949546814, + "p95": 1056.3520193099976, + "p99": 1083.5200548171997 + }, + "combine": { + "p50": 1157.3760509490967, + "p90": 1166.3039922714233, + "p95": 1168.287992477417, + "p99": 1172.9600429534912 + }, + "roundtrip": { + "p50": 2131.3281059265137, + "p90": 2149.5680809020996, + "p95": 2156.287908554077, + "p99": 2167.2000885009766 + }, + "isolatedSum": { + "p50": 2184.4160556793213, + "p90": 2217.4079418182373, + "p95": 2224.6400117874146, + "p99": 2256.480097770691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3be01792", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h100_9a44fbfe", + "comparisonKey": "07784dba087a0f54", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:14.860059+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 164.06400501728058, + "p90": 169.15200650691986, + "p95": 171.48800194263458, + "p99": 180.7039976119995 + }, + "combine": { + "p50": 121.2799996137619, + "p90": 123.99999797344208, + "p95": 129.37599420547485, + "p99": 132.89600610733032 + }, + "roundtrip": { + "p50": 241.72799289226532, + "p90": 252.3519992828369, + "p95": 256.415992975235, + "p99": 269.4079875946045 + }, + "isolatedSum": { + "p50": 285.3440046310425, + "p90": 293.15200448036194, + "p95": 300.86399614810944, + "p99": 313.60000371932983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 188.03200125694275, + "p90": 193.9840018749237, + "p95": 196.383997797966, + "p99": 200.95999538898468 + }, + "combine": { + "p50": 165.6000018119812, + "p90": 170.6240028142929, + "p95": 171.77599668502808, + "p99": 175.4239946603775 + }, + "roundtrip": { + "p50": 307.3279857635498, + "p90": 313.34400177001953, + "p95": 315.64798951148987, + "p99": 322.52800464630127 + }, + "isolatedSum": { + "p50": 353.63200306892395, + "p90": 364.6080046892166, + "p95": 368.1599944829941, + "p99": 376.3839900493622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 243.23199689388275, + "p90": 250.4960000514984, + "p95": 252.79998779296875, + "p99": 259.8719894886017 + }, + "combine": { + "p50": 243.83999407291412, + "p90": 248.51199984550476, + "p95": 250.75200200080872, + "p99": 253.60000133514404 + }, + "roundtrip": { + "p50": 447.2639858722687, + "p90": 453.0879855155945, + "p95": 455.58398962020874, + "p99": 463.1359875202179 + }, + "isolatedSum": { + "p50": 487.0719909667969, + "p90": 499.0079998970032, + "p95": 503.55198979377747, + "p99": 513.4719908237457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.680002450943, + "p90": 355.00800609588623, + "p95": 358.815997838974, + "p99": 366.7199909687042 + }, + "combine": { + "p50": 379.93600964546204, + "p90": 386.49600744247437, + "p95": 388.92799615859985, + "p99": 395.4879939556122 + }, + "roundtrip": { + "p50": 689.408004283905, + "p90": 698.9120244979858, + "p95": 701.8880248069763, + "p99": 711.2320065498352 + }, + "isolatedSum": { + "p50": 727.616012096405, + "p90": 741.5040135383606, + "p95": 747.7439939975739, + "p99": 762.2079849243164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 566.0799741744995, + "p90": 574.8800039291382, + "p95": 577.5359869003296, + "p99": 584.8640203475952 + }, + "combine": { + "p50": 638.1440162658691, + "p90": 646.8160152435303, + "p95": 649.2800116539001, + "p99": 681.119978427887 + }, + "roundtrip": { + "p50": 1165.2799844741821, + "p90": 1174.9119758605957, + "p95": 1177.6000261306763, + "p99": 1185.6000423431396 + }, + "isolatedSum": { + "p50": 1204.2239904403687, + "p90": 1221.6960191726685, + "p95": 1226.8159985542297, + "p99": 1265.9839987754822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1045.5360412597656, + "p90": 1076.5759944915771, + "p95": 1087.712049484253, + "p99": 1347.0079898834229 + }, + "combine": { + "p50": 1155.4239988327026, + "p90": 1163.167953491211, + "p95": 1165.7600402832031, + "p99": 1171.455979347229 + }, + "roundtrip": { + "p50": 2146.8799114227295, + "p90": 2170.2399253845215, + "p95": 2174.623966217041, + "p99": 2183.4559440612793 + }, + "isolatedSum": { + "p50": 2200.9600400924683, + "p90": 2239.743947982788, + "p95": 2253.472089767456, + "p99": 2518.463969230652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c29b0731", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_896e9933", + "comparisonKey": "f08afbc19f2c9ac9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:11.466678+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 168.60799491405487, + "p90": 173.0239987373352, + "p95": 174.17599260807037, + "p99": 178.14399302005768 + }, + "combine": { + "p50": 128.03199887275696, + "p90": 130.62399625778198, + "p95": 131.16799294948578, + "p99": 137.40800321102142 + }, + "roundtrip": { + "p50": 260.44800877571106, + "p90": 264.70398902893066, + "p95": 266.59199595451355, + "p99": 271.36000990867615 + }, + "isolatedSum": { + "p50": 296.63999378681183, + "p90": 303.6479949951172, + "p95": 305.34398555755615, + "p99": 315.5519962310791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 203.23200523853302, + "p90": 206.7520022392273, + "p95": 208.70399475097656, + "p99": 213.50400149822235 + }, + "combine": { + "p50": 179.00800704956055, + "p90": 182.27200210094452, + "p95": 184.2239946126938, + "p99": 186.97600066661835 + }, + "roundtrip": { + "p50": 338.9759957790375, + "p90": 342.78398752212524, + "p95": 344.543993473053, + "p99": 348.86398911476135 + }, + "isolatedSum": { + "p50": 382.24001228809357, + "p90": 389.0240043401718, + "p95": 392.92798936367035, + "p99": 400.4800021648407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 275.4560112953186, + "p90": 280.38400411605835, + "p95": 282.24000334739685, + "p99": 287.80800104141235 + }, + "combine": { + "p50": 273.8879919052124, + "p90": 279.6480059623718, + "p95": 281.15200996398926, + "p99": 285.0559949874878 + }, + "roundtrip": { + "p50": 510.1119875907898, + "p90": 515.3599977493286, + "p95": 517.2799825668335, + "p99": 519.5840001106262 + }, + "isolatedSum": { + "p50": 549.344003200531, + "p90": 560.0320100784302, + "p95": 563.3920133113861, + "p99": 572.8639960289001 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 416.22400283813477, + "p90": 423.9040017127991, + "p95": 427.13600397109985, + "p99": 770.1759934425354 + }, + "combine": { + "p50": 454.8160135746002, + "p90": 460.09600162506104, + "p95": 461.85600757598877, + "p99": 464.9919867515564 + }, + "roundtrip": { + "p50": 832.7360153198242, + "p90": 839.1039967536926, + "p95": 841.2479758262634, + "p99": 847.1999764442444 + }, + "isolatedSum": { + "p50": 871.040016412735, + "p90": 884.0000033378601, + "p95": 888.9920115470886, + "p99": 1235.1679801940918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 714.0799760818481, + "p90": 729.0880084037781, + "p95": 734.3999743461609, + "p99": 752.7999877929688 + }, + "combine": { + "p50": 812.5439882278442, + "p90": 819.7439908981323, + "p95": 822.1120238304138, + "p99": 828.0320167541504 + }, + "roundtrip": { + "p50": 1485.8880043029785, + "p90": 1495.2640533447266, + "p95": 1498.047947883606, + "p99": 1505.0560235977173 + }, + "isolatedSum": { + "p50": 1526.6239643096924, + "p90": 1548.8319993019104, + "p95": 1556.5119981765747, + "p99": 1580.8320045471191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1312.4480247497559, + "p90": 1322.8479623794556, + "p95": 1327.6159763336182, + "p99": 1336.7359638214111 + }, + "combine": { + "p50": 1522.655963897705, + "p90": 1533.3759784698486, + "p95": 1551.7760515213013, + "p99": 1611.4879846572876 + }, + "roundtrip": { + "p50": 2795.9680557250977, + "p90": 2807.4240684509277, + "p95": 2811.840057373047, + "p99": 2821.7599391937256 + }, + "isolatedSum": { + "p50": 2835.103988647461, + "p90": 2856.223940849304, + "p95": 2879.3920278549194, + "p99": 2948.2239484786987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02909d9f", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h100_05d96d49", + "comparisonKey": "7f18e5acc5098e71", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:12.792788+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.3120070695877, + "p90": 169.3439930677414, + "p95": 171.424001455307, + "p99": 174.75199699401855 + }, + "combine": { + "p50": 122.84799665212631, + "p90": 130.0799995660782, + "p95": 132.54399597644806, + "p99": 139.29599523544312 + }, + "roundtrip": { + "p50": 253.82399559020996, + "p90": 262.9440128803253, + "p95": 266.7199969291687, + "p99": 276.3519883155823 + }, + "isolatedSum": { + "p50": 288.160003721714, + "p90": 299.4239926338196, + "p95": 303.96799743175507, + "p99": 314.04799222946167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 200.03199577331543, + "p90": 207.23199844360352, + "p95": 208.639994263649, + "p99": 213.6320024728775 + }, + "combine": { + "p50": 169.8240041732788, + "p90": 172.54400253295898, + "p95": 173.08799922466278, + "p99": 177.279993891716 + }, + "roundtrip": { + "p50": 326.7199993133545, + "p90": 331.743985414505, + "p95": 333.5359990596771, + "p99": 337.0879888534546 + }, + "isolatedSum": { + "p50": 369.85599994659424, + "p90": 379.7760009765625, + "p95": 381.72799348831177, + "p99": 390.9119963645935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 271.07200026512146, + "p90": 280.5120050907135, + "p95": 285.0880026817322, + "p99": 299.55199360847473 + }, + "combine": { + "p50": 261.79200410842896, + "p90": 267.2959864139557, + "p95": 268.70399713516235, + "p99": 280.0000011920929 + }, + "roundtrip": { + "p50": 496.5440034866333, + "p90": 502.8480291366577, + "p95": 505.3120255470276, + "p99": 546.8800067901611 + }, + "isolatedSum": { + "p50": 532.8640043735504, + "p90": 547.8079915046692, + "p95": 553.7919998168945, + "p99": 579.5519948005676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 414.14400935173035, + "p90": 421.9520092010498, + "p95": 426.2079894542694, + "p99": 741.4079904556274 + }, + "combine": { + "p50": 440.5440092086792, + "p90": 446.75201177597046, + "p95": 448.2240080833435, + "p99": 453.44001054763794 + }, + "roundtrip": { + "p50": 816.2559866905212, + "p90": 822.3360180854797, + "p95": 824.3839740753174, + "p99": 828.3519744873047 + }, + "isolatedSum": { + "p50": 854.6880185604095, + "p90": 868.7040209770203, + "p95": 874.4319975376129, + "p99": 1194.8480010032654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 700.3520131111145, + "p90": 710.2720141410828, + "p95": 713.3439779281616, + "p99": 718.1439995765686 + }, + "combine": { + "p50": 791.263997554779, + "p90": 797.1519827842712, + "p95": 799.9359965324402, + "p99": 804.3839931488037 + }, + "roundtrip": { + "p50": 1451.6799449920654, + "p90": 1461.0559940338135, + "p95": 1463.96803855896, + "p99": 1470.2080488204956 + }, + "isolatedSum": { + "p50": 1491.6160106658936, + "p90": 1507.423996925354, + "p95": 1513.2799744606018, + "p99": 1522.5279927253723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1275.0719785690308, + "p90": 1288.159966468811, + "p95": 1292.672038078308, + "p99": 1328.2239437103271 + }, + "combine": { + "p50": 1469.3440198898315, + "p90": 1477.6320457458496, + "p95": 1480.2559614181519, + "p99": 1485.5999946594238 + }, + "roundtrip": { + "p50": 2705.6639194488525, + "p90": 2716.2559032440186, + "p95": 2719.42400932312, + "p99": 2724.9279022216797 + }, + "isolatedSum": { + "p50": 2744.4159984588623, + "p90": 2765.7920122146606, + "p95": 2772.92799949646, + "p99": 2813.823938369751 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f8c10702", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h100_e4941d05", + "comparisonKey": "b06717eb030d24bf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:09.988999+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.03999722003937, + "p90": 166.6879951953888, + "p95": 169.5680022239685, + "p99": 175.52000284194946 + }, + "combine": { + "p50": 120.7680031657219, + "p90": 122.72000312805176, + "p95": 123.48800152540207, + "p99": 128.63999605178833 + }, + "roundtrip": { + "p50": 241.31199717521667, + "p90": 246.20799720287323, + "p95": 247.77600169181824, + "p99": 252.3519992828369 + }, + "isolatedSum": { + "p50": 283.80800038576126, + "p90": 289.40799832344055, + "p95": 293.0560037493706, + "p99": 304.1599988937378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 192.03199446201324, + "p90": 196.48000597953796, + "p95": 198.97599518299103, + "p99": 203.16800475120544 + }, + "combine": { + "p50": 164.95999693870544, + "p90": 169.72799599170685, + "p95": 170.84799706935883, + "p99": 175.10400712490082 + }, + "roundtrip": { + "p50": 311.7760121822357, + "p90": 316.73601269721985, + "p95": 318.4320032596588, + "p99": 322.1760094165802 + }, + "isolatedSum": { + "p50": 356.9919914007187, + "p90": 366.2080019712448, + "p95": 369.82399225234985, + "p99": 378.27201187610626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 245.9840029478073, + "p90": 251.74400210380554, + "p95": 253.53598594665527, + "p99": 264.6079957485199 + }, + "combine": { + "p50": 239.9040013551712, + "p90": 244.32000517845154, + "p95": 245.40799856185913, + "p99": 248.3839988708496 + }, + "roundtrip": { + "p50": 446.8800127506256, + "p90": 452.57601141929626, + "p95": 454.5919895172119, + "p99": 459.7119987010956 + }, + "isolatedSum": { + "p50": 485.8880043029785, + "p90": 496.0640072822571, + "p95": 498.9439845085144, + "p99": 512.9919946193695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 356.4800024032593, + "p90": 361.37598752975464, + "p95": 363.1359934806824, + "p99": 366.87999963760376 + }, + "combine": { + "p50": 381.21598958969116, + "p90": 387.4559998512268, + "p95": 389.3760144710541, + "p99": 394.9759900569916 + }, + "roundtrip": { + "p50": 697.5039839744568, + "p90": 705.0880193710327, + "p95": 708.191990852356, + "p99": 713.919997215271 + }, + "isolatedSum": { + "p50": 737.6959919929504, + "p90": 748.8319873809814, + "p95": 752.5120079517365, + "p99": 761.8559896945953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.2479772567749, + "p90": 587.1040225028992, + "p95": 590.9439921379089, + "p99": 595.3599810600281 + }, + "combine": { + "p50": 653.8559794425964, + "p90": 661.5999937057495, + "p95": 664.1920208930969, + "p99": 714.7200107574463 + }, + "roundtrip": { + "p50": 1192.896008491516, + "p90": 1203.2639980316162, + "p95": 1207.2319984436035, + "p99": 1214.1120433807373 + }, + "isolatedSum": { + "p50": 1231.1039566993713, + "p90": 1248.7040162086487, + "p95": 1255.1360130310059, + "p99": 1310.0799918174744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1052.3840188980103, + "p90": 1073.9200115203857, + "p95": 1079.9360275268555, + "p99": 1091.007947921753 + }, + "combine": { + "p50": 1193.6639547348022, + "p90": 1201.5039920806885, + "p95": 1203.8400173187256, + "p99": 1211.3920450210571 + }, + "roundtrip": { + "p50": 2199.5201110839844, + "p90": 2216.0000801086426, + "p95": 2218.559980392456, + "p99": 2225.6319522857666 + }, + "isolatedSum": { + "p50": 2246.0479736328125, + "p90": 2275.424003601074, + "p95": 2283.776044845581, + "p99": 2302.39999294281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bcaff7b5", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h100_260e03e4", + "comparisonKey": "c2ae52423a6975c0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:36.435950+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 167.35999286174774, + "p90": 171.6800034046173, + "p95": 174.94399845600128, + "p99": 204.3199986219406 + }, + "combine": { + "p50": 129.37599420547485, + "p90": 131.3599944114685, + "p95": 134.88000631332397, + "p99": 136.89599931240082 + }, + "roundtrip": { + "p50": 258.7200105190277, + "p90": 262.65600323677063, + "p95": 263.9999985694885, + "p99": 266.81599020957947 + }, + "isolatedSum": { + "p50": 296.7359870672226, + "p90": 303.0399978160858, + "p95": 309.82400476932526, + "p99": 341.21599793434143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 203.0400037765503, + "p90": 209.24800634384155, + "p95": 210.27199923992157, + "p99": 212.73599565029144 + }, + "combine": { + "p50": 184.60799753665924, + "p90": 188.73600661754608, + "p95": 189.34400379657745, + "p99": 194.36800479888916 + }, + "roundtrip": { + "p50": 344.543993473053, + "p90": 348.7679958343506, + "p95": 350.3679931163788, + "p99": 354.7520041465759 + }, + "isolatedSum": { + "p50": 387.64800131320953, + "p90": 397.98401296138763, + "p95": 399.616003036499, + "p99": 407.1040004491806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 271.232008934021, + "p90": 276.92800760269165, + "p95": 278.75199913978577, + "p99": 281.43998980522156 + }, + "combine": { + "p50": 277.2800028324127, + "p90": 282.24000334739685, + "p95": 283.32799673080444, + "p99": 286.20800375938416 + }, + "roundtrip": { + "p50": 512.5759840011597, + "p90": 518.0799961090088, + "p95": 519.8400020599365, + "p99": 524.2559909820557 + }, + "isolatedSum": { + "p50": 548.5120117664337, + "p90": 559.1680109500885, + "p95": 562.0799958705902, + "p99": 567.6479935646057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 417.08800196647644, + "p90": 427.4879992008209, + "p95": 430.59200048446655, + "p99": 440.8000111579895 + }, + "combine": { + "p50": 457.40801095962524, + "p90": 465.66399931907654, + "p95": 467.45601296424866, + "p99": 473.82399439811707 + }, + "roundtrip": { + "p50": 835.6480002403259, + "p90": 841.8880105018616, + "p95": 844.0319895744324, + "p99": 849.0880131721497 + }, + "isolatedSum": { + "p50": 874.4960129261017, + "p90": 893.1519985198975, + "p95": 898.0480134487152, + "p99": 914.6240055561066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 713.8559818267822, + "p90": 725.0880002975464, + "p95": 727.840006351471, + "p99": 733.2479953765869 + }, + "combine": { + "p50": 805.791974067688, + "p90": 812.2559785842896, + "p95": 815.1360154151917, + "p99": 820.2239871025085 + }, + "roundtrip": { + "p50": 1477.5359630584717, + "p90": 1487.328052520752, + "p95": 1490.3039932250977, + "p99": 1496.6720342636108 + }, + "isolatedSum": { + "p50": 1519.6479558944702, + "p90": 1537.343978881836, + "p95": 1542.9760217666626, + "p99": 1553.4719824790955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1313.472032546997, + "p90": 1327.2000551223755, + "p95": 1330.399990081787, + "p99": 1338.1760120391846 + }, + "combine": { + "p50": 1509.6640586853027, + "p90": 1519.10400390625, + "p95": 1522.1120119094849, + "p99": 1530.1439762115479 + }, + "roundtrip": { + "p50": 2786.2401008605957, + "p90": 2798.880100250244, + "p95": 2803.6160469055176, + "p99": 2825.279951095581 + }, + "isolatedSum": { + "p50": 2823.1360912323, + "p90": 2846.3040590286255, + "p95": 2852.512001991272, + "p99": 2868.3199882507324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f850e513", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h100_75843a0a", + "comparisonKey": "c5c1a89fb715f829", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:09.390181+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.9440039396286, + "p90": 167.04000532627106, + "p95": 169.76000368595123, + "p99": 176.92799866199493 + }, + "combine": { + "p50": 121.31199985742569, + "p90": 122.8799968957901, + "p95": 123.96799772977829, + "p99": 134.49600338935852 + }, + "roundtrip": { + "p50": 242.5599992275238, + "p90": 247.0719963312149, + "p95": 249.31199848651886, + "p99": 253.91998887062073 + }, + "isolatedSum": { + "p50": 284.2560037970543, + "p90": 289.92000222206116, + "p95": 293.7280014157295, + "p99": 311.42400205135345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 190.40000438690186, + "p90": 194.72000002861023, + "p95": 197.34400510787964, + "p99": 201.63199305534363 + }, + "combine": { + "p50": 163.7759953737259, + "p90": 169.21600699424744, + "p95": 170.56000232696533, + "p99": 172.63999581336975 + }, + "roundtrip": { + "p50": 311.0080063343048, + "p90": 316.0319924354553, + "p95": 318.4320032596588, + "p99": 324.0959942340851 + }, + "isolatedSum": { + "p50": 354.17599976062775, + "p90": 363.93600702285767, + "p95": 367.90400743484497, + "p99": 374.2719888687134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 243.42399835586548, + "p90": 249.95200335979462, + "p95": 251.93598866462708, + "p99": 259.64799523353577 + }, + "combine": { + "p50": 243.16799640655518, + "p90": 247.13599681854248, + "p95": 250.30401349067688, + "p99": 254.01601195335388 + }, + "roundtrip": { + "p50": 448.3200013637543, + "p90": 453.69601249694824, + "p95": 455.6480050086975, + "p99": 461.43999695777893 + }, + "isolatedSum": { + "p50": 486.59199476242065, + "p90": 497.0880001783371, + "p95": 502.24000215530396, + "p99": 513.6640071868896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 350.816011428833, + "p90": 356.76801204681396, + "p95": 359.0399920940399, + "p99": 363.0400002002716 + }, + "combine": { + "p50": 377.6960074901581, + "p90": 383.55201482772827, + "p95": 386.27201318740845, + "p99": 389.75998759269714 + }, + "roundtrip": { + "p50": 688.9600157737732, + "p90": 696.287989616394, + "p95": 699.1040110588074, + "p99": 704.2880058288574 + }, + "isolatedSum": { + "p50": 728.5120189189911, + "p90": 740.3200268745422, + "p95": 745.3120052814484, + "p99": 752.7999877929688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 563.9039874076843, + "p90": 573.9840269088745, + "p95": 577.2799849510193, + "p99": 582.4639797210693 + }, + "combine": { + "p50": 642.1120166778564, + "p90": 648.9279866218567, + "p95": 650.7200002670288, + "p99": 654.6559929847717 + }, + "roundtrip": { + "p50": 1166.7200326919556, + "p90": 1176.6079664230347, + "p95": 1180.2560091018677, + "p99": 1188.1599426269531 + }, + "isolatedSum": { + "p50": 1206.0160040855408, + "p90": 1222.9120135307312, + "p95": 1227.999985218048, + "p99": 1237.119972705841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1010.6879472732544, + "p90": 1028.3839702606201, + "p95": 1035.4559421539307, + "p99": 1048.7680435180664 + }, + "combine": { + "p50": 1171.5199947357178, + "p90": 1180.6720495224, + "p95": 1183.1040382385254, + "p99": 1193.2480335235596 + }, + "roundtrip": { + "p50": 2139.3918991088867, + "p90": 2153.6641120910645, + "p95": 2160.0639820098877, + "p99": 2172.800064086914 + }, + "isolatedSum": { + "p50": 2182.207942008972, + "p90": 2209.05601978302, + "p95": 2218.559980392456, + "p99": 2242.016077041626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-951b8d43", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h100_d41b0513", + "comparisonKey": "11d82110c5fddcfe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:37.544577+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 166.4319932460785, + "p90": 171.80800437927246, + "p95": 173.37599396705627, + "p99": 523.7119793891907 + }, + "combine": { + "p50": 126.91199779510498, + "p90": 129.50399518013, + "p95": 130.43199479579926, + "p99": 133.53599607944489 + }, + "roundtrip": { + "p50": 258.14399123191833, + "p90": 261.9839906692505, + "p95": 262.7840042114258, + "p99": 266.2079930305481 + }, + "isolatedSum": { + "p50": 293.3439910411835, + "p90": 301.31199955940247, + "p95": 303.80798876285553, + "p99": 657.2479754686356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 201.1519968509674, + "p90": 206.7520022392273, + "p95": 208.70399475097656, + "p99": 212.51200139522552 + }, + "combine": { + "p50": 177.63200402259827, + "p90": 180.1919937133789, + "p95": 181.2479943037033, + "p99": 186.65599822998047 + }, + "roundtrip": { + "p50": 338.591992855072, + "p90": 343.1040048599243, + "p95": 344.4480001926422, + "p99": 355.3920090198517 + }, + "isolatedSum": { + "p50": 378.7840008735657, + "p90": 386.9439959526062, + "p95": 389.95198905467987, + "p99": 399.167999625206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 274.9119997024536, + "p90": 279.6480059623718, + "p95": 284.31999683380127, + "p99": 474.2079973220825 + }, + "combine": { + "p50": 273.79199862480164, + "p90": 279.90400791168213, + "p95": 284.7360074520111, + "p99": 399.3920087814331 + }, + "roundtrip": { + "p50": 510.0799798965454, + "p90": 515.9040093421936, + "p95": 518.4640288352966, + "p99": 534.2079997062683 + }, + "isolatedSum": { + "p50": 548.7039983272552, + "p90": 559.552013874054, + "p95": 569.0560042858124, + "p99": 873.6000061035156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 416.06399416923523, + "p90": 422.5600063800812, + "p95": 424.73599314689636, + "p99": 438.7519955635071 + }, + "combine": { + "p50": 454.17600870132446, + "p90": 459.00800824165344, + "p95": 460.89598536491394, + "p99": 464.9919867515564 + }, + "roundtrip": { + "p50": 832.4480056762695, + "p90": 838.9120101928711, + "p95": 840.8960103988647, + "p99": 845.1200127601624 + }, + "isolatedSum": { + "p50": 870.2400028705597, + "p90": 881.5680146217346, + "p95": 885.6319785118103, + "p99": 903.7439823150635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 711.4880084991455, + "p90": 722.7839827537537, + "p95": 726.4959812164307, + "p99": 731.007993221283 + }, + "combine": { + "p50": 813.3760094642639, + "p90": 820.6080198287964, + "p95": 822.3040103912354, + "p99": 827.2320032119751 + }, + "roundtrip": { + "p50": 1485.6640100479126, + "p90": 1495.8720207214355, + "p95": 1500, + "p99": 1507.680058479309 + }, + "isolatedSum": { + "p50": 1524.8640179634094, + "p90": 1543.39200258255, + "p95": 1548.799991607666, + "p99": 1558.239996433258 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1313.1200075149536, + "p90": 1326.3360261917114, + "p95": 1329.5680284500122, + "p99": 1337.2479677200317 + }, + "combine": { + "p50": 1522.1760272979736, + "p90": 1530.176043510437, + "p95": 1532.5440168380737, + "p99": 1539.9680137634277 + }, + "roundtrip": { + "p50": 2797.9519367218018, + "p90": 2809.8559379577637, + "p95": 2813.0240440368652, + "p99": 2820.8320140838623 + }, + "isolatedSum": { + "p50": 2835.2960348129272, + "p90": 2856.5120697021484, + "p95": 2862.112045288086, + "p99": 2877.2159814834595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9eb8bebf", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_811b044b", + "comparisonKey": "3aff7ffe1802d1c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:10.595235+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.43999993801117, + "p90": 166.75199568271637, + "p95": 168.03200542926788, + "p99": 171.55200242996216 + }, + "combine": { + "p50": 120.80000340938568, + "p90": 122.78400361537933, + "p95": 123.83999675512314, + "p99": 128.57599556446075 + }, + "roundtrip": { + "p50": 243.23199689388275, + "p90": 247.16800451278687, + "p95": 248.48000705242157, + "p99": 252.06398963928223 + }, + "isolatedSum": { + "p50": 282.24000334739685, + "p90": 289.5359992980957, + "p95": 291.872002184391, + "p99": 300.1279979944229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.85599958896637, + "p90": 191.0720020532608, + "p95": 193.1840032339096, + "p99": 199.16799664497375 + }, + "combine": { + "p50": 163.55200111865997, + "p90": 167.71200299263, + "p95": 168.89600455760956, + "p99": 173.5360026359558 + }, + "roundtrip": { + "p50": 307.8399896621704, + "p90": 312.73600459098816, + "p95": 314.4319951534271, + "p99": 320.3519880771637 + }, + "isolatedSum": { + "p50": 349.40800070762634, + "p90": 358.7840050458908, + "p95": 362.08000779151917, + "p99": 372.70399928092957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 241.63199961185455, + "p90": 246.62399291992188, + "p95": 248.73599410057068, + "p99": 253.91998887062073 + }, + "combine": { + "p50": 239.26399648189545, + "p90": 244.22399699687958, + "p95": 245.85600197315216, + "p99": 250.43201446533203 + }, + "roundtrip": { + "p50": 443.77601146698, + "p90": 449.40799474716187, + "p95": 451.84001326560974, + "p99": 458.68799090385437 + }, + "isolatedSum": { + "p50": 480.89599609375, + "p90": 490.84798991680145, + "p95": 494.59199607372284, + "p99": 504.35200333595276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.8719890117645, + "p90": 352.4160087108612, + "p95": 354.0799915790558, + "p99": 357.695996761322 + }, + "combine": { + "p50": 374.11201000213623, + "p90": 379.8080086708069, + "p95": 381.76000118255615, + "p99": 384.5439851284027 + }, + "roundtrip": { + "p50": 683.5200190544128, + "p90": 690.9120082855225, + "p95": 692.8960084915161, + "p99": 702.5920152664185 + }, + "isolatedSum": { + "p50": 721.9839990139008, + "p90": 732.2240173816681, + "p95": 735.8399927616119, + "p99": 742.2399818897247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 565.7280087471008, + "p90": 574.783980846405, + "p95": 577.888011932373, + "p99": 582.2719931602478 + }, + "combine": { + "p50": 656.000018119812, + "p90": 664.3519997596741, + "p95": 666.0799980163574, + "p99": 671.3600158691406 + }, + "roundtrip": { + "p50": 1181.280016899109, + "p90": 1191.6160583496094, + "p95": 1194.3999528884888, + "p99": 1200.6399631500244 + }, + "isolatedSum": { + "p50": 1221.7280268669128, + "p90": 1239.135980606079, + "p95": 1243.9680099487305, + "p99": 1253.6320090293884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1025.7920026779175, + "p90": 1043.455958366394, + "p95": 1050.0160455703735, + "p99": 1059.5519542694092 + }, + "combine": { + "p50": 1170.7199811935425, + "p90": 1179.0399551391602, + "p95": 1181.3440322875977, + "p99": 1185.3439807891846 + }, + "roundtrip": { + "p50": 2149.951934814453, + "p90": 2164.479970932007, + "p95": 2168.9279079437256, + "p99": 2181.8559169769287 + }, + "isolatedSum": { + "p50": 2196.51198387146, + "p90": 2222.495913505554, + "p95": 2231.360077857971, + "p99": 2244.8959350585938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bae449f", + "identity": "h100|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h100_ff8c31eb", + "comparisonKey": "7b4473a368b01569", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:10.117877+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.4880006313324, + "p90": 167.87199676036835, + "p95": 169.27999258041382, + "p99": 173.66400361061096 + }, + "combine": { + "p50": 121.5360015630722, + "p90": 123.61600250005722, + "p95": 126.52799487113953, + "p99": 130.72000443935394 + }, + "roundtrip": { + "p50": 242.5599992275238, + "p90": 247.67999351024628, + "p95": 249.2160052061081, + "p99": 252.54398584365845 + }, + "isolatedSum": { + "p50": 285.0240021944046, + "p90": 291.48799926042557, + "p95": 295.80798745155334, + "p99": 304.3840080499649 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 188.57599794864655, + "p90": 193.66399943828583, + "p95": 196.19199633598328, + "p99": 211.64800226688385 + }, + "combine": { + "p50": 164.41600024700165, + "p90": 168.47999393939972, + "p95": 170.1119989156723, + "p99": 173.7920045852661 + }, + "roundtrip": { + "p50": 307.5200021266937, + "p90": 312.4479949474335, + "p95": 314.14398550987244, + "p99": 316.99201464653015 + }, + "isolatedSum": { + "p50": 352.9919981956482, + "p90": 362.14399337768555, + "p95": 366.3039952516556, + "p99": 385.44000685214996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 242.5280064344406, + "p90": 247.74399399757385, + "p95": 249.66399371623993, + "p99": 253.85600328445435 + }, + "combine": { + "p50": 240.1600033044815, + "p90": 244.73600089550018, + "p95": 246.20799720287323, + "p99": 249.11999702453613 + }, + "roundtrip": { + "p50": 444.2239999771118, + "p90": 449.3440091609955, + "p95": 450.9119987487793, + "p99": 454.6560049057007 + }, + "isolatedSum": { + "p50": 482.6880097389221, + "p90": 492.47999489307404, + "p95": 495.87199091911316, + "p99": 502.9760003089905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 348.4799861907959, + "p90": 353.43998670578003, + "p95": 355.29598593711853, + "p99": 361.1840009689331 + }, + "combine": { + "p50": 374.5279908180237, + "p90": 380.3839981555939, + "p95": 382.07998871803284, + "p99": 385.888010263443 + }, + "roundtrip": { + "p50": 684.2880249023438, + "p90": 690.8479928970337, + "p95": 693.2799816131592, + "p99": 697.8240013122559 + }, + "isolatedSum": { + "p50": 723.0079770088196, + "p90": 733.8239848613739, + "p95": 737.3759746551514, + "p99": 747.0720112323761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 567.7760243415833, + "p90": 578.3039927482605, + "p95": 582.5600028038025, + "p99": 708.0000042915344 + }, + "combine": { + "p50": 653.439998626709, + "p90": 662.3039841651917, + "p95": 665.0879979133606, + "p99": 724.2239713668823 + }, + "roundtrip": { + "p50": 1181.0239553451538, + "p90": 1192.1279430389404, + "p95": 1194.8479413986206, + "p99": 1201.5039920806885 + }, + "isolatedSum": { + "p50": 1221.2160229682922, + "p90": 1240.6079769134521, + "p95": 1247.648000717163, + "p99": 1432.2239756584167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1026.9440412521362, + "p90": 1045.9840297698975, + "p95": 1051.8720149993896, + "p99": 1065.440058708191 + }, + "combine": { + "p50": 1168.1599617004395, + "p90": 1176.8640279769897, + "p95": 1179.0080070495605, + "p99": 1186.8159770965576 + }, + "roundtrip": { + "p50": 2146.0158824920654, + "p90": 2162.11199760437, + "p95": 2166.624069213867, + "p99": 2175.3599643707275 + }, + "isolatedSum": { + "p50": 2195.1040029525757, + "p90": 2222.848057746887, + "p95": 2230.88002204895, + "p99": 2252.2560358047485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-612d3ad7", + "identity": "h100|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_508883cc", + "comparisonKey": "85a9edc8b5ca3e56", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:37.130174+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.15199315547943, + "p90": 137.9839926958084, + "p95": 139.8400068283081, + "p99": 144.0960019826889 + }, + "combine": { + "p50": 120.7680031657219, + "p90": 123.00799787044525, + "p95": 127.29600071907043, + "p99": 130.0799995660782 + }, + "roundtrip": { + "p50": 220.09600698947906, + "p90": 223.39199483394623, + "p95": 224.5119959115982, + "p99": 230.880007147789 + }, + "isolatedSum": { + "p50": 253.91999632120132, + "p90": 260.99199056625366, + "p95": 267.13600754737854, + "p99": 274.1760015487671 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.59199380874634, + "p90": 167.67999529838562, + "p95": 169.37600076198578, + "p99": 177.95200645923615 + }, + "combine": { + "p50": 162.59199380874634, + "p90": 166.33599996566772, + "p95": 168.2880073785782, + "p99": 171.61600291728973 + }, + "roundtrip": { + "p50": 282.24000334739685, + "p90": 286.8799865245819, + "p95": 288.06400299072266, + "p99": 295.199990272522 + }, + "isolatedSum": { + "p50": 325.1839876174927, + "p90": 334.01599526405334, + "p95": 337.66400814056396, + "p99": 349.5680093765259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 216.92800521850586, + "p90": 234.40000414848328, + "p95": 238.52799832820892, + "p99": 254.55999374389648 + }, + "combine": { + "p50": 242.01600253582, + "p90": 246.24000489711761, + "p95": 247.871994972229, + "p99": 253.91998887062073 + }, + "roundtrip": { + "p50": 420.415997505188, + "p90": 424.80000853538513, + "p95": 426.4959990978241, + "p99": 429.82399463653564 + }, + "isolatedSum": { + "p50": 458.94400775432587, + "p90": 480.6400090456009, + "p95": 486.3999933004379, + "p99": 508.4799826145172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 323.3279883861542, + "p90": 328.0960023403168, + "p95": 329.98400926589966, + "p99": 333.95200967788696 + }, + "combine": { + "p50": 378.2080113887787, + "p90": 384.223997592926, + "p95": 387.03998923301697, + "p99": 393.1199908256531 + }, + "roundtrip": { + "p50": 663.3920073509216, + "p90": 669.8240041732788, + "p95": 672.1280217170715, + "p99": 679.0720224380493 + }, + "isolatedSum": { + "p50": 701.5359997749329, + "p90": 712.3199999332428, + "p95": 717.0239984989166, + "p99": 727.07200050354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 547.7759838104248, + "p90": 556.8640232086182, + "p95": 559.3600273132324, + "p99": 570.4320073127747 + }, + "combine": { + "p50": 646.8799710273743, + "p90": 655.0400257110596, + "p95": 657.0240259170532, + "p99": 662.8479957580566 + }, + "roundtrip": { + "p50": 1153.7920236587524, + "p90": 1165.0240421295166, + "p95": 1168.67196559906, + "p99": 1174.9440431594849 + }, + "isolatedSum": { + "p50": 1194.655954837799, + "p90": 1211.9040489196777, + "p95": 1216.3840532302856, + "p99": 1233.2800030708313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1001.6319751739502, + "p90": 1022.9120254516602, + "p95": 1029.4400453567505, + "p99": 1046.5600490570068 + }, + "combine": { + "p50": 1177.5039434432983, + "p90": 1186.1120462417603, + "p95": 1190.2079582214355, + "p99": 1196.4479684829712 + }, + "roundtrip": { + "p50": 2130.592107772827, + "p90": 2147.775888442993, + "p95": 2153.6319255828857, + "p99": 2165.663957595825 + }, + "isolatedSum": { + "p50": 2179.1359186172485, + "p90": 2209.0240716934204, + "p95": 2219.648003578186, + "p99": 2243.008017539978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d5c6c3d", + "identity": "h100|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "c27635c3e303d63e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:19.153031+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 275.6800055503845, + "p90": 284.0319871902466, + "p95": 286.97600960731506, + "p99": 309.85599756240845 + }, + "combine": { + "p50": 90.7519981265068, + "p90": 93.85599941015244, + "p95": 95.87199985980988, + "p99": 98.91200065612793 + }, + "roundtrip": { + "p50": 354.97599840164185, + "p90": 362.68800497055054, + "p95": 365.85599184036255, + "p99": 378.08001041412354 + }, + "isolatedSum": { + "p50": 366.4320036768913, + "p90": 377.887986600399, + "p95": 382.84800946712494, + "p99": 408.7679982185364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 297.1520125865936, + "p90": 303.5840094089508, + "p95": 307.1039915084839, + "p99": 334.75199341773987 + }, + "combine": { + "p50": 118.84800344705582, + "p90": 121.95199728012085, + "p95": 123.77600371837616, + "p99": 129.2800009250641 + }, + "roundtrip": { + "p50": 385.21599769592285, + "p90": 392.15999841690063, + "p95": 394.8799967765808, + "p99": 399.6799886226654 + }, + "isolatedSum": { + "p50": 416.00001603364944, + "p90": 425.53600668907166, + "p95": 430.87999522686005, + "p99": 464.03199434280396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 385.8560025691986, + "p90": 439.58398699760437, + "p95": 442.75200366973877, + "p99": 450.1760005950928 + }, + "combine": { + "p50": 172.7360039949417, + "p90": 177.12000012397766, + "p95": 178.75200510025024, + "p99": 183.9040070772171 + }, + "roundtrip": { + "p50": 525.3120064735413, + "p90": 532.9920053482056, + "p95": 534.9760055541992, + "p99": 540.287971496582 + }, + "isolatedSum": { + "p50": 558.5920065641403, + "p90": 616.703987121582, + "p95": 621.504008769989, + "p99": 634.0800076723099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 538.5280251502991, + "p90": 545.4720258712769, + "p95": 548.3840107917786, + "p99": 561.2480044364929 + }, + "combine": { + "p50": 287.3600125312805, + "p90": 291.7119860649109, + "p95": 293.5360074043274, + "p99": 295.5839931964874 + }, + "roundtrip": { + "p50": 794.655978679657, + "p90": 802.5919795036316, + "p95": 804.9920201301575, + "p99": 810.6880187988281 + }, + "isolatedSum": { + "p50": 825.8880376815796, + "p90": 837.1840119361877, + "p95": 841.920018196106, + "p99": 856.8319976329803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 865.8239841461182, + "p90": 919.0719723701477, + "p95": 923.0719804763794, + "p99": 931.9679737091064 + }, + "combine": { + "p50": 473.66398572921753, + "p90": 483.7760031223297, + "p95": 485.9839975833893, + "p99": 494.1439926624298 + }, + "roundtrip": { + "p50": 1306.0799837112427, + "p90": 1343.1999683380127, + "p95": 1361.4720106124878, + "p99": 1497.3119497299194 + }, + "isolatedSum": { + "p50": 1339.4879698753357, + "p90": 1402.8479754924774, + "p95": 1409.0559780597687, + "p99": 1426.1119663715363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1592.6079750061035, + "p90": 1599.5839834213257, + "p95": 1602.4960279464722, + "p99": 1608.6399555206299 + }, + "combine": { + "p50": 851.1999845504761, + "p90": 860.5440258979797, + "p95": 862.9119992256165, + "p99": 866.6560053825378 + }, + "roundtrip": { + "p50": 2422.463893890381, + "p90": 2436.0640048980713, + "p95": 2440.9279823303223, + "p99": 2449.4400024414062 + }, + "isolatedSum": { + "p50": 2443.8079595565796, + "p90": 2460.1280093193054, + "p95": 2465.4080271720886, + "p99": 2475.2959609031677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e982185", + "identity": "h100|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "f05abd259a3a467c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:17.790921+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.76800441741943, + "p90": 274.30400252342224, + "p95": 277.3759961128235, + "p99": 289.4720137119293 + }, + "combine": { + "p50": 97.79199957847595, + "p90": 100.51199793815613, + "p95": 101.6639992594719, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 350.816011428833, + "p90": 358.14398527145386, + "p95": 361.1840009689331, + "p99": 369.6640133857727 + }, + "isolatedSum": { + "p50": 362.5600039958954, + "p90": 374.81600046157837, + "p95": 379.0399953722954, + "p99": 395.6480100750923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 308.9919984340668, + "p90": 349.8240113258362, + "p95": 368.3840036392212, + "p99": 425.56801438331604 + }, + "combine": { + "p50": 129.72800433635712, + "p90": 150.91200172901154, + "p95": 153.6639928817749, + "p99": 158.87999534606934 + }, + "roundtrip": { + "p50": 409.1840088367462, + "p90": 416.48000478744507, + "p95": 419.3600118160248, + "p99": 454.27200198173523 + }, + "isolatedSum": { + "p50": 438.7200027704239, + "p90": 500.7360130548477, + "p95": 522.0479965209961, + "p99": 584.4480097293854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 402.24000811576843, + "p90": 410.14400124549866, + "p95": 412.80001401901245, + "p99": 432.15999007225037 + }, + "combine": { + "p50": 195.74399292469025, + "p90": 199.26400482654572, + "p95": 200.6720006465912, + "p99": 203.45599949359894 + }, + "roundtrip": { + "p50": 565.8559799194336, + "p90": 572.3519921302795, + "p95": 574.783980846405, + "p99": 582.5920104980469 + }, + "isolatedSum": { + "p50": 597.9840010404587, + "p90": 609.4080060720444, + "p95": 613.4720146656036, + "p99": 635.6159895658493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 579.6480178833008, + "p90": 585.536003112793, + "p95": 587.9039764404297, + "p99": 595.3279733657837 + }, + "combine": { + "p50": 305.2479922771454, + "p90": 310.5599880218506, + "p95": 312.28798627853394, + "p99": 315.39198756217957 + }, + "roundtrip": { + "p50": 856.5760254859924, + "p90": 864.4480109214783, + "p95": 867.0719861984253, + "p99": 874.4000196456909 + }, + "isolatedSum": { + "p50": 884.8960101604462, + "p90": 896.0959911346436, + "p95": 900.1919627189636, + "p99": 910.7199609279633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1002.560019493103, + "p90": 1008.0000162124634, + "p95": 1011.2639665603638, + "p99": 1024.351954460144 + }, + "combine": { + "p50": 522.4320292472839, + "p90": 529.2159914970398, + "p95": 532.5120091438293, + "p99": 537.3759865760803 + }, + "roundtrip": { + "p50": 1490.6879663467407, + "p90": 1499.135971069336, + "p95": 1501.7600059509277, + "p99": 1511.296033859253 + }, + "isolatedSum": { + "p50": 1524.992048740387, + "p90": 1537.2160077095032, + "p95": 1543.7759757041931, + "p99": 1561.7279410362244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1853.760004043579, + "p90": 1862.496018409729, + "p95": 1864.6399974822998, + "p99": 1869.53604221344 + }, + "combine": { + "p50": 940.0960206985474, + "p90": 949.0879774093628, + "p95": 951.4880180358887, + "p99": 958.8159918785095 + }, + "roundtrip": { + "p50": 2765.791893005371, + "p90": 2780.57599067688, + "p95": 2785.536050796509, + "p99": 2812.4160766601562 + }, + "isolatedSum": { + "p50": 2793.8560247421265, + "p90": 2811.583995819092, + "p95": 2816.1280155181885, + "p99": 2828.3520340919495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ee22b5cd", + "identity": "h100|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "b99c8b688d8c57c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:20.114144+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 273.75999093055725, + "p90": 310.2079927921295, + "p95": 322.07998633384705, + "p99": 395.9679901599884 + }, + "combine": { + "p50": 105.92000186443329, + "p90": 117.50400066375732, + "p95": 120.67200243473053, + "p99": 127.00800597667694 + }, + "roundtrip": { + "p50": 369.28001046180725, + "p90": 412.8960072994232, + "p95": 422.5600063800812, + "p99": 441.6959881782532 + }, + "isolatedSum": { + "p50": 379.67999279499054, + "p90": 427.71199345588684, + "p95": 442.7519887685776, + "p99": 522.9759961366653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 327.87200808525085, + "p90": 336.1920118331909, + "p95": 338.9120101928711, + "p99": 350.3359854221344 + }, + "combine": { + "p50": 142.71999895572662, + "p90": 146.43199741840363, + "p95": 147.96799421310425, + "p99": 154.84799444675446 + }, + "roundtrip": { + "p50": 449.18400049209595, + "p90": 458.71999859809875, + "p95": 462.2719883918762, + "p99": 515.7439708709717 + }, + "isolatedSum": { + "p50": 470.5920070409775, + "p90": 482.62400925159454, + "p95": 486.88000440597534, + "p99": 505.18397986888885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 438.30400705337524, + "p90": 447.1360146999359, + "p95": 450.5920112133026, + "p99": 475.2640128135681 + }, + "combine": { + "p50": 213.76000344753265, + "p90": 217.631995677948, + "p95": 218.75199675559998, + "p99": 224.67200458049774 + }, + "roundtrip": { + "p50": 625.6319880485535, + "p90": 657.3759913444519, + "p95": 723.5519886016846, + "p99": 805.8559894561768 + }, + "isolatedSum": { + "p50": 652.0640105009079, + "p90": 664.7680103778839, + "p95": 669.3440079689026, + "p99": 699.9360173940659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 644.2559957504272, + "p90": 652.351975440979, + "p95": 655.9039950370789, + "p99": 667.6480174064636 + }, + "combine": { + "p50": 336.38399839401245, + "p90": 340.7680094242096, + "p95": 342.3680067062378, + "p99": 346.97601199150085 + }, + "roundtrip": { + "p50": 948.6719965934753, + "p90": 956.8639993667603, + "p95": 959.9360227584839, + "p99": 978.4960150718689 + }, + "isolatedSum": { + "p50": 980.6399941444397, + "p90": 993.1199848651886, + "p95": 998.2720017433167, + "p99": 1014.6240293979645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1150.048017501831, + "p90": 1155.9040546417236, + "p95": 1158.1759452819824, + "p99": 1163.648009300232 + }, + "combine": { + "p50": 581.3760161399841, + "p90": 587.7760052680969, + "p95": 589.5040035247803, + "p99": 603.9999723434448 + }, + "roundtrip": { + "p50": 1710.0160121917725, + "p90": 1728.4480333328247, + "p95": 1733.8240146636963, + "p99": 1752.7680397033691 + }, + "isolatedSum": { + "p50": 1731.4240336418152, + "p90": 1743.6800599098206, + "p95": 1747.6799488067627, + "p99": 1767.6479816436768 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2137.120008468628, + "p90": 2144.320011138916, + "p95": 2147.871971130371, + "p99": 2155.679941177368 + }, + "combine": { + "p50": 1059.0399503707886, + "p90": 1067.0080184936523, + "p95": 1069.599986076355, + "p99": 1076.6079425811768 + }, + "roundtrip": { + "p50": 3258.6560249328613, + "p90": 3284.480094909668, + "p95": 3294.6879863739014, + "p99": 3311.4240169525146 + }, + "isolatedSum": { + "p50": 3196.1599588394165, + "p90": 3211.3280296325684, + "p95": 3217.471957206726, + "p99": 3232.287883758545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1e98e9a", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_4e920b93", + "comparisonKey": "5a2d0a92e1d0cee5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:37.115077+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 138.59200477600098, + "p90": 144.70399916172028, + "p95": 147.45600521564484, + "p99": 155.71199357509613 + }, + "combine": { + "p50": 110.72000116109848, + "p90": 114.88000303506851, + "p95": 116.99199676513672, + "p99": 119.23199892044067 + }, + "roundtrip": { + "p50": 271.36000990867615, + "p90": 278.56001257896423, + "p95": 280.8000147342682, + "p99": 286.01598739624023 + }, + "isolatedSum": { + "p50": 249.31200593709946, + "p90": 259.5840021967888, + "p95": 264.44800198078156, + "p99": 274.9439924955368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 156.8319946527481, + "p90": 162.75200247764587, + "p95": 164.63999450206757, + "p99": 170.30400037765503 + }, + "combine": { + "p50": 156.38400614261627, + "p90": 159.67999398708344, + "p95": 161.5999937057495, + "p99": 165.27999937534332 + }, + "roundtrip": { + "p50": 379.10398840904236, + "p90": 383.93598794937134, + "p95": 385.4080140590668, + "p99": 389.8240029811859 + }, + "isolatedSum": { + "p50": 313.2160007953644, + "p90": 322.4319964647293, + "p95": 326.2399882078171, + "p99": 335.58399975299835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.24800372123718, + "p90": 198.68800044059753, + "p95": 200.47999918460846, + "p99": 204.79999482631683 + }, + "combine": { + "p50": 233.95200073719025, + "p90": 237.47199773788452, + "p95": 239.45599794387817, + "p99": 243.0720031261444 + }, + "roundtrip": { + "p50": 577.344000339508, + "p90": 585.1519703865051, + "p95": 593.0560231208801, + "p99": 647.9679942131042 + }, + "isolatedSum": { + "p50": 427.20000445842743, + "p90": 436.15999817848206, + "p95": 439.93599712848663, + "p99": 447.87199795246124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 263.10399174690247, + "p90": 268.70399713516235, + "p95": 270.30399441719055, + "p99": 275.4560112953186 + }, + "combine": { + "p50": 370.49600481987, + "p90": 374.81600046157837, + "p95": 376.41599774360657, + "p99": 379.0079951286316 + }, + "roundtrip": { + "p50": 942.9759979248047, + "p90": 949.504017829895, + "p95": 951.2640237808228, + "p99": 963.0079865455627 + }, + "isolatedSum": { + "p50": 633.5999965667725, + "p90": 643.5199975967407, + "p95": 646.7199921607971, + "p99": 654.4640064239502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 393.50399374961853, + "p90": 399.26400780677795, + "p95": 400.86400508880615, + "p99": 404.9600064754486 + }, + "combine": { + "p50": 639.3600106239319, + "p90": 647.0720171928406, + "p95": 649.6319770812988, + "p99": 653.8879871368408 + }, + "roundtrip": { + "p50": 1663.7120246887207, + "p90": 1672.0960140228271, + "p95": 1675.3920316696167, + "p99": 1787.6160144805908 + }, + "isolatedSum": { + "p50": 1032.8640043735504, + "p90": 1046.3360249996185, + "p95": 1050.495982170105, + "p99": 1058.8479936122894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 665.7599806785583, + "p90": 674.7840046882629, + "p95": 681.119978427887, + "p99": 864.0959858894348 + }, + "combine": { + "p50": 1167.9680347442627, + "p90": 1177.0559549331665, + "p95": 1180.3200244903564, + "p99": 1194.5600509643555 + }, + "roundtrip": { + "p50": 3117.503881454468, + "p90": 3130.079984664917, + "p95": 3135.456085205078, + "p99": 3144.7360515594482 + }, + "isolatedSum": { + "p50": 1833.728015422821, + "p90": 1851.8399596214294, + "p95": 1861.4400029182434, + "p99": 2058.6560368537903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bcdd43a", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "d6c3b1ccff0a879e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:43.018270+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 274.6880054473877, + "p90": 284.67199206352234, + "p95": 293.3120131492615, + "p99": 459.1679871082306 + }, + "combine": { + "p50": 111.90400272607803, + "p90": 117.72800236940384, + "p95": 136.86400651931763, + "p99": 140.6719982624054 + }, + "roundtrip": { + "p50": 367.42401123046875, + "p90": 379.96798753738403, + "p95": 389.6639943122864, + "p99": 451.58401131629944 + }, + "isolatedSum": { + "p50": 386.59200817346573, + "p90": 402.3999944329262, + "p95": 430.1760196685791, + "p99": 599.839985370636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 340.5439853668213, + "p90": 349.15199875831604, + "p95": 352.7039885520935, + "p99": 388.38401436805725 + }, + "combine": { + "p50": 157.85600244998932, + "p90": 161.56800091266632, + "p95": 163.29599916934967, + "p99": 170.01600563526154 + }, + "roundtrip": { + "p50": 474.65598583221436, + "p90": 483.93601179122925, + "p95": 487.8399968147278, + "p99": 516.1280035972595 + }, + "isolatedSum": { + "p50": 498.3999878168106, + "p90": 510.71999967098236, + "p95": 515.9999877214432, + "p99": 558.4000200033188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 462.1120095252991, + "p90": 470.7520008087158, + "p95": 475.8079946041107, + "p99": 493.9199984073639 + }, + "combine": { + "p50": 234.9119931459427, + "p90": 238.78400027751923, + "p95": 240.31999707221985, + "p99": 244.159996509552 + }, + "roundtrip": { + "p50": 672.7679967880249, + "p90": 681.3759803771973, + "p95": 684.6399903297424, + "p99": 730.3360104560852 + }, + "isolatedSum": { + "p50": 697.0240026712418, + "p90": 709.536001086235, + "p95": 716.1279916763306, + "p99": 738.0799949169159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 704.6719789505005, + "p90": 711.2640142440796, + "p95": 716.159999370575, + "p99": 788.9599800109863 + }, + "combine": { + "p50": 371.8720078468323, + "p90": 377.47201323509216, + "p95": 379.5199990272522, + "p99": 393.8240110874176 + }, + "roundtrip": { + "p50": 1046.496033668518, + "p90": 1053.439974784851, + "p95": 1056.480050086975, + "p99": 1123.6799955368042 + }, + "isolatedSum": { + "p50": 1076.5439867973328, + "p90": 1088.7360274791718, + "p95": 1095.6799983978271, + "p99": 1182.783991098404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1275.10404586792, + "p90": 1282.9439640045166, + "p95": 1286.6239547729492, + "p99": 1514.240026473999 + }, + "combine": { + "p50": 645.1839804649353, + "p90": 654.2720198631287, + "p95": 657.0559740066528, + "p99": 668.7039732933044 + }, + "roundtrip": { + "p50": 1879.1999816894531, + "p90": 1888.64004611969, + "p95": 1892.4800157546997, + "p99": 1917.3120260238647 + }, + "isolatedSum": { + "p50": 1920.2880263328552, + "p90": 1937.2159838676453, + "p95": 1943.679928779602, + "p99": 2182.9439997673035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2385.215997695923, + "p90": 2394.239902496338, + "p95": 2399.168014526367, + "p99": 2479.6481132507324 + }, + "combine": { + "p50": 1170.6559658050537, + "p90": 1179.7759532928467, + "p95": 1182.8800439834595, + "p99": 1191.5839910507202 + }, + "roundtrip": { + "p50": 3534.496068954468, + "p90": 3549.407958984375, + "p95": 3560.0640773773193, + "p99": 3761.5039348602295 + }, + "isolatedSum": { + "p50": 3555.8719635009766, + "p90": 3574.0158557891846, + "p95": 3582.0480585098267, + "p99": 3671.2321043014526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cfaaae9d", + "identity": "h100|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h100_ef2b14f7", + "comparisonKey": "09d914f290c7827f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:18.956958+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 280.86400032043457, + "p90": 288.7040078639984, + "p95": 291.20001196861267, + "p99": 300.86401104927063 + }, + "combine": { + "p50": 113.3119985461235, + "p90": 116.70400202274323, + "p95": 118.43200027942657, + "p99": 126.68800354003906 + }, + "roundtrip": { + "p50": 382.4320137500763, + "p90": 390.24001359939575, + "p95": 393.66400241851807, + "p99": 402.20800042152405 + }, + "isolatedSum": { + "p50": 394.1759988665581, + "p90": 405.40800988674164, + "p95": 409.63201224803925, + "p99": 427.5520145893097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 346.6559946537018, + "p90": 353.7920117378235, + "p95": 355.45599460601807, + "p99": 364.9280071258545 + }, + "combine": { + "p50": 157.02399611473083, + "p90": 160.35200655460358, + "p95": 161.3440066576004, + "p99": 164.8000031709671 + }, + "roundtrip": { + "p50": 482.87999629974365, + "p90": 491.58400297164917, + "p95": 501.4079809188843, + "p99": 1007.1359872817993 + }, + "isolatedSum": { + "p50": 503.6799907684326, + "p90": 514.1440182924271, + "p95": 516.8000012636185, + "p99": 529.7280102968216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 469.184011220932, + "p90": 511.4560127258301, + "p95": 523.1680274009705, + "p99": 546.3039875030518 + }, + "combine": { + "p50": 239.1040027141571, + "p90": 254.40001487731934, + "p95": 258.976012468338, + "p99": 263.71198892593384 + }, + "roundtrip": { + "p50": 679.5520186424255, + "p90": 690.3359889984131, + "p95": 705.7600021362305, + "p99": 745.0559735298157 + }, + "isolatedSum": { + "p50": 708.2880139350891, + "p90": 765.8560276031494, + "p95": 782.1440398693085, + "p99": 810.0159764289856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 702.2079825401306, + "p90": 707.6159715652466, + "p95": 710.9439969062805, + "p99": 723.6480116844177 + }, + "combine": { + "p50": 372.8959858417511, + "p90": 378.08001041412354, + "p95": 379.71198558807373, + "p99": 385.8239948749542 + }, + "roundtrip": { + "p50": 1047.5200414657593, + "p90": 1056.6400289535522, + "p95": 1059.872031211853, + "p99": 1098.0160236358643 + }, + "isolatedSum": { + "p50": 1075.1039683818817, + "p90": 1085.6959819793701, + "p95": 1090.6559824943542, + "p99": 1109.472006559372 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1267.9680585861206, + "p90": 1273.1839418411255, + "p95": 1275.4559516906738, + "p99": 1290.079951286316 + }, + "combine": { + "p50": 635.6800198554993, + "p90": 642.4000263214111, + "p95": 644.3520188331604, + "p99": 649.8240232467651 + }, + "roundtrip": { + "p50": 1869.6000576019287, + "p90": 1877.9840469360352, + "p95": 1881.119966506958, + "p99": 1895.9039449691772 + }, + "isolatedSum": { + "p50": 1903.6480784416199, + "p90": 1915.5839681625366, + "p95": 1919.8079705238342, + "p99": 1939.903974533081 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2382.9119205474854, + "p90": 2390.655994415283, + "p95": 2393.2480812072754, + "p99": 2401.6640186309814 + }, + "combine": { + "p50": 1150.7519483566284, + "p90": 1159.5200300216675, + "p95": 1161.5040302276611, + "p99": 1166.2720441818237 + }, + "roundtrip": { + "p50": 3520.416021347046, + "p90": 3543.1039333343506, + "p95": 3550.431966781616, + "p99": 3566.175937652588 + }, + "isolatedSum": { + "p50": 3533.6638689041138, + "p90": 3550.1760244369507, + "p95": 3554.7521114349365, + "p99": 3567.936062812805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d398936", + "identity": "h100|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h100_fd11f54b", + "comparisonKey": "6ec21bdd69aca4c5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:37.543709+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h100-dgxc-slurm_17", + "sku": "h100", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h100-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H100 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.68000167608261, + "p90": 121.18399888277054, + "p95": 122.81599640846252, + "p99": 126.65599584579468 + }, + "combine": { + "p50": 111.07199639081955, + "p90": 115.10399729013443, + "p95": 117.24799871444702, + "p99": 118.94399672746658 + }, + "roundtrip": { + "p50": 249.08800423145294, + "p90": 257.05599784851074, + "p95": 261.56800985336304, + "p99": 265.82399010658264 + }, + "isolatedSum": { + "p50": 226.75199806690216, + "p90": 236.28799617290497, + "p95": 240.06399512290955, + "p99": 245.59999257326126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 133.95200669765472, + "p90": 139.96799290180206, + "p95": 141.66399836540222, + "p99": 145.82400023937225 + }, + "combine": { + "p50": 157.18400478363037, + "p90": 160.863995552063, + "p95": 162.56000101566315, + "p99": 165.8560037612915 + }, + "roundtrip": { + "p50": 356.0959994792938, + "p90": 361.4400029182434, + "p95": 364.1279935836792, + "p99": 375.8080005645752 + }, + "isolatedSum": { + "p50": 291.1360114812851, + "p90": 300.83198845386505, + "p95": 304.22399938106537, + "p99": 311.68000400066376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 171.58399522304535, + "p90": 175.99999904632568, + "p95": 177.824005484581, + "p99": 183.00800025463104 + }, + "combine": { + "p50": 233.5360050201416, + "p90": 237.40799725055695, + "p95": 238.91200125217438, + "p99": 243.42399835586548 + }, + "roundtrip": { + "p50": 555.1360249519348, + "p90": 560.5440139770508, + "p95": 562.6879930496216, + "p99": 567.8719878196716 + }, + "isolatedSum": { + "p50": 405.12000024318695, + "p90": 413.40799629688263, + "p95": 416.73600673675537, + "p99": 426.4319986104965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 239.6160066127777, + "p90": 244.76799368858337, + "p95": 246.68799340724945, + "p99": 252.51200795173645 + }, + "combine": { + "p50": 370.6879913806915, + "p90": 375.5519986152649, + "p95": 377.0880103111267, + "p99": 379.4879913330078 + }, + "roundtrip": { + "p50": 920.0959801673889, + "p90": 925.9840250015259, + "p95": 928.0959963798523, + "p99": 938.6879801750183 + }, + "isolatedSum": { + "p50": 610.3039979934692, + "p90": 620.3199923038483, + "p95": 623.7760037183762, + "p99": 631.9999992847443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 372.0319867134094, + "p90": 377.6319921016693, + "p95": 380.16000390052795, + "p99": 388.89598846435547 + }, + "combine": { + "p50": 639.19997215271, + "p90": 645.2159881591797, + "p95": 647.9359865188599, + "p99": 654.0799736976624 + }, + "roundtrip": { + "p50": 1641.37601852417, + "p90": 1650.4000425338745, + "p95": 1653.3759832382202, + "p99": 1659.8399877548218 + }, + "isolatedSum": { + "p50": 1011.2319588661194, + "p90": 1022.847980260849, + "p95": 1028.0959904193878, + "p99": 1042.9759621620178 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 635.9040141105652, + "p90": 645.6000208854675, + "p95": 654.2720198631287, + "p99": 848.9279747009277 + }, + "combine": { + "p50": 1167.9680347442627, + "p90": 1177.6959896087646, + "p95": 1180.4800033569336, + "p99": 1186.4639520645142 + }, + "roundtrip": { + "p50": 3096.224069595337, + "p90": 3116.9919967651367, + "p95": 3123.552083969116, + "p99": 3146.752119064331 + }, + "isolatedSum": { + "p50": 1803.8720488548279, + "p90": 1823.2960104942322, + "p95": 1834.7520232200623, + "p99": 2035.391926765442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1b8bf813", + "identity": "h200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_25d672be", + "comparisonKey": "1a0b6c85c8beecc7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:15.211481+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.23999774456024, + "p90": 93.02400052547455, + "p95": 101.1200025677681, + "p99": 120.12799829244614 + }, + "combine": { + "p50": 57.21599981188774, + "p90": 67.23199784755707, + "p95": 73.63200187683105, + "p99": 84.63999629020691 + }, + "roundtrip": { + "p50": 107.4879989027977, + "p90": 132.35199451446533, + "p95": 143.16800236701965, + "p99": 159.13599729537964 + }, + "isolatedSum": { + "p50": 123.45599755644798, + "p90": 160.25599837303162, + "p95": 174.75200444459915, + "p99": 204.76799458265305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 65.88800251483917, + "p90": 90.11200070381165, + "p95": 97.63199836015701, + "p99": 108.76800119876862 + }, + "combine": { + "p50": 58.01599845290184, + "p90": 66.14399701356888, + "p95": 73.60000163316727, + "p99": 82.17599987983704 + }, + "roundtrip": { + "p50": 109.47199910879135, + "p90": 134.49600338935852, + "p95": 143.61600577831268, + "p99": 180.00000715255737 + }, + "isolatedSum": { + "p50": 123.90400096774101, + "p90": 156.25599771738052, + "p95": 171.23199999332428, + "p99": 190.94400107860565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 67.84000247716904, + "p90": 95.74399888515472, + "p95": 105.85600137710571, + "p99": 117.37599968910217 + }, + "combine": { + "p50": 59.42400172352791, + "p90": 70.01599669456482, + "p95": 76.06399804353714, + "p99": 88.70399743318558 + }, + "roundtrip": { + "p50": 109.92000252008438, + "p90": 134.24000144004822, + "p95": 144.25599575042725, + "p99": 153.85599434375763 + }, + "isolatedSum": { + "p50": 127.26400420069695, + "p90": 165.75999557971954, + "p95": 181.91999942064285, + "p99": 206.07999712228775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.8480030298233, + "p90": 93.82399916648865, + "p95": 101.85600072145462, + "p99": 118.78400295972824 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 69.98399645090103, + "p95": 76.25599950551987, + "p99": 85.60000360012054 + }, + "roundtrip": { + "p50": 112.15999722480774, + "p90": 141.53599739074707, + "p95": 150.62400698661804, + "p99": 166.46400094032288 + }, + "isolatedSum": { + "p50": 130.3360015153885, + "p90": 163.80799561738968, + "p95": 178.1120002269745, + "p99": 204.38400655984879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.83200323581696, + "p90": 93.91999989748001, + "p95": 103.2319962978363, + "p99": 116.5120005607605 + }, + "combine": { + "p50": 60.22400036454201, + "p90": 69.69600170850754, + "p95": 74.87999647855759, + "p99": 80.70400357246399 + }, + "roundtrip": { + "p50": 112.8000020980835, + "p90": 139.52000439167023, + "p95": 148.83199334144592, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 133.05600360035896, + "p90": 163.61600160598755, + "p95": 178.1119927763939, + "p99": 197.2160041332245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.98399710655212, + "p90": 104.12800312042236, + "p95": 111.13599687814713, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 63.45599889755249, + "p90": 76.80000364780426, + "p95": 82.24000036716461, + "p99": 89.21600133180618 + }, + "roundtrip": { + "p50": 114.52800035476685, + "p90": 138.94400000572205, + "p95": 147.64800667762756, + "p99": 165.6319946050644 + }, + "isolatedSum": { + "p50": 137.43999600410461, + "p90": 180.92800676822662, + "p95": 193.37599724531174, + "p99": 212.22399920225143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 80.1599994301796, + "p90": 104.89600151777267, + "p95": 115.07199704647064, + "p99": 167.9680049419403 + }, + "combine": { + "p50": 72.73600250482559, + "p90": 91.32800251245499, + "p95": 95.42399644851685, + "p99": 117.79200285673141 + }, + "roundtrip": { + "p50": 130.94399869441986, + "p90": 165.69599509239197, + "p95": 172.92800545692444, + "p99": 195.68000733852386 + }, + "isolatedSum": { + "p50": 152.8960019350052, + "p90": 196.22400403022766, + "p95": 210.4959934949875, + "p99": 285.7600077986717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.67999708652496, + "p90": 108.92800241708755, + "p95": 116.64000153541565, + "p99": 144.44799721240997 + }, + "combine": { + "p50": 81.66400343179703, + "p90": 93.47199648618698, + "p95": 98.91200065612793, + "p99": 107.87200182676315 + }, + "roundtrip": { + "p50": 148.99200201034546, + "p90": 167.52000153064728, + "p95": 172.63999581336975, + "p99": 182.91200697422028 + }, + "isolatedSum": { + "p50": 169.344000518322, + "p90": 202.39999890327454, + "p95": 215.55200219154358, + "p99": 252.31999903917313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf31e3fe", + "identity": "h200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_25d672be", + "comparisonKey": "f8f167a03805b9a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:41.791824+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.66400080919266, + "p90": 75.13599842786789, + "p95": 80.22399991750717, + "p99": 93.18400174379349 + }, + "combine": { + "p50": 60.7680007815361, + "p90": 64.35199826955795, + "p95": 68.92800331115723, + "p99": 78.40000092983246 + }, + "roundtrip": { + "p50": 113.63200098276138, + "p90": 123.80799651145935, + "p95": 129.95199859142303, + "p99": 160.38399934768677 + }, + "isolatedSum": { + "p50": 126.43200159072876, + "p90": 139.48799669742584, + "p95": 149.1520032286644, + "p99": 171.58400267362595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 66.75200164318085, + "p90": 78.27199995517731, + "p95": 82.84799754619598, + "p99": 100.25600343942642 + }, + "combine": { + "p50": 61.24800071120262, + "p90": 65.11999666690826, + "p95": 69.7920024394989, + "p99": 96.73599898815155 + }, + "roundtrip": { + "p50": 112.67200112342834, + "p90": 123.96799772977829, + "p95": 129.92000579833984, + "p99": 143.0400013923645 + }, + "isolatedSum": { + "p50": 128.00000235438347, + "p90": 143.39199662208557, + "p95": 152.63999998569489, + "p99": 196.99200242757797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.15999746322632, + "p90": 79.93599772453308, + "p95": 88.57599645853043, + "p99": 112.86400258541107 + }, + "combine": { + "p50": 61.91999837756157, + "p90": 66.17599725723267, + "p95": 73.66400212049484, + "p99": 83.77599716186523 + }, + "roundtrip": { + "p50": 116.22399836778641, + "p90": 127.71199643611908, + "p95": 140.9599930047989, + "p99": 164.8000031709671 + }, + "isolatedSum": { + "p50": 130.0799958407879, + "p90": 146.11199498176575, + "p95": 162.23999857902527, + "p99": 196.6399997472763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.66400146484375, + "p90": 83.00799876451492, + "p95": 94.65599805116653, + "p99": 149.98400211334229 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 67.58400052785873, + "p95": 71.52000069618225, + "p99": 79.52000200748444 + }, + "roundtrip": { + "p50": 116.03199690580368, + "p90": 127.83999741077423, + "p95": 135.0719928741455, + "p99": 147.71200716495514 + }, + "isolatedSum": { + "p50": 132.60800391435623, + "p90": 150.59199929237366, + "p95": 166.17599874734879, + "p99": 229.50400412082672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.6160020828247, + "p90": 87.8399983048439, + "p95": 95.87199985980988, + "p99": 113.92000317573547 + }, + "combine": { + "p50": 63.74400109052658, + "p90": 71.99999690055847, + "p95": 78.11199873685837, + "p99": 87.52000331878662 + }, + "roundtrip": { + "p50": 115.48800021409988, + "p90": 131.071999669075, + "p95": 144.86399292945862, + "p99": 166.46400094032288 + }, + "isolatedSum": { + "p50": 139.3600031733513, + "p90": 159.83999520540237, + "p95": 173.98399859666824, + "p99": 201.4400064945221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.39200037717819, + "p90": 86.56000345945358, + "p95": 92.51199662685394, + "p99": 115.07199704647064 + }, + "combine": { + "p50": 69.023996591568, + "p90": 75.71200281381607, + "p95": 82.68799632787704, + "p99": 92.57599711418152 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 132.1599930524826, + "p95": 141.79199934005737, + "p99": 165.24800658226013 + }, + "isolatedSum": { + "p50": 144.41599696874619, + "p90": 162.27200627326965, + "p95": 175.199992954731, + "p99": 207.64799416065216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.36800265312195, + "p90": 98.78399968147278, + "p95": 105.53599894046783, + "p99": 123.74400347471237 + }, + "combine": { + "p50": 76.99199765920639, + "p90": 80.44800162315369, + "p95": 84.19200032949448, + "p99": 97.69599884748459 + }, + "roundtrip": { + "p50": 139.5840048789978, + "p90": 151.48800611495972, + "p95": 160.60799360275269, + "p99": 178.30400168895721 + }, + "isolatedSum": { + "p50": 167.36000031232834, + "p90": 179.23200130462646, + "p95": 189.7279992699623, + "p99": 221.44000232219696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.120001912117, + "p90": 108.89600217342377, + "p95": 115.10399729013443, + "p99": 129.63199615478516 + }, + "combine": { + "p50": 88.06400001049042, + "p90": 95.83999961614609, + "p95": 101.27999633550644, + "p99": 110.43199896812439 + }, + "roundtrip": { + "p50": 165.8879965543747, + "p90": 176.79999768733978, + "p95": 186.14399433135986, + "p99": 199.23199713230133 + }, + "isolatedSum": { + "p50": 185.18400192260742, + "p90": 204.73600178956985, + "p95": 216.38399362564087, + "p99": 240.06399512290955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c9c1a87b", + "identity": "h200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_25d672be", + "comparisonKey": "a1972377f2469047", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:10.405626+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.66400146484375, + "p90": 78.23999971151352, + "p95": 88.16000074148178, + "p99": 104.92800176143646 + }, + "combine": { + "p50": 62.04799935221672, + "p90": 72.28799909353256, + "p95": 79.26400005817413, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 111.35999858379364, + "p90": 127.74400413036346, + "p95": 139.71200585365295, + "p99": 155.35999834537506 + }, + "isolatedSum": { + "p50": 131.71200081706047, + "p90": 150.52799880504608, + "p95": 167.42400079965591, + "p99": 194.40000504255295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.01599669456482, + "p90": 94.11200135946274, + "p95": 104.19200360774994, + "p99": 118.68800222873688 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 78.23999971151352, + "p95": 85.08799970149994, + "p99": 98.55999797582626 + }, + "roundtrip": { + "p50": 118.81600320339203, + "p90": 147.93600142002106, + "p95": 158.65600109100342, + "p99": 177.3120015859604 + }, + "isolatedSum": { + "p50": 136.25599443912506, + "p90": 172.35200107097626, + "p95": 189.28000330924988, + "p99": 217.24800020456314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.27199864387512, + "p90": 76.4160007238388, + "p95": 86.87999844551086, + "p99": 101.21600329875946 + }, + "combine": { + "p50": 65.40799885988235, + "p90": 70.56000083684921, + "p95": 77.15199887752533, + "p99": 88.92799913883209 + }, + "roundtrip": { + "p50": 113.92000317573547, + "p90": 122.68800288438797, + "p95": 135.19999384880066, + "p99": 151.296004652977 + }, + "isolatedSum": { + "p50": 135.67999750375748, + "p90": 146.97600156068802, + "p95": 164.0319973230362, + "p99": 190.14400243759155 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.23199850320816, + "p90": 83.20000022649765, + "p95": 95.87199985980988, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 66.72000139951706, + "p90": 71.3919997215271, + "p95": 80.99199831485748, + "p99": 91.42400324344635 + }, + "roundtrip": { + "p50": 116.22399836778641, + "p90": 134.65599715709686, + "p95": 145.53600549697876, + "p99": 166.6879951953888 + }, + "isolatedSum": { + "p50": 137.95199990272522, + "p90": 154.59199994802475, + "p95": 176.86399817466736, + "p99": 202.4640068411827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 70.30399888753891, + "p90": 82.11199939250946, + "p95": 91.2960022687912, + "p99": 107.32799768447876 + }, + "combine": { + "p50": 67.07199662923813, + "p90": 70.27199864387512, + "p95": 75.96799731254578, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 117.98399686813354, + "p90": 127.71199643611908, + "p95": 138.75199854373932, + "p99": 158.1760048866272 + }, + "isolatedSum": { + "p50": 137.37599551677704, + "p90": 152.38399803638458, + "p95": 167.26399958133698, + "p99": 196.06399536132812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.96799796819687, + "p90": 90.59199690818787, + "p95": 99.0080013871193, + "p99": 110.04800349473953 + }, + "combine": { + "p50": 70.27199864387512, + "p90": 76.60800218582153, + "p95": 86.81599795818329, + "p99": 101.18400305509567 + }, + "roundtrip": { + "p50": 126.14400684833527, + "p90": 139.20000195503235, + "p95": 146.59200608730316, + "p99": 156.89599514007568 + }, + "isolatedSum": { + "p50": 150.239996612072, + "p90": 167.1999990940094, + "p95": 185.82399934530258, + "p99": 211.2320065498352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.81599795818329, + "p90": 99.07200187444687, + "p95": 110.52799969911575, + "p99": 122.81599640846252 + }, + "combine": { + "p50": 81.4720019698143, + "p90": 85.88799834251404, + "p95": 97.6639986038208, + "p99": 111.93600296974182 + }, + "roundtrip": { + "p50": 146.11199498176575, + "p90": 155.42399883270264, + "p95": 167.32800006866455, + "p99": 186.0159933567047 + }, + "isolatedSum": { + "p50": 168.2879999279976, + "p90": 184.9600002169609, + "p95": 208.19199830293655, + "p99": 234.75199937820435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.84000092744827, + "p90": 117.0559972524643, + "p95": 128.63999605178833, + "p99": 136.86400651931763 + }, + "combine": { + "p50": 94.40000355243683, + "p90": 103.35999727249146, + "p95": 112.09599673748016, + "p99": 122.6240023970604 + }, + "roundtrip": { + "p50": 179.03999984264374, + "p90": 199.20000433921814, + "p95": 206.1759978532791, + "p99": 218.87999773025513 + }, + "isolatedSum": { + "p50": 198.2400044798851, + "p90": 220.41599452495575, + "p95": 240.7359927892685, + "p99": 259.488008916378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc6a8308", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_608ab302", + "comparisonKey": "670bec81c18ed4ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:13.669316+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.98399710655212, + "p90": 100.92800110578537, + "p95": 113.63200098276138, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 69.7920024394989, + "p90": 83.64800363779068, + "p95": 90.81599861383438, + "p99": 127.77599692344666 + }, + "roundtrip": { + "p50": 125.44000148773193, + "p90": 158.52800011634827, + "p95": 166.49599373340607, + "p99": 206.2080055475235 + }, + "isolatedSum": { + "p50": 143.77599954605103, + "p90": 184.57600474357605, + "p95": 204.44799959659576, + "p99": 259.8719894886017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.30399954319, + "p90": 103.45599800348282, + "p95": 110.20799726247787, + "p99": 122.52800166606903 + }, + "combine": { + "p50": 68.92800331115723, + "p90": 86.30400151014328, + "p95": 92.03200042247772, + "p99": 108.0000028014183 + }, + "roundtrip": { + "p50": 123.26399981975555, + "p90": 156.41599893569946, + "p95": 169.69600319862366, + "p99": 199.42399859428406 + }, + "isolatedSum": { + "p50": 143.23200285434723, + "p90": 189.7599995136261, + "p95": 202.2399976849556, + "p99": 230.52800446748734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.30399954319, + "p90": 98.24000298976898, + "p95": 107.58399963378906, + "p99": 123.90399724245071 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 83.3280012011528, + "p95": 89.12000060081482, + "p99": 110.52799969911575 + }, + "roundtrip": { + "p50": 125.2480000257492, + "p90": 155.61600029468536, + "p95": 166.01599752902985, + "p99": 190.08000195026398 + }, + "isolatedSum": { + "p50": 144.28799599409103, + "p90": 181.56800419092178, + "p95": 196.70400023460388, + "p99": 234.43199694156647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.65600222349167, + "p90": 102.91200131177902, + "p95": 112.12799698114395, + "p99": 127.61600315570831 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 86.40000224113464, + "p95": 91.2960022687912, + "p99": 102.81600058078766 + }, + "roundtrip": { + "p50": 125.37600100040436, + "p90": 159.87199544906616, + "p95": 173.95199835300446, + "p99": 193.27999651432037 + }, + "isolatedSum": { + "p50": 144.6719989180565, + "p90": 189.31200355291367, + "p95": 203.42399924993515, + "p99": 230.43200373649597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.89600372314453, + "p90": 107.42399841547012, + "p95": 119.84000355005264, + "p99": 150.78400075435638 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 90.14400094747543, + "p95": 96.16000205278397, + "p99": 127.9039978981018 + }, + "roundtrip": { + "p50": 130.2720010280609, + "p90": 164.000004529953, + "p95": 179.36000227928162, + "p99": 208.73600244522095 + }, + "isolatedSum": { + "p50": 145.24800330400467, + "p90": 197.56799936294556, + "p95": 216.0000056028366, + "p99": 278.6879986524582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.46400207281113, + "p90": 108.67200046777725, + "p95": 115.99999666213989, + "p99": 136.60800457000732 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 90.68799763917923, + "p95": 96.70399874448776, + "p99": 110.36799848079681 + }, + "roundtrip": { + "p50": 135.3919953107834, + "p90": 158.65600109100342, + "p95": 163.07200491428375, + "p99": 179.29600179195404 + }, + "isolatedSum": { + "p50": 160.3199988603592, + "p90": 199.35999810695648, + "p95": 212.70399540662766, + "p99": 246.97600305080414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.13600236177444, + "p90": 113.98400366306305, + "p95": 121.05599790811539, + "p99": 146.464005112648 + }, + "combine": { + "p50": 86.75199747085571, + "p90": 100.38399696350098, + "p95": 106.75200074911118, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 159.71200168132782, + "p90": 179.00800704956055, + "p95": 186.75200641155243, + "p99": 217.82399713993073 + }, + "isolatedSum": { + "p50": 185.88799983263016, + "p90": 214.36800062656403, + "p95": 227.80799865722656, + "p99": 272.8639990091324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.43200027942657, + "p90": 138.08000087738037, + "p95": 146.30399644374847, + "p99": 185.248002409935 + }, + "combine": { + "p50": 107.10400342941284, + "p90": 121.60000205039978, + "p95": 124.44800138473511, + "p99": 142.91200041770935 + }, + "roundtrip": { + "p50": 199.8399943113327, + "p90": 222.84799814224243, + "p95": 228.09599339962006, + "p99": 257.34400749206543 + }, + "isolatedSum": { + "p50": 225.53600370883942, + "p90": 259.68000292778015, + "p95": 270.7519978284836, + "p99": 328.16000282764435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34056a5c", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_25d672be", + "comparisonKey": "6a4e0e764c1e9bc2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:40.879173+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.55200093984604, + "p90": 87.2960016131401, + "p95": 99.7759997844696, + "p99": 111.23199760913849 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 75.19999891519547, + "p95": 82.20800012350082, + "p99": 91.90399944782257 + }, + "roundtrip": { + "p50": 119.90399658679962, + "p90": 141.59999787807465, + "p95": 153.31199765205383, + "p99": 168.86399686336517 + }, + "isolatedSum": { + "p50": 139.77599889039993, + "p90": 162.49600052833557, + "p95": 181.98399990797043, + "p99": 203.13599705696106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.89600372314453, + "p90": 93.53599697351456, + "p95": 103.13600301742554, + "p99": 205.4399996995926 + }, + "combine": { + "p50": 68.06399673223495, + "p90": 75.48800110816956, + "p95": 81.4720019698143, + "p99": 91.74399822950363 + }, + "roundtrip": { + "p50": 120.41600048542023, + "p90": 139.8400068283081, + "p95": 147.90399372577667, + "p99": 162.7199947834015 + }, + "isolatedSum": { + "p50": 140.9600004553795, + "p90": 169.0239980816841, + "p95": 184.60800498723984, + "p99": 297.1839979290962 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.66400212049484, + "p90": 85.34400165081024, + "p95": 98.94400089979172, + "p99": 111.90400272607803 + }, + "combine": { + "p50": 68.64000111818314, + "p90": 80.06399869918823, + "p95": 85.95199882984161, + "p99": 97.95200079679489 + }, + "roundtrip": { + "p50": 120.99199742078781, + "p90": 144.6399986743927, + "p95": 155.58399260044098, + "p99": 170.68800330162048 + }, + "isolatedSum": { + "p50": 142.30400323867798, + "p90": 165.40800034999847, + "p95": 184.89599972963333, + "p99": 209.85600352287292 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.95199686288834, + "p90": 122.6240023970604, + "p95": 130.91200590133667, + "p99": 142.46399700641632 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 78.62400263547897, + "p95": 85.31200140714645, + "p99": 96.73599898815155 + }, + "roundtrip": { + "p50": 123.26399981975555, + "p90": 145.31199634075165, + "p95": 153.6639928817749, + "p99": 165.3439998626709 + }, + "isolatedSum": { + "p50": 143.327996134758, + "p90": 201.24800503253937, + "p95": 216.22400730848312, + "p99": 239.19999599456787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.64000177383423, + "p90": 94.43199634552002, + "p95": 106.72000050544739, + "p99": 118.97599697113037 + }, + "combine": { + "p50": 70.14399766921997, + "p90": 79.74400371313095, + "p95": 83.42400193214417, + "p99": 91.10400080680847 + }, + "roundtrip": { + "p50": 122.97599762678146, + "p90": 158.87999534606934, + "p95": 182.75199830532074, + "p99": 208.5759937763214 + }, + "isolatedSum": { + "p50": 142.7839994430542, + "p90": 174.17600005865097, + "p95": 190.14400243759155, + "p99": 210.07999777793884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.23199981451035, + "p90": 101.02400183677673, + "p95": 109.18399691581726, + "p99": 127.71199643611908 + }, + "combine": { + "p50": 77.31200009584427, + "p90": 87.26400136947632, + "p95": 92.12800115346909, + "p99": 99.10400211811066 + }, + "roundtrip": { + "p50": 133.85599851608276, + "p90": 151.07199549674988, + "p95": 159.10400450229645, + "p99": 175.26400089263916 + }, + "isolatedSum": { + "p50": 156.54399991035461, + "p90": 188.28800320625305, + "p95": 201.31199806928635, + "p99": 226.81599855422974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.65599805116653, + "p90": 107.39199817180634, + "p95": 117.27999895811081, + "p99": 190.2720034122467 + }, + "combine": { + "p50": 86.56000345945358, + "p90": 95.2640026807785, + "p95": 103.4879982471466, + "p99": 112.5119999051094 + }, + "roundtrip": { + "p50": 157.69599378108978, + "p90": 166.9120043516159, + "p95": 175.00799894332886, + "p99": 187.1359944343567 + }, + "isolatedSum": { + "p50": 181.21600151062012, + "p90": 202.65600085258484, + "p95": 220.76799720525742, + "p99": 302.7840033173561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.10399729013443, + "p90": 129.43999469280243, + "p95": 138.49599659442902, + "p99": 150.4639983177185 + }, + "combine": { + "p50": 105.15200346708298, + "p90": 115.39199948310852, + "p95": 121.76000326871872, + "p99": 129.69599664211273 + }, + "roundtrip": { + "p50": 196.8960016965866, + "p90": 210.55999398231506, + "p95": 218.33600103855133, + "p99": 252.0959973335266 + }, + "isolatedSum": { + "p50": 220.2560007572174, + "p90": 244.83199417591095, + "p95": 260.25599986314774, + "p99": 280.15999495983124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ba7518cd", + "identity": "h200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_25d672be", + "comparisonKey": "a83631469e97f980", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:45.472003+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.03199714422226, + "p90": 94.97600048780441, + "p95": 100.03200173377991, + "p99": 121.69600278139114 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 74.81600344181061, + "p95": 82.84799754619598, + "p99": 89.9839997291565 + }, + "roundtrip": { + "p50": 119.4240003824234, + "p90": 142.5279974937439, + "p95": 150.59199929237366, + "p99": 167.9680049419403 + }, + "isolatedSum": { + "p50": 139.3279954791069, + "p90": 169.79200392961502, + "p95": 182.8799992799759, + "p99": 211.68000251054764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.7040022611618, + "p90": 97.37599641084671, + "p95": 105.76000064611435, + "p99": 126.20800733566284 + }, + "combine": { + "p50": 67.391999065876, + "p90": 78.04799824953079, + "p95": 87.26400136947632, + "p99": 98.78399968147278 + }, + "roundtrip": { + "p50": 118.81600320339203, + "p90": 145.63199877738953, + "p95": 154.94400262832642, + "p99": 171.03999853134155 + }, + "isolatedSum": { + "p50": 140.0960013270378, + "p90": 175.4239946603775, + "p95": 193.02400201559067, + "p99": 224.99200701713562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.76800274848938, + "p90": 86.5280032157898, + "p95": 100.41599720716476, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 67.391999065876, + "p90": 71.03999704122543, + "p95": 79.45600152015686, + "p99": 92.3520028591156 + }, + "roundtrip": { + "p50": 119.9679970741272, + "p90": 142.68800616264343, + "p95": 155.16799688339233, + "p99": 178.20799350738525 + }, + "isolatedSum": { + "p50": 140.1600018143654, + "p90": 157.56800025701523, + "p95": 179.87199872732162, + "p99": 205.88800311088562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.05599749088287, + "p90": 96.19200229644775, + "p95": 104.032002389431, + "p99": 111.39199882745743 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 76.83199644088745, + "p95": 87.64799684286118, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 120.25599926710129, + "p90": 146.55999839305878, + "p95": 156.8319946527481, + "p99": 169.50400173664093 + }, + "isolatedSum": { + "p50": 141.27999544143677, + "p90": 173.0239987373352, + "p95": 191.67999923229218, + "p99": 211.16799861192703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.89600372314453, + "p90": 85.95199882984161, + "p95": 99.39199686050415, + "p99": 112.03200370073318 + }, + "combine": { + "p50": 70.72000205516815, + "p90": 77.40800082683563, + "p95": 87.55200356245041, + "p99": 104.51199859380722 + }, + "roundtrip": { + "p50": 123.3920007944107, + "p90": 143.90400052070618, + "p95": 155.32800555229187, + "p99": 170.23999989032745 + }, + "isolatedSum": { + "p50": 143.61600577831268, + "p90": 163.35999965667725, + "p95": 186.94400042295456, + "p99": 216.5440022945404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.25600016117096, + "p90": 103.80800068378448, + "p95": 113.21599781513214, + "p99": 127.51999497413635 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 88.67199718952179, + "p95": 96.8639999628067, + "p99": 103.7760004401207 + }, + "roundtrip": { + "p50": 134.75200533866882, + "p90": 158.49600732326508, + "p95": 170.0800061225891, + "p99": 195.16800343990326 + }, + "isolatedSum": { + "p50": 157.50399976968765, + "p90": 192.47999787330627, + "p95": 210.07999777793884, + "p99": 231.29599541425705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.8079993724823, + "p90": 111.07199639081955, + "p95": 119.55200135707855, + "p99": 202.4639993906021 + }, + "combine": { + "p50": 86.33600175380707, + "p90": 101.53599828481674, + "p95": 105.15200346708298, + "p99": 117.18399822711945 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 171.87200486660004, + "p95": 180.67200481891632, + "p99": 195.16800343990326 + }, + "isolatedSum": { + "p50": 182.14400112628937, + "p90": 212.6079946756363, + "p95": 224.70400482416153, + "p99": 319.64799761772156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.23199826478958, + "p90": 124.06399846076965, + "p95": 137.34400272369385, + "p99": 148.28799664974213 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 111.42399907112122, + "p95": 120.09599804878235, + "p99": 130.62399625778198 + }, + "roundtrip": { + "p50": 195.80799341201782, + "p90": 206.2399983406067, + "p95": 215.2000069618225, + "p99": 235.71200668811798 + }, + "isolatedSum": { + "p50": 218.30400079488754, + "p90": 235.48799753189087, + "p95": 257.4400007724762, + "p99": 278.9119929075241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-07a0efce", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_b93edec2", + "comparisonKey": "7e9fcfd90631d352", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:13.651415+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.87999647855759, + "p90": 125.88800489902496, + "p95": 134.8160058259964, + "p99": 155.93600273132324 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 90.52799642086029, + "p95": 92.22400188446045, + "p99": 102.75200009346008 + }, + "roundtrip": { + "p50": 118.07999759912491, + "p90": 145.31199634075165, + "p95": 159.36000645160675, + "p99": 193.88799369335175 + }, + "isolatedSum": { + "p50": 143.74399930238724, + "p90": 216.41600131988525, + "p95": 227.04000771045685, + "p99": 258.6880028247833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 92.79999881982803, + "p90": 125.15200674533844, + "p95": 133.69600474834442, + "p99": 143.90400052070618 + }, + "combine": { + "p50": 76.7040029168129, + "p90": 99.13600236177444, + "p95": 100.44799745082855, + "p99": 111.10399663448334 + }, + "roundtrip": { + "p50": 144.28800344467163, + "p90": 180.60800433158875, + "p95": 191.13600254058838, + "p99": 236.76800727844238 + }, + "isolatedSum": { + "p50": 169.50400173664093, + "p90": 224.28800910711288, + "p95": 234.14400219917297, + "p99": 255.0079971551895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.02400577068329, + "p90": 132.22399353981018, + "p95": 136.80000603199005, + "p99": 146.62399888038635 + }, + "combine": { + "p50": 110.01600325107574, + "p90": 124.4800016283989, + "p95": 128.83199751377106, + "p99": 138.2399946451187 + }, + "roundtrip": { + "p50": 208.3200067281723, + "p90": 223.87200593948364, + "p95": 232.96000063419342, + "p99": 250.11199712753296 + }, + "isolatedSum": { + "p50": 235.04000902175903, + "p90": 256.7039951682091, + "p95": 265.6320035457611, + "p99": 284.86399352550507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fc6a8f57", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_c940d95a", + "comparisonKey": "4e05055ca286954a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:31.258470+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.23999840021133, + "p90": 81.56800270080566, + "p95": 88.25600147247314, + "p99": 101.95200145244598 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 89.66399729251862, + "p95": 94.7519987821579, + "p99": 113.6000007390976 + }, + "roundtrip": { + "p50": 117.5680011510849, + "p90": 136.19199395179749, + "p95": 163.87200355529785, + "p99": 205.21600544452667 + }, + "isolatedSum": { + "p50": 137.4719962477684, + "p90": 171.23199999332428, + "p95": 183.00800025463104, + "p99": 215.55200219154358 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.13599908351898, + "p90": 112.60800063610077, + "p95": 129.15199995040894, + "p99": 141.2159949541092 + }, + "combine": { + "p50": 75.13599842786789, + "p90": 82.8159973025322, + "p95": 89.88799899816513, + "p99": 104.60799932479858 + }, + "roundtrip": { + "p50": 135.29600203037262, + "p90": 164.41600024700165, + "p95": 190.14400243759155, + "p99": 212.54399418830872 + }, + "isolatedSum": { + "p50": 154.27199751138687, + "p90": 195.42399793863297, + "p95": 219.03999894857407, + "p99": 245.82399427890778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.99999797344208, + "p90": 143.10400187969208, + "p95": 149.82399344444275, + "p99": 161.6320013999939 + }, + "combine": { + "p50": 112.89600282907486, + "p90": 142.07999408245087, + "p95": 144.54400539398193, + "p99": 171.58399522304535 + }, + "roundtrip": { + "p50": 209.9200040102005, + "p90": 249.15200471878052, + "p95": 253.4720003604889, + "p99": 268.095999956131 + }, + "isolatedSum": { + "p50": 236.89600080251694, + "p90": 285.18399596214294, + "p95": 294.3679988384247, + "p99": 333.21599662303925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bb714a8d", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_c840d7c7", + "comparisonKey": "321cafabc6d996e5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:48.487130+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.98399645090103, + "p90": 89.02399986982346, + "p95": 97.15200215578079, + "p99": 121.08799815177917 + }, + "combine": { + "p50": 66.11199676990509, + "p90": 73.21599870920181, + "p95": 80.73599636554718, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 114.78400230407715, + "p90": 136.6720050573349, + "p95": 146.2399959564209, + "p99": 160.12799739837646 + }, + "isolatedSum": { + "p50": 136.09599322080612, + "p90": 162.23999857902527, + "p95": 177.88799852132797, + "p99": 212.70399540662766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.8159973025322, + "p90": 134.0479999780655, + "p95": 140.70400595664978, + "p99": 156.38400614261627 + }, + "combine": { + "p50": 75.00799745321274, + "p90": 86.46400272846222, + "p95": 92.32000261545181, + "p99": 103.42399775981903 + }, + "roundtrip": { + "p50": 132.38400220870972, + "p90": 149.6960073709488, + "p95": 158.33599865436554, + "p99": 172.8000044822693 + }, + "isolatedSum": { + "p50": 157.82399475574493, + "p90": 220.5120027065277, + "p95": 233.0240085721016, + "p99": 259.8080039024353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.78399682044983, + "p90": 144.03200149536133, + "p95": 152.79999375343323, + "p99": 165.18400609493256 + }, + "combine": { + "p50": 109.63200032711029, + "p90": 123.48800152540207, + "p95": 129.2800009250641, + "p99": 136.89599931240082 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 221.3120013475418, + "p95": 228.44800353050232, + "p99": 239.6160066127777 + }, + "isolatedSum": { + "p50": 236.41599714756012, + "p90": 267.5200030207634, + "p95": 282.0799946784973, + "p99": 302.0800054073334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5267ba4f", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_c740d634", + "comparisonKey": "8f00aa02a5bf3cfe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:06.025049+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 68.51200014352798, + "p90": 77.15199887752533, + "p95": 88.95999938249588, + "p99": 112.89600282907486 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 69.05599683523178, + "p95": 71.99999690055847, + "p99": 82.36800134181976 + }, + "roundtrip": { + "p50": 118.43200027942657, + "p90": 130.8480054140091, + "p95": 141.4719969034195, + "p99": 207.58399367332458 + }, + "isolatedSum": { + "p50": 132.35200196504593, + "p90": 146.2079957127571, + "p95": 160.95999628305435, + "p99": 195.26400417089462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.02399855852127, + "p90": 93.9520001411438, + "p95": 108.25599730014801, + "p99": 136.54400408267975 + }, + "combine": { + "p50": 73.2479989528656, + "p90": 77.82399654388428, + "p95": 84.25600081682205, + "p99": 96.99200093746185 + }, + "roundtrip": { + "p50": 131.67999684810638, + "p90": 138.94400000572205, + "p95": 149.1200029850006, + "p99": 175.9359985589981 + }, + "isolatedSum": { + "p50": 154.27199751138687, + "p90": 171.77599668502808, + "p95": 192.51199811697006, + "p99": 233.5360050201416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.31200051307678, + "p90": 136.4479959011078, + "p95": 143.51999759674072, + "p99": 170.04799842834473 + }, + "combine": { + "p50": 109.72800105810165, + "p90": 114.88000303506851, + "p95": 120.86399644613266, + "p99": 130.94399869441986 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 218.23999285697937, + "p95": 226.43199563026428, + "p99": 248.54399263858795 + }, + "isolatedSum": { + "p50": 235.04000157117844, + "p90": 251.3279989361763, + "p95": 264.3839940428734, + "p99": 300.9919971227646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-17b0fe4b", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_09ca428a", + "comparisonKey": "f9875440020c77b7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:53.288904+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.11999797821045, + "p90": 94.17600184679031, + "p95": 102.30399668216705, + "p99": 114.81600254774094 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 81.216000020504, + "p95": 87.39200234413147, + "p99": 99.13600236177444 + }, + "roundtrip": { + "p50": 124.41600114107132, + "p90": 148.19200336933136, + "p95": 156.09599649906158, + "p99": 174.94399845600128 + }, + "isolatedSum": { + "p50": 143.64799857139587, + "p90": 175.3920018672943, + "p95": 189.69599902629852, + "p99": 213.95200490951538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.56800138950348, + "p90": 92.51199662685394, + "p95": 102.81600058078766, + "p99": 119.48800086975098 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 80.79999685287476, + "p95": 90.7839983701706, + "p99": 121.60000205039978 + }, + "roundtrip": { + "p50": 126.0479986667633, + "p90": 148.8640010356903, + "p95": 158.2079976797104, + "p99": 170.6559956073761 + }, + "isolatedSum": { + "p50": 144.22400295734406, + "p90": 173.3119934797287, + "p95": 193.59999895095825, + "p99": 241.08800292015076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.35199958086014, + "p90": 85.05599945783615, + "p95": 95.77599912881851, + "p99": 110.33599823713303 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 80.09599894285202, + "p95": 90.40000289678574, + "p99": 175.35999417304993 + }, + "roundtrip": { + "p50": 126.17599964141846, + "p90": 144.25599575042725, + "p95": 154.33600544929504, + "p99": 175.29599368572235 + }, + "isolatedSum": { + "p50": 142.7839994430542, + "p90": 165.15199840068817, + "p95": 186.17600202560425, + "p99": 285.69599241018295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.60000163316727, + "p90": 92.92799979448318, + "p95": 100.5759984254837, + "p99": 112.03200370073318 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 79.42400127649307, + "p95": 89.79199826717377, + "p99": 98.91200065612793 + }, + "roundtrip": { + "p50": 125.18399953842163, + "p90": 149.4079977273941, + "p95": 158.1760048866272, + "p99": 189.43999707698822 + }, + "isolatedSum": { + "p50": 144.44800466299057, + "p90": 172.35200107097626, + "p95": 190.36799669265747, + "p99": 210.94400435686111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.7040022611618, + "p90": 86.87999844551086, + "p95": 96.76799923181534, + "p99": 112.60800063610077 + }, + "combine": { + "p50": 74.94399696588516, + "p90": 78.91199737787247, + "p95": 89.05600011348724, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 125.98399817943573, + "p90": 141.12000167369843, + "p95": 147.45600521564484, + "p99": 173.24799299240112 + }, + "isolatedSum": { + "p50": 147.64799922704697, + "p90": 165.79199582338333, + "p95": 185.82399934530258, + "p99": 210.07999777793884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.11200070381165, + "p90": 100.70399940013885, + "p95": 112.12799698114395, + "p99": 124.83199685811996 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 87.99999952316284, + "p95": 97.05600142478943, + "p99": 107.93600231409073 + }, + "roundtrip": { + "p50": 143.71199905872345, + "p90": 160.60799360275269, + "p95": 165.92000424861908, + "p99": 182.46400356292725 + }, + "isolatedSum": { + "p50": 169.6000024676323, + "p90": 188.7039989233017, + "p95": 209.18399840593338, + "p99": 232.7679991722107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 105.31199723482132, + "p90": 115.7120019197464, + "p95": 127.58399546146393, + "p99": 138.0160003900528 + }, + "combine": { + "p50": 94.81599926948547, + "p90": 100.832000374794, + "p95": 107.58399963378906, + "p99": 120.83200365304947 + }, + "roundtrip": { + "p50": 176.03200674057007, + "p90": 186.75200641155243, + "p95": 195.0719952583313, + "p99": 206.59199357032776 + }, + "isolatedSum": { + "p50": 200.1279965043068, + "p90": 216.5440022945404, + "p95": 235.167995095253, + "p99": 258.84800404310226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.74399733543396, + "p90": 139.29599523544312, + "p95": 147.77599275112152, + "p99": 167.29600727558136 + }, + "combine": { + "p50": 123.23199957609177, + "p90": 130.94399869441986, + "p95": 137.34400272369385, + "p99": 145.53600549697876 + }, + "roundtrip": { + "p50": 228.03199291229248, + "p90": 246.87999486923218, + "p95": 251.8399953842163, + "p99": 270.1759934425354 + }, + "isolatedSum": { + "p50": 254.97599691152573, + "p90": 270.239993929863, + "p95": 285.11999547481537, + "p99": 312.8320127725601 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-64d96dcc", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_1ec4b445", + "comparisonKey": "8962a26053bc6233", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:38.319503+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 61.02399900555611, + "p90": 87.36000210046768, + "p95": 95.0080007314682, + "p99": 108.73600095510483 + }, + "combine": { + "p50": 58.848001062870026, + "p90": 64.25599753856659, + "p95": 74.68800246715546, + "p99": 81.88799768686295 + }, + "roundtrip": { + "p50": 110.59200018644333, + "p90": 137.34400272369385, + "p95": 146.65600657463074, + "p99": 159.39199924468994 + }, + "isolatedSum": { + "p50": 119.87200006842613, + "p90": 151.61599963903427, + "p95": 169.69600319862366, + "p99": 190.62399864196777 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.08799773454666, + "p90": 101.59999877214432, + "p95": 108.47999900579453, + "p99": 120.25599926710129 + }, + "combine": { + "p50": 59.90400165319443, + "p90": 67.00800359249115, + "p95": 75.26399940252304, + "p99": 86.40000224113464 + }, + "roundtrip": { + "p50": 113.34399878978729, + "p90": 136.28800213336945, + "p95": 145.02400159835815, + "p99": 164.63999450206757 + }, + "isolatedSum": { + "p50": 132.9919993877411, + "p90": 168.60800236463547, + "p95": 183.74399840831757, + "p99": 206.65600150823593 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.03999900817871, + "p90": 96.3200032711029, + "p95": 108.03200304508209, + "p99": 116.95999652147293 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 78.94399762153625, + "p95": 85.31200140714645, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 126.17599964141846, + "p90": 150.94399452209473, + "p95": 159.36000645160675, + "p99": 177.98399925231934 + }, + "isolatedSum": { + "p50": 152.12799608707428, + "p90": 175.26400089263916, + "p95": 193.34400445222855, + "p99": 212.92799711227417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.70400357246399, + "p90": 105.02400249242783, + "p95": 115.23199826478958, + "p99": 130.40000200271606 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 79.23199981451035, + "p95": 89.59999680519104, + "p99": 97.6639986038208 + }, + "roundtrip": { + "p50": 130.048006772995, + "p90": 149.88799393177032, + "p95": 157.31200575828552, + "p99": 182.17599391937256 + }, + "isolatedSum": { + "p50": 150.7520005106926, + "p90": 184.25600230693817, + "p95": 204.83199506998062, + "p99": 228.06400060653687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a0301aad", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_ecb98184", + "comparisonKey": "1e6a8546c1cb2678", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:13.352947+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 67.6800012588501, + "p90": 97.43999689817429, + "p95": 107.32799768447876, + "p99": 173.47200214862823 + }, + "combine": { + "p50": 60.5119988322258, + "p90": 66.3359984755516, + "p95": 75.80800354480743, + "p99": 84.6719965338707 + }, + "roundtrip": { + "p50": 112.38399893045425, + "p90": 132.89600610733032, + "p95": 145.31199634075165, + "p99": 172.7360039949417 + }, + "isolatedSum": { + "p50": 128.1920000910759, + "p90": 163.7759953737259, + "p95": 183.1360012292862, + "p99": 258.14399868249893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.96000355482101, + "p90": 90.97599983215332, + "p95": 99.45599734783173, + "p99": 120.54400146007538 + }, + "combine": { + "p50": 60.83200126886368, + "p90": 69.88800317049026, + "p95": 80.6720033288002, + "p99": 120.06399780511856 + }, + "roundtrip": { + "p50": 117.47200042009354, + "p90": 153.05599570274353, + "p95": 161.79199516773224, + "p99": 227.35999524593353 + }, + "isolatedSum": { + "p50": 129.7920048236847, + "p90": 160.86400300264359, + "p95": 180.12800067663193, + "p99": 240.60799926519394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.29599899053574, + "p90": 89.85599875450134, + "p95": 99.71199929714203, + "p99": 117.0239970088005 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 68.51200014352798, + "p95": 74.62400197982788, + "p99": 82.30400085449219 + }, + "roundtrip": { + "p50": 111.87200248241425, + "p90": 137.1839940547943, + "p95": 145.1520025730133, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 131.96799904108047, + "p90": 158.36799889802933, + "p95": 174.3360012769699, + "p99": 199.3279978632927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.89600372314453, + "p90": 96.79999947547913, + "p95": 107.744000852108, + "p99": 224.57599639892578 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 68.67200136184692, + "p95": 79.64800298213959, + "p99": 88.73599767684937 + }, + "roundtrip": { + "p50": 114.62400108575821, + "p90": 138.59200477600098, + "p95": 149.3760049343109, + "p99": 190.49599766731262 + }, + "isolatedSum": { + "p50": 133.9840032160282, + "p90": 165.47200083732605, + "p95": 187.3920038342476, + "p99": 313.31199407577515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.13599842786789, + "p90": 95.29600292444229, + "p95": 106.81600123643875, + "p99": 117.5680011510849 + }, + "combine": { + "p50": 62.55999952554703, + "p90": 70.56000083684921, + "p95": 80.19199967384338, + "p99": 87.00799942016602 + }, + "roundtrip": { + "p50": 116.44800007343292, + "p90": 138.17599415779114, + "p95": 147.90399372577667, + "p99": 182.3039948940277 + }, + "isolatedSum": { + "p50": 137.69599795341492, + "p90": 165.8560037612915, + "p95": 187.00800091028214, + "p99": 204.57600057125092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.0479975938797, + "p90": 95.42399644851685, + "p95": 102.08000242710114, + "p99": 111.68000102043152 + }, + "combine": { + "p50": 65.24799764156342, + "p90": 75.03999769687653, + "p95": 80.99199831485748, + "p99": 91.58399701118469 + }, + "roundtrip": { + "p50": 118.04799735546112, + "p90": 139.80799913406372, + "p95": 150.07999539375305, + "p99": 162.4000072479248 + }, + "isolatedSum": { + "p50": 139.29599523544312, + "p90": 170.46399414539337, + "p95": 183.07200074195862, + "p99": 203.2639980316162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 78.36800068616867, + "p90": 99.58399832248688, + "p95": 108.06400328874588, + "p99": 121.98399752378464 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 76.1599987745285, + "p95": 83.71199667453766, + "p99": 92.79999881982803 + }, + "roundtrip": { + "p50": 125.34399330615997, + "p90": 145.82400023937225, + "p95": 153.08800339698792, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 147.2640037536621, + "p90": 175.74399709701538, + "p95": 191.77599996328354, + "p99": 214.78399634361267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.27200257778168, + "p90": 104.8320010304451, + "p95": 110.11199653148651, + "p99": 118.72000247240067 + }, + "combine": { + "p50": 83.5840031504631, + "p90": 96.19200229644775, + "p95": 98.62399846315384, + "p99": 107.87200182676315 + }, + "roundtrip": { + "p50": 153.3759981393814, + "p90": 163.96799683570862, + "p95": 173.8239973783493, + "p99": 185.88800728321075 + }, + "isolatedSum": { + "p50": 177.85600572824478, + "p90": 201.02400332689285, + "p95": 208.73599499464035, + "p99": 226.59200429916382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2146ba1b", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_9bd5ea5a", + "comparisonKey": "10c7a860d1793705", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:03.807615+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.83999633789062, + "p90": 89.40800279378891, + "p95": 97.72799909114838, + "p99": 109.24799740314484 + }, + "combine": { + "p50": 70.49600034952164, + "p90": 79.99999821186066, + "p95": 84.28800106048584, + "p99": 90.68799763917923 + }, + "roundtrip": { + "p50": 125.08800625801086, + "p90": 136.06399297714233, + "p95": 145.02400159835815, + "p99": 178.97599935531616 + }, + "isolatedSum": { + "p50": 146.33599668741226, + "p90": 169.40800100564957, + "p95": 182.01600015163422, + "p99": 199.93599504232407 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.98399841785431, + "p90": 96.92800045013428, + "p95": 108.31999778747559, + "p99": 217.98400580883026 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 86.36800199747086, + "p95": 92.0960009098053, + "p99": 105.85600137710571 + }, + "roundtrip": { + "p50": 140.47999680042267, + "p90": 154.1759967803955, + "p95": 162.432000041008, + "p99": 182.75199830532074 + }, + "isolatedSum": { + "p50": 160.48000007867813, + "p90": 183.29600244760513, + "p95": 200.41599869728088, + "p99": 323.840007185936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.11999905109406, + "p90": 134.17600095272064, + "p95": 139.0399932861328, + "p99": 150.11200308799744 + }, + "combine": { + "p50": 118.72000247240067, + "p90": 124.35200065374374, + "p95": 131.20000064373016, + "p99": 139.8400068283081 + }, + "roundtrip": { + "p50": 222.59199619293213, + "p90": 234.3679964542389, + "p95": 240.12799561023712, + "p99": 259.71201062202454 + }, + "isolatedSum": { + "p50": 243.84000152349472, + "p90": 258.5280016064644, + "p95": 270.239993929863, + "p99": 289.95200991630554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3fccdb51", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_c1eeeda2", + "comparisonKey": "ce14e6ce451dd2ad", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:21.120117+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.27999919652939, + "p90": 79.13599908351898, + "p95": 85.34400165081024, + "p99": 95.51999717950821 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 72.83200323581696, + "p95": 76.99199765920639, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 122.17599898576736, + "p90": 131.32800161838531, + "p95": 135.83999872207642, + "p99": 159.743994474411 + }, + "isolatedSum": { + "p50": 142.68799871206284, + "p90": 151.96800231933594, + "p95": 162.33599931001663, + "p99": 179.03999984264374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.11999928951263, + "p90": 89.6959975361824, + "p95": 94.40000355243683, + "p99": 118.23999881744385 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 81.31200075149536, + "p95": 85.53600311279297, + "p99": 108.06400328874588 + }, + "roundtrip": { + "p50": 138.62399756908417, + "p90": 147.2959965467453, + "p95": 152.22400426864624, + "p99": 161.72799468040466 + }, + "isolatedSum": { + "p50": 158.36799889802933, + "p90": 171.00799828767776, + "p95": 179.9360066652298, + "p99": 226.30400210618973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.87199699878693, + "p90": 132.25600123405457, + "p95": 138.0160003900528, + "p99": 153.4080058336258 + }, + "combine": { + "p50": 117.91999638080597, + "p90": 122.30399996042252, + "p95": 125.56800246238708, + "p99": 133.760005235672 + }, + "roundtrip": { + "p50": 220.2560007572174, + "p90": 227.80799865722656, + "p95": 231.58399760723114, + "p99": 240.7359927892685 + }, + "isolatedSum": { + "p50": 241.7919933795929, + "p90": 254.56000119447708, + "p95": 263.5840028524399, + "p99": 287.1680110692978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e5ec67c", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_c0eeec0f", + "comparisonKey": "d73640f9a51a81d6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:39.034538+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.73600250482559, + "p90": 77.63200253248215, + "p95": 83.29600095748901, + "p99": 103.20000350475311 + }, + "combine": { + "p50": 68.86400282382965, + "p90": 71.96799665689468, + "p95": 78.72000336647034, + "p99": 87.10400015115738 + }, + "roundtrip": { + "p50": 118.72000247240067, + "p90": 130.048006772995, + "p95": 139.20000195503235, + "p99": 154.94400262832642 + }, + "isolatedSum": { + "p50": 141.60000532865524, + "p90": 149.59999918937683, + "p95": 162.01600432395935, + "p99": 190.3040036559105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.39200168848038, + "p90": 140.00000059604645, + "p95": 147.32800424098969, + "p99": 174.46400225162506 + }, + "combine": { + "p50": 76.83199644088745, + "p90": 80.38400113582611, + "p95": 84.60800349712372, + "p99": 100.22400319576263 + }, + "roundtrip": { + "p50": 134.49600338935852, + "p90": 143.327996134758, + "p95": 157.6640009880066, + "p99": 222.01600670814514 + }, + "isolatedSum": { + "p50": 160.22399812936783, + "p90": 220.38400173187256, + "p95": 231.9360077381134, + "p99": 274.6880054473877 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.69600278139114, + "p90": 131.20000064373016, + "p95": 141.24800264835358, + "p99": 165.72800278663635 + }, + "combine": { + "p50": 117.0559972524643, + "p90": 122.43200093507767, + "p95": 127.80800461769104, + "p99": 138.91200721263885 + }, + "roundtrip": { + "p50": 219.00799870491028, + "p90": 226.46400332450867, + "p95": 233.37599635124207, + "p99": 245.31200528144836 + }, + "isolatedSum": { + "p50": 238.75200003385544, + "p90": 253.63200157880783, + "p95": 269.0560072660446, + "p99": 304.6400099992752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4ffd9109", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_bfeeea7c", + "comparisonKey": "e9533923c651089d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:56.640256+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.35199958086014, + "p90": 82.84799754619598, + "p95": 96.51199728250504, + "p99": 110.91200262308121 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 76.22399926185608, + "p95": 81.85599744319916, + "p99": 91.51999652385712 + }, + "roundtrip": { + "p50": 121.98399752378464, + "p90": 144.44799721240997, + "p95": 150.04800260066986, + "p99": 161.72799468040466 + }, + "isolatedSum": { + "p50": 141.4399966597557, + "p90": 159.07199680805206, + "p95": 178.3679947257042, + "p99": 202.43199914693832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.11199939250946, + "p90": 129.2800009250641, + "p95": 138.2399946451187, + "p99": 266.36800169944763 + }, + "combine": { + "p50": 76.67200267314911, + "p90": 83.26400071382523, + "p95": 91.96799993515015, + "p99": 104.54399883747101 + }, + "roundtrip": { + "p50": 135.1040005683899, + "p90": 149.82399344444275, + "p95": 158.01599621772766, + "p99": 175.00799894332886 + }, + "isolatedSum": { + "p50": 158.78400206565857, + "p90": 212.5440016388893, + "p95": 230.20799458026886, + "p99": 370.91200053691864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.29600006341934, + "p90": 137.37599551677704, + "p95": 147.64800667762756, + "p99": 168.89600455760956 + }, + "combine": { + "p50": 116.99199676513672, + "p90": 121.8239963054657, + "p95": 127.32799351215363, + "p99": 136.80000603199005 + }, + "roundtrip": { + "p50": 219.90400552749634, + "p90": 230.84799945354462, + "p95": 237.88799345493317, + "p99": 248.99199604988098 + }, + "isolatedSum": { + "p50": 240.28799682855606, + "p90": 259.19999182224274, + "p95": 274.9760001897812, + "p99": 305.6960105895996 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ce98174", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_bbacb788", + "comparisonKey": "d66634fbe16a8c1f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:44.592616+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.8480030298233, + "p90": 90.71999788284302, + "p95": 99.7759997844696, + "p99": 106.72000050544739 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 73.11999797821045, + "p95": 81.63200318813324, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 119.03999745845795, + "p90": 140.57600498199463, + "p95": 150.81599354743958, + "p99": 168.83200407028198 + }, + "isolatedSum": { + "p50": 137.88799941539764, + "p90": 163.83999586105347, + "p95": 181.40800297260284, + "p99": 196.16000354290009 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.03199714422226, + "p90": 97.18400239944458, + "p95": 105.43999820947647, + "p99": 116.57600104808807 + }, + "combine": { + "p50": 67.90400296449661, + "p90": 77.504001557827, + "p95": 82.49600231647491, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 119.9679970741272, + "p90": 146.08000218868256, + "p95": 154.84799444675446, + "p99": 166.97600483894348 + }, + "isolatedSum": { + "p50": 139.93600010871887, + "p90": 174.68800395727158, + "p95": 187.93600052595139, + "p99": 207.35999941825867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.29599964618683, + "p90": 124.15999919176102, + "p95": 128.89599800109863, + "p99": 147.5519984960556 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 92.57599711418152, + "p95": 94.36800330877304, + "p99": 102.4319976568222 + }, + "roundtrip": { + "p50": 125.11999905109406, + "p90": 175.48799514770508, + "p95": 182.40000307559967, + "p99": 190.72000682353973 + }, + "isolatedSum": { + "p50": 145.21600306034088, + "p90": 216.73599630594254, + "p95": 223.26400130987167, + "p99": 249.9839961528778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.7600028514862, + "p90": 91.32800251245499, + "p95": 100.73599964380264, + "p99": 114.20799791812897 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 74.46400076150894, + "p95": 88.16000074148178, + "p99": 94.62399780750275 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 151.58399939537048, + "p95": 158.49600732326508, + "p99": 173.92000555992126 + }, + "isolatedSum": { + "p50": 142.72000640630722, + "p90": 165.79200327396393, + "p95": 188.89600038528442, + "p99": 208.8319957256317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 69.92000341415405, + "p90": 94.71999853849411, + "p95": 100.47999769449234, + "p99": 118.07999759912491 + }, + "combine": { + "p50": 70.20799815654755, + "p90": 79.32800054550171, + "p95": 87.96799927949905, + "p99": 96.16000205278397 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 151.58399939537048, + "p95": 161.3759994506836, + "p99": 179.48800325393677 + }, + "isolatedSum": { + "p50": 140.1280015707016, + "p90": 174.04799908399582, + "p95": 188.4479969739914, + "p99": 214.23999965190887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.05599880218506, + "p90": 107.71200060844421, + "p95": 117.3119992017746, + "p99": 183.52000415325165 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 87.3280018568039, + "p95": 94.59199756383896, + "p99": 101.85600072145462 + }, + "roundtrip": { + "p50": 138.08000087738037, + "p90": 162.27200627326965, + "p95": 170.27199268341064, + "p99": 189.4720047712326 + }, + "isolatedSum": { + "p50": 158.52800011634827, + "p90": 195.0400024652481, + "p95": 211.90399676561356, + "p99": 285.37600487470627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 101.34399682283401, + "p90": 127.74400413036346, + "p95": 135.26399433612823, + "p99": 162.36799955368042 + }, + "combine": { + "p50": 89.9839997291565, + "p90": 106.08000308275223, + "p95": 113.02399635314941, + "p99": 127.42400169372559 + }, + "roundtrip": { + "p50": 172.7360039949417, + "p90": 199.20000433921814, + "p95": 205.1839977502823, + "p99": 218.33600103855133 + }, + "isolatedSum": { + "p50": 191.3279965519905, + "p90": 233.8240072131157, + "p95": 248.28799068927765, + "p99": 289.792001247406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.03999811410904, + "p90": 141.92000031471252, + "p95": 146.27200365066528, + "p99": 170.71999609470367 + }, + "combine": { + "p50": 117.27999895811081, + "p90": 126.0479986667633, + "p95": 130.97600638866425, + "p99": 140.70400595664978 + }, + "roundtrip": { + "p50": 222.1439927816391, + "p90": 244.54399943351746, + "p95": 250.71999430656433, + "p99": 285.63201427459717 + }, + "isolatedSum": { + "p50": 240.31999707221985, + "p90": 267.96799898147583, + "p95": 277.24801003932953, + "p99": 311.42400205135345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a29c6b7d", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_c32b0e66", + "comparisonKey": "e7d3e6604fc3ae2a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:19.063205+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.41600006818771, + "p90": 79.64800298213959, + "p95": 85.4400023818016, + "p99": 108.76800119876862 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 74.40000027418137, + "p95": 79.9039974808693, + "p99": 87.23200112581253 + }, + "roundtrip": { + "p50": 120.99199742078781, + "p90": 134.0160071849823, + "p95": 144.06399428844452, + "p99": 165.43999314308167 + }, + "isolatedSum": { + "p50": 140.03200083971024, + "p90": 154.04800325632095, + "p95": 165.3439998626709, + "p99": 196.00000232458115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.02399724721909, + "p90": 79.1039988398552, + "p95": 83.64800363779068, + "p99": 98.75199943780899 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 71.84000313282013, + "p95": 76.4480009675026, + "p99": 85.7279971241951 + }, + "roundtrip": { + "p50": 119.74400281906128, + "p90": 136.83199882507324, + "p95": 150.2400040626526, + "p99": 162.30399906635284 + }, + "isolatedSum": { + "p50": 140.1599943637848, + "p90": 150.94400197267532, + "p95": 160.09600460529327, + "p99": 184.4799965620041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.65600222349167, + "p90": 86.40000224113464, + "p95": 98.49599748849869, + "p99": 125.40799379348755 + }, + "combine": { + "p50": 68.92800331115723, + "p90": 78.52800190448761, + "p95": 83.3280012011528, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 122.17599898576736, + "p90": 132.7040046453476, + "p95": 138.5599970817566, + "p99": 186.20799481868744 + }, + "isolatedSum": { + "p50": 143.5840055346489, + "p90": 164.92800414562225, + "p95": 181.8239986896515, + "p99": 216.12799167633057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.55200159549713, + "p90": 100.28800368309021, + "p95": 108.89600217342377, + "p99": 123.00799787044525 + }, + "combine": { + "p50": 69.63200122117996, + "p90": 79.26400005817413, + "p95": 82.43200182914734, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 124.51200187206268, + "p90": 147.8080004453659, + "p95": 153.34400534629822, + "p99": 163.4880006313324 + }, + "isolatedSum": { + "p50": 145.1840028166771, + "p90": 179.55200374126434, + "p95": 191.3280040025711, + "p99": 214.87999707460403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.87200337648392, + "p90": 84.57600325345993, + "p95": 97.05600142478943, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 70.20799815654755, + "p90": 75.29599964618683, + "p95": 78.11199873685837, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 125.791996717453, + "p90": 138.3039951324463, + "p95": 150.4639983177185, + "p99": 187.42400407791138 + }, + "isolatedSum": { + "p50": 142.08000153303146, + "p90": 159.87200289964676, + "p95": 175.1680001616478, + "p99": 198.0160027742386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.14399963617325, + "p90": 90.08000046014786, + "p95": 94.71999853849411, + "p99": 105.85600137710571 + }, + "combine": { + "p50": 77.27999985218048, + "p90": 87.93599903583527, + "p95": 92.12800115346909, + "p99": 96.38399630784988 + }, + "roundtrip": { + "p50": 134.2719942331314, + "p90": 143.8400000333786, + "p95": 150.39999783039093, + "p99": 169.98399794101715 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 178.01599949598312, + "p95": 186.8479996919632, + "p99": 202.2399976849556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.41599655151367, + "p90": 106.88000172376633, + "p95": 111.80800199508667, + "p99": 266.7199969291687 + }, + "combine": { + "p50": 87.42400258779526, + "p90": 93.28000247478485, + "p95": 97.120001912117, + "p99": 107.84000158309937 + }, + "roundtrip": { + "p50": 158.81599485874176, + "p90": 167.39200055599213, + "p95": 175.6799966096878, + "p99": 283.03998708724976 + }, + "isolatedSum": { + "p50": 183.83999913930893, + "p90": 200.16000419855118, + "p95": 208.92800390720367, + "p99": 374.55999851226807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.87200313806534, + "p90": 121.72800302505493, + "p95": 124.76799637079239, + "p99": 135.51999628543854 + }, + "combine": { + "p50": 106.78400099277496, + "p90": 111.7120012640953, + "p95": 115.55200070142746, + "p99": 124.28800016641617 + }, + "roundtrip": { + "p50": 198.46400618553162, + "p90": 211.07199788093567, + "p95": 221.343994140625, + "p99": 381.44001364707947 + }, + "isolatedSum": { + "p50": 222.6560041308403, + "p90": 233.44000428915024, + "p95": 240.31999707221985, + "p99": 259.8079964518547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-559dd0dc", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h200_f2036099", + "comparisonKey": "c1032f03c3bc2904", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:42.926713+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 74.52800124883652, + "p90": 81.37600123882294, + "p95": 90.40000289678574, + "p99": 104.3199971318245 + }, + "combine": { + "p50": 70.30399888753891, + "p90": 75.1039981842041, + "p95": 79.80799674987793, + "p99": 92.6399976015091 + }, + "roundtrip": { + "p50": 127.42400169372559, + "p90": 138.65600526332855, + "p95": 145.05599439144135, + "p99": 156.8319946527481 + }, + "isolatedSum": { + "p50": 144.83200013637543, + "p90": 156.47999942302704, + "p95": 170.20799964666367, + "p99": 196.9599947333336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 84.03199911117554, + "p90": 94.46399658918381, + "p95": 101.47199779748917, + "p99": 138.68799805641174 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 83.20000022649765, + "p95": 88.28800171613693, + "p99": 100.96000134944916 + }, + "roundtrip": { + "p50": 136.9280070066452, + "p90": 148.73600006103516, + "p95": 154.7199934720993, + "p99": 253.1839907169342 + }, + "isolatedSum": { + "p50": 161.98399662971497, + "p90": 177.66399681568146, + "p95": 189.7599995136261, + "p99": 239.6479994058609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 119.03999745845795, + "p90": 128.25599312782288, + "p95": 132.7040046453476, + "p99": 181.66400492191315 + }, + "combine": { + "p50": 108.92800241708755, + "p90": 114.94400352239609, + "p95": 118.27199906110764, + "p99": 125.56800246238708 + }, + "roundtrip": { + "p50": 201.63199305534363, + "p90": 209.6959948539734, + "p95": 215.36000072956085, + "p99": 253.63200902938843 + }, + "isolatedSum": { + "p50": 227.9679998755455, + "p90": 243.19999665021896, + "p95": 250.97600370645523, + "p99": 307.23200738430023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-61f394ea", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_ca86eac2", + "comparisonKey": "335c3e5827f35dc8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:25.632034+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.03999769687653, + "p90": 91.96799993515015, + "p95": 101.40799731016159, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 70.23999840021133, + "p90": 78.27199995517731, + "p95": 84.73599702119827, + "p99": 91.13600105047226 + }, + "roundtrip": { + "p50": 126.46399438381195, + "p90": 151.90400183200836, + "p95": 159.2639982700348, + "p99": 170.84799706935883 + }, + "isolatedSum": { + "p50": 145.27999609708786, + "p90": 170.23999989032745, + "p95": 186.14399433135986, + "p99": 207.58400112390518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.36000144481659, + "p90": 100.41599720716476, + "p95": 111.23199760913849, + "p99": 132.09599256515503 + }, + "combine": { + "p50": 78.43200117349625, + "p90": 91.10400080680847, + "p95": 95.61599791049957, + "p99": 104.96000200510025 + }, + "roundtrip": { + "p50": 136.6720050573349, + "p90": 155.39200603961945, + "p95": 163.80800306797028, + "p99": 185.15199422836304 + }, + "isolatedSum": { + "p50": 161.79200261831284, + "p90": 191.51999801397324, + "p95": 206.84799551963806, + "p99": 237.05599457025528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.1999980211258, + "p90": 132.192000746727, + "p95": 140.32000303268433, + "p99": 152.19199657440186 + }, + "combine": { + "p50": 116.12799763679504, + "p90": 123.77600371837616, + "p95": 131.96800649166107, + "p99": 166.78400337696075 + }, + "roundtrip": { + "p50": 209.47200059890747, + "p90": 221.5999960899353, + "p95": 229.08799350261688, + "p99": 338.9439880847931 + }, + "isolatedSum": { + "p50": 231.32799565792084, + "p90": 255.96800446510315, + "p95": 272.2880095243454, + "p99": 318.9759999513626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8544b7aa", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_79a4cecc", + "comparisonKey": "7cc928a5cc2141db", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:30.906200+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.66400212049484, + "p90": 99.10400211811066, + "p95": 110.3999987244606, + "p99": 123.58400225639343 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 78.27199995517731, + "p95": 83.45600217580795, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 118.01599711179733, + "p90": 141.34399592876434, + "p95": 150.4639983177185, + "p99": 164.000004529953 + }, + "isolatedSum": { + "p50": 140.70399850606918, + "p90": 177.37600207328796, + "p95": 193.85600090026855, + "p99": 220.86400538682938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.98399710655212, + "p90": 96.54399752616882, + "p95": 105.24799674749374, + "p99": 115.93600362539291 + }, + "combine": { + "p50": 67.77600198984146, + "p90": 76.80000364780426, + "p95": 87.39200234413147, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 118.65600198507309, + "p90": 142.97600090503693, + "p95": 149.05600249767303, + "p99": 161.1199975013733 + }, + "isolatedSum": { + "p50": 141.75999909639359, + "p90": 173.34400117397308, + "p95": 192.6399990916252, + "p99": 222.1119999885559 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.37599992752075, + "p90": 96.89600020647049, + "p95": 104.99200224876404, + "p99": 112.96000331640244 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 75.13599842786789, + "p95": 81.11999928951263, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 120.44800072908401, + "p90": 150.65599977970123, + "p95": 158.4639996290207, + "p99": 171.74400389194489 + }, + "isolatedSum": { + "p50": 141.2160024046898, + "p90": 172.03199863433838, + "p95": 186.11200153827667, + "p99": 203.68000119924545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.49600100517273, + "p90": 98.30400347709656, + "p95": 105.8880016207695, + "p99": 123.9359974861145 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 79.52000200748444, + "p95": 87.93599903583527, + "p99": 99.07200187444687 + }, + "roundtrip": { + "p50": 122.97599762678146, + "p90": 146.36799693107605, + "p95": 157.1200042963028, + "p99": 173.15199971199036 + }, + "isolatedSum": { + "p50": 143.16800236701965, + "p90": 177.824005484581, + "p95": 193.82400065660477, + "p99": 223.00799936056137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.14399832487106, + "p90": 93.53599697351456, + "p95": 102.84800082445145, + "p99": 123.80799651145935 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 78.20799946784973, + "p95": 88.25600147247314, + "p99": 105.34399747848511 + }, + "roundtrip": { + "p50": 126.20800733566284, + "p90": 146.7200070619583, + "p95": 159.5200002193451, + "p99": 174.01599884033203 + }, + "isolatedSum": { + "p50": 144.31999623775482, + "p90": 171.7439964413643, + "p95": 191.1040022969246, + "p99": 229.15199398994446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.216000020504, + "p90": 98.49599748849869, + "p95": 106.39999806880951, + "p99": 114.656001329422 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 83.3280012011528, + "p95": 90.94399958848953, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 135.26399433612823, + "p90": 156.2879979610443, + "p95": 164.35199975967407, + "p99": 199.16799664497375 + }, + "isolatedSum": { + "p50": 157.60000050067902, + "p90": 181.8239986896515, + "p95": 197.34399765729904, + "p99": 214.1439989209175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.91200000047684, + "p90": 109.69600081443787, + "p95": 118.56000125408173, + "p99": 128.54400277137756 + }, + "combine": { + "p50": 86.30400151014328, + "p90": 99.0080013871193, + "p95": 105.82400113344193, + "p99": 114.72000181674957 + }, + "roundtrip": { + "p50": 156.54399991035461, + "p90": 173.2800006866455, + "p95": 180.25599420070648, + "p99": 193.02399456501007 + }, + "isolatedSum": { + "p50": 181.21600151062012, + "p90": 208.70400220155716, + "p95": 224.38400238752365, + "p99": 243.26400458812714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.99199676513672, + "p90": 130.20800054073334, + "p95": 139.3280029296875, + "p99": 151.0079950094223 + }, + "combine": { + "p50": 104.12800312042236, + "p90": 113.63200098276138, + "p95": 120.41600048542023, + "p99": 129.15199995040894 + }, + "roundtrip": { + "p50": 195.93599438667297, + "p90": 210.7200026512146, + "p95": 219.4560021162033, + "p99": 228.67199778556824 + }, + "isolatedSum": { + "p50": 221.11999988555908, + "p90": 243.84000152349472, + "p95": 259.7440034151077, + "p99": 280.15999495983124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-534b37cc", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_740192e9", + "comparisonKey": "049133906f61d24a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:14.503506+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.10399752855301, + "p90": 78.97599786520004, + "p95": 83.3280012011528, + "p99": 95.13600170612335 + }, + "combine": { + "p50": 66.52799993753433, + "p90": 70.78400254249573, + "p95": 73.44000041484833, + "p99": 86.01599931716919 + }, + "roundtrip": { + "p50": 117.8240031003952, + "p90": 131.6159963607788, + "p95": 139.8400068283081, + "p99": 158.07999670505524 + }, + "isolatedSum": { + "p50": 137.63199746608734, + "p90": 149.76000040769577, + "p95": 156.76800161600113, + "p99": 181.15200102329254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.23199850320816, + "p90": 81.727996468544, + "p95": 88.06400001049042, + "p99": 106.01600259542465 + }, + "combine": { + "p50": 66.46399945020676, + "p90": 74.14399832487106, + "p95": 80.4160013794899, + "p99": 93.21600198745728 + }, + "roundtrip": { + "p50": 120.2239990234375, + "p90": 130.3360015153885, + "p95": 135.3600025177002, + "p99": 161.24799847602844 + }, + "isolatedSum": { + "p50": 137.69599795341492, + "p90": 155.87199479341507, + "p95": 168.48000138998032, + "p99": 199.23200458288193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.27999919652939, + "p90": 81.82399719953537, + "p95": 94.55999732017517, + "p99": 109.15199667215347 + }, + "combine": { + "p50": 66.72000139951706, + "p90": 81.28000050783157, + "p95": 84.86399799585342, + "p99": 129.50399518013 + }, + "roundtrip": { + "p50": 120.60800194740295, + "p90": 134.39999520778656, + "p95": 142.59199798107147, + "p99": 153.31199765205383 + }, + "isolatedSum": { + "p50": 140.00000059604645, + "p90": 163.10399770736694, + "p95": 179.4239953160286, + "p99": 238.65599185228348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.5600014925003, + "p90": 85.9839990735054, + "p95": 95.64799815416336, + "p99": 107.4879989027977 + }, + "combine": { + "p50": 68.28799843788147, + "p90": 71.99999690055847, + "p95": 77.85599678754807, + "p99": 87.52000331878662 + }, + "roundtrip": { + "p50": 121.50400131940842, + "p90": 132.192000746727, + "p95": 138.43199610710144, + "p99": 149.98400211334229 + }, + "isolatedSum": { + "p50": 142.84799993038177, + "p90": 157.98399597406387, + "p95": 173.50399494171143, + "p99": 195.00800222158432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 77.18399912118912, + "p90": 97.47199714183807, + "p95": 107.45599865913391, + "p99": 135.42400300502777 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 80.1599994301796, + "p95": 85.82399785518646, + "p99": 93.75999867916107 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 142.20799505710602, + "p95": 148.3840048313141, + "p99": 184.4480037689209 + }, + "isolatedSum": { + "p50": 147.61599898338318, + "p90": 177.63199657201767, + "p95": 193.27999651432037, + "p99": 229.18400168418884 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.63200318813324, + "p90": 87.61599659919739, + "p95": 90.91199934482574, + "p99": 98.94400089979172 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 84.16000008583069, + "p95": 88.67199718952179, + "p99": 92.57599711418152 + }, + "roundtrip": { + "p50": 137.28000223636627, + "p90": 147.5840061903, + "p95": 155.39200603961945, + "p99": 173.95199835300446 + }, + "isolatedSum": { + "p50": 157.76000171899796, + "p90": 171.77599668502808, + "p95": 179.58399653434753, + "p99": 191.51999801397324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.28800302743912, + "p90": 106.84800148010254, + "p95": 113.37599903345108, + "p99": 151.8400013446808 + }, + "combine": { + "p50": 86.7839977145195, + "p90": 96.89600020647049, + "p95": 103.13600301742554, + "p99": 208.5759937763214 + }, + "roundtrip": { + "p50": 163.455992937088, + "p90": 177.98399925231934, + "p95": 184.1920018196106, + "p99": 233.40800404548645 + }, + "isolatedSum": { + "p50": 183.07200074195862, + "p90": 203.74400168657303, + "p95": 216.51200205087662, + "p99": 360.4159951210022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.2799996137619, + "p90": 132.192000746727, + "p95": 140.8960074186325, + "p99": 179.51999604701996 + }, + "combine": { + "p50": 112.12799698114395, + "p90": 118.04799735546112, + "p95": 122.81599640846252, + "p99": 132.25600123405457 + }, + "roundtrip": { + "p50": 214.56000208854675, + "p90": 223.29600155353546, + "p95": 227.4560034275055, + "p99": 242.5280064344406 + }, + "isolatedSum": { + "p50": 233.40799659490585, + "p90": 250.2399981021881, + "p95": 263.71200382709503, + "p99": 311.7759972810745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fab913c7", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_af66e0b3", + "comparisonKey": "3607955e8b7b1ff0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:00.020392+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 66.04799628257751, + "p90": 77.53600180149078, + "p95": 82.40000158548355, + "p99": 99.04000163078308 + }, + "combine": { + "p50": 61.59999966621399, + "p90": 65.08799642324448, + "p95": 69.47200000286102, + "p99": 78.20799946784973 + }, + "roundtrip": { + "p50": 116.41599982976913, + "p90": 128.38399410247803, + "p95": 137.28000223636627, + "p99": 159.61599349975586 + }, + "isolatedSum": { + "p50": 127.6479959487915, + "p90": 142.62399822473526, + "p95": 151.87200158834457, + "p99": 177.2480010986328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.17599791288376, + "p90": 80.4160013794899, + "p95": 85.9839990735054, + "p99": 101.1200025677681 + }, + "combine": { + "p50": 62.431998550891876, + "p90": 66.39999896287918, + "p95": 69.69600170850754, + "p99": 77.69600301980972 + }, + "roundtrip": { + "p50": 118.81600320339203, + "p90": 129.7599971294403, + "p95": 132.9600065946579, + "p99": 155.39200603961945 + }, + "isolatedSum": { + "p50": 132.60799646377563, + "p90": 146.81600034236908, + "p95": 155.68000078201294, + "p99": 178.81600558757782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.9040036201477, + "p90": 80.09599894285202, + "p95": 84.22400057315826, + "p99": 96.19200229644775 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 66.81600213050842, + "p95": 71.45600020885468, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 118.9119964838028, + "p90": 128.4479945898056, + "p95": 132.28799402713776, + "p99": 162.56000101566315 + }, + "isolatedSum": { + "p50": 134.5280036330223, + "p90": 146.91200107336044, + "p95": 155.68000078201294, + "p99": 174.97599869966507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.20799881219864, + "p90": 84.83199775218964, + "p95": 94.81599926948547, + "p99": 257.4720084667206 + }, + "combine": { + "p50": 64.60800021886826, + "p90": 68.76800209283829, + "p95": 71.45600020885468, + "p99": 79.39200103282928 + }, + "roundtrip": { + "p50": 122.01599776744843, + "p90": 132.9919993877411, + "p95": 139.74399864673615, + "p99": 192.19200313091278 + }, + "isolatedSum": { + "p50": 138.8159990310669, + "p90": 153.59999984502792, + "p95": 166.27199947834015, + "p99": 336.86400949954987 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.68000257015228, + "p90": 82.78399705886841, + "p95": 87.5839963555336, + "p99": 96.79999947547913 + }, + "combine": { + "p50": 66.04799628257751, + "p90": 70.94399631023407, + "p95": 74.33599978685379, + "p99": 80.76799660921097 + }, + "roundtrip": { + "p50": 117.8240031003952, + "p90": 129.56799566745758, + "p95": 133.98399949073792, + "p99": 143.2960033416748 + }, + "isolatedSum": { + "p50": 141.7279988527298, + "p90": 153.72799336910248, + "p95": 161.9199961423874, + "p99": 177.5679960846901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.23199981451035, + "p90": 88.54400366544724, + "p95": 95.0080007314682, + "p99": 105.76000064611435 + }, + "combine": { + "p50": 73.56800138950348, + "p90": 78.04799824953079, + "p95": 82.07999914884567, + "p99": 121.11999839544296 + }, + "roundtrip": { + "p50": 132.1599930524826, + "p90": 141.12000167369843, + "p95": 147.07200229167938, + "p99": 169.18399930000305 + }, + "isolatedSum": { + "p50": 152.80000120401382, + "p90": 166.59200191497803, + "p95": 177.08799988031387, + "p99": 226.8799990415573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.08800166845322, + "p90": 106.36799782514572, + "p95": 110.59200018644333, + "p99": 119.55200135707855 + }, + "combine": { + "p50": 82.33600109815598, + "p90": 87.5839963555336, + "p95": 91.93599969148636, + "p99": 101.9200012087822 + }, + "roundtrip": { + "p50": 156.3200056552887, + "p90": 163.42400014400482, + "p95": 169.5999950170517, + "p99": 186.5600049495697 + }, + "isolatedSum": { + "p50": 179.4240027666092, + "p90": 193.95199418067932, + "p95": 202.5279998779297, + "p99": 221.47200256586075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.35199999809265, + "p90": 131.3599944114685, + "p95": 136.57599687576294, + "p99": 149.4400054216385 + }, + "combine": { + "p50": 107.84000158309937, + "p90": 112.67200112342834, + "p95": 115.9679964184761, + "p99": 123.9359974861145 + }, + "roundtrip": { + "p50": 206.94400370121002, + "p90": 213.95200490951538, + "p95": 217.8560048341751, + "p99": 240.447998046875 + }, + "isolatedSum": { + "p50": 228.19200158119202, + "p90": 244.03199553489685, + "p95": 252.54399329423904, + "p99": 273.376002907753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16635435", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_6bff286b", + "comparisonKey": "e2931338c871890f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:35.968611+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.49600034952164, + "p90": 87.93599903583527, + "p95": 98.81599992513657, + "p99": 107.96800255775452 + }, + "combine": { + "p50": 67.03999638557434, + "p90": 72.51200079917908, + "p95": 80.54400235414505, + "p99": 87.90399879217148 + }, + "roundtrip": { + "p50": 118.14399808645248, + "p90": 139.48799669742584, + "p95": 150.04800260066986, + "p99": 162.75200247764587 + }, + "isolatedSum": { + "p50": 137.53599673509598, + "p90": 160.44799983501434, + "p95": 179.36000227928162, + "p99": 195.872001349926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.55200093984604, + "p90": 89.08800035715103, + "p95": 97.50399738550186, + "p99": 115.58400094509125 + }, + "combine": { + "p50": 67.35999882221222, + "p90": 74.52800124883652, + "p95": 81.69600367546082, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 120.7360029220581, + "p90": 139.52000439167023, + "p95": 148.6400067806244, + "p99": 163.10399770736694 + }, + "isolatedSum": { + "p50": 138.91199976205826, + "p90": 163.61600160598755, + "p95": 179.20000106096268, + "p99": 213.82400393486023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.43200051784515, + "p90": 88.16000074148178, + "p95": 102.46399790048599, + "p99": 111.51999980211258 + }, + "combine": { + "p50": 68.57600063085556, + "p90": 75.83999633789062, + "p95": 83.71199667453766, + "p99": 118.40000003576279 + }, + "roundtrip": { + "p50": 123.52000176906586, + "p90": 140.09599387645721, + "p95": 151.67999267578125, + "p99": 169.3120002746582 + }, + "isolatedSum": { + "p50": 143.0080011487007, + "p90": 163.9999970793724, + "p95": 186.17599457502365, + "p99": 229.91999983787537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.13599842786789, + "p90": 93.24800223112106, + "p95": 103.80800068378448, + "p99": 125.31200051307678 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 78.78399640321732, + "p95": 85.24800091981888, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 121.15199863910675, + "p90": 149.98400211334229, + "p95": 160.5760008096695, + "p99": 204.12799715995789 + }, + "isolatedSum": { + "p50": 144.54399794340134, + "p90": 172.03199863433838, + "p95": 189.05600160360336, + "p99": 222.27200120687485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.36800003051758, + "p90": 88.22400122880936, + "p95": 101.34399682283401, + "p99": 114.94400352239609 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 81.66400343179703, + "p95": 89.91999924182892, + "p99": 104.63999956846237 + }, + "roundtrip": { + "p50": 124.70400333404541, + "p90": 154.62400019168854, + "p95": 163.13600540161133, + "p99": 192.9280012845993 + }, + "isolatedSum": { + "p50": 146.39999717473984, + "p90": 169.88800466060638, + "p95": 191.26399606466293, + "p99": 219.58400309085846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.23199981451035, + "p90": 98.14400225877762, + "p95": 107.90400207042694, + "p99": 127.9039978981018 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 83.61600339412689, + "p95": 89.9519994854927, + "p99": 105.76000064611435 + }, + "roundtrip": { + "p50": 134.17600095272064, + "p90": 150.11200308799744, + "p95": 158.39999914169312, + "p99": 173.8239973783493 + }, + "isolatedSum": { + "p50": 155.61600029468536, + "p90": 181.7600056529045, + "p95": 197.85600155591965, + "p99": 233.66399854421616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.6959981918335, + "p90": 109.8560020327568, + "p95": 119.13599818944931, + "p99": 135.903999209404 + }, + "combine": { + "p50": 86.94399893283844, + "p90": 95.45599669218063, + "p95": 104.60799932479858, + "p99": 119.77600306272507 + }, + "roundtrip": { + "p50": 159.16800498962402, + "p90": 170.84799706935883, + "p95": 181.08800053596497, + "p99": 194.91200149059296 + }, + "isolatedSum": { + "p50": 180.63999712467194, + "p90": 205.31199872493744, + "p95": 223.7439975142479, + "p99": 255.68000227212906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.60800129175186, + "p90": 126.97599828243256, + "p95": 140.76800644397736, + "p99": 155.20000457763672 + }, + "combine": { + "p50": 104.12800312042236, + "p90": 112.99200356006622, + "p95": 121.79200351238251, + "p99": 140.3840035200119 + }, + "roundtrip": { + "p50": 196.383997797966, + "p90": 203.8400024175644, + "p95": 212.73599565029144, + "p99": 229.63200509548187 + }, + "isolatedSum": { + "p50": 220.73600441217422, + "p90": 239.96800184249878, + "p95": 262.56000995635986, + "p99": 295.5840080976486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a1e2de6e", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_eaca5b26", + "comparisonKey": "264b55134a4b894c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:38.148512+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.29599899053574, + "p90": 95.67999839782715, + "p95": 103.04000228643417, + "p99": 116.31999909877777 + }, + "combine": { + "p50": 67.26399809122086, + "p90": 77.37600058317184, + "p95": 85.28000116348267, + "p99": 95.67999839782715 + }, + "roundtrip": { + "p50": 118.56000125408173, + "p90": 148.00000190734863, + "p95": 155.07200360298157, + "p99": 173.92000555992126 + }, + "isolatedSum": { + "p50": 138.5599970817566, + "p90": 173.055998980999, + "p95": 188.32000344991684, + "p99": 211.99999749660492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.1359977722168, + "p90": 93.37600320577621, + "p95": 101.56799852848053, + "p99": 120.09599804878235 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 74.97599720954895, + "p95": 81.08799904584885, + "p99": 90.71999788284302 + }, + "roundtrip": { + "p50": 119.29599940776825, + "p90": 141.24800264835358, + "p95": 151.8400013446808, + "p99": 167.13599860668182 + }, + "isolatedSum": { + "p50": 138.43199610710144, + "p90": 168.35200041532516, + "p95": 182.65599757432938, + "p99": 210.81599593162537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.31999933719635, + "p90": 84.89599823951721, + "p95": 98.08000177145004, + "p99": 110.52799969911575 + }, + "combine": { + "p50": 67.52000004053116, + "p90": 71.99999690055847, + "p95": 81.60000294446945, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 119.74400281906128, + "p90": 136.76799833774567, + "p95": 151.39199793338776, + "p99": 167.26399958133698 + }, + "isolatedSum": { + "p50": 139.8399993777275, + "p90": 156.89599514007568, + "p95": 179.6800047159195, + "p99": 202.2399976849556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.06399738788605, + "p90": 90.68799763917923, + "p95": 98.43199700117111, + "p99": 108.67200046777725 + }, + "combine": { + "p50": 67.96800345182419, + "p90": 79.23199981451035, + "p95": 86.87999844551086, + "p99": 100.03200173377991 + }, + "roundtrip": { + "p50": 121.21599912643433, + "p90": 144.99199390411377, + "p95": 150.68799257278442, + "p99": 164.67200219631195 + }, + "isolatedSum": { + "p50": 140.03200083971024, + "p90": 169.91999745368958, + "p95": 185.31199544668198, + "p99": 208.70400220155716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.09599763154984, + "p90": 87.5839963555336, + "p95": 102.4319976568222, + "p99": 117.08799749612808 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 76.19199901819229, + "p95": 83.8719978928566, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 140.47999680042267, + "p95": 153.82400155067444, + "p99": 172.2559928894043 + }, + "isolatedSum": { + "p50": 142.11199432611465, + "p90": 163.7759953737259, + "p95": 186.3039955496788, + "p99": 207.19999819993973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.09599894285202, + "p90": 91.67999774217606, + "p95": 103.64799946546555, + "p99": 116.03199690580368 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 87.99999952316284, + "p95": 94.87999975681305, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 135.16800105571747, + "p90": 156.47999942302704, + "p95": 163.64799439907074, + "p99": 174.3679940700531 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 179.6799972653389, + "p95": 198.5279992222786, + "p99": 217.9199978709221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.21600264310837, + "p90": 110.01600325107574, + "p95": 117.63200163841248, + "p99": 299.8400032520294 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 95.64799815416336, + "p95": 102.9760017991066, + "p99": 112.64000087976456 + }, + "roundtrip": { + "p50": 161.9199961423874, + "p90": 174.6560037136078, + "p95": 181.31199479103088, + "p99": 222.46399521827698 + }, + "isolatedSum": { + "p50": 184.09600108861923, + "p90": 205.6640014052391, + "p95": 220.60800343751907, + "p99": 412.480004131794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.87199699878693, + "p90": 131.55199587345123, + "p95": 135.68000495433807, + "p99": 142.71999895572662 + }, + "combine": { + "p50": 113.56800049543381, + "p90": 123.36000055074692, + "p95": 131.1040073633194, + "p99": 138.43199610710144 + }, + "roundtrip": { + "p50": 214.52799439430237, + "p90": 230.6559979915619, + "p95": 237.98400163650513, + "p99": 252.9279887676239 + }, + "isolatedSum": { + "p50": 237.43999749422073, + "p90": 254.91199642419815, + "p95": 266.78401231765747, + "p99": 281.15199506282806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-92d6baca", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_e3f779e8", + "comparisonKey": "5e8aa2bd2971d72c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:56.918780+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.80000299215317, + "p90": 79.52000200748444, + "p95": 84.06399935483932, + "p99": 93.72799843549728 + }, + "combine": { + "p50": 68.25599819421768, + "p90": 72.12799787521362, + "p95": 76.73600316047668, + "p99": 81.56800270080566 + }, + "roundtrip": { + "p50": 123.61600250005722, + "p90": 136.9599997997284, + "p95": 144.06399428844452, + "p99": 175.00799894332886 + }, + "isolatedSum": { + "p50": 141.05600118637085, + "p90": 151.64799988269806, + "p95": 160.800002515316, + "p99": 175.29600113630295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.43200051784515, + "p90": 90.65599739551544, + "p95": 99.23200309276581, + "p99": 144.1279947757721 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 72.7040022611618, + "p95": 82.17599987983704, + "p99": 91.32800251245499 + }, + "roundtrip": { + "p50": 122.68800288438797, + "p90": 140.47999680042267, + "p95": 152.41600573062897, + "p99": 181.7920058965683 + }, + "isolatedSum": { + "p50": 142.62399822473526, + "p90": 163.35999965667725, + "p95": 181.40800297260284, + "p99": 235.45599728822708 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.62400197982788, + "p90": 100.00000149011612, + "p95": 110.46399921178818, + "p99": 119.93599683046341 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 73.34399968385696, + "p95": 83.16799998283386, + "p99": 92.32000261545181 + }, + "roundtrip": { + "p50": 123.58400225639343, + "p90": 143.5839980840683, + "p95": 152.41600573062897, + "p99": 171.6800034046173 + }, + "isolatedSum": { + "p50": 143.67999881505966, + "p90": 173.34400117397308, + "p95": 193.63199919462204, + "p99": 212.25599944591522 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.13599842786789, + "p90": 84.70399677753448, + "p95": 93.44000369310379, + "p99": 113.63200098276138 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 73.95199686288834, + "p95": 79.71200346946716, + "p99": 97.79199957847595 + }, + "roundtrip": { + "p50": 125.34399330615997, + "p90": 139.55199718475342, + "p95": 154.4319987297058, + "p99": 177.91999876499176 + }, + "isolatedSum": { + "p50": 145.11999487876892, + "p90": 158.65599364042282, + "p95": 173.15200716257095, + "p99": 211.42400056123734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.23199915885925, + "p90": 84.25600081682205, + "p95": 88.639996945858, + "p99": 95.48799693584442 + }, + "combine": { + "p50": 72.22399860620499, + "p90": 77.69600301980972, + "p95": 82.30400085449219, + "p99": 93.12000125646591 + }, + "roundtrip": { + "p50": 128.09599936008453, + "p90": 140.44800400733948, + "p95": 146.464005112648, + "p99": 187.0719939470291 + }, + "isolatedSum": { + "p50": 147.45599776506424, + "p90": 161.95200383663177, + "p95": 170.9439978003502, + "p99": 188.60799819231033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.3280012011528, + "p90": 92.41600334644318, + "p95": 101.3759970664978, + "p99": 119.84000355005264 + }, + "combine": { + "p50": 78.04799824953079, + "p90": 84.16000008583069, + "p95": 91.58399701118469, + "p99": 106.49599879980087 + }, + "roundtrip": { + "p50": 137.08800077438354, + "p90": 148.44800531864166, + "p95": 159.93599593639374, + "p99": 179.1680008172989 + }, + "isolatedSum": { + "p50": 161.3759994506836, + "p90": 176.57600343227386, + "p95": 192.9599940776825, + "p99": 226.33600234985352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.3200032711029, + "p90": 109.82400178909302, + "p95": 119.03999745845795, + "p99": 138.40000331401825 + }, + "combine": { + "p50": 87.55200356245041, + "p90": 97.18400239944458, + "p95": 104.38399761915207, + "p99": 114.94400352239609 + }, + "roundtrip": { + "p50": 160.67199409008026, + "p90": 172.70399630069733, + "p95": 184.28799510002136, + "p99": 198.65599274635315 + }, + "isolatedSum": { + "p50": 183.8720068335533, + "p90": 207.0080041885376, + "p95": 223.42399507761002, + "p99": 253.34400683641434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.04799735546112, + "p90": 127.42400169372559, + "p95": 132.83200562000275, + "p99": 145.08800208568573 + }, + "combine": { + "p50": 104.22399640083313, + "p90": 110.46399921178818, + "p95": 115.90400338172913, + "p99": 128.31999361515045 + }, + "roundtrip": { + "p50": 199.0399956703186, + "p90": 212.5760018825531, + "p95": 221.27999365329742, + "p99": 244.1920042037964 + }, + "isolatedSum": { + "p50": 222.27199375629425, + "p90": 237.88800090551376, + "p95": 248.73600900173187, + "p99": 273.4079957008362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8b6c7b2", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_cc176d35", + "comparisonKey": "f9a9194e0a59db1e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:07.308465+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.80000233650208, + "p90": 76.60800218582153, + "p95": 91.13600105047226, + "p99": 194.87999379634857 + }, + "combine": { + "p50": 62.144000083208084, + "p90": 69.18399780988693, + "p95": 73.44000041484833, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 115.13599753379822, + "p90": 125.72799623012543, + "p95": 137.79200613498688, + "p99": 158.07999670505524 + }, + "isolatedSum": { + "p50": 130.94400241971016, + "p90": 145.79199999570847, + "p95": 164.5760014653206, + "p99": 278.55999022722244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.88800317049026, + "p90": 74.78400319814682, + "p95": 79.03999835252762, + "p99": 92.3520028591156 + }, + "combine": { + "p50": 67.10399687290192, + "p90": 69.24799829721451, + "p95": 71.07199728488922, + "p99": 79.39200103282928 + }, + "roundtrip": { + "p50": 116.2559986114502, + "p90": 123.77600371837616, + "p95": 127.55200266838074, + "p99": 137.472003698349 + }, + "isolatedSum": { + "p50": 136.99200004339218, + "p90": 144.03200149536133, + "p95": 150.11199563741684, + "p99": 171.74400389194489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.71200215816498, + "p90": 77.56800204515457, + "p95": 87.26400136947632, + "p99": 106.6880002617836 + }, + "combine": { + "p50": 66.3359984755516, + "p90": 69.11999732255936, + "p95": 73.05599749088287, + "p99": 86.68799698352814 + }, + "roundtrip": { + "p50": 117.50400066375732, + "p90": 129.12000715732574, + "p95": 137.92000710964203, + "p99": 152.79999375343323 + }, + "isolatedSum": { + "p50": 138.04800063371658, + "p90": 146.68799936771393, + "p95": 160.3199988603592, + "p99": 193.37599724531174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.06399738788605, + "p90": 81.66400343179703, + "p95": 84.95999872684479, + "p99": 98.33600372076035 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 71.87200337648392, + "p95": 79.23199981451035, + "p99": 92.47999638319016 + }, + "roundtrip": { + "p50": 119.6800023317337, + "p90": 133.85599851608276, + "p95": 144.44799721240997, + "p99": 175.00799894332886 + }, + "isolatedSum": { + "p50": 140.54399728775024, + "p90": 153.53600680828094, + "p95": 164.19199854135513, + "p99": 190.8160001039505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.84000313282013, + "p90": 80.51200211048126, + "p95": 89.88799899816513, + "p99": 140.1280015707016 + }, + "combine": { + "p50": 68.76800209283829, + "p90": 74.33599978685379, + "p95": 76.76800340414047, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 121.88799679279327, + "p90": 133.37600231170654, + "p95": 143.8080072402954, + "p99": 163.10399770736694 + }, + "isolatedSum": { + "p50": 140.60800522565842, + "p90": 154.84800189733505, + "p95": 166.6560024023056, + "p99": 230.14400154352188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.89599758386612, + "p90": 89.9519994854927, + "p95": 96.19200229644775, + "p99": 110.17599701881409 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 80.6720033288002, + "p95": 86.62399649620056, + "p99": 99.45599734783173 + }, + "roundtrip": { + "p50": 134.2719942331314, + "p90": 143.10400187969208, + "p95": 149.1200029850006, + "p99": 167.58400201797485 + }, + "isolatedSum": { + "p50": 156.47999942302704, + "p90": 170.6240028142929, + "p95": 182.81599879264832, + "p99": 209.6319943666458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.50399738550186, + "p90": 105.53599894046783, + "p95": 108.8000014424324, + "p99": 120.64000219106674 + }, + "combine": { + "p50": 84.99199897050858, + "p90": 89.59999680519104, + "p95": 94.65599805116653, + "p99": 109.0880036354065 + }, + "roundtrip": { + "p50": 159.9999964237213, + "p90": 165.18400609493256, + "p95": 170.17599940299988, + "p99": 181.536003947258 + }, + "isolatedSum": { + "p50": 182.49599635601044, + "p90": 195.13599574565887, + "p95": 203.45599949359894, + "p99": 229.72800582647324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.38400024175644, + "p90": 127.3919939994812, + "p95": 132.28799402713776, + "p99": 139.96799290180206 + }, + "combine": { + "p50": 111.29599809646606, + "p90": 117.85600334405899, + "p95": 121.66400253772736, + "p99": 131.1360001564026 + }, + "roundtrip": { + "p50": 211.87199652194977, + "p90": 219.90400552749634, + "p95": 226.59200429916382, + "p99": 246.91200256347656 + }, + "isolatedSum": { + "p50": 231.6799983382225, + "p90": 245.2479973435402, + "p95": 253.9519965648651, + "p99": 271.10399305820465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-65b53e1a", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_6cbbd029", + "comparisonKey": "d26acde69c8c9d6a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:25.646574+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.32799923419952, + "p90": 78.62400263547897, + "p95": 83.10399949550629, + "p99": 96.3520035147667 + }, + "combine": { + "p50": 67.23199784755707, + "p90": 71.16799801588058, + "p95": 73.72800260782242, + "p99": 82.78399705886841 + }, + "roundtrip": { + "p50": 119.84000355005264, + "p90": 128.54400277137756, + "p95": 133.08799266815186, + "p99": 143.10400187969208 + }, + "isolatedSum": { + "p50": 138.5599970817566, + "p90": 149.79200065135956, + "p95": 156.8320021033287, + "p99": 179.1360005736351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.54400104284286, + "p90": 77.85599678754807, + "p95": 81.91999793052673, + "p99": 89.53599631786346 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 70.68800181150436, + "p95": 74.27199929952621, + "p99": 82.43200182914734 + }, + "roundtrip": { + "p50": 120.03199756145477, + "p90": 130.0799995660782, + "p95": 134.97599959373474, + "p99": 159.64800119400024 + }, + "isolatedSum": { + "p50": 140.03200083971024, + "p90": 148.54399859905243, + "p95": 156.19199723005295, + "p99": 171.9679981470108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.40800017118454, + "p90": 83.48800241947174, + "p95": 88.83199840784073, + "p99": 99.61599856615067 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 72.1919983625412, + "p95": 76.51200145483017, + "p99": 82.78399705886841 + }, + "roundtrip": { + "p50": 122.81599640846252, + "p90": 132.4480026960373, + "p95": 137.05599308013916, + "p99": 155.45600652694702 + }, + "isolatedSum": { + "p50": 142.08000153303146, + "p90": 155.68000078201294, + "p95": 165.3439998626709, + "p99": 182.39999562501907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.62400197982788, + "p90": 80.54400235414505, + "p95": 85.31200140714645, + "p99": 96.44799679517746 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 72.06399738788605, + "p95": 76.38400048017502, + "p99": 87.39200234413147 + }, + "roundtrip": { + "p50": 123.83999675512314, + "p90": 132.35199451446533, + "p95": 136.31999492645264, + "p99": 154.9759954214096 + }, + "isolatedSum": { + "p50": 143.71199905872345, + "p90": 152.6079997420311, + "p95": 161.69600188732147, + "p99": 183.83999913930893 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.83200323581696, + "p90": 80.60800284147263, + "p95": 85.40800213813782, + "p99": 94.87999975681305 + }, + "combine": { + "p50": 70.8480030298233, + "p90": 75.32799988985062, + "p95": 78.46400141716003, + "p99": 84.70399677753448 + }, + "roundtrip": { + "p50": 125.66399574279785, + "p90": 132.9600065946579, + "p95": 138.14400136470795, + "p99": 146.4959979057312 + }, + "isolatedSum": { + "p50": 143.68000626564026, + "p90": 155.93600273132324, + "p95": 163.87200355529785, + "p99": 179.58399653434753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.07999914884567, + "p90": 89.05600011348724, + "p95": 92.76799857616425, + "p99": 101.9200012087822 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 80.54400235414505, + "p95": 84.25600081682205, + "p99": 91.96799993515015 + }, + "roundtrip": { + "p50": 135.19999384880066, + "p90": 140.79999923706055, + "p95": 146.01600170135498, + "p99": 154.65599298477173 + }, + "isolatedSum": { + "p50": 159.4879999756813, + "p90": 169.6000024676323, + "p95": 177.0239993929863, + "p99": 193.88800114393234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.31200271844864, + "p90": 100.67199915647507, + "p95": 105.31199723482132, + "p99": 119.35999989509583 + }, + "combine": { + "p50": 86.91199868917465, + "p90": 91.58399701118469, + "p95": 93.08800101280212, + "p99": 100.41599720716476 + }, + "roundtrip": { + "p50": 160.64000129699707, + "p90": 168.16000640392303, + "p95": 173.47200214862823, + "p99": 194.240003824234 + }, + "isolatedSum": { + "p50": 180.2240014076233, + "p90": 192.25599616765976, + "p95": 198.39999824762344, + "p99": 219.7759971022606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.23999881744385, + "p90": 122.75200337171555, + "p95": 128.4479945898056, + "p99": 135.04000008106232 + }, + "combine": { + "p50": 103.93600165843964, + "p90": 108.2879975438118, + "p95": 111.61600053310394, + "p99": 120.41600048542023 + }, + "roundtrip": { + "p50": 195.71200013160706, + "p90": 201.9840031862259, + "p95": 205.1839977502823, + "p99": 213.72799575328827 + }, + "isolatedSum": { + "p50": 222.17600047588348, + "p90": 231.04000091552734, + "p95": 240.06399512290955, + "p99": 255.45600056648254 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6c4db8a6", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_0bcc3225", + "comparisonKey": "1e0c0f01c7354aa9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:54.515385+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.12799787521362, + "p90": 79.8719972372055, + "p95": 85.50400286912918, + "p99": 104.09600287675858 + }, + "combine": { + "p50": 67.1359971165657, + "p90": 72.28799909353256, + "p95": 76.99199765920639, + "p99": 84.57600325345993 + }, + "roundtrip": { + "p50": 118.33599954843521, + "p90": 128.48000228405, + "p95": 138.17599415779114, + "p99": 171.64799571037292 + }, + "isolatedSum": { + "p50": 139.26399499177933, + "p90": 152.15999633073807, + "p95": 162.49600052833557, + "p99": 188.6720061302185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.20799881219864, + "p90": 97.69599884748459, + "p95": 108.38399827480316, + "p99": 146.62399888038635 + }, + "combine": { + "p50": 67.71200150251389, + "p90": 77.44000107049942, + "p95": 83.52000266313553, + "p99": 108.25599730014801 + }, + "roundtrip": { + "p50": 120.57600170373917, + "p90": 142.68800616264343, + "p95": 151.48800611495972, + "p99": 185.88800728321075 + }, + "isolatedSum": { + "p50": 141.92000031471252, + "p90": 175.135999917984, + "p95": 191.9040009379387, + "p99": 254.87999618053436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.68800246715546, + "p90": 84.3840017914772, + "p95": 91.36000275611877, + "p99": 105.53599894046783 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 72.80000299215317, + "p95": 77.95199751853943, + "p99": 89.34400230646133 + }, + "roundtrip": { + "p50": 121.37600034475327, + "p90": 133.53599607944489, + "p95": 141.63200557231903, + "p99": 163.7759953737259 + }, + "isolatedSum": { + "p50": 143.23200285434723, + "p90": 157.18400478363037, + "p95": 169.3120002746582, + "p99": 194.88000124692917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.94399696588516, + "p90": 86.01599931716919, + "p95": 98.94400089979172, + "p99": 114.01599645614624 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 74.91199672222137, + "p95": 82.91199803352356, + "p99": 92.12800115346909 + }, + "roundtrip": { + "p50": 123.4240010380745, + "p90": 141.02399349212646, + "p95": 152.67199277877808, + "p99": 167.39200055599213 + }, + "isolatedSum": { + "p50": 143.99999380111694, + "p90": 160.92799603939056, + "p95": 181.85599893331528, + "p99": 206.14399760961533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.25599884986877, + "p90": 82.5280025601387, + "p95": 91.87199920415878, + "p99": 112.41599917411804 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 75.16799867153168, + "p95": 81.44000172615051, + "p99": 92.06400066614151 + }, + "roundtrip": { + "p50": 124.60800260305405, + "p90": 139.71200585365295, + "p95": 148.95999431610107, + "p99": 162.81600296497345 + }, + "isolatedSum": { + "p50": 142.43199676275253, + "p90": 157.69600123167038, + "p95": 173.3120009303093, + "p99": 204.47999984025955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.77599716186523, + "p90": 95.07200121879578, + "p95": 106.4319983124733, + "p99": 168.7680035829544 + }, + "combine": { + "p50": 77.31200009584427, + "p90": 85.4400023818016, + "p95": 91.26400202512741, + "p99": 105.05600273609161 + }, + "roundtrip": { + "p50": 135.04000008106232, + "p90": 148.5760062932968, + "p95": 161.56800091266632, + "p99": 177.21599340438843 + }, + "isolatedSum": { + "p50": 161.0879972577095, + "p90": 180.51200360059738, + "p95": 197.6960003376007, + "p99": 273.824006319046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.75999867916107, + "p90": 102.81600058078766, + "p95": 108.70400071144104, + "p99": 119.52000111341476 + }, + "combine": { + "p50": 86.496002972126, + "p90": 91.36000275611877, + "p95": 94.27200257778168, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 158.49600732326508, + "p90": 168.5439944267273, + "p95": 175.99999904632568, + "p99": 189.88800048828125 + }, + "isolatedSum": { + "p50": 180.25600165128708, + "p90": 194.17600333690643, + "p95": 202.97600328922272, + "p99": 223.77599775791168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.49600076675415, + "p90": 130.11200726032257, + "p95": 137.53600418567657, + "p99": 163.35999965667725 + }, + "combine": { + "p50": 103.80800068378448, + "p90": 112.44799941778183, + "p95": 120.44800072908401, + "p99": 134.75200533866882 + }, + "roundtrip": { + "p50": 197.34400510787964, + "p90": 208.0959975719452, + "p95": 215.7759964466095, + "p99": 242.3039972782135 + }, + "isolatedSum": { + "p50": 222.30400145053864, + "p90": 242.5600066781044, + "p95": 257.9840049147606, + "p99": 298.11200499534607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e5f549c8", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d1bd20fa", + "comparisonKey": "bcf005c1368d470b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:57.095964+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 62.30400130152702, + "p90": 71.42399996519089, + "p95": 75.29599964618683, + "p99": 84.70399677753448 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 73.15199822187424, + "p95": 78.14399898052216, + "p99": 99.42399710416794 + }, + "roundtrip": { + "p50": 109.66400057077408, + "p90": 119.13599818944931, + "p95": 124.25599992275238, + "p99": 134.24000144004822 + }, + "isolatedSum": { + "p50": 131.20000436902046, + "p90": 144.57599818706512, + "p95": 153.43999862670898, + "p99": 184.12799388170242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 62.72000074386597, + "p90": 67.16799736022949, + "p95": 71.96799665689468, + "p99": 87.20000088214874 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 76.4480009675026, + "p95": 83.8719978928566, + "p99": 99.61599856615067 + }, + "roundtrip": { + "p50": 109.76000130176544, + "p90": 119.45600062608719, + "p95": 123.77600371837616, + "p99": 135.93600690364838 + }, + "isolatedSum": { + "p50": 131.52000308036804, + "p90": 143.6159983277321, + "p95": 155.83999454975128, + "p99": 186.8159994482994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 62.07999959588051, + "p90": 75.68000257015228, + "p95": 88.03199976682663, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 72.60800153017044, + "p95": 77.18399912118912, + "p99": 85.7279971241951 + }, + "roundtrip": { + "p50": 110.944002866745, + "p90": 121.18399888277054, + "p95": 132.54399597644806, + "p99": 166.36799275875092 + }, + "isolatedSum": { + "p50": 131.32799789309502, + "p90": 148.28800410032272, + "p95": 165.21599888801575, + "p99": 195.67999988794327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 62.33600154519081, + "p90": 71.07199728488922, + "p95": 78.015998005867, + "p99": 102.27199643850327 + }, + "combine": { + "p50": 70.17599791288376, + "p90": 79.8719972372055, + "p95": 87.87199854850769, + "p99": 104.25599664449692 + }, + "roundtrip": { + "p50": 112.35199868679047, + "p90": 121.66400253772736, + "p95": 131.84000551700592, + "p99": 145.82400023937225 + }, + "isolatedSum": { + "p50": 132.51199945807457, + "p90": 150.94399452209473, + "p95": 165.8879965543747, + "p99": 206.52799308300018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 58.78400057554245, + "p90": 69.85600292682648, + "p95": 75.99999755620956, + "p99": 85.95199882984161 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 75.99999755620956, + "p95": 80.48000186681747, + "p99": 94.01600062847137 + }, + "roundtrip": { + "p50": 111.10399663448334, + "p90": 122.91199713945389, + "p95": 130.23999333381653, + "p99": 145.82400023937225 + }, + "isolatedSum": { + "p50": 129.85599786043167, + "p90": 145.85600048303604, + "p95": 156.47999942302704, + "p99": 179.967999458313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 68.70400160551071, + "p90": 76.9599974155426, + "p95": 82.91199803352356, + "p99": 103.5199984908104 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 83.42400193214417, + "p95": 89.31200206279755, + "p99": 100.76799988746643 + }, + "roundtrip": { + "p50": 124.41600114107132, + "p90": 150.78400075435638, + "p95": 169.15200650691986, + "p99": 186.81600689888 + }, + "isolatedSum": { + "p50": 146.55999839305878, + "p90": 160.38399934768677, + "p95": 172.2240000963211, + "p99": 204.28799837827682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 86.7839977145195, + "p90": 102.91200131177902, + "p95": 111.32799834012985, + "p99": 121.66400253772736 + }, + "combine": { + "p50": 87.20000088214874, + "p90": 93.12000125646591, + "p95": 102.39999741315842, + "p99": 114.01599645614624 + }, + "roundtrip": { + "p50": 145.37599682807922, + "p90": 158.2079976797104, + "p95": 174.40000176429749, + "p99": 192.06400215625763 + }, + "isolatedSum": { + "p50": 173.98399859666824, + "p90": 196.03200256824493, + "p95": 213.72799575328827, + "p99": 235.6799989938736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.2879968881607, + "p90": 119.48800086975098, + "p95": 123.3920007944107, + "p99": 159.7760021686554 + }, + "combine": { + "p50": 105.40799796581268, + "p90": 111.93600296974182, + "p95": 117.0559972524643, + "p99": 129.05600666999817 + }, + "roundtrip": { + "p50": 186.68800592422485, + "p90": 192.3840045928955, + "p95": 198.7520009279251, + "p99": 211.04000508785248 + }, + "isolatedSum": { + "p50": 209.6959948539734, + "p90": 231.4240038394928, + "p95": 240.447998046875, + "p99": 288.83200883865356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cd75bc03", + "identity": "h200|deepep|v1|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_1a10f0af", + "comparisonKey": "ec8691dd7160f8c3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:43.676950+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 37.66399994492531, + "p90": 46.23999819159508, + "p95": 51.872000098228455, + "p99": 63.64800035953522 + }, + "combine": { + "p50": 33.59999880194664, + "p90": 41.88799858093262, + "p95": 44.51199993491173, + "p99": 49.31199923157692 + }, + "roundtrip": { + "p50": 51.19999870657921, + "p90": 63.93600255250931, + "p95": 70.91200351715088, + "p99": 78.62400263547897 + }, + "isolatedSum": { + "p50": 71.26399874687195, + "p90": 88.1279967725277, + "p95": 96.38400003314018, + "p99": 112.95999959111214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 36.67199984192848, + "p90": 44.47999969124794, + "p95": 47.807998955249786, + "p99": 55.456001311540604 + }, + "combine": { + "p50": 33.15199911594391, + "p90": 37.50399872660637, + "p95": 42.97599941492081, + "p99": 46.84799909591675 + }, + "roundtrip": { + "p50": 52.41600051522255, + "p90": 67.52000004053116, + "p95": 74.94399696588516, + "p99": 83.5840031504631 + }, + "isolatedSum": { + "p50": 69.82399895787239, + "p90": 81.98399841785431, + "p95": 90.7839983701706, + "p99": 102.30400040745735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 36.51199862360954, + "p90": 44.415999203920364, + "p95": 49.50400069355965, + "p99": 59.328000992536545 + }, + "combine": { + "p50": 33.08799862861633, + "p90": 37.696000188589096, + "p95": 42.91199892759323, + "p99": 47.16800153255463 + }, + "roundtrip": { + "p50": 51.711998879909515, + "p90": 63.74400109052658, + "p95": 69.023996591568, + "p99": 78.91199737787247 + }, + "isolatedSum": { + "p50": 69.59999725222588, + "p90": 82.11199939250946, + "p95": 92.41599962115288, + "p99": 106.49600252509117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 36.959998309612274, + "p90": 44.544000178575516, + "p95": 53.37600037455559, + "p99": 59.58399921655655 + }, + "combine": { + "p50": 33.504001796245575, + "p90": 41.50399938225746, + "p95": 45.31199857592583, + "p99": 49.44000020623207 + }, + "roundtrip": { + "p50": 54.1439987719059, + "p90": 64.38399851322174, + "p95": 70.27199864387512, + "p99": 81.79199695587158 + }, + "isolatedSum": { + "p50": 70.46400010585785, + "p90": 86.04799956083298, + "p95": 98.68799895048141, + "p99": 109.02399942278862 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.975998759269714, + "p90": 49.50400069355965, + "p95": 54.9440011382103, + "p99": 62.20800057053566 + }, + "combine": { + "p50": 35.96799820661545, + "p90": 45.024000108242035, + "p95": 47.32799902558327, + "p99": 54.496001452207565 + }, + "roundtrip": { + "p50": 60.416001826524734, + "p90": 70.30399888753891, + "p95": 74.5600014925003, + "p99": 82.2720006108284 + }, + "isolatedSum": { + "p50": 74.94399696588516, + "p90": 94.52800080180168, + "p95": 102.27200016379356, + "p99": 116.70400202274323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.184001326560974, + "p90": 51.00800096988678, + "p95": 56.73599988222122, + "p99": 66.81600213050842 + }, + "combine": { + "p50": 43.23200136423111, + "p90": 47.200001776218414, + "p95": 53.15199866890907, + "p99": 58.36800113320351 + }, + "roundtrip": { + "p50": 71.55200093984604, + "p90": 79.8719972372055, + "p95": 84.86399799585342, + "p99": 99.58399832248688 + }, + "isolatedSum": { + "p50": 88.41600269079208, + "p90": 98.2080027461052, + "p95": 109.8879985511303, + "p99": 125.18400326371193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.93600124120712, + "p90": 64.31999802589417, + "p95": 68.70400160551071, + "p99": 78.78399640321732 + }, + "combine": { + "p50": 57.50399827957153, + "p90": 69.40799951553345, + "p95": 71.19999825954437, + "p99": 100.5759984254837 + }, + "roundtrip": { + "p50": 96.12800180912018, + "p90": 108.22399705648422, + "p95": 112.15999722480774, + "p99": 116.2559986114502 + }, + "isolatedSum": { + "p50": 113.43999952077866, + "p90": 133.7279975414276, + "p95": 139.90399986505508, + "p99": 179.35999482870102 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 78.91199737787247, + "p90": 84.16000008583069, + "p95": 88.8959988951683, + "p99": 93.88799965381622 + }, + "combine": { + "p50": 85.02399921417236, + "p90": 90.08000046014786, + "p95": 94.33600306510925, + "p99": 96.96000069379807 + }, + "roundtrip": { + "p50": 146.7200070619583, + "p90": 155.42399883270264, + "p95": 158.87999534606934, + "p99": 174.46400225162506 + }, + "isolatedSum": { + "p50": 163.93599659204483, + "p90": 174.24000054597855, + "p95": 183.23200196027756, + "p99": 190.8480003476143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3550a297", + "identity": "h200|deepep|v1|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ad73e1fb", + "comparisonKey": "f8d2a5672a7323ee", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:06.088380+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 37.567999213933945, + "p90": 46.23999819159508, + "p95": 47.58400097489357, + "p99": 55.03999814391136 + }, + "combine": { + "p50": 32.06399828195572, + "p90": 33.824000507593155, + "p95": 36.768000572919846, + "p99": 42.11200028657913 + }, + "roundtrip": { + "p50": 49.695998430252075, + "p90": 54.43200096487999, + "p95": 58.36800113320351, + "p99": 63.231997191905975 + }, + "isolatedSum": { + "p50": 69.63199749588966, + "p90": 80.06399869918823, + "p95": 84.35200154781342, + "p99": 97.1519984304905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 36.57599911093712, + "p90": 38.84800150990486, + "p95": 43.935999274253845, + "p99": 49.984000623226166 + }, + "combine": { + "p50": 33.215999603271484, + "p90": 35.16799956560135, + "p95": 38.52799907326698, + "p99": 44.60800066590309 + }, + "roundtrip": { + "p50": 52.12799832224846, + "p90": 55.16799911856651, + "p95": 60.19200012087822, + "p99": 70.43199986219406 + }, + "isolatedSum": { + "p50": 69.7919987142086, + "p90": 74.01600107550621, + "p95": 82.46399834752083, + "p99": 94.59200128912926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 36.83200106024742, + "p90": 40.41599854826927, + "p95": 48.287998884916306, + "p99": 56.09599873423576 + }, + "combine": { + "p50": 33.31200033426285, + "p90": 35.679999738931656, + "p95": 41.69600084424019, + "p99": 44.19200122356415 + }, + "roundtrip": { + "p50": 52.928000688552856, + "p90": 61.63199990987778, + "p95": 69.15199756622314, + "p99": 73.85600358247757 + }, + "isolatedSum": { + "p50": 70.14400139451027, + "p90": 76.09599828720093, + "p95": 89.9839997291565, + "p99": 100.28799995779991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.408001720905304, + "p90": 44.415999203920364, + "p95": 48.128001391887665, + "p99": 55.10399863123894 + }, + "combine": { + "p50": 33.663999289274216, + "p90": 40.192000567913055, + "p95": 42.81599819660187, + "p99": 50.65599828958511 + }, + "roundtrip": { + "p50": 54.17599901556969, + "p90": 62.463998794555664, + "p95": 66.17599725723267, + "p99": 77.88799703121185 + }, + "isolatedSum": { + "p50": 71.07200101017952, + "p90": 84.60799977183342, + "p95": 90.94399958848953, + "p99": 105.75999692082405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.176000118255615, + "p90": 44.319998472929, + "p95": 47.74399846792221, + "p99": 54.84800040721893 + }, + "combine": { + "p50": 35.2960005402565, + "p90": 38.975998759269714, + "p95": 42.7200011909008, + "p99": 45.3759990632534 + }, + "roundtrip": { + "p50": 58.75200033187866, + "p90": 63.231997191905975, + "p95": 66.39999896287918, + "p99": 71.52000069618225 + }, + "isolatedSum": { + "p50": 73.47200065851212, + "p90": 83.29599723219872, + "p95": 90.46399965882301, + "p99": 100.22399947047234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.3759990632534, + "p90": 49.66399818658829, + "p95": 54.1439987719059, + "p99": 63.71200084686279 + }, + "combine": { + "p50": 43.296001851558685, + "p90": 44.92799937725067, + "p95": 45.9199994802475, + "p99": 51.67999863624573 + }, + "roundtrip": { + "p50": 71.55200093984604, + "p90": 78.11199873685837, + "p95": 82.04799890518188, + "p99": 89.15200084447861 + }, + "isolatedSum": { + "p50": 88.67200091481209, + "p90": 94.59199756383896, + "p95": 100.0639982521534, + "p99": 115.39199948310852 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 54.75199967622757, + "p90": 62.55999952554703, + "p95": 68.67200136184692, + "p99": 79.64800298213959 + }, + "combine": { + "p50": 57.40800127387047, + "p90": 59.487998485565186, + "p95": 63.13599646091461, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 95.2640026807785, + "p90": 98.52799773216248, + "p95": 100.09600222110748, + "p99": 108.0000028014183 + }, + "isolatedSum": { + "p50": 112.16000095009804, + "p90": 122.04799801111221, + "p95": 131.80799782276154, + "p99": 153.76000106334686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 81.216000020504, + "p90": 84.25600081682205, + "p95": 87.77599781751633, + "p99": 94.08000111579895 + }, + "combine": { + "p50": 84.76799726486206, + "p90": 88.06400001049042, + "p95": 91.2960022687912, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 146.43199741840363, + "p90": 154.9759954214096, + "p95": 159.07199680805206, + "p99": 166.81599617004395 + }, + "isolatedSum": { + "p50": 165.98399728536606, + "p90": 172.32000082731247, + "p95": 179.07200008630753, + "p99": 189.92000073194504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d91b0686", + "identity": "h200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_76b40c99", + "comparisonKey": "82e9b1a652ab3547", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:32.477291+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 210.78400313854218, + "p90": 263.2319927215576, + "p95": 271.07200026512146, + "p99": 739.90398645401 + }, + "combine": { + "p50": 47.648001462221146, + "p90": 56.671999394893646, + "p95": 66.59200042486191, + "p99": 74.75200295448303 + }, + "roundtrip": { + "p50": 243.42399835586548, + "p90": 291.7119860649109, + "p95": 300.2240061759949, + "p99": 340.64000844955444 + }, + "isolatedSum": { + "p50": 258.4320046007633, + "p90": 319.90399211645126, + "p95": 337.66400068998337, + "p99": 814.655989408493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 212.25599944591522, + "p90": 265.76000452041626, + "p95": 277.69601345062256, + "p99": 295.77600955963135 + }, + "combine": { + "p50": 47.68000170588493, + "p90": 54.976001381874084, + "p95": 67.58400052785873, + "p99": 77.91999727487564 + }, + "roundtrip": { + "p50": 241.98399484157562, + "p90": 291.5840148925781, + "p95": 300.8959889411926, + "p99": 322.9120075702667 + }, + "isolatedSum": { + "p50": 259.93600115180016, + "p90": 320.73600590229034, + "p95": 345.2800139784813, + "p99": 373.696006834507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 206.94400370121002, + "p90": 270.4960107803345, + "p95": 282.49600529670715, + "p99": 323.68001341819763 + }, + "combine": { + "p50": 48.09600114822388, + "p90": 53.50400134921074, + "p95": 61.69600039720535, + "p99": 74.33599978685379 + }, + "roundtrip": { + "p50": 235.10399460792542, + "p90": 287.1040105819702, + "p95": 297.5359857082367, + "p99": 419.51999068260193 + }, + "isolatedSum": { + "p50": 255.0400048494339, + "p90": 324.0000121295452, + "p95": 344.1920056939125, + "p99": 398.0160132050514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 226.01599991321564, + "p90": 287.200003862381, + "p95": 310.3039860725403, + "p99": 384.0000033378601 + }, + "combine": { + "p50": 49.31199923157692, + "p90": 62.30400130152702, + "p95": 68.57600063085556, + "p99": 78.78399640321732 + }, + "roundtrip": { + "p50": 246.36800587177277, + "p90": 301.08800530433655, + "p95": 314.65598940849304, + "p99": 352.06401348114014 + }, + "isolatedSum": { + "p50": 275.32799914479256, + "p90": 349.504005163908, + "p95": 378.87998670339584, + "p99": 462.7839997410774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.9840031862259, + "p90": 248.19199740886688, + "p95": 261.1519992351532, + "p99": 297.91998863220215 + }, + "combine": { + "p50": 50.52800104022026, + "p90": 56.89600110054016, + "p95": 63.64800035953522, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 239.1359955072403, + "p90": 289.66400027275085, + "p95": 299.1040050983429, + "p99": 316.6719973087311 + }, + "isolatedSum": { + "p50": 252.51200422644615, + "p90": 305.08799850940704, + "p95": 324.7999995946884, + "p99": 372.0319867134094 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.1280039548874, + "p90": 250.71999430656433, + "p95": 261.1840069293976, + "p99": 286.9440019130707 + }, + "combine": { + "p50": 53.37600037455559, + "p90": 58.43200162053108, + "p95": 68.09599697589874, + "p99": 81.11999928951263 + }, + "roundtrip": { + "p50": 250.17601251602173, + "p90": 295.77600955963135, + "p95": 312.99200654029846, + "p99": 350.40000081062317 + }, + "isolatedSum": { + "p50": 253.50400432944298, + "p90": 309.1519959270954, + "p95": 329.2800039052963, + "p99": 368.0640012025833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 195.360004901886, + "p90": 236.92800104618073, + "p95": 255.96800446510315, + "p99": 275.58401226997375 + }, + "combine": { + "p50": 60.95999851822853, + "p90": 65.08799642324448, + "p95": 70.91200351715088, + "p99": 82.33600109815598 + }, + "roundtrip": { + "p50": 247.13599681854248, + "p90": 293.66400837898254, + "p95": 312.73600459098816, + "p99": 685.7600212097168 + }, + "isolatedSum": { + "p50": 256.3200034201145, + "p90": 302.0159974694252, + "p95": 326.880007982254, + "p99": 357.92001336812973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 206.2399983406067, + "p90": 277.9519855976105, + "p95": 294.14400458335876, + "p99": 433.56800079345703 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 75.26399940252304, + "p95": 83.64800363779068, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 264.67201113700867, + "p90": 319.2000091075897, + "p95": 330.0800025463104, + "p99": 385.0559890270233 + }, + "isolatedSum": { + "p50": 277.8559997677803, + "p90": 353.2159850001335, + "p95": 377.79200822114944, + "p99": 531.0399979352951 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1f8c9a45", + "identity": "h200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_76b40c99", + "comparisonKey": "048e3ce8878455ea", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:58.180551+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 207.0399969816208, + "p90": 250.0160038471222, + "p95": 260.3519856929779, + "p99": 313.1200075149536 + }, + "combine": { + "p50": 50.36799982190132, + "p90": 58.848001062870026, + "p95": 67.32799857854843, + "p99": 74.97599720954895 + }, + "roundtrip": { + "p50": 240.7359927892685, + "p90": 285.8879864215851, + "p95": 298.43199253082275, + "p99": 362.4959886074066 + }, + "isolatedSum": { + "p50": 257.4079968035221, + "p90": 308.8640049099922, + "p95": 327.67998427152634, + "p99": 388.09600472450256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 194.17600333690643, + "p90": 244.35199797153473, + "p95": 250.43201446533203, + "p99": 275.39199590682983 + }, + "combine": { + "p50": 51.10400170087814, + "p90": 63.45599889755249, + "p95": 70.49600034952164, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 247.45599925518036, + "p90": 295.9040105342865, + "p95": 308.1279993057251, + "p99": 322.6880133152008 + }, + "isolatedSum": { + "p50": 245.28000503778458, + "p90": 307.8079968690872, + "p95": 320.92801481485367, + "p99": 365.727998316288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 198.7520009279251, + "p90": 256.8640112876892, + "p95": 271.2959945201874, + "p99": 434.81600284576416 + }, + "combine": { + "p50": 51.83999985456467, + "p90": 61.40799820423126, + "p95": 68.28799843788147, + "p99": 77.02399790287018 + }, + "roundtrip": { + "p50": 238.87999355793, + "p90": 286.20800375938416, + "p95": 298.6240088939667, + "p99": 325.98400115966797 + }, + "isolatedSum": { + "p50": 250.59200078248978, + "p90": 318.27200949192047, + "p95": 339.58399295806885, + "p99": 511.84000074863434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 194.30400431156158, + "p90": 237.56800591945648, + "p95": 247.93599545955658, + "p99": 263.7439966201782 + }, + "combine": { + "p50": 52.41600051522255, + "p90": 59.87200140953064, + "p95": 67.61600077152252, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 233.11999440193176, + "p90": 272.7679908275604, + "p95": 279.2640030384064, + "p99": 299.80799555778503 + }, + "isolatedSum": { + "p50": 246.72000482678413, + "p90": 297.4400073289871, + "p95": 315.5519962310791, + "p99": 342.399999499321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 199.93600249290466, + "p90": 244.4480061531067, + "p95": 252.19199061393738, + "p99": 277.6319980621338 + }, + "combine": { + "p50": 53.53600159287453, + "p90": 66.65600091218948, + "p95": 71.19999825954437, + "p99": 81.95199817419052 + }, + "roundtrip": { + "p50": 236.41599714756012, + "p90": 281.5679907798767, + "p95": 292.32001304626465, + "p99": 314.07999992370605 + }, + "isolatedSum": { + "p50": 253.4720040857792, + "p90": 311.1040070652962, + "p95": 323.39198887348175, + "p99": 359.5839962363243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 194.84800100326538, + "p90": 243.3599978685379, + "p95": 255.23200631141663, + "p99": 289.5680069923401 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 65.0240033864975, + "p95": 73.40800017118454, + "p99": 79.58400249481201 + }, + "roundtrip": { + "p50": 239.42400515079498, + "p90": 281.792014837265, + "p95": 293.4400141239166, + "p99": 328.7679851055145 + }, + "isolatedSum": { + "p50": 252.44800001382828, + "p90": 308.3840012550354, + "p95": 328.64000648260117, + "p99": 369.1520094871521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 205.47200739383698, + "p90": 257.7280104160309, + "p95": 265.79201221466064, + "p99": 285.37601232528687 + }, + "combine": { + "p50": 66.27199798822403, + "p90": 78.43200117349625, + "p95": 84.28800106048584, + "p99": 96.44799679517746 + }, + "roundtrip": { + "p50": 256.1599910259247, + "p90": 306.5919876098633, + "p95": 318.4640109539032, + "p99": 388.63998651504517 + }, + "isolatedSum": { + "p50": 271.744005382061, + "p90": 336.16001158952713, + "p95": 350.0800132751465, + "p99": 381.8240091204643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.61600136756897, + "p90": 252.00000405311584, + "p95": 262.30400800704956, + "p99": 291.04000329971313 + }, + "combine": { + "p50": 80.83199709653854, + "p90": 92.67199784517288, + "p95": 96.67199850082397, + "p99": 109.56799983978271 + }, + "roundtrip": { + "p50": 273.72801303863525, + "p90": 312.79999017715454, + "p95": 323.168009519577, + "p99": 343.6799943447113 + }, + "isolatedSum": { + "p50": 288.4479984641075, + "p90": 344.6720018982887, + "p95": 358.97600650787354, + "p99": 400.60800313949585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f761a9d6", + "identity": "h200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_76b40c99", + "comparisonKey": "2f8862586aa9fc78", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:24.796689+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 198.65599274635315, + "p90": 252.9599964618683, + "p95": 264.22399282455444, + "p99": 298.5599935054779 + }, + "combine": { + "p50": 54.55999821424484, + "p90": 65.50399959087372, + "p95": 75.00799745321274, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 244.9920028448105, + "p90": 296.86400294303894, + "p95": 305.9200048446655, + "p99": 331.29599690437317 + }, + "isolatedSum": { + "p50": 253.215990960598, + "p90": 318.463996052742, + "p95": 339.2319902777672, + "p99": 382.1759968996048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 198.68800044059753, + "p90": 249.08800423145294, + "p95": 257.02399015426636, + "p99": 273.69600534439087 + }, + "combine": { + "p50": 55.10399863123894, + "p90": 68.28799843788147, + "p95": 75.42400062084198, + "p99": 86.36800199747086 + }, + "roundtrip": { + "p50": 241.31199717521667, + "p90": 290.8479869365692, + "p95": 302.14399099349976, + "p99": 324.7680068016052 + }, + "isolatedSum": { + "p50": 253.79199907183647, + "p90": 317.3760026693344, + "p95": 332.44799077510834, + "p99": 360.0640073418617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 198.2080042362213, + "p90": 250.97599625587463, + "p95": 269.567996263504, + "p99": 300.7679879665375 + }, + "combine": { + "p50": 55.743999779224396, + "p90": 65.50399959087372, + "p95": 72.35199958086014, + "p99": 79.32800054550171 + }, + "roundtrip": { + "p50": 242.88000166416168, + "p90": 301.05599761009216, + "p95": 314.36800956726074, + "p99": 337.8239870071411 + }, + "isolatedSum": { + "p50": 253.9520040154457, + "p90": 316.47999584674835, + "p95": 341.91999584436417, + "p99": 380.0959885120392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 198.5280066728592, + "p90": 256.1599910259247, + "p95": 264.19198513031006, + "p99": 279.6480059623718 + }, + "combine": { + "p50": 56.44800141453743, + "p90": 67.03999638557434, + "p95": 76.12799853086472, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 243.3280050754547, + "p90": 298.8480031490326, + "p95": 315.4880106449127, + "p99": 659.2640280723572 + }, + "isolatedSum": { + "p50": 254.97600808739662, + "p90": 323.199987411499, + "p95": 340.3199836611748, + "p99": 365.60000479221344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 199.8720020055771, + "p90": 252.6400089263916, + "p95": 263.4879946708679, + "p99": 306.7519962787628 + }, + "combine": { + "p50": 58.52799862623215, + "p90": 70.56000083684921, + "p95": 76.25599950551987, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 245.85600197315216, + "p90": 300.8959889411926, + "p95": 310.36800146102905, + "p99": 338.0480110645294 + }, + "isolatedSum": { + "p50": 258.40000063180923, + "p90": 323.2000097632408, + "p95": 339.7439941763878, + "p99": 389.0239968895912 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 194.240003824234, + "p90": 250.5599856376648, + "p95": 264.8960053920746, + "p99": 427.8079867362976 + }, + "combine": { + "p50": 62.6240000128746, + "p90": 74.0479975938797, + "p95": 81.7599967122078, + "p99": 93.12000125646591 + }, + "roundtrip": { + "p50": 243.71199309825897, + "p90": 294.048011302948, + "p95": 303.8400113582611, + "p99": 335.9360098838806 + }, + "isolatedSum": { + "p50": 256.8640038371086, + "p90": 324.6079832315445, + "p95": 346.6560021042824, + "p99": 520.9279879927635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 192.51200556755066, + "p90": 244.60799992084503, + "p95": 256.19199872016907, + "p99": 282.8480005264282 + }, + "combine": { + "p50": 71.58400118350983, + "p90": 77.34400033950806, + "p95": 86.27200126647949, + "p99": 95.48799693584442 + }, + "roundtrip": { + "p50": 251.10399723052979, + "p90": 296.4800000190735, + "p95": 305.9520125389099, + "p99": 344.2879915237427 + }, + "isolatedSum": { + "p50": 264.0960067510605, + "p90": 321.9520002603531, + "p95": 342.46399998664856, + "p99": 378.33599746227264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 207.74400234222412, + "p90": 253.12000513076782, + "p95": 264.8000121116638, + "p99": 287.55199909210205 + }, + "combine": { + "p50": 86.33600175380707, + "p90": 96.70399874448776, + "p95": 104.12800312042236, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 283.3920121192932, + "p90": 329.79199290275574, + "p95": 338.4000062942505, + "p99": 352.35199332237244 + }, + "isolatedSum": { + "p50": 294.0800040960312, + "p90": 349.8240038752556, + "p95": 368.9280152320862, + "p99": 409.12000089883804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6eee1f1b", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_aa333d39", + "comparisonKey": "1d3b4cbbe50c1ded", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:24.532124+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 61.69600039720535, + "p90": 68.57600063085556, + "p95": 73.95199686288834, + "p99": 81.82399719953537 + }, + "combine": { + "p50": 58.46399813890457, + "p90": 62.111999839544296, + "p95": 66.68800115585327, + "p99": 83.74399691820145 + }, + "roundtrip": { + "p50": 145.34400403499603, + "p90": 158.33599865436554, + "p95": 163.7759953737259, + "p99": 174.27200078964233 + }, + "isolatedSum": { + "p50": 120.15999853610992, + "p90": 130.68800047039986, + "p95": 140.6399980187416, + "p99": 165.56799411773682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.58399987220764, + "p90": 101.43999755382538, + "p95": 107.39199817180634, + "p99": 122.6240023970604 + }, + "combine": { + "p50": 59.07199904322624, + "p90": 62.49599903821945, + "p95": 66.59200042486191, + "p99": 80.28800040483475 + }, + "roundtrip": { + "p50": 145.85599303245544, + "p90": 159.71200168132782, + "p95": 165.0879979133606, + "p99": 192.9280012845993 + }, + "isolatedSum": { + "p50": 122.65599891543388, + "p90": 163.93599659204483, + "p95": 173.98399859666824, + "p99": 202.91200280189514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 63.26399743556976, + "p90": 72.15999811887741, + "p95": 76.22399926185608, + "p99": 83.99999886751175 + }, + "combine": { + "p50": 58.079998940229416, + "p90": 61.91999837756157, + "p95": 64.4799992442131, + "p99": 75.42400062084198 + }, + "roundtrip": { + "p50": 150.68799257278442, + "p90": 166.01599752902985, + "p95": 171.23199999332428, + "p99": 184.76800620555878 + }, + "isolatedSum": { + "p50": 121.34399637579918, + "p90": 134.07999649643898, + "p95": 140.70399850606918, + "p99": 159.42399948835373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 61.88800185918808, + "p90": 69.5360004901886, + "p95": 73.5040009021759, + "p99": 85.15200018882751 + }, + "combine": { + "p50": 59.647999703884125, + "p90": 64.25599753856659, + "p95": 68.09599697589874, + "p99": 75.55200159549713 + }, + "roundtrip": { + "p50": 148.99200201034546, + "p90": 167.58400201797485, + "p95": 178.24000120162964, + "p99": 204.73599433898926 + }, + "isolatedSum": { + "p50": 121.5360015630722, + "p90": 133.7919980287552, + "p95": 141.59999787807465, + "p99": 160.70400178432465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 63.90400230884552, + "p90": 71.61600142717361, + "p95": 76.99199765920639, + "p99": 84.51200276613235 + }, + "combine": { + "p50": 61.344001442193985, + "p90": 64.57599997520447, + "p95": 67.55200028419495, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 152.51199901103973, + "p90": 170.27199268341064, + "p95": 184.76800620555878, + "p99": 245.56800723075867 + }, + "isolatedSum": { + "p50": 125.2480037510395, + "p90": 136.19200140237808, + "p95": 144.54399794340134, + "p99": 163.16800564527512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 67.1359971165657, + "p90": 74.5600014925003, + "p95": 77.18399912118912, + "p99": 89.1840010881424 + }, + "combine": { + "p50": 67.4239993095398, + "p90": 70.36799937486649, + "p95": 72.28799909353256, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 163.90399634838104, + "p90": 187.6479983329773, + "p95": 202.87999510765076, + "p99": 260.6399953365326 + }, + "isolatedSum": { + "p50": 134.5599964261055, + "p90": 144.9280008673668, + "p95": 149.47199821472168, + "p99": 168.92800480127335 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 76.92799717187881, + "p90": 83.39200168848038, + "p95": 86.87999844551086, + "p99": 93.18400174379349 + }, + "combine": { + "p50": 77.82399654388428, + "p90": 82.71999657154083, + "p95": 90.27200192213058, + "p99": 96.54399752616882 + }, + "roundtrip": { + "p50": 172.41600155830383, + "p90": 187.99999356269836, + "p95": 194.84800100326538, + "p99": 211.64800226688385 + }, + "isolatedSum": { + "p50": 154.7519937157631, + "p90": 166.1119982600212, + "p95": 177.15200036764145, + "p99": 189.7279992699623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.927998483181, + "p90": 89.56799656152725, + "p95": 93.1520015001297, + "p99": 104.47999835014343 + }, + "combine": { + "p50": 94.59199756383896, + "p90": 98.04800152778625, + "p95": 100.35199671983719, + "p99": 109.69600081443787 + }, + "roundtrip": { + "p50": 208.99200439453125, + "p90": 217.50399470329285, + "p95": 222.1119999885559, + "p99": 230.30400276184082 + }, + "isolatedSum": { + "p50": 179.51999604701996, + "p90": 187.6159980893135, + "p95": 193.5039982199669, + "p99": 214.1759991645813 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d4738edf", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_76b40c99", + "comparisonKey": "80dab888a7520ff2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:55.064780+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 206.01600408554077, + "p90": 251.13600492477417, + "p95": 262.4000012874603, + "p99": 286.46400570869446 + }, + "combine": { + "p50": 58.27200040221214, + "p90": 66.68800115585327, + "p95": 74.11199808120728, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 246.7840015888214, + "p90": 287.6800000667572, + "p95": 299.8400032520294, + "p99": 386.9439959526062 + }, + "isolatedSum": { + "p50": 264.2880044877529, + "p90": 317.82400608062744, + "p95": 336.5119993686676, + "p99": 367.7760064601898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 204.28800582885742, + "p90": 249.08800423145294, + "p95": 262.9759907722473, + "p99": 361.37598752975464 + }, + "combine": { + "p50": 59.20000001788139, + "p90": 71.71200215816498, + "p95": 78.11199873685837, + "p99": 86.97599917650223 + }, + "roundtrip": { + "p50": 245.02399563789368, + "p90": 286.20800375938416, + "p95": 297.1839904785156, + "p99": 318.7839984893799 + }, + "isolatedSum": { + "p50": 263.4880058467388, + "p90": 320.8000063896179, + "p95": 341.0879895091057, + "p99": 448.35198670625687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 196.6720074415207, + "p90": 239.3600046634674, + "p95": 248.9600032567978, + "p99": 350.94401240348816 + }, + "combine": { + "p50": 58.97599831223488, + "p90": 65.11999666690826, + "p95": 75.42400062084198, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 254.72000241279602, + "p90": 292.4480140209198, + "p95": 310.5280101299286, + "p99": 346.560001373291 + }, + "isolatedSum": { + "p50": 255.64800575375557, + "p90": 304.48000133037567, + "p95": 324.38400387763977, + "p99": 434.88001078367233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 199.39200580120087, + "p90": 236.12800240516663, + "p95": 248.19199740886688, + "p99": 269.6639895439148 + }, + "combine": { + "p50": 60.03199890255928, + "p90": 74.75200295448303, + "p95": 80.57600259780884, + "p99": 100.09600222110748 + }, + "roundtrip": { + "p50": 247.74399399757385, + "p90": 292.1920120716095, + "p95": 308.03200602531433, + "p99": 435.93600392341614 + }, + "isolatedSum": { + "p50": 259.42400470376015, + "p90": 310.88000535964966, + "p95": 328.7680000066757, + "p99": 369.7599917650223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 200.54399967193604, + "p90": 239.1040027141571, + "p95": 248.06399643421173, + "p99": 263.93601298332214 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 79.42400127649307, + "p95": 83.26400071382523, + "p99": 95.51999717950821 + }, + "roundtrip": { + "p50": 254.65598702430725, + "p90": 299.6160089969635, + "p95": 311.6160035133362, + "p99": 341.8560028076172 + }, + "isolatedSum": { + "p50": 262.4320015311241, + "p90": 318.5280039906502, + "p95": 331.32799714803696, + "p99": 359.45601016283035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 201.82399451732635, + "p90": 241.34400486946106, + "p95": 251.8399953842163, + "p99": 290.75199365615845 + }, + "combine": { + "p50": 67.58400052785873, + "p90": 77.40800082683563, + "p95": 85.34400165081024, + "p99": 93.72799843549728 + }, + "roundtrip": { + "p50": 255.67999482154846, + "p90": 308.6400032043457, + "p95": 365.4400110244751, + "p99": 421.6639995574951 + }, + "isolatedSum": { + "p50": 269.4079950451851, + "p90": 318.7520056962967, + "p95": 337.18399703502655, + "p99": 384.47999209165573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 206.91199600696564, + "p90": 247.6159930229187, + "p95": 263.68001103401184, + "p99": 373.6959993839264 + }, + "combine": { + "p50": 77.53600180149078, + "p90": 86.84799820184708, + "p95": 96.12800180912018, + "p99": 107.90400207042694 + }, + "roundtrip": { + "p50": 266.52801036834717, + "p90": 300.7360100746155, + "p95": 310.5599880218506, + "p99": 331.5199911594391 + }, + "isolatedSum": { + "p50": 284.4479978084564, + "p90": 334.4639912247658, + "p95": 359.808012843132, + "p99": 481.60000145435333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.66400337219238, + "p90": 251.74400210380554, + "p95": 261.53600215911865, + "p99": 271.64798974990845 + }, + "combine": { + "p50": 95.96800059080124, + "p90": 107.42399841547012, + "p95": 113.82400244474411, + "p99": 121.18399888277054 + }, + "roundtrip": { + "p50": 306.87999725341797, + "p90": 339.35999870300293, + "p95": 346.94400429725647, + "p99": 404.57600355148315 + }, + "isolatedSum": { + "p50": 313.6320039629936, + "p90": 359.16800051927567, + "p95": 375.36000460386276, + "p99": 392.831988632679 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-147b38e2", + "identity": "h200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_76b40c99", + "comparisonKey": "4cfc753c874e2370", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:58.130759+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 213.6639952659607, + "p90": 256.1599910259247, + "p95": 271.7440128326416, + "p99": 333.21601152420044 + }, + "combine": { + "p50": 56.73599988222122, + "p90": 67.96800345182419, + "p95": 75.48800110816956, + "p99": 89.47200328111649 + }, + "roundtrip": { + "p50": 260.0319981575012, + "p90": 306.8160116672516, + "p95": 318.59201192855835, + "p99": 405.7280123233795 + }, + "isolatedSum": { + "p50": 270.3999951481819, + "p90": 324.12799447774887, + "p95": 347.23201394081116, + "p99": 422.6880148053169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 206.56000077724457, + "p90": 247.74399399757385, + "p95": 256.73601031303406, + "p99": 281.21599555015564 + }, + "combine": { + "p50": 58.04799869656563, + "p90": 67.77600198984146, + "p95": 73.98399710655212, + "p99": 78.68800312280655 + }, + "roundtrip": { + "p50": 255.295991897583, + "p90": 297.88801074028015, + "p95": 308.76800417900085, + "p99": 564.0320181846619 + }, + "isolatedSum": { + "p50": 264.6079994738102, + "p90": 315.5199959874153, + "p95": 330.7200074195862, + "p99": 359.9039986729622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 201.75999402999878, + "p90": 245.79200148582458, + "p95": 261.24799251556396, + "p99": 326.55999064445496 + }, + "combine": { + "p50": 58.04799869656563, + "p90": 67.52000004053116, + "p95": 73.5040009021759, + "p99": 81.216000020504 + }, + "roundtrip": { + "p50": 259.552001953125, + "p90": 297.69599437713623, + "p95": 310.2079927921295, + "p99": 330.59200644493103 + }, + "isolatedSum": { + "p50": 259.8079927265644, + "p90": 313.31200152635574, + "p95": 334.75199341773987, + "p99": 407.77599066495895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 220.12799978256226, + "p90": 265.82399010658264, + "p95": 283.1679880619049, + "p99": 402.43199467658997 + }, + "combine": { + "p50": 59.84000116586685, + "p90": 71.99999690055847, + "p95": 79.55200225114822, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 251.93598866462708, + "p90": 285.2799892425537, + "p95": 293.11999678611755, + "p99": 336.64000034332275 + }, + "isolatedSum": { + "p50": 279.9680009484291, + "p90": 337.8239870071411, + "p95": 362.71999031305313, + "p99": 493.98399144411087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 212.47999370098114, + "p90": 255.0399899482727, + "p95": 262.56000995635986, + "p99": 292.4799919128418 + }, + "combine": { + "p50": 62.04799935221672, + "p90": 73.79200309515, + "p95": 79.52000200748444, + "p99": 94.46399658918381 + }, + "roundtrip": { + "p50": 257.9199969768524, + "p90": 292.2559976577759, + "p95": 302.8480112552643, + "p99": 322.2399950027466 + }, + "isolatedSum": { + "p50": 274.52799305319786, + "p90": 328.8319930434227, + "p95": 342.0800119638443, + "p99": 386.9439885020256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 208.51199328899384, + "p90": 249.40800666809082, + "p95": 258.5600018501282, + "p99": 273.24798703193665 + }, + "combine": { + "p50": 67.29599833488464, + "p90": 75.32799988985062, + "p95": 84.25600081682205, + "p99": 93.9520001411438 + }, + "roundtrip": { + "p50": 264.5440101623535, + "p90": 297.91998863220215, + "p95": 313.4399950504303, + "p99": 1018.0480480194092 + }, + "isolatedSum": { + "p50": 275.8079916238785, + "p90": 324.73600655794144, + "p95": 342.8160026669502, + "p99": 367.19998717308044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 206.68800175189972, + "p90": 241.88800156116486, + "p95": 252.41601467132568, + "p99": 321.1840093135834 + }, + "combine": { + "p50": 77.85599678754807, + "p90": 88.54400366544724, + "p95": 93.63199770450592, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 267.93599128723145, + "p90": 301.60000920295715, + "p95": 307.9040050506592, + "p99": 327.1999955177307 + }, + "isolatedSum": { + "p50": 284.5439985394478, + "p90": 330.4320052266121, + "p95": 346.0480123758316, + "p99": 422.40001261234283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 225.24799406528473, + "p90": 261.8879973888397, + "p95": 270.52798867225647, + "p99": 319.7439908981323 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 109.53599959611893, + "p95": 113.47199976444244, + "p99": 119.80800330638885 + }, + "roundtrip": { + "p50": 309.05601382255554, + "p90": 341.95199608802795, + "p95": 348.4799861907959, + "p99": 416.159987449646 + }, + "isolatedSum": { + "p50": 320.3839957714081, + "p90": 371.42399698495865, + "p95": 383.9999884366989, + "p99": 439.5519942045212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2209c673", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_8701b74d", + "comparisonKey": "78630685892e3edc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:09.456332+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.18399825692177, + "p90": 67.35999882221222, + "p95": 79.71200346946716, + "p99": 109.24799740314484 + }, + "combine": { + "p50": 57.66399949789047, + "p90": 67.19999760389328, + "p95": 72.12799787521362, + "p99": 78.65600287914276 + }, + "roundtrip": { + "p50": 134.65599715709686, + "p90": 174.94399845600128, + "p95": 185.2159947156906, + "p99": 203.2960057258606 + }, + "isolatedSum": { + "p50": 106.84799775481224, + "p90": 134.5599964261055, + "p95": 151.8400013446808, + "p99": 187.9040002822876 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 50.71999877691269, + "p90": 73.79200309515, + "p95": 82.0159986615181, + "p99": 90.36800265312195 + }, + "combine": { + "p50": 58.52799862623215, + "p90": 70.36799937486649, + "p95": 76.80000364780426, + "p99": 83.36000144481659 + }, + "roundtrip": { + "p50": 136.09600067138672, + "p90": 181.31199479103088, + "p95": 189.11999464035034, + "p99": 208.15999805927277 + }, + "isolatedSum": { + "p50": 109.24799740314484, + "p90": 144.16000247001648, + "p95": 158.81600230932236, + "p99": 173.72800409793854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.40799996256828, + "p90": 61.95199862122536, + "p95": 75.00799745321274, + "p99": 86.62399649620056 + }, + "combine": { + "p50": 58.079998940229416, + "p90": 61.344001442193985, + "p95": 73.98399710655212, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 132.57600367069244, + "p90": 160.0639969110489, + "p95": 171.36000096797943, + "p99": 184.03199315071106 + }, + "isolatedSum": { + "p50": 107.4879989027977, + "p90": 123.29600006341934, + "p95": 148.99199455976486, + "p99": 170.1439991593361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 49.536000937223434, + "p90": 63.040003180503845, + "p95": 75.13599842786789, + "p99": 84.83199775218964 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 64.38399851322174, + "p95": 76.57600194215775, + "p99": 84.16000008583069 + }, + "roundtrip": { + "p50": 133.98399949073792, + "p90": 164.86400365829468, + "p95": 180.09600043296814, + "p99": 203.67999374866486 + }, + "isolatedSum": { + "p50": 108.25600102543831, + "p90": 127.42400169372559, + "p95": 151.71200037002563, + "p99": 168.99199783802032 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 51.96800082921982, + "p90": 75.32799988985062, + "p95": 82.40000158548355, + "p99": 95.74399888515472 + }, + "combine": { + "p50": 61.40799820423126, + "p90": 69.7920024394989, + "p95": 78.65600287914276, + "p99": 90.65599739551544 + }, + "roundtrip": { + "p50": 135.5839967727661, + "p90": 169.27999258041382, + "p95": 181.40800297260284, + "p99": 211.0079973936081 + }, + "isolatedSum": { + "p50": 113.37599903345108, + "p90": 145.12000232934952, + "p95": 161.0560044646263, + "p99": 186.39999628067017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 55.67999929189682, + "p90": 68.76800209283829, + "p95": 84.927998483181, + "p99": 96.0640013217926 + }, + "combine": { + "p50": 67.16799736022949, + "p90": 73.2479989528656, + "p95": 83.93599838018417, + "p99": 90.08000046014786 + }, + "roundtrip": { + "p50": 141.59999787807465, + "p90": 174.49599504470825, + "p95": 183.67999792099, + "p99": 196.3520050048828 + }, + "isolatedSum": { + "p50": 122.84799665212631, + "p90": 142.0160010457039, + "p95": 168.86399686336517, + "p99": 186.14400178194046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 64.54399973154068, + "p90": 74.8480036854744, + "p95": 85.79199761152267, + "p99": 95.74399888515472 + }, + "combine": { + "p50": 76.86399668455124, + "p90": 85.28000116348267, + "p95": 96.12800180912018, + "p99": 117.08799749612808 + }, + "roundtrip": { + "p50": 153.60000729560852, + "p90": 182.94399976730347, + "p95": 193.15199553966522, + "p99": 207.42399990558624 + }, + "isolatedSum": { + "p50": 141.40799641609192, + "p90": 160.12800484895706, + "p95": 181.91999942064285, + "p99": 212.8319963812828 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 72.22399860620499, + "p90": 79.03999835252762, + "p95": 91.74399822950363, + "p99": 102.11200267076492 + }, + "combine": { + "p50": 95.45599669218063, + "p90": 102.52799838781357, + "p95": 111.04000359773636, + "p99": 117.0239970088005 + }, + "roundtrip": { + "p50": 197.9839950799942, + "p90": 216.41600131988525, + "p95": 224.38399493694305, + "p99": 243.96799504756927 + }, + "isolatedSum": { + "p50": 167.67999529838562, + "p90": 181.5679967403412, + "p95": 202.78400182724, + "p99": 219.13599967956543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-45a8518c", + "identity": "h200|deepep|v1|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_e7a79a34", + "comparisonKey": "3dff4c770e61a66e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:50.801707+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 27.807999402284622, + "p90": 38.55999931693077, + "p95": 51.52000114321709, + "p99": 62.55999952554703 + }, + "combine": { + "p50": 51.552001386880875, + "p90": 62.3680017888546, + "p95": 67.55200028419495, + "p99": 112.86400258541107 + }, + "roundtrip": { + "p50": 1862.6559972763062, + "p90": 1877.5999546051025, + "p95": 1885.632038116455, + "p99": 2049.567937850952 + }, + "isolatedSum": { + "p50": 79.3600007891655, + "p90": 100.92800110578537, + "p95": 119.07200142741203, + "p99": 175.4240021109581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 28.60799990594387, + "p90": 45.504000037908554, + "p95": 55.743999779224396, + "p99": 68.00000369548798 + }, + "combine": { + "p50": 52.44800075888634, + "p90": 64.38399851322174, + "p95": 74.01599735021591, + "p99": 138.8159990310669 + }, + "roundtrip": { + "p50": 1864.7680282592773, + "p90": 1881.1839818954468, + "p95": 1887.8719806671143, + "p99": 1907.4879884719849 + }, + "isolatedSum": { + "p50": 81.05600066483021, + "p90": 109.8879985511303, + "p95": 129.7599971294403, + "p99": 206.81600272655487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 29.055999591946602, + "p90": 49.984000623226166, + "p95": 55.87200075387955, + "p99": 79.64800298213959 + }, + "combine": { + "p50": 53.727999329566956, + "p90": 66.01600348949432, + "p95": 73.5040009021759, + "p99": 133.215993642807 + }, + "roundtrip": { + "p50": 1864.9920225143433, + "p90": 1882.2400569915771, + "p95": 1889.1520500183105, + "p99": 1926.2399673461914 + }, + "isolatedSum": { + "p50": 82.78399892151356, + "p90": 116.00000411272049, + "p95": 129.37600165605545, + "p99": 212.8639966249466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.472000896930695, + "p90": 40.47999903559685, + "p95": 54.62399870157242, + "p99": 64.4799992442131 + }, + "combine": { + "p50": 54.11199852824211, + "p90": 65.21599739789963, + "p95": 69.92000341415405, + "p99": 109.98400300741196 + }, + "roundtrip": { + "p50": 1865.1200532913208, + "p90": 1882.464051246643, + "p95": 1889.0880346298218, + "p99": 1980.4160594940186 + }, + "isolatedSum": { + "p50": 83.5839994251728, + "p90": 105.69599643349648, + "p95": 124.54400211572647, + "p99": 174.46400225162506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.55200183391571, + "p90": 43.87199878692627, + "p95": 55.71199953556061, + "p99": 72.1919983625412 + }, + "combine": { + "p50": 54.43200096487999, + "p90": 63.87200206518173, + "p95": 68.60800087451935, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 1869.3759441375732, + "p90": 1885.4080438613892, + "p95": 1891.7440176010132, + "p99": 1912.6720428466797 + }, + "isolatedSum": { + "p50": 85.9840027987957, + "p90": 107.744000852108, + "p95": 124.32000041007996, + "p99": 168.7679961323738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.440001308918, + "p90": 46.49600014090538, + "p95": 56.96000158786774, + "p99": 67.29599833488464 + }, + "combine": { + "p50": 62.912002205848694, + "p90": 95.16800194978714, + "p95": 131.99999928474426, + "p99": 172.57599532604218 + }, + "roundtrip": { + "p50": 1880.7679414749146, + "p90": 1900.1599550247192, + "p95": 1908.2880020141602, + "p99": 1969.8560237884521 + }, + "isolatedSum": { + "p50": 96.3520035147667, + "p90": 141.66400209069252, + "p95": 188.960000872612, + "p99": 239.87199366092682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.68000039458275, + "p90": 52.51200124621391, + "p95": 58.9120015501976, + "p99": 73.47200065851212 + }, + "combine": { + "p50": 72.06399738788605, + "p90": 83.36000144481659, + "p95": 89.31200206279755, + "p99": 127.80800461769104 + }, + "roundtrip": { + "p50": 1895.1040506362915, + "p90": 1914.84797000885, + "p95": 1939.520001411438, + "p99": 1996.1600303649902 + }, + "isolatedSum": { + "p50": 111.7439977824688, + "p90": 135.8720026910305, + "p95": 148.22400361299515, + "p99": 201.28000527620316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 52.5440014898777, + "p90": 62.94400244951248, + "p95": 69.37599927186966, + "p99": 76.73600316047668 + }, + "combine": { + "p50": 98.68799895048141, + "p90": 109.63200032711029, + "p95": 114.656001329422, + "p99": 166.75199568271637 + }, + "roundtrip": { + "p50": 1936.8640184402466, + "p90": 1963.3599519729614, + "p95": 1990.272045135498, + "p99": 2012.6080513000488 + }, + "isolatedSum": { + "p50": 151.23200044035912, + "p90": 172.57600277662277, + "p95": 184.03200060129166, + "p99": 243.48799884319305 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-55316ef8", + "identity": "h200|deepep|v1|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_4becf8b4", + "comparisonKey": "5fc3d93524f8ba5e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:35.240802+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.672000393271446, + "p90": 31.808000057935715, + "p95": 36.51199862360954, + "p99": 42.94399917125702 + }, + "combine": { + "p50": 35.87200120091438, + "p90": 41.95199906826019, + "p95": 45.632001012563705, + "p99": 77.44000107049942 + }, + "roundtrip": { + "p50": 1847.4880456924438, + "p90": 1854.5600175857544, + "p95": 1860.1280450820923, + "p99": 1911.8080139160156 + }, + "isolatedSum": { + "p50": 64.54400159418583, + "p90": 73.75999912619591, + "p95": 82.14399963617325, + "p99": 120.38400024175644 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 30.400000512599945, + "p90": 71.55200093984604, + "p95": 74.40000027418137, + "p99": 83.39200168848038 + }, + "combine": { + "p50": 37.151999771595, + "p90": 46.65600135922432, + "p95": 60.15999987721443, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 1846.9760417938232, + "p90": 1854.0159463882446, + "p95": 1861.2799644470215, + "p99": 1912.12797164917 + }, + "isolatedSum": { + "p50": 67.55200028419495, + "p90": 118.20800229907036, + "p95": 134.5600001513958, + "p99": 164.70400243997574 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 29.184000566601753, + "p90": 34.272000193595886, + "p95": 40.28800129890442, + "p99": 75.3600001335144 + }, + "combine": { + "p50": 36.639999598264694, + "p90": 44.89599913358688, + "p95": 49.18399825692177, + "p99": 91.74399822950363 + }, + "roundtrip": { + "p50": 1846.783995628357, + "p90": 1854.1439771652222, + "p95": 1858.5280179977417, + "p99": 1910.6240272521973 + }, + "isolatedSum": { + "p50": 65.82400016486645, + "p90": 79.16799932718277, + "p95": 89.47199955582619, + "p99": 167.10399836301804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 30.047999694943428, + "p90": 32.86400064826012, + "p95": 36.73600032925606, + "p99": 45.823998749256134 + }, + "combine": { + "p50": 37.88800165057182, + "p90": 45.31199857592583, + "p95": 50.464000552892685, + "p99": 110.97600311040878 + }, + "roundtrip": { + "p50": 1849.5680093765259, + "p90": 1855.5200099945068, + "p95": 1863.584041595459, + "p99": 1915.1359796524048 + }, + "isolatedSum": { + "p50": 67.93600134551525, + "p90": 78.17599922418594, + "p95": 87.20000088214874, + "p99": 156.80000185966492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.26399964094162, + "p90": 33.85600075125694, + "p95": 37.376001477241516, + "p99": 43.68000105023384 + }, + "combine": { + "p50": 40.672000497579575, + "p90": 47.90399968624115, + "p95": 54.75199967622757, + "p99": 117.63200163841248 + }, + "roundtrip": { + "p50": 1853.4719944000244, + "p90": 1859.6160411834717, + "p95": 1864.2560243606567, + "p99": 1971.7119932174683 + }, + "isolatedSum": { + "p50": 71.9360001385212, + "p90": 81.76000043749809, + "p95": 92.12800115346909, + "p99": 161.31200268864632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.47200155258179, + "p90": 35.45600175857544, + "p95": 37.28000074625015, + "p99": 46.01600021123886 + }, + "combine": { + "p50": 45.9199994802475, + "p90": 50.49600079655647, + "p95": 54.71999943256378, + "p99": 99.13600236177444 + }, + "roundtrip": { + "p50": 1862.7519607543945, + "p90": 1867.7120208740234, + "p95": 1873.4400272369385, + "p99": 1989.9519681930542 + }, + "isolatedSum": { + "p50": 79.39200103282928, + "p90": 85.95200255513191, + "p95": 92.00000017881393, + "p99": 145.1520025730133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.96799886226654, + "p90": 42.97599941492081, + "p95": 45.471999794244766, + "p99": 52.000001072883606 + }, + "combine": { + "p50": 58.75200033187866, + "p90": 63.391998410224915, + "p95": 68.70400160551071, + "p99": 180.4800033569336 + }, + "roundtrip": { + "p50": 1881.2479972839355, + "p90": 1887.712001800537, + "p95": 1895.4880237579346, + "p99": 2028.9599895477295 + }, + "isolatedSum": { + "p50": 98.7199991941452, + "p90": 106.36799782514572, + "p95": 114.17600139975548, + "p99": 232.4800044298172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 52.38400027155876, + "p90": 55.55199831724167, + "p95": 60.5119988322258, + "p99": 67.32799857854843 + }, + "combine": { + "p50": 86.40000224113464, + "p90": 92.38400310277939, + "p95": 99.2640033364296, + "p99": 137.92000710964203 + }, + "roundtrip": { + "p50": 1920.7040071487427, + "p90": 1926.4960289001465, + "p95": 1932.800054550171, + "p99": 1999.6800422668457 + }, + "isolatedSum": { + "p50": 138.7840025126934, + "p90": 147.93600142002106, + "p95": 159.7760021686554, + "p99": 205.24800568819046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1846871c", + "identity": "h200|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_b8782b41", + "comparisonKey": "cb787ed94f904f30", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:19.685708+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.72800129652023, + "p90": 81.727996468544, + "p95": 91.45600348711014, + "p99": 104.00000214576721 + }, + "combine": { + "p50": 59.967998415231705, + "p90": 63.19999694824219, + "p95": 72.03199714422226, + "p99": 81.18399977684021 + }, + "roundtrip": { + "p50": 110.49599945545197, + "p90": 129.69599664211273, + "p95": 141.59999787807465, + "p99": 156.3200056552887 + }, + "isolatedSum": { + "p50": 125.69599971175194, + "p90": 144.9279934167862, + "p95": 163.4880006313324, + "p99": 185.18400192260742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.56000083684921, + "p90": 95.36000341176987, + "p95": 102.33599692583084, + "p99": 116.15999788045883 + }, + "combine": { + "p50": 60.70400029420853, + "p90": 69.56800073385239, + "p95": 81.11999928951263, + "p99": 87.16800063848495 + }, + "roundtrip": { + "p50": 114.14399743080139, + "p90": 143.51999759674072, + "p95": 152.28800475597382, + "p99": 167.23200678825378 + }, + "isolatedSum": { + "p50": 131.26400113105774, + "p90": 164.92800414562225, + "p95": 183.45599621534348, + "p99": 203.3279985189438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.83200258016586, + "p90": 79.58400249481201, + "p95": 92.38400310277939, + "p99": 103.07200253009796 + }, + "combine": { + "p50": 60.15999987721443, + "p90": 64.4799992442131, + "p95": 71.19999825954437, + "p99": 81.82399719953537 + }, + "roundtrip": { + "p50": 113.63200098276138, + "p90": 130.65600395202637, + "p95": 138.8159990310669, + "p99": 147.96799421310425 + }, + "isolatedSum": { + "p50": 128.9920024573803, + "p90": 144.06400173902512, + "p95": 163.58400136232376, + "p99": 184.89599972963333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.0799971818924, + "p90": 81.60000294446945, + "p95": 96.0640013217926, + "p99": 106.65600001811981 + }, + "combine": { + "p50": 60.5119988322258, + "p90": 66.23999774456024, + "p95": 73.98399710655212, + "p99": 83.52000266313553 + }, + "roundtrip": { + "p50": 113.0559965968132, + "p90": 127.80800461769104, + "p95": 141.82400703430176, + "p99": 155.10399639606476 + }, + "isolatedSum": { + "p50": 130.5919960141182, + "p90": 147.8400006890297, + "p95": 170.04799842834473, + "p99": 190.17600268125534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.37599992752075, + "p90": 83.74399691820145, + "p95": 95.74399888515472, + "p99": 107.80800133943558 + }, + "combine": { + "p50": 61.59999966621399, + "p90": 68.67200136184692, + "p95": 75.6480023264885, + "p99": 83.55200290679932 + }, + "roundtrip": { + "p50": 113.53600025177002, + "p90": 126.94400548934937, + "p95": 139.26400244235992, + "p99": 153.08800339698792 + }, + "isolatedSum": { + "p50": 134.97599959373474, + "p90": 152.41599828004837, + "p95": 171.39200121164322, + "p99": 191.3600042462349 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 72.83200323581696, + "p90": 88.3840024471283, + "p95": 100.63999891281128, + "p99": 115.29599875211716 + }, + "combine": { + "p50": 64.12799656391144, + "p90": 72.06399738788605, + "p95": 78.78399640321732, + "p99": 97.69599884748459 + }, + "roundtrip": { + "p50": 117.63200163841248, + "p90": 134.5600038766861, + "p95": 142.94399321079254, + "p99": 157.98400342464447 + }, + "isolatedSum": { + "p50": 136.9599997997284, + "p90": 160.44799983501434, + "p95": 179.4239953160286, + "p99": 212.99199759960175 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.0719985961914, + "p90": 90.68799763917923, + "p95": 101.02400183677673, + "p99": 109.31199789047241 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 77.504001557827, + "p95": 84.57600325345993, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 129.56799566745758, + "p90": 140.22399485111237, + "p95": 149.6960073709488, + "p99": 161.50400042533875 + }, + "isolatedSum": { + "p50": 149.9520018696785, + "p90": 168.19199919700623, + "p95": 185.60000509023666, + "p99": 200.86399465799332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.24800223112106, + "p90": 110.81600189208984, + "p95": 124.1919994354248, + "p99": 141.63200557231903 + }, + "combine": { + "p50": 84.22400057315826, + "p90": 92.8959995508194, + "p95": 99.39199686050415, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 154.23999726772308, + "p90": 166.72000288963318, + "p95": 177.0240068435669, + "p99": 190.62399864196777 + }, + "isolatedSum": { + "p50": 177.47200280427933, + "p90": 203.71200144290924, + "p95": 223.58399629592896, + "p99": 254.59200888872147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e9652a1", + "identity": "h200|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_b8782b41", + "comparisonKey": "99619e4b7680e5e1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:15.089158+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 68.1919977068901, + "p90": 83.96799862384796, + "p95": 92.70399808883667, + "p99": 118.40000003576279 + }, + "combine": { + "p50": 61.37600168585777, + "p90": 67.87200272083282, + "p95": 74.62400197982788, + "p99": 83.23200047016144 + }, + "roundtrip": { + "p50": 114.81600254774094, + "p90": 137.63199746608734, + "p95": 142.752006649971, + "p99": 157.85600244998932 + }, + "isolatedSum": { + "p50": 129.56799939274788, + "p90": 151.8400013446808, + "p95": 167.32800006866455, + "p99": 201.63200050592422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.09599697589874, + "p90": 84.03199911117554, + "p95": 90.59199690818787, + "p99": 97.63199836015701 + }, + "combine": { + "p50": 61.85600161552429, + "p90": 69.63200122117996, + "p95": 75.19999891519547, + "p99": 88.92799913883209 + }, + "roundtrip": { + "p50": 113.6000007390976, + "p90": 132.25600123405457, + "p95": 139.3280029296875, + "p99": 152.0320028066635 + }, + "isolatedSum": { + "p50": 129.95199859142303, + "p90": 153.6640003323555, + "p95": 165.79199582338333, + "p99": 186.5599974989891 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.29599899053574, + "p90": 95.77599912881851, + "p95": 104.2879968881607, + "p99": 161.53599321842194 + }, + "combine": { + "p50": 63.391998410224915, + "p90": 74.46400076150894, + "p95": 80.6720033288002, + "p99": 89.08800035715103 + }, + "roundtrip": { + "p50": 117.85600334405899, + "p90": 138.8159990310669, + "p95": 147.8399932384491, + "p99": 174.49599504470825 + }, + "isolatedSum": { + "p50": 134.68799740076065, + "p90": 170.23999989032745, + "p95": 184.9600002169609, + "p99": 250.62399357557297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.31999933719635, + "p90": 87.36000210046768, + "p95": 95.87199985980988, + "p99": 104.44799810647964 + }, + "combine": { + "p50": 62.94400244951248, + "p90": 74.81600344181061, + "p95": 82.30400085449219, + "p99": 103.84000092744827 + }, + "roundtrip": { + "p50": 118.56000125408173, + "p90": 136.89599931240082, + "p95": 145.88800072669983, + "p99": 158.4320068359375 + }, + "isolatedSum": { + "p50": 135.26400178670883, + "p90": 162.1760055422783, + "p95": 178.17600071430206, + "p99": 208.28799903392792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.56800138950348, + "p90": 89.66399729251862, + "p95": 96.25600278377533, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 63.07200342416763, + "p90": 72.06399738788605, + "p95": 80.60800284147263, + "p99": 85.40800213813782 + }, + "roundtrip": { + "p50": 118.8800036907196, + "p90": 140.79999923706055, + "p95": 146.30399644374847, + "p99": 168.60799491405487 + }, + "isolatedSum": { + "p50": 136.6400048136711, + "p90": 161.72799468040466, + "p95": 176.86400562524796, + "p99": 193.08800250291824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.0479975938797, + "p90": 88.639996945858, + "p95": 95.93600034713745, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 76.25599950551987, + "p95": 83.39200168848038, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 121.0239976644516, + "p90": 140.54399728775024, + "p95": 147.07200229167938, + "p99": 158.30400586128235 + }, + "isolatedSum": { + "p50": 143.39199662208557, + "p90": 164.89599645137787, + "p95": 179.32800203561783, + "p99": 201.05600357055664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.84799885749817, + "p90": 105.05600273609161, + "p95": 113.3119985461235, + "p99": 122.6240023970604 + }, + "combine": { + "p50": 77.95199751853943, + "p90": 89.02399986982346, + "p95": 92.76799857616425, + "p99": 99.39199686050415 + }, + "roundtrip": { + "p50": 138.7840062379837, + "p90": 155.93600273132324, + "p95": 160.5439931154251, + "p99": 170.49600183963776 + }, + "isolatedSum": { + "p50": 168.7999963760376, + "p90": 194.08000260591507, + "p95": 206.07999712228775, + "p99": 222.01599925756454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.3200032711029, + "p90": 111.87200248241425, + "p95": 119.00799721479416, + "p99": 126.14400684833527 + }, + "combine": { + "p50": 92.79999881982803, + "p90": 101.95200145244598, + "p95": 107.744000852108, + "p99": 117.21599847078323 + }, + "roundtrip": { + "p50": 165.75999557971954, + "p90": 179.83999848365784, + "p95": 185.18400192260742, + "p99": 193.50400567054749 + }, + "isolatedSum": { + "p50": 189.12000209093094, + "p90": 213.82400393486023, + "p95": 226.75199806690216, + "p99": 243.3600053191185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9090e7ec", + "identity": "h200|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_b8782b41", + "comparisonKey": "8f77bea5d0b67f18", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:13.481183+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.49600034952164, + "p90": 90.52799642086029, + "p95": 103.20000350475311, + "p99": 178.24000120162964 + }, + "combine": { + "p50": 64.41599875688553, + "p90": 71.35999947786331, + "p95": 76.54400169849396, + "p99": 85.95199882984161 + }, + "roundtrip": { + "p50": 120.99199742078781, + "p90": 139.80799913406372, + "p95": 155.68000078201294, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 134.91199910640717, + "p90": 161.8879958987236, + "p95": 179.74400520324707, + "p99": 264.19200003147125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.4480003118515, + "p90": 94.55999732017517, + "p95": 103.20000350475311, + "p99": 116.73600226640701 + }, + "combine": { + "p50": 66.97600334882736, + "p90": 76.1599987745285, + "p95": 83.00799876451492, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 120.31999975442886, + "p90": 143.39199662208557, + "p95": 154.81600165367126, + "p99": 175.29599368572235 + }, + "isolatedSum": { + "p50": 139.42400366067886, + "p90": 170.71999609470367, + "p95": 186.20800226926804, + "p99": 207.07200467586517 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.9040036201477, + "p90": 79.8719972372055, + "p95": 85.08799970149994, + "p99": 108.22399705648422 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 70.30399888753891, + "p95": 75.32799988985062, + "p99": 91.61599725484848 + }, + "roundtrip": { + "p50": 120.67200243473053, + "p90": 130.17599284648895, + "p95": 138.14400136470795, + "p99": 154.4640064239502 + }, + "isolatedSum": { + "p50": 139.74400609731674, + "p90": 150.17599612474442, + "p95": 160.41599959135056, + "p99": 199.8399943113327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.5040009021759, + "p90": 97.31200337409973, + "p95": 106.49599879980087, + "p99": 122.81599640846252 + }, + "combine": { + "p50": 69.24799829721451, + "p90": 80.19199967384338, + "p95": 89.59999680519104, + "p99": 100.0640019774437 + }, + "roundtrip": { + "p50": 123.1359988451004, + "p90": 152.12799608707428, + "p95": 163.07200491428375, + "p99": 186.62400543689728 + }, + "isolatedSum": { + "p50": 142.7519991993904, + "p90": 177.50400304794312, + "p95": 196.0959956049919, + "p99": 222.87999838590622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.47200065851212, + "p90": 87.3280018568039, + "p95": 101.24800354242325, + "p99": 113.27999830245972 + }, + "combine": { + "p50": 69.37599927186966, + "p90": 77.79199630022049, + "p95": 84.48000252246857, + "p99": 93.6959981918335 + }, + "roundtrip": { + "p50": 123.3920007944107, + "p90": 143.26399564743042, + "p95": 156.2879979610443, + "p99": 175.10400712490082 + }, + "isolatedSum": { + "p50": 142.84799993038177, + "p90": 165.11999815702438, + "p95": 185.72800606489182, + "p99": 206.9759964942932 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.2720006108284, + "p90": 94.87999975681305, + "p95": 103.04000228643417, + "p99": 114.3999993801117 + }, + "combine": { + "p50": 74.5920017361641, + "p90": 81.34400099515915, + "p95": 90.81599861383438, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 129.7920048236847, + "p90": 147.07200229167938, + "p95": 160.16000509262085, + "p99": 182.78400599956512 + }, + "isolatedSum": { + "p50": 156.8640023469925, + "p90": 176.2240007519722, + "p95": 193.85600090026855, + "p99": 220.5759957432747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.87999910116196, + "p90": 106.20799660682678, + "p95": 114.94400352239609, + "p99": 123.77600371837616 + }, + "combine": { + "p50": 84.51200276613235, + "p90": 90.30400216579437, + "p95": 99.71199929714203, + "p99": 107.19999670982361 + }, + "roundtrip": { + "p50": 147.71200716495514, + "p90": 165.75999557971954, + "p95": 173.08799922466278, + "p99": 187.1040016412735 + }, + "isolatedSum": { + "p50": 175.3920018672943, + "p90": 196.51199877262115, + "p95": 214.65600281953812, + "p99": 230.97600042819977 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 108.15999656915665, + "p90": 121.79200351238251, + "p95": 133.15199315547943, + "p99": 143.51999759674072 + }, + "combine": { + "p50": 96.57599776983261, + "p90": 105.05600273609161, + "p95": 114.75200206041336, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 179.967999458313, + "p90": 201.4400064945221, + "p95": 205.9520035982132, + "p99": 213.18399906158447 + }, + "isolatedSum": { + "p50": 204.73599433898926, + "p90": 226.84800624847412, + "p95": 247.9039952158928, + "p99": 267.07199960947037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3e9467f7", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d7895571", + "comparisonKey": "a8d7aa1ea70e9702", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:38.044110+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.68800181150436, + "p90": 76.99199765920639, + "p95": 81.31200075149536, + "p99": 91.90399944782257 + }, + "combine": { + "p50": 69.023996591568, + "p90": 73.37599992752075, + "p95": 76.22399926185608, + "p99": 83.39200168848038 + }, + "roundtrip": { + "p50": 120.60800194740295, + "p90": 129.66400384902954, + "p95": 131.6159963607788, + "p99": 141.40799641609192 + }, + "isolatedSum": { + "p50": 139.71199840307236, + "p90": 150.36799758672714, + "p95": 157.53600001335144, + "p99": 175.29600113630295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.07199728488922, + "p90": 77.7600035071373, + "p95": 81.40800148248672, + "p99": 98.88000041246414 + }, + "combine": { + "p50": 69.56800073385239, + "p90": 73.69600236415863, + "p95": 76.48000121116638, + "p99": 83.20000022649765 + }, + "roundtrip": { + "p50": 122.94399738311768, + "p90": 130.36799430847168, + "p95": 133.56800377368927, + "p99": 151.07199549674988 + }, + "isolatedSum": { + "p50": 140.6399980187416, + "p90": 151.45600587129593, + "p95": 157.8880026936531, + "p99": 182.0800006389618 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.29599899053574, + "p90": 77.27999985218048, + "p95": 79.6160027384758, + "p99": 86.68799698352814 + }, + "combine": { + "p50": 69.50400024652481, + "p90": 73.27999919652939, + "p95": 76.28799974918365, + "p99": 84.89599823951721 + }, + "roundtrip": { + "p50": 124.4800016283989, + "p90": 135.29600203037262, + "p95": 140.3840035200119, + "p99": 213.72799575328827 + }, + "isolatedSum": { + "p50": 140.79999923706055, + "p90": 150.55999904870987, + "p95": 155.90400248765945, + "p99": 171.58399522304535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.74400240182877, + "p90": 75.19999891519547, + "p95": 79.0719985961914, + "p99": 88.41600269079208 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 74.72000271081924, + "p95": 77.82399654388428, + "p99": 86.27200126647949 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 132.79999792575836, + "p95": 136.22400164604187, + "p99": 177.91999876499176 + }, + "isolatedSum": { + "p50": 141.66400581598282, + "p90": 149.9200016260147, + "p95": 156.89599514007568, + "p99": 174.68800395727158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.9359964132309, + "p90": 78.36800068616867, + "p95": 81.95199817419052, + "p99": 87.45600283145905 + }, + "combine": { + "p50": 71.23199850320816, + "p90": 76.4160007238388, + "p95": 78.97599786520004, + "p99": 87.93599903583527 + }, + "roundtrip": { + "p50": 125.72799623012543, + "p90": 132.4159950017929, + "p95": 135.26399433612823, + "p99": 146.94400131702423 + }, + "isolatedSum": { + "p50": 143.16799491643906, + "p90": 154.78400141000748, + "p95": 160.92799603939056, + "p99": 175.3920018672943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.9039974808693, + "p90": 87.42400258779526, + "p95": 91.67999774217606, + "p99": 103.00800204277039 + }, + "combine": { + "p50": 77.63200253248215, + "p90": 81.08799904584885, + "p95": 83.64800363779068, + "p99": 91.71199798583984 + }, + "roundtrip": { + "p50": 134.88000631332397, + "p90": 140.00000059604645, + "p95": 144.28800344467163, + "p99": 161.3440066576004 + }, + "isolatedSum": { + "p50": 157.53600001335144, + "p90": 168.5120016336441, + "p95": 175.32800137996674, + "p99": 194.72000002861023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.19200229644775, + "p90": 102.33599692583084, + "p95": 105.27999699115753, + "p99": 118.367999792099 + }, + "combine": { + "p50": 88.06400001049042, + "p90": 93.44000369310379, + "p95": 96.67199850082397, + "p99": 108.25599730014801 + }, + "roundtrip": { + "p50": 158.39999914169312, + "p90": 163.96799683570862, + "p95": 168.12799870967865, + "p99": 173.15199971199036 + }, + "isolatedSum": { + "p50": 184.25600230693817, + "p90": 195.77600061893463, + "p95": 201.9519954919815, + "p99": 226.623997092247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.98400366306305, + "p90": 120.89599668979645, + "p95": 123.52000176906586, + "p99": 129.18399274349213 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 110.23999750614166, + "p95": 113.02399635314941, + "p99": 120.44800072908401 + }, + "roundtrip": { + "p50": 196.31999731063843, + "p90": 202.2079974412918, + "p95": 205.85599541664124, + "p99": 210.94399690628052 + }, + "isolatedSum": { + "p50": 218.33600103855133, + "p90": 231.1359941959381, + "p95": 236.54399812221527, + "p99": 249.63199347257614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc162bb3", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_b8782b41", + "comparisonKey": "8e2aa47d4077d181", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:34.185069+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.80000299215317, + "p90": 98.59199821949005, + "p95": 106.84800148010254, + "p99": 120.03199756145477 + }, + "combine": { + "p50": 70.75200229883194, + "p90": 86.20800077915192, + "p95": 90.4960036277771, + "p99": 97.85600006580353 + }, + "roundtrip": { + "p50": 127.26399302482605, + "p90": 153.18399667739868, + "p95": 161.79199516773224, + "p99": 175.77600479125977 + }, + "isolatedSum": { + "p50": 143.5520052909851, + "p90": 184.79999899864197, + "p95": 197.34400510787964, + "p99": 217.8879976272583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.08799773454666, + "p90": 97.24800288677216, + "p95": 103.4879982471466, + "p99": 112.73600161075592 + }, + "combine": { + "p50": 69.43999975919724, + "p90": 79.74400371313095, + "p95": 88.73599767684937, + "p99": 99.04000163078308 + }, + "roundtrip": { + "p50": 121.0239976644516, + "p90": 148.80000054836273, + "p95": 156.3519984483719, + "p99": 169.5999950170517 + }, + "isolatedSum": { + "p50": 142.5279974937439, + "p90": 176.9920065999031, + "p95": 192.22399592399597, + "p99": 211.776003241539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.44000041484833, + "p90": 98.1760025024414, + "p95": 104.63999956846237, + "p99": 113.3119985461235 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 81.53600245714188, + "p95": 89.75999802350998, + "p99": 99.29600358009338 + }, + "roundtrip": { + "p50": 123.07199835777283, + "p90": 150.84800124168396, + "p95": 156.95999562740326, + "p99": 175.35999417304993 + }, + "isolatedSum": { + "p50": 143.45599710941315, + "p90": 179.71200495958328, + "p95": 194.39999759197235, + "p99": 212.6080021262169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.01599735021591, + "p90": 96.73599898815155, + "p95": 104.86400127410889, + "p99": 119.45600062608719 + }, + "combine": { + "p50": 70.65600156784058, + "p90": 88.76799792051315, + "p95": 91.16800129413605, + "p99": 101.31199657917023 + }, + "roundtrip": { + "p50": 126.01600587368011, + "p90": 153.31199765205383, + "p95": 161.28000617027283, + "p99": 177.37600207328796 + }, + "isolatedSum": { + "p50": 144.6719989180565, + "p90": 185.5039969086647, + "p95": 196.03200256824493, + "p99": 220.76799720525742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.48000121116638, + "p90": 102.30399668216705, + "p95": 111.13599687814713, + "p99": 128.1919926404953 + }, + "combine": { + "p50": 73.34399968385696, + "p90": 84.57600325345993, + "p95": 96.54399752616882, + "p99": 103.5199984908104 + }, + "roundtrip": { + "p50": 130.20800054073334, + "p90": 154.2080044746399, + "p95": 163.90399634838104, + "p99": 183.58400464057922 + }, + "isolatedSum": { + "p50": 149.82400089502335, + "p90": 186.87999993562698, + "p95": 207.67999440431595, + "p99": 231.7119911313057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.10399949550629, + "p90": 107.2319969534874, + "p95": 113.24799805879593, + "p99": 125.82400441169739 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 89.47200328111649, + "p95": 97.85600006580353, + "p99": 100.00000149011612 + }, + "roundtrip": { + "p50": 137.37599551677704, + "p90": 161.21600568294525, + "p95": 171.9360053539276, + "p99": 187.3600035905838 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 196.70400023460388, + "p95": 211.10399812459946, + "p99": 225.8240059018135 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.88800030946732, + "p90": 119.23199892044067, + "p95": 128.31999361515045, + "p99": 156.031996011734 + }, + "combine": { + "p50": 90.08000046014786, + "p90": 105.21599650382996, + "p95": 112.31999844312668, + "p99": 119.71200257539749 + }, + "roundtrip": { + "p50": 162.52799332141876, + "p90": 183.80799889564514, + "p95": 190.20800292491913, + "p99": 211.74399554729462 + }, + "isolatedSum": { + "p50": 187.96800076961517, + "p90": 224.44799542427063, + "p95": 240.63999205827713, + "p99": 275.7439985871315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.9120032787323, + "p90": 134.8479986190796, + "p95": 142.68800616264343, + "p99": 148.15999567508698 + }, + "combine": { + "p50": 104.96000200510025, + "p90": 121.56800180673599, + "p95": 124.54400211572647, + "p99": 131.67999684810638 + }, + "roundtrip": { + "p50": 200.32000541687012, + "p90": 222.08000719547272, + "p95": 229.5999974012375, + "p99": 242.33600497245789 + }, + "isolatedSum": { + "p50": 219.87200528383255, + "p90": 256.4160004258156, + "p95": 267.2320082783699, + "p99": 279.83999252319336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1d12f2b2", + "identity": "h200|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_b8782b41", + "comparisonKey": "4304527bc3c80284", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:20.824155+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.86400347948074, + "p90": 89.31200206279755, + "p95": 97.15200215578079, + "p99": 109.47199910879135 + }, + "combine": { + "p50": 68.15999746322632, + "p90": 73.44000041484833, + "p95": 82.36800134181976, + "p99": 89.1840010881424 + }, + "roundtrip": { + "p50": 122.81599640846252, + "p90": 144.22400295734406, + "p95": 152.28800475597382, + "p99": 170.49600183963776 + }, + "isolatedSum": { + "p50": 141.02400094270706, + "p90": 162.75200247764587, + "p95": 179.52000349760056, + "p99": 198.65600019693375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.44000041484833, + "p90": 95.07200121879578, + "p95": 102.33599692583084, + "p99": 112.5119999051094 + }, + "combine": { + "p50": 68.89600306749344, + "p90": 79.68000322580338, + "p95": 84.70399677753448, + "p99": 90.7839983701706 + }, + "roundtrip": { + "p50": 123.96799772977829, + "p90": 149.02399480342865, + "p95": 155.5200070142746, + "p99": 161.98399662971497 + }, + "isolatedSum": { + "p50": 142.33600348234177, + "p90": 174.75200444459915, + "p95": 187.03999370336533, + "p99": 203.29599827528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.20799881219864, + "p90": 99.61599856615067, + "p95": 109.66400057077408, + "p99": 132.03200697898865 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 84.54400300979614, + "p95": 89.21600133180618, + "p99": 96.8639999628067 + }, + "roundtrip": { + "p50": 128.76799702644348, + "p90": 153.3759981393814, + "p95": 163.96799683570862, + "p99": 259.552001953125 + }, + "isolatedSum": { + "p50": 145.02400159835815, + "p90": 184.1600015759468, + "p95": 198.88000190258026, + "p99": 228.89600694179535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.06399804353714, + "p90": 95.42399644851685, + "p95": 101.27999633550644, + "p99": 111.1999973654747 + }, + "combine": { + "p50": 71.03999704122543, + "p90": 81.216000020504, + "p95": 88.28800171613693, + "p99": 93.9520001411438 + }, + "roundtrip": { + "p50": 127.10399925708771, + "p90": 149.47199821472168, + "p95": 155.7759940624237, + "p99": 169.5999950170517 + }, + "isolatedSum": { + "p50": 147.10399508476257, + "p90": 176.63999646902084, + "p95": 189.56799805164337, + "p99": 205.1519975066185 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.57600194215775, + "p90": 105.18400371074677, + "p95": 111.13599687814713, + "p99": 125.15200674533844 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 84.89599823951721, + "p95": 90.84799885749817, + "p99": 98.62399846315384 + }, + "roundtrip": { + "p50": 129.43999469280243, + "p90": 152.19199657440186, + "p95": 158.62399339675903, + "p99": 181.536003947258 + }, + "isolatedSum": { + "p50": 148.8640010356903, + "p90": 190.08000195026398, + "p95": 201.9839957356453, + "p99": 223.77600520849228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.55200290679932, + "p90": 101.31199657917023, + "p95": 110.17599701881409, + "p99": 118.72000247240067 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 91.71199798583984, + "p95": 97.88800030946732, + "p99": 106.36799782514572 + }, + "roundtrip": { + "p50": 138.5599970817566, + "p90": 158.62399339675903, + "p95": 164.73600268363953, + "p99": 178.0800074338913 + }, + "isolatedSum": { + "p50": 162.75200247764587, + "p90": 193.02399456501007, + "p95": 208.0639973282814, + "p99": 225.0880002975464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 97.82399982213974, + "p90": 111.61600053310394, + "p95": 115.87200313806534, + "p99": 132.9279989004135 + }, + "combine": { + "p50": 88.22400122880936, + "p90": 98.30400347709656, + "p95": 105.0880029797554, + "p99": 110.23999750614166 + }, + "roundtrip": { + "p50": 160.863995552063, + "p90": 175.80799758434296, + "p95": 182.8480064868927, + "p99": 195.2960044145584 + }, + "isolatedSum": { + "p50": 186.0480010509491, + "p90": 209.9200040102005, + "p95": 220.96000611782074, + "p99": 243.16799640655518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.74400281906128, + "p90": 134.62400436401367, + "p95": 141.4719969034195, + "p99": 152.19199657440186 + }, + "combine": { + "p50": 104.8320010304451, + "p90": 116.15999788045883, + "p95": 121.8239963054657, + "p99": 125.76000392436981 + }, + "roundtrip": { + "p50": 196.83200120925903, + "p90": 210.7519954442978, + "p95": 219.00799870491028, + "p99": 226.23999416828156 + }, + "isolatedSum": { + "p50": 224.57600384950638, + "p90": 250.7840022444725, + "p95": 263.2959932088852, + "p99": 277.95200049877167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4dbc7b23", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_5b7726a3", + "comparisonKey": "2f4764150efe25a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:21.411532+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.80000299215317, + "p90": 90.7519981265068, + "p95": 98.88000041246414, + "p99": 109.21599715948105 + }, + "combine": { + "p50": 68.1919977068901, + "p90": 78.015998005867, + "p95": 83.52000266313553, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 120.28799951076508, + "p90": 143.51999759674072, + "p95": 149.05600249767303, + "p99": 161.0880047082901 + }, + "isolatedSum": { + "p50": 140.99200069904327, + "p90": 168.7679961323738, + "p95": 182.40000307559967, + "p99": 199.0719959139824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.1599994301796, + "p90": 98.01600128412247, + "p95": 103.45599800348282, + "p99": 114.9120032787323 + }, + "combine": { + "p50": 76.51200145483017, + "p90": 84.79999750852585, + "p95": 91.51999652385712, + "p99": 99.2640033364296 + }, + "roundtrip": { + "p50": 135.68000495433807, + "p90": 158.33599865436554, + "p95": 168.09600591659546, + "p99": 202.55999267101288 + }, + "isolatedSum": { + "p50": 156.67200088500977, + "p90": 182.81599879264832, + "p95": 194.97599452733994, + "p99": 214.1760066151619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.2799928188324, + "p90": 134.43200290203094, + "p95": 137.69599795341492, + "p99": 149.1840034723282 + }, + "combine": { + "p50": 111.16799712181091, + "p90": 126.62400305271149, + "p95": 131.77600502967834, + "p99": 144.03200149536133 + }, + "roundtrip": { + "p50": 210.01599729061127, + "p90": 226.59200429916382, + "p95": 231.9359928369522, + "p99": 251.2960135936737 + }, + "isolatedSum": { + "p50": 236.4479899406433, + "p90": 261.05600595474243, + "p95": 269.47200298309326, + "p99": 293.2160049676895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-27c9a028", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_1c96befd", + "comparisonKey": "415ba3f78080f657", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:08:45.993231+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.68800181150436, + "p90": 103.87200117111206, + "p95": 109.8560020327568, + "p99": 124.51200187206268 + }, + "combine": { + "p50": 68.35199892520905, + "p90": 82.17599987983704, + "p95": 88.76799792051315, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 122.81599640846252, + "p90": 154.01600301265717, + "p95": 163.10399770736694, + "p99": 176.06399953365326 + }, + "isolatedSum": { + "p50": 139.0400007367134, + "p90": 186.0480010509491, + "p95": 198.62399995326996, + "p99": 221.53600305318832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.70399677753448, + "p90": 108.8000014424324, + "p95": 114.75200206041336, + "p99": 127.13600695133209 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 88.57599645853043, + "p95": 93.66399794816971, + "p99": 100.3199964761734 + }, + "roundtrip": { + "p50": 136.31999492645264, + "p90": 162.88000345230103, + "p95": 169.88800466060638, + "p99": 190.33600389957428 + }, + "isolatedSum": { + "p50": 161.02399677038193, + "p90": 197.37599790096283, + "p95": 208.41600000858307, + "p99": 227.4560034275055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.8479973077774, + "p90": 144.76799964904785, + "p95": 152.63999998569489, + "p99": 162.20800578594208 + }, + "combine": { + "p50": 111.10399663448334, + "p90": 124.38400089740753, + "p95": 130.0159990787506, + "p99": 147.45600521564484 + }, + "roundtrip": { + "p50": 209.88799631595612, + "p90": 229.08799350261688, + "p95": 234.27200317382812, + "p99": 253.50400805473328 + }, + "isolatedSum": { + "p50": 237.95199394226074, + "p90": 269.1520005464554, + "p95": 282.6559990644455, + "p99": 309.6640110015869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-22b28d1a", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_1996ba44", + "comparisonKey": "6e2498ded154f118", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:09:10.607705+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 71.58400118350983, + "p90": 91.80799871683121, + "p95": 100.3199964761734, + "p99": 109.56799983978271 + }, + "combine": { + "p50": 67.45599955320358, + "p90": 77.02399790287018, + "p95": 83.96799862384796, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 120.2239990234375, + "p90": 145.28000354766846, + "p95": 153.3759981393814, + "p99": 183.80799889564514 + }, + "isolatedSum": { + "p50": 139.0400007367134, + "p90": 168.83199661970139, + "p95": 184.28799510002136, + "p99": 199.68000054359436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.77599650621414, + "p90": 99.74399954080582, + "p95": 107.04000294208527, + "p99": 120.70400267839432 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 85.63199639320374, + "p95": 91.0400003194809, + "p99": 99.48799759149551 + }, + "roundtrip": { + "p50": 133.69600474834442, + "p90": 152.28800475597382, + "p95": 160.70400178432465, + "p99": 179.61600422859192 + }, + "isolatedSum": { + "p50": 156.031996011734, + "p90": 185.37599593400955, + "p95": 198.08000326156616, + "p99": 220.19200026988983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.7360035777092, + "p90": 132.28799402713776, + "p95": 135.45599579811096, + "p99": 147.23199605941772 + }, + "combine": { + "p50": 110.17599701881409, + "p90": 122.27199971675873, + "p95": 128.48000228405, + "p99": 137.31199502944946 + }, + "roundtrip": { + "p50": 209.98400449752808, + "p90": 224.57599639892578, + "p95": 231.61600530147552, + "p99": 243.6159998178482 + }, + "isolatedSum": { + "p50": 234.91200059652328, + "p90": 254.55999374389648, + "p95": 263.93599808216095, + "p99": 284.5439910888672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-068ed780", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_1a96bbd7", + "comparisonKey": "e91588b8684e74f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:09:35.079301+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.98399645090103, + "p90": 95.16800194978714, + "p95": 101.08800232410431, + "p99": 110.97600311040878 + }, + "combine": { + "p50": 67.48799979686737, + "p90": 78.11199873685837, + "p95": 83.16799998283386, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 121.56800180673599, + "p90": 151.5520066022873, + "p95": 161.40800714492798, + "p99": 174.0799993276596 + }, + "isolatedSum": { + "p50": 137.4719962477684, + "p90": 173.2800006866455, + "p95": 184.25600230693817, + "p99": 201.60000026226044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.24000036716461, + "p90": 103.13600301742554, + "p95": 110.84800213575363, + "p99": 119.99999731779099 + }, + "combine": { + "p50": 76.31999999284744, + "p90": 88.54400366544724, + "p95": 92.92799979448318, + "p99": 101.21600329875946 + }, + "roundtrip": { + "p50": 132.9279989004135, + "p90": 154.6880006790161, + "p95": 159.4880074262619, + "p99": 174.112007021904 + }, + "isolatedSum": { + "p50": 158.56000036001205, + "p90": 191.68000668287277, + "p95": 203.77600193023682, + "p99": 221.21600061655045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.5919953584671, + "p90": 147.74399995803833, + "p95": 152.19199657440186, + "p99": 163.90399634838104 + }, + "combine": { + "p50": 110.55999994277954, + "p90": 123.00799787044525, + "p95": 129.88799810409546, + "p99": 132.83200562000275 + }, + "roundtrip": { + "p50": 211.35999262332916, + "p90": 227.52000391483307, + "p95": 231.9680005311966, + "p99": 239.6160066127777 + }, + "isolatedSum": { + "p50": 237.15199530124664, + "p90": 270.7519978284836, + "p95": 282.0799946784973, + "p99": 296.7360019683838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-063be61e", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_fa12a3e3", + "comparisonKey": "4f57655f825bd262", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:12.548918+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.52800124883652, + "p90": 102.4319976568222, + "p95": 106.6880002617836, + "p99": 131.99999928474426 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 83.48800241947174, + "p95": 90.08000046014786, + "p99": 95.83999961614609 + }, + "roundtrip": { + "p50": 123.9359974861145, + "p90": 150.94399452209473, + "p95": 159.16800498962402, + "p99": 172.92800545692444 + }, + "isolatedSum": { + "p50": 145.7279995083809, + "p90": 185.92000007629395, + "p95": 196.76800072193146, + "p99": 227.83999890089035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 74.17599856853485, + "p90": 90.14400094747543, + "p95": 99.45599734783173, + "p99": 112.60800063610077 + }, + "combine": { + "p50": 71.07199728488922, + "p90": 82.33600109815598, + "p95": 90.36800265312195, + "p99": 97.75999933481216 + }, + "roundtrip": { + "p50": 125.31200051307678, + "p90": 154.23999726772308, + "p95": 160.89600324630737, + "p99": 189.69599902629852 + }, + "isolatedSum": { + "p50": 145.24799585342407, + "p90": 172.4800020456314, + "p95": 189.82400000095367, + "p99": 210.36799997091293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.71200281381607, + "p90": 107.71200060844421, + "p95": 118.68800222873688, + "p99": 132.57600367069244 + }, + "combine": { + "p50": 72.57600128650665, + "p90": 88.83199840784073, + "p95": 91.45600348711014, + "p99": 103.7760004401207 + }, + "roundtrip": { + "p50": 124.09599870443344, + "p90": 154.55999970436096, + "p95": 161.82400286197662, + "p99": 179.1040003299713 + }, + "isolatedSum": { + "p50": 148.28800410032272, + "p90": 196.54399901628494, + "p95": 210.14400571584702, + "p99": 236.35200411081314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.23999905586243, + "p90": 96.16000205278397, + "p95": 103.71199995279312, + "p99": 115.1999980211258 + }, + "combine": { + "p50": 72.35199958086014, + "p90": 84.79999750852585, + "p95": 91.16800129413605, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 125.15200674533844, + "p90": 150.84800124168396, + "p95": 157.50400722026825, + "p99": 166.72000288963318 + }, + "isolatedSum": { + "p50": 146.59199863672256, + "p90": 180.95999956130981, + "p95": 194.88000124692917, + "p99": 213.44000101089478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.1359977722168, + "p90": 93.02400052547455, + "p95": 100.99200159311295, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 87.10400015115738, + "p95": 92.44800359010696, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 131.67999684810638, + "p90": 155.4879993200302, + "p95": 162.9440039396286, + "p99": 172.44799435138702 + }, + "isolatedSum": { + "p50": 147.74399995803833, + "p90": 180.12800067663193, + "p95": 193.4400051832199, + "p99": 212.48000115156174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 90.59199690818787, + "p90": 104.06400263309479, + "p95": 112.5119999051094, + "p99": 128.7039965391159 + }, + "combine": { + "p50": 80.4160013794899, + "p90": 91.36000275611877, + "p95": 98.08000177145004, + "p99": 105.76000064611435 + }, + "roundtrip": { + "p50": 145.05599439144135, + "p90": 164.41600024700165, + "p95": 170.9119975566864, + "p99": 186.49600446224213 + }, + "isolatedSum": { + "p50": 171.00799828767776, + "p90": 195.42400538921356, + "p95": 210.59200167655945, + "p99": 234.46399718523026 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 108.64000022411346, + "p90": 146.4959979057312, + "p95": 149.79200065135956, + "p99": 163.42400014400482 + }, + "combine": { + "p50": 95.13600170612335, + "p90": 110.01600325107574, + "p95": 115.42399972677231, + "p99": 127.00800597667694 + }, + "roundtrip": { + "p50": 176.92799866199493, + "p90": 195.74399292469025, + "p95": 201.88799500465393, + "p99": 405.69600462913513 + }, + "isolatedSum": { + "p50": 203.77600193023682, + "p90": 256.51200115680695, + "p95": 265.21600037813187, + "p99": 290.43200612068176 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.63200426101685, + "p90": 151.42400562763214, + "p95": 159.29600596427917, + "p99": 170.3999936580658 + }, + "combine": { + "p50": 125.37600100040436, + "p90": 139.13600146770477, + "p95": 141.66399836540222, + "p99": 148.3519971370697 + }, + "roundtrip": { + "p50": 229.88800704479218, + "p90": 252.128005027771, + "p95": 255.13601303100586, + "p99": 260.19200682640076 + }, + "isolatedSum": { + "p50": 259.0080052614212, + "p90": 290.5600070953369, + "p95": 300.9600043296814, + "p99": 318.7519907951355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cc5c4ebf", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_de57b2b2", + "comparisonKey": "e2570de89e5a2535", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:10.685179+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 64.83200192451477, + "p90": 85.69599688053131, + "p95": 92.57599711418152, + "p99": 102.1760031580925 + }, + "combine": { + "p50": 59.84000116586685, + "p90": 69.40799951553345, + "p95": 75.52000135183334, + "p99": 83.5840031504631 + }, + "roundtrip": { + "p50": 110.72000116109848, + "p90": 130.40000200271606, + "p95": 140.1280015707016, + "p99": 152.6080071926117 + }, + "isolatedSum": { + "p50": 124.67200309038162, + "p90": 155.10399639606476, + "p95": 168.09599846601486, + "p99": 185.7600063085556 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.49600034952164, + "p90": 86.65599673986435, + "p95": 94.40000355243683, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 60.7680007815361, + "p90": 67.58400052785873, + "p95": 74.43200051784515, + "p99": 79.71200346946716 + }, + "roundtrip": { + "p50": 113.95200341939926, + "p90": 131.26400113105774, + "p95": 139.48799669742584, + "p99": 147.61599898338318 + }, + "isolatedSum": { + "p50": 131.26400113105774, + "p90": 154.23999726772308, + "p95": 168.83200407028198, + "p99": 184.7040057182312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.97599852085114, + "p90": 97.88800030946732, + "p95": 103.67999970912933, + "p99": 111.04000359773636 + }, + "combine": { + "p50": 69.76000219583511, + "p90": 77.44000107049942, + "p95": 84.41600203514099, + "p99": 95.551997423172 + }, + "roundtrip": { + "p50": 123.77600371837616, + "p90": 143.8719928264618, + "p95": 149.75999295711517, + "p99": 159.32799875736237 + }, + "isolatedSum": { + "p50": 152.73600071668625, + "p90": 175.32800137996674, + "p95": 188.09600174427032, + "p99": 206.59200102090836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.57600259780884, + "p90": 96.99200093746185, + "p95": 103.67999970912933, + "p99": 112.70400136709213 + }, + "combine": { + "p50": 71.68000191450119, + "p90": 80.1599994301796, + "p95": 88.639996945858, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 130.2720010280609, + "p90": 146.62399888038635, + "p95": 153.24799716472626, + "p99": 170.43200135231018 + }, + "isolatedSum": { + "p50": 152.25600451231003, + "p90": 177.15200036764145, + "p95": 192.31999665498734, + "p99": 205.6960016489029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28abf0fa", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_3515cf1b", + "comparisonKey": "8693865d7cc00429", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:57.883574+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 65.95200300216675, + "p90": 91.74399822950363, + "p95": 99.04000163078308, + "p99": 147.87200093269348 + }, + "combine": { + "p50": 61.85600161552429, + "p90": 68.96000355482101, + "p95": 78.43200117349625, + "p99": 84.25600081682205 + }, + "roundtrip": { + "p50": 116.54400080442429, + "p90": 144.31999623775482, + "p95": 151.36000514030457, + "p99": 162.4000072479248 + }, + "isolatedSum": { + "p50": 127.80800461769104, + "p90": 160.70400178432465, + "p95": 177.47200280427933, + "p99": 232.12800174951553 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.08799707889557, + "p90": 92.3520028591156, + "p95": 99.61599856615067, + "p99": 105.82400113344193 + }, + "combine": { + "p50": 61.72800064086914, + "p90": 69.08799707889557, + "p95": 78.23999971151352, + "p99": 89.9519994854927 + }, + "roundtrip": { + "p50": 116.99199676513672, + "p90": 138.75199854373932, + "p95": 147.8399932384491, + "p99": 161.72799468040466 + }, + "isolatedSum": { + "p50": 130.8159977197647, + "p90": 161.43999993801117, + "p95": 177.85599827766418, + "p99": 195.77600061893463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.47200065851212, + "p90": 94.17600184679031, + "p95": 101.72799974679947, + "p99": 114.23999816179276 + }, + "combine": { + "p50": 62.07999959588051, + "p90": 70.01599669456482, + "p95": 80.48000186681747, + "p99": 84.83199775218964 + }, + "roundtrip": { + "p50": 118.367999792099, + "p90": 147.74399995803833, + "p95": 158.01599621772766, + "p99": 169.44000124931335 + }, + "isolatedSum": { + "p50": 135.55200025439262, + "p90": 164.19199854135513, + "p95": 182.20800161361694, + "p99": 199.0719959139824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.66400212049484, + "p90": 91.90399944782257, + "p95": 100.28800368309021, + "p99": 113.53600025177002 + }, + "combine": { + "p50": 62.81600147485733, + "p90": 71.07199728488922, + "p95": 77.7600035071373, + "p99": 88.76799792051315 + }, + "roundtrip": { + "p50": 118.07999759912491, + "p90": 140.57600498199463, + "p95": 151.96800231933594, + "p99": 163.96799683570862 + }, + "isolatedSum": { + "p50": 136.48000359535217, + "p90": 162.9759967327118, + "p95": 178.0480071902275, + "p99": 202.30399817228317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.86399668455124, + "p90": 95.07200121879578, + "p95": 106.52799904346466, + "p99": 120.99199742078781 + }, + "combine": { + "p50": 64.09599632024765, + "p90": 74.11199808120728, + "p95": 83.20000022649765, + "p99": 90.94399958848953 + }, + "roundtrip": { + "p50": 119.74400281906128, + "p90": 142.68800616264343, + "p95": 149.85600113868713, + "p99": 156.09599649906158 + }, + "isolatedSum": { + "p50": 140.9599930047989, + "p90": 169.18399930000305, + "p95": 189.7279992699623, + "p99": 211.93599700927734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.72000271081924, + "p90": 96.54399752616882, + "p95": 104.76800054311752, + "p99": 114.94400352239609 + }, + "combine": { + "p50": 64.83200192451477, + "p90": 74.49600100517273, + "p95": 84.25600081682205, + "p99": 91.00800007581711 + }, + "roundtrip": { + "p50": 120.92799693346024, + "p90": 144.31999623775482, + "p95": 151.96800231933594, + "p99": 161.0880047082901 + }, + "isolatedSum": { + "p50": 139.55200463533401, + "p90": 171.03999853134155, + "p95": 189.02400135993958, + "p99": 205.9520035982132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 84.06399935483932, + "p90": 102.33599692583084, + "p95": 108.96000266075134, + "p99": 118.20799857378006 + }, + "combine": { + "p50": 70.592001080513, + "p90": 80.44800162315369, + "p95": 89.9519994854927, + "p99": 95.13600170612335 + }, + "roundtrip": { + "p50": 125.66399574279785, + "p90": 145.28000354766846, + "p95": 152.67199277877808, + "p99": 173.34400117397308 + }, + "isolatedSum": { + "p50": 154.65600043535233, + "p90": 182.78399854898453, + "p95": 198.91200214624405, + "p99": 213.3440002799034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 96.28800302743912, + "p90": 113.11999708414078, + "p95": 120.60800194740295, + "p99": 139.0720009803772 + }, + "combine": { + "p50": 85.15200018882751, + "p90": 98.9760011434555, + "p95": 105.85600137710571, + "p99": 112.5440001487732 + }, + "roundtrip": { + "p50": 155.29599785804749, + "p90": 168.16000640392303, + "p95": 175.9680062532425, + "p99": 186.68800592422485 + }, + "isolatedSum": { + "p50": 181.44000321626663, + "p90": 212.09599822759628, + "p95": 226.46400332450867, + "p99": 251.6160011291504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e74ffbf6", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_1065180b", + "comparisonKey": "166d33514d3eccfc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:42.848014+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.32799988985062, + "p90": 80.44800162315369, + "p95": 90.36800265312195, + "p99": 106.55999928712845 + }, + "combine": { + "p50": 71.16799801588058, + "p90": 75.45600086450577, + "p95": 79.00799810886383, + "p99": 89.53599631786346 + }, + "roundtrip": { + "p50": 125.76000392436981, + "p90": 137.11999356746674, + "p95": 142.30400323867798, + "p99": 154.2080044746399 + }, + "isolatedSum": { + "p50": 146.4959979057312, + "p90": 155.90400248765945, + "p95": 169.37600076198578, + "p99": 196.0959956049919 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.51200211048126, + "p90": 90.81599861383438, + "p95": 97.56799787282944, + "p99": 108.06400328874588 + }, + "combine": { + "p50": 79.3600007891655, + "p90": 83.5840031504631, + "p95": 87.96799927949905, + "p99": 99.87200051546097 + }, + "roundtrip": { + "p50": 140.60799777507782, + "p90": 150.2079963684082, + "p95": 158.1760048866272, + "p99": 201.92000269889832 + }, + "isolatedSum": { + "p50": 159.87200289964676, + "p90": 174.40000176429749, + "p95": 185.5359971523285, + "p99": 207.93600380420685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.54400211572647, + "p90": 140.09599387645721, + "p95": 148.83199334144592, + "p99": 167.26399958133698 + }, + "combine": { + "p50": 118.65600198507309, + "p90": 123.71200323104858, + "p95": 130.62399625778198, + "p99": 133.59999656677246 + }, + "roundtrip": { + "p50": 221.82400524616241, + "p90": 230.880007147789, + "p95": 234.8479926586151, + "p99": 241.37599766254425 + }, + "isolatedSum": { + "p50": 243.20000410079956, + "p90": 263.8079971075058, + "p95": 279.4559895992279, + "p99": 300.86399614810944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16aeedda", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_b2c59755", + "comparisonKey": "21e26acefbd7814e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:07.354210+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.46400076150894, + "p90": 80.48000186681747, + "p95": 83.90399813652039, + "p99": 96.96000069379807 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 73.69600236415863, + "p95": 75.55200159549713, + "p99": 80.4160013794899 + }, + "roundtrip": { + "p50": 126.71999633312225, + "p90": 135.48800349235535, + "p95": 138.0160003900528, + "p99": 150.04800260066986 + }, + "isolatedSum": { + "p50": 145.37600427865982, + "p90": 154.1760042309761, + "p95": 159.45599973201752, + "p99": 177.37600207328796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.34400165081024, + "p90": 92.67199784517288, + "p95": 96.6079980134964, + "p99": 106.08000308275223 + }, + "combine": { + "p50": 79.45600152015686, + "p90": 83.5840031504631, + "p95": 88.73599767684937, + "p99": 167.07199811935425 + }, + "roundtrip": { + "p50": 140.4159963130951, + "p90": 147.77599275112152, + "p95": 154.08000349998474, + "p99": 160.64000129699707 + }, + "isolatedSum": { + "p50": 164.8000031709671, + "p90": 176.256000995636, + "p95": 185.34399569034576, + "p99": 273.1520012021065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.70400333404541, + "p90": 133.5040032863617, + "p95": 141.53599739074707, + "p99": 160.60799360275269 + }, + "combine": { + "p50": 119.45600062608719, + "p90": 124.54400211572647, + "p95": 130.36799430847168, + "p99": 151.5199989080429 + }, + "roundtrip": { + "p50": 223.87200593948364, + "p90": 231.74400627613068, + "p95": 236.2239956855774, + "p99": 247.80799448490143 + }, + "isolatedSum": { + "p50": 244.1600039601326, + "p90": 258.04800540208817, + "p95": 271.90399169921875, + "p99": 312.1279925107956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fd6fae37", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_afc5929c", + "comparisonKey": "63b75b96b43908fa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:31.697488+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.14399832487106, + "p90": 95.83999961614609, + "p95": 105.18400371074677, + "p99": 118.07999759912491 + }, + "combine": { + "p50": 71.61600142717361, + "p90": 83.64800363779068, + "p95": 90.4960036277771, + "p99": 101.9200012087822 + }, + "roundtrip": { + "p50": 123.87199699878693, + "p90": 153.79199385643005, + "p95": 167.93599724769592, + "p99": 195.8719938993454 + }, + "isolatedSum": { + "p50": 145.75999975204468, + "p90": 179.48800325393677, + "p95": 195.68000733852386, + "p99": 219.9999988079071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.64800298213959, + "p90": 106.33599758148193, + "p95": 125.63200294971466, + "p99": 146.65600657463074 + }, + "combine": { + "p50": 78.3040001988411, + "p90": 91.13600105047226, + "p95": 98.01600128412247, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 138.84800672531128, + "p90": 161.82400286197662, + "p95": 166.81599617004395, + "p99": 209.34399962425232 + }, + "isolatedSum": { + "p50": 157.95200318098068, + "p90": 197.4719986319542, + "p95": 223.64800423383713, + "p99": 253.31200659275055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.90399724245071, + "p90": 138.8159990310669, + "p95": 145.4080045223236, + "p99": 164.22399878501892 + }, + "combine": { + "p50": 119.00799721479416, + "p90": 131.80799782276154, + "p95": 139.1039937734604, + "p99": 151.2320041656494 + }, + "roundtrip": { + "p50": 223.29600155353546, + "p90": 252.79998779296875, + "p95": 258.91199707984924, + "p99": 273.0880081653595 + }, + "isolatedSum": { + "p50": 242.91199445724487, + "p90": 270.62399685382843, + "p95": 284.511998295784, + "p99": 315.45600295066833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe8ad28a", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_b0c5942f", + "comparisonKey": "4486f638770c7b2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:55.887324+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.56800138950348, + "p90": 95.0080007314682, + "p95": 103.87200117111206, + "p99": 112.22399771213531 + }, + "combine": { + "p50": 69.95200365781784, + "p90": 81.56800270080566, + "p95": 90.17600119113922, + "p99": 98.78399968147278 + }, + "roundtrip": { + "p50": 124.03199821710587, + "p90": 150.751993060112, + "p95": 160.19199788570404, + "p99": 185.44000387191772 + }, + "isolatedSum": { + "p50": 143.52000504732132, + "p90": 176.57600343227386, + "p95": 194.04800236225128, + "p99": 211.0079973936081 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.36800134181976, + "p90": 100.89600086212158, + "p95": 109.15199667215347, + "p99": 147.42399752140045 + }, + "combine": { + "p50": 78.68800312280655, + "p90": 91.45600348711014, + "p95": 97.69599884748459, + "p99": 107.16799646615982 + }, + "roundtrip": { + "p50": 137.56799697875977, + "p90": 159.0079963207245, + "p95": 166.07999801635742, + "p99": 182.97599256038666 + }, + "isolatedSum": { + "p50": 161.0560044646263, + "p90": 192.35200434923172, + "p95": 206.84799551963806, + "p99": 254.59199398756027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.36000055074692, + "p90": 136.51199638843536, + "p95": 142.11200177669525, + "p99": 148.92800152301788 + }, + "combine": { + "p50": 118.14399808645248, + "p90": 129.72800433635712, + "p95": 137.9839926958084, + "p99": 140.73599874973297 + }, + "roundtrip": { + "p50": 222.84799814224243, + "p90": 243.74400079250336, + "p95": 250.20799040794373, + "p99": 259.2320144176483 + }, + "isolatedSum": { + "p50": 241.5039986371994, + "p90": 266.2400007247925, + "p95": 280.09599447250366, + "p99": 289.66400027275085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a823a335", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_f933a4f5", + "comparisonKey": "795c97ef85458a0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:58.511205+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.7040022611618, + "p90": 94.91200000047684, + "p95": 103.5199984908104, + "p99": 111.55200004577637 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 77.31200009584427, + "p95": 82.65600353479385, + "p99": 89.4400030374527 + }, + "roundtrip": { + "p50": 121.2799996137619, + "p90": 148.12800288200378, + "p95": 153.9199948310852, + "p99": 162.1759980916977 + }, + "isolatedSum": { + "p50": 141.12000167369843, + "p90": 172.2240000963211, + "p95": 186.17600202560425, + "p99": 200.99200308322906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.89600372314453, + "p90": 99.10400211811066, + "p95": 105.12000322341919, + "p99": 120.41600048542023 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 81.50400221347809, + "p95": 87.48800307512283, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 125.59999525547028, + "p90": 155.93600273132324, + "p95": 164.0319973230362, + "p99": 177.18400061130524 + }, + "isolatedSum": { + "p50": 142.91200041770935, + "p90": 180.60800433158875, + "p95": 192.60800629854202, + "p99": 215.32800048589706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.01599735021591, + "p90": 93.6959981918335, + "p95": 100.51199793815613, + "p99": 117.0559972524643 + }, + "combine": { + "p50": 69.85600292682648, + "p90": 80.03199845552444, + "p95": 88.16000074148178, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 125.44000148773193, + "p90": 154.01600301265717, + "p95": 159.16800498962402, + "p99": 170.23999989032745 + }, + "isolatedSum": { + "p50": 143.8720002770424, + "p90": 173.72799664735794, + "p95": 188.6719986796379, + "p99": 210.04799753427505 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.46400076150894, + "p90": 93.9520001411438, + "p95": 101.15200281143188, + "p99": 109.98400300741196 + }, + "combine": { + "p50": 70.52800059318542, + "p90": 79.77599650621414, + "p95": 87.39200234413147, + "p99": 93.50399672985077 + }, + "roundtrip": { + "p50": 126.8479973077774, + "p90": 150.9760022163391, + "p95": 157.9200029373169, + "p99": 173.63199591636658 + }, + "isolatedSum": { + "p50": 144.99200135469437, + "p90": 173.72799664735794, + "p95": 188.54400515556335, + "p99": 203.48799973726273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.82400333881378, + "p90": 94.04800087213516, + "p95": 102.78400033712387, + "p99": 113.37599903345108 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 82.14399963617325, + "p95": 90.4960036277771, + "p99": 98.43199700117111 + }, + "roundtrip": { + "p50": 129.63199615478516, + "p90": 150.33599734306335, + "p95": 156.19200468063354, + "p99": 164.70399498939514 + }, + "isolatedSum": { + "p50": 146.91200107336044, + "p90": 176.1920005083084, + "p95": 193.28000396490097, + "p99": 211.8079960346222 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.96799862384796, + "p90": 106.30399733781815, + "p95": 112.64000087976456, + "p99": 119.4240003824234 + }, + "combine": { + "p50": 79.6160027384758, + "p90": 93.79199892282486, + "p95": 98.75199943780899, + "p99": 104.73600029945374 + }, + "roundtrip": { + "p50": 140.60799777507782, + "p90": 163.13600540161133, + "p95": 169.5999950170517, + "p99": 183.9359998703003 + }, + "isolatedSum": { + "p50": 163.58400136232376, + "p90": 200.095996260643, + "p95": 211.39200031757355, + "p99": 224.16000068187714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.71199929714203, + "p90": 112.22399771213531, + "p95": 118.97599697113037, + "p99": 124.38400089740753 + }, + "combine": { + "p50": 90.43200314044952, + "p90": 103.96800190210342, + "p95": 110.01600325107574, + "p99": 116.44800007343292 + }, + "roundtrip": { + "p50": 166.78400337696075, + "p90": 184.09599363803864, + "p95": 191.48799777030945, + "p99": 199.16799664497375 + }, + "isolatedSum": { + "p50": 190.14400243759155, + "p90": 216.19199961423874, + "p95": 228.99200022220612, + "p99": 240.83200097084045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.30400061607361, + "p90": 146.84799313545227, + "p95": 151.32799744606018, + "p99": 166.30400717258453 + }, + "combine": { + "p50": 119.32799965143204, + "p90": 134.17600095272064, + "p95": 138.87999951839447, + "p99": 144.22400295734406 + }, + "roundtrip": { + "p50": 224.48000311851501, + "p90": 247.45599925518036, + "p95": 253.24800610542297, + "p99": 267.16798543930054 + }, + "isolatedSum": { + "p50": 245.63200026750565, + "p90": 281.0239940881729, + "p95": 290.20799696445465, + "p99": 310.5280101299286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eade2006", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_72b3c469", + "comparisonKey": "0936bffd50410fb9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:44.852000+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.24799829721451, + "p90": 79.1039988398552, + "p95": 90.20800143480301, + "p99": 108.89600217342377 + }, + "combine": { + "p50": 68.31999868154526, + "p90": 71.55200093984604, + "p95": 76.57600194215775, + "p99": 91.26400202512741 + }, + "roundtrip": { + "p50": 121.63200229406357, + "p90": 131.71200454235077, + "p95": 141.56800508499146, + "p99": 158.4639996290207 + }, + "isolatedSum": { + "p50": 137.56799697875977, + "p90": 150.65599977970123, + "p95": 166.78400337696075, + "p99": 200.16000419855118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.54400104284286, + "p90": 85.60000360012054, + "p95": 99.48799759149551, + "p99": 112.47999966144562 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 76.4480009675026, + "p95": 82.84799754619598, + "p99": 92.83199906349182 + }, + "roundtrip": { + "p50": 122.30399996042252, + "p90": 142.7839994430542, + "p95": 156.76799416542053, + "p99": 165.3759926557541 + }, + "isolatedSum": { + "p50": 141.50400459766388, + "p90": 162.04800456762314, + "p95": 182.3359951376915, + "p99": 205.31199872493744 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.17599856853485, + "p90": 87.77599781751633, + "p95": 99.23200309276581, + "p99": 108.51199924945831 + }, + "combine": { + "p50": 69.60000097751617, + "p90": 75.93599706888199, + "p95": 82.40000158548355, + "p99": 91.07200056314468 + }, + "roundtrip": { + "p50": 124.38400089740753, + "p90": 141.02399349212646, + "p95": 151.36000514030457, + "p99": 164.8319959640503 + }, + "isolatedSum": { + "p50": 143.77599954605103, + "p90": 163.71199488639832, + "p95": 181.63200467824936, + "p99": 199.583999812603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.87999647855759, + "p90": 92.54399687051773, + "p95": 103.00800204277039, + "p99": 119.39200013875961 + }, + "combine": { + "p50": 70.36799937486649, + "p90": 83.61600339412689, + "p95": 88.76799792051315, + "p99": 94.87999975681305 + }, + "roundtrip": { + "p50": 122.36800044775009, + "p90": 140.6719982624054, + "p95": 153.02400290966034, + "p99": 172.992005944252 + }, + "isolatedSum": { + "p50": 145.24799585342407, + "p90": 176.16000026464462, + "p95": 191.77599996328354, + "p99": 214.27199989557266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.55200093984604, + "p90": 80.92799782752991, + "p95": 93.05600076913834, + "p99": 110.52799969911575 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 78.11199873685837, + "p95": 88.8959988951683, + "p99": 101.05600208044052 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 141.34399592876434, + "p95": 149.6960073709488, + "p99": 164.70399498939514 + }, + "isolatedSum": { + "p50": 142.88000017404556, + "p90": 159.03999656438828, + "p95": 181.95199966430664, + "p99": 211.58400177955627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.20800012350082, + "p90": 97.43999689817429, + "p95": 109.0560033917427, + "p99": 123.10399860143661 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 84.57600325345993, + "p95": 91.71199798583984, + "p99": 100.96000134944916 + }, + "roundtrip": { + "p50": 137.11999356746674, + "p90": 154.84799444675446, + "p95": 165.95199704170227, + "p99": 187.71199882030487 + }, + "isolatedSum": { + "p50": 160.44799983501434, + "p90": 182.01600015163422, + "p95": 200.76800137758255, + "p99": 224.06399995088577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.45599669218063, + "p90": 107.744000852108, + "p95": 114.9120032787323, + "p99": 199.072003364563 + }, + "combine": { + "p50": 88.03199976682663, + "p90": 96.44799679517746, + "p95": 103.67999970912933, + "p99": 109.69600081443787 + }, + "roundtrip": { + "p50": 158.75199437141418, + "p90": 167.71200299263, + "p95": 178.01600694656372, + "p99": 188.76799941062927 + }, + "isolatedSum": { + "p50": 183.48799645900726, + "p90": 204.19199764728546, + "p95": 218.59200298786163, + "p99": 308.76800417900085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.34399944543839, + "p90": 126.36800110340118, + "p95": 136.54400408267975, + "p99": 145.1520025730133 + }, + "combine": { + "p50": 108.96000266075134, + "p90": 116.03199690580368, + "p95": 121.85599654912949, + "p99": 135.48800349235535 + }, + "roundtrip": { + "p50": 199.64799284934998, + "p90": 210.65600216388702, + "p95": 219.13599967956543, + "p99": 232.41600394248962 + }, + "isolatedSum": { + "p50": 226.30400210618973, + "p90": 242.39999800920486, + "p95": 258.40000063180923, + "p99": 280.64000606536865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4f1c55f9", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h200_3520c448", + "comparisonKey": "25fe0c98b43fdf7c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:10:23.936929+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 73.2479989528656, + "p90": 79.45600152015686, + "p95": 83.26400071382523, + "p99": 91.80799871683121 + }, + "combine": { + "p50": 71.10399752855301, + "p90": 75.77600330114365, + "p95": 80.1599994301796, + "p99": 90.62399715185165 + }, + "roundtrip": { + "p50": 125.85599720478058, + "p90": 139.16799426078796, + "p95": 153.9199948310852, + "p99": 169.08800601959229 + }, + "isolatedSum": { + "p50": 144.3519964814186, + "p90": 155.2320048213005, + "p95": 163.42400014400482, + "p99": 182.43199586868286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 46, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 80.19199967384338, + "p90": 91.90399944782257, + "p95": 102.46399790048599, + "p99": 122.17599898576736 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 84.44800227880478, + "p95": 89.02399986982346, + "p99": 163.83999586105347 + }, + "roundtrip": { + "p50": 135.13599336147308, + "p90": 144.67200636863708, + "p95": 153.18399667739868, + "p99": 198.71999323368073 + }, + "isolatedSum": { + "p50": 157.98399597406387, + "p90": 176.35200172662735, + "p95": 191.48799777030945, + "p99": 286.01599484682083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 180, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 114.656001329422, + "p90": 129.88799810409546, + "p95": 147.90399372577667, + "p99": 340.1919901371002 + }, + "combine": { + "p50": 109.31199789047241, + "p90": 113.43999952077866, + "p95": 117.8240031003952, + "p99": 126.78399682044983 + }, + "roundtrip": { + "p50": 200.32000541687012, + "p90": 208.22399854660034, + "p95": 212.2880071401596, + "p99": 224.09600019454956 + }, + "isolatedSum": { + "p50": 223.9679992198944, + "p90": 243.32799762487411, + "p95": 265.7279968261719, + "p99": 466.97598695755005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 722, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28647697", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_b2141073", + "comparisonKey": "f3223f156e0f187e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:09:59.710551+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.20799881219864, + "p90": 84.16000008583069, + "p95": 100.19200295209885, + "p99": 121.63200229406357 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 75.83999633789062, + "p95": 81.66400343179703, + "p99": 90.52799642086029 + }, + "roundtrip": { + "p50": 122.75200337171555, + "p90": 134.14399325847626, + "p95": 142.7839994430542, + "p99": 163.4880006313324 + }, + "isolatedSum": { + "p50": 145.08800208568573, + "p90": 159.9999964237213, + "p95": 181.85600638389587, + "p99": 212.15999871492386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.87999778985977, + "p90": 133.98399949073792, + "p95": 138.17599415779114, + "p99": 157.4079990386963 + }, + "combine": { + "p50": 78.17599922418594, + "p90": 82.43200182914734, + "p95": 86.33600175380707, + "p99": 97.47199714183807 + }, + "roundtrip": { + "p50": 136.4160031080246, + "p90": 148.25600385665894, + "p95": 154.9759954214096, + "p99": 170.0800061225891 + }, + "isolatedSum": { + "p50": 161.05599701404572, + "p90": 216.41600131988525, + "p95": 224.5119959115982, + "p99": 254.87999618053436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 113.08799684047699, + "p90": 122.6240023970604, + "p95": 129.40800189971924, + "p99": 144.73600685596466 + }, + "combine": { + "p50": 115.87200313806534, + "p90": 122.78400361537933, + "p95": 125.34399330615997, + "p99": 137.85600662231445 + }, + "roundtrip": { + "p50": 209.9200040102005, + "p90": 218.33600103855133, + "p95": 222.3999947309494, + "p99": 233.72800648212433 + }, + "isolatedSum": { + "p50": 228.95999997854233, + "p90": 245.40800601243973, + "p95": 254.7519952058792, + "p99": 282.5920134782791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e7a48f3", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_3586ca3d", + "comparisonKey": "02d53e775757b018", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:58.697479+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.95999675989151, + "p90": 89.28000181913376, + "p95": 98.62399846315384, + "p99": 106.97600245475769 + }, + "combine": { + "p50": 67.55200028419495, + "p90": 75.29599964618683, + "p95": 79.74400371313095, + "p99": 86.30400151014328 + }, + "roundtrip": { + "p50": 122.20799922943115, + "p90": 140.22399485111237, + "p95": 146.7200070619583, + "p99": 154.7199934720993 + }, + "isolatedSum": { + "p50": 140.51199704408646, + "p90": 164.5760014653206, + "p95": 178.3680021762848, + "p99": 193.28000396490097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.26399940252304, + "p90": 96.89600020647049, + "p95": 106.04800283908844, + "p99": 120.38400024175644 + }, + "combine": { + "p50": 69.21599805355072, + "p90": 80.03199845552444, + "p95": 86.27200126647949, + "p99": 92.19200164079666 + }, + "roundtrip": { + "p50": 125.40799379348755, + "p90": 148.92800152301788, + "p95": 157.72800147533417, + "p99": 169.855996966362 + }, + "isolatedSum": { + "p50": 144.47999745607376, + "p90": 176.92799866199493, + "p95": 192.32000410556793, + "p99": 212.5760018825531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.16799867153168, + "p90": 95.58399766683578, + "p95": 101.1200025677681, + "p99": 111.96800321340561 + }, + "combine": { + "p50": 70.592001080513, + "p90": 80.64000308513641, + "p95": 89.08800035715103, + "p99": 97.37599641084671 + }, + "roundtrip": { + "p50": 125.50400197505951, + "p90": 150.65599977970123, + "p95": 157.47199952602386, + "p99": 166.55999422073364 + }, + "isolatedSum": { + "p50": 145.75999975204468, + "p90": 176.2240007519722, + "p95": 190.20800292491913, + "p99": 209.34399962425232 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.64000242948532, + "p90": 96.67199850082397, + "p95": 104.5759990811348, + "p99": 124.03199821710587 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 79.23199981451035, + "p95": 85.56800335645676, + "p99": 93.50399672985077 + }, + "roundtrip": { + "p50": 129.05600666999817, + "p90": 151.64799988269806, + "p95": 157.82399475574493, + "p99": 173.72800409793854 + }, + "isolatedSum": { + "p50": 148.09600263834, + "p90": 175.90399831533432, + "p95": 190.14400243759155, + "p99": 217.53599494695663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.07199794054031, + "p90": 92.96000003814697, + "p95": 98.7199991941452, + "p99": 107.84000158309937 + }, + "combine": { + "p50": 71.84000313282013, + "p90": 80.79999685287476, + "p95": 87.23200112581253, + "p99": 93.12000125646591 + }, + "roundtrip": { + "p50": 128.1599998474121, + "p90": 144.48000490665436, + "p95": 151.16800367832184, + "p99": 159.93599593639374 + }, + "isolatedSum": { + "p50": 146.91200107336044, + "p90": 173.75999689102173, + "p95": 185.95200031995773, + "p99": 200.96000283956528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 85.7279971241951, + "p90": 100.38399696350098, + "p95": 105.95200210809708, + "p99": 119.87199634313583 + }, + "combine": { + "p50": 79.48800176382065, + "p90": 90.20800143480301, + "p95": 94.36800330877304, + "p99": 100.47999769449234 + }, + "roundtrip": { + "p50": 138.94400000572205, + "p90": 160.92799603939056, + "p95": 171.77599668502808, + "p99": 187.1359944343567 + }, + "isolatedSum": { + "p50": 165.21599888801575, + "p90": 190.59199839830399, + "p95": 200.32000541687012, + "p99": 220.35199403762817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.59199756383896, + "p90": 106.04800283908844, + "p95": 112.89600282907486, + "p99": 119.10399794578552 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 95.93600034713745, + "p95": 100.67199915647507, + "p99": 108.09600353240967 + }, + "roundtrip": { + "p50": 159.4880074262619, + "p90": 173.3119934797287, + "p95": 178.0800074338913, + "p99": 190.8160001039505 + }, + "isolatedSum": { + "p50": 182.20799416303635, + "p90": 201.9840031862259, + "p95": 213.56800198554993, + "p99": 227.2000014781952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.52000111341476, + "p90": 131.71200454235077, + "p95": 139.5840048789978, + "p99": 146.11199498176575 + }, + "combine": { + "p50": 105.72800040245056, + "p90": 116.89600348472595, + "p95": 122.5920021533966, + "p99": 126.75200402736664 + }, + "roundtrip": { + "p50": 199.0399956703186, + "p90": 209.08799767494202, + "p95": 215.00800549983978, + "p99": 222.56000339984894 + }, + "isolatedSum": { + "p50": 225.24800151586533, + "p90": 248.60800802707672, + "p95": 262.1760070323944, + "p99": 272.8639990091324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aed13c11", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_37ab84c8", + "comparisonKey": "7c29427de9a8773f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:01.495662+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.68000191450119, + "p90": 90.55999666452408, + "p95": 97.37599641084671, + "p99": 108.22399705648422 + }, + "combine": { + "p50": 64.96000289916992, + "p90": 78.40000092983246, + "p95": 84.86399799585342, + "p99": 95.74399888515472 + }, + "roundtrip": { + "p50": 121.18399888277054, + "p90": 144.06399428844452, + "p95": 152.79999375343323, + "p99": 166.6879951953888 + }, + "isolatedSum": { + "p50": 136.6400048136711, + "p90": 168.95999759435654, + "p95": 182.23999440670013, + "p99": 203.96799594163895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.60800153017044, + "p90": 94.68799829483032, + "p95": 107.51999914646149, + "p99": 169.18399930000305 + }, + "combine": { + "p50": 64.83200192451477, + "p90": 73.98399710655212, + "p95": 79.39200103282928, + "p99": 165.72800278663635 + }, + "roundtrip": { + "p50": 119.90399658679962, + "p90": 138.75199854373932, + "p95": 146.7519998550415, + "p99": 158.4320068359375 + }, + "isolatedSum": { + "p50": 137.4400034546852, + "p90": 168.67199540138245, + "p95": 186.91200017929077, + "p99": 334.9120020866394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.33599978685379, + "p90": 81.60000294446945, + "p95": 89.40800279378891, + "p99": 99.16800260543823 + }, + "combine": { + "p50": 67.6800012588501, + "p90": 72.03199714422226, + "p95": 76.60800218582153, + "p99": 83.67999643087387 + }, + "roundtrip": { + "p50": 121.98399752378464, + "p90": 133.05599987506866, + "p95": 140.9280002117157, + "p99": 156.73600137233734 + }, + "isolatedSum": { + "p50": 142.0160010457039, + "p90": 153.6320000886917, + "p95": 166.01600497961044, + "p99": 182.8479990363121 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.87999647855759, + "p90": 90.11200070381165, + "p95": 96.76799923181534, + "p99": 116.2559986114502 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 74.97599720954895, + "p95": 82.71999657154083, + "p99": 89.85599875450134 + }, + "roundtrip": { + "p50": 123.4240010380745, + "p90": 138.5280042886734, + "p95": 145.11999487876892, + "p99": 157.79200196266174 + }, + "isolatedSum": { + "p50": 144.79999989271164, + "p90": 165.0879979133606, + "p95": 179.48799580335617, + "p99": 206.11199736595154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.3600001335144, + "p90": 90.81599861383438, + "p95": 100.28800368309021, + "p99": 109.63200032711029 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 81.98399841785431, + "p95": 85.63199639320374, + "p99": 92.03200042247772 + }, + "roundtrip": { + "p50": 125.15200674533844, + "p90": 145.60000598430634, + "p95": 151.32799744606018, + "p99": 159.7760021686554 + }, + "isolatedSum": { + "p50": 146.7519998550415, + "p90": 172.7999970316887, + "p95": 185.92000007629395, + "p99": 201.664000749588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.97599852085114, + "p90": 90.7839983701706, + "p95": 97.63199836015701, + "p99": 107.10400342941284 + }, + "combine": { + "p50": 78.23999971151352, + "p90": 83.29600095748901, + "p95": 91.42400324344635, + "p99": 99.64799880981445 + }, + "roundtrip": { + "p50": 136.83199882507324, + "p90": 148.0959951877594, + "p95": 158.33599865436554, + "p99": 166.30400717258453 + }, + "isolatedSum": { + "p50": 161.21599823236465, + "p90": 174.0799993276596, + "p95": 189.05600160360336, + "p99": 206.7520022392273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.81599992513657, + "p90": 105.40799796581268, + "p95": 109.95200276374817, + "p99": 116.44800007343292 + }, + "combine": { + "p50": 87.71199733018875, + "p90": 92.6399976015091, + "p95": 97.24800288677216, + "p99": 109.95200276374817 + }, + "roundtrip": { + "p50": 163.71199488639832, + "p90": 171.23199999332428, + "p95": 175.6799966096878, + "p99": 185.2799952030182 + }, + "isolatedSum": { + "p50": 186.52799725532532, + "p90": 198.04799556732178, + "p95": 207.20000565052032, + "p99": 226.4000028371811 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.19999933242798, + "p90": 130.14400005340576, + "p95": 134.24000144004822, + "p99": 138.08000087738037 + }, + "combine": { + "p50": 112.76800185441971, + "p90": 119.39200013875961, + "p95": 123.87199699878693, + "p99": 132.7359974384308 + }, + "roundtrip": { + "p50": 214.7199958562851, + "p90": 221.76000475883484, + "p95": 225.75999796390533, + "p99": 269.6639895439148 + }, + "isolatedSum": { + "p50": 235.9680011868477, + "p90": 249.53600019216537, + "p95": 258.11199843883514, + "p99": 270.81599831581116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-12016ffa", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_2b0beea2", + "comparisonKey": "0b3a7f9586e75ff4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:59.789492+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.26399743556976, + "p90": 76.60800218582153, + "p95": 84.95999872684479, + "p99": 98.1760025024414 + }, + "combine": { + "p50": 61.15199998021126, + "p90": 65.11999666690826, + "p95": 72.25599884986877, + "p99": 82.2720006108284 + }, + "roundtrip": { + "p50": 114.75200206041336, + "p90": 127.58399546146393, + "p95": 132.9279989004135, + "p99": 197.4399983882904 + }, + "isolatedSum": { + "p50": 124.41599741578102, + "p90": 141.7279988527298, + "p95": 157.21599757671356, + "p99": 180.4480031132698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 65.50399959087372, + "p90": 74.91199672222137, + "p95": 79.45600152015686, + "p99": 92.03200042247772 + }, + "combine": { + "p50": 61.055999249219894, + "p90": 64.7360011935234, + "p95": 68.80000233650208, + "p99": 76.35200023651123 + }, + "roundtrip": { + "p50": 114.3679991364479, + "p90": 123.19999933242798, + "p95": 126.68800354003906, + "p99": 151.32799744606018 + }, + "isolatedSum": { + "p50": 126.55999884009361, + "p90": 139.64799791574478, + "p95": 148.25600385665894, + "p99": 168.38400065898895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.85600292682648, + "p90": 80.1599994301796, + "p95": 87.71199733018875, + "p99": 105.6319996714592 + }, + "combine": { + "p50": 62.17600032687187, + "p90": 67.29599833488464, + "p95": 72.76800274848938, + "p99": 82.87999778985977 + }, + "roundtrip": { + "p50": 116.19199812412262, + "p90": 127.13600695133209, + "p95": 135.68000495433807, + "p99": 151.8400013446808 + }, + "isolatedSum": { + "p50": 132.03200325369835, + "p90": 147.45599776506424, + "p95": 160.48000007867813, + "p99": 188.51199746131897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.81600278615952, + "p90": 80.19199967384338, + "p95": 83.5840031504631, + "p99": 132.64000415802002 + }, + "combine": { + "p50": 62.68800050020218, + "p90": 67.07199662923813, + "p95": 70.27199864387512, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 117.60000139474869, + "p90": 127.07200646400452, + "p95": 133.08799266815186, + "p99": 153.47200632095337 + }, + "isolatedSum": { + "p50": 133.5040032863617, + "p90": 147.2639963030815, + "p95": 153.85600179433823, + "p99": 209.79200303554535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.42400062084198, + "p90": 83.42400193214417, + "p95": 90.27200192213058, + "p99": 104.99200224876404 + }, + "combine": { + "p50": 67.9360032081604, + "p90": 71.58400118350983, + "p95": 76.99199765920639, + "p99": 84.16000008583069 + }, + "roundtrip": { + "p50": 119.35999989509583, + "p90": 132.60799646377563, + "p95": 140.54399728775024, + "p99": 180.67200481891632 + }, + "isolatedSum": { + "p50": 143.36000382900238, + "p90": 155.008003115654, + "p95": 167.26399958133698, + "p99": 189.15200233459473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.44000107049942, + "p90": 86.36800199747086, + "p95": 91.51999652385712, + "p99": 102.4319976568222 + }, + "combine": { + "p50": 71.9359964132309, + "p90": 76.86399668455124, + "p95": 79.74400371313095, + "p99": 94.7519987821579 + }, + "roundtrip": { + "p50": 130.62399625778198, + "p90": 139.64800536632538, + "p95": 145.75999975204468, + "p99": 162.04799711704254 + }, + "isolatedSum": { + "p50": 149.37599748373032, + "p90": 163.2319986820221, + "p95": 171.26400023698807, + "p99": 197.1839964389801 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.71999853849411, + "p90": 104.35199737548828, + "p95": 108.44799876213074, + "p99": 116.95999652147293 + }, + "combine": { + "p50": 80.79999685287476, + "p90": 87.99999952316284, + "p95": 92.83199906349182, + "p99": 117.34399944543839 + }, + "roundtrip": { + "p50": 155.45600652694702, + "p90": 162.7199947834015, + "p95": 169.66399550437927, + "p99": 184.67199802398682 + }, + "isolatedSum": { + "p50": 175.51999539136887, + "p90": 192.35199689865112, + "p95": 201.27999782562256, + "p99": 234.30399596691132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.9119964838028, + "p90": 126.88000500202179, + "p95": 134.43200290203094, + "p99": 137.82399892807007 + }, + "combine": { + "p50": 109.6000000834465, + "p90": 116.95999652147293, + "p95": 122.94399738311768, + "p99": 132.47999548912048 + }, + "roundtrip": { + "p50": 207.58399367332458, + "p90": 216.8000042438507, + "p95": 222.97599911689758, + "p99": 315.0720000267029 + }, + "isolatedSum": { + "p50": 228.5119965672493, + "p90": 243.84000152349472, + "p95": 257.3760002851486, + "p99": 270.30399441719055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2ac8c75a", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_07d2a5ec", + "comparisonKey": "43271fa4a8104894", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:46.264578+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.23999840021133, + "p90": 93.66399794816971, + "p95": 102.1760031580925, + "p99": 116.99199676513672 + }, + "combine": { + "p50": 68.38399916887283, + "p90": 73.56800138950348, + "p95": 80.76799660921097, + "p99": 84.1279998421669 + }, + "roundtrip": { + "p50": 121.34400010108948, + "p90": 145.4399973154068, + "p95": 152.38399803638458, + "p99": 166.87999665737152 + }, + "isolatedSum": { + "p50": 138.62399756908417, + "p90": 167.2319993376732, + "p95": 182.94399976730347, + "p99": 201.11999660730362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.56800138950348, + "p90": 93.44000369310379, + "p95": 101.02400183677673, + "p99": 111.16799712181091 + }, + "combine": { + "p50": 68.54400038719177, + "p90": 74.40000027418137, + "p95": 82.62400329113007, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 122.46400117874146, + "p90": 145.53600549697876, + "p95": 153.85599434375763, + "p99": 163.7759953737259 + }, + "isolatedSum": { + "p50": 142.11200177669525, + "p90": 167.84000396728516, + "p95": 183.6480051279068, + "p99": 201.08799636363983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 74.23999905586243, + "p90": 88.70399743318558, + "p95": 100.16000270843506, + "p99": 110.07999628782272 + }, + "combine": { + "p50": 69.31199878454208, + "p90": 74.01599735021591, + "p95": 82.84799754619598, + "p99": 91.13600105047226 + }, + "roundtrip": { + "p50": 123.71200323104858, + "p90": 145.31199634075165, + "p95": 155.2640050649643, + "p99": 172.54400253295898 + }, + "isolatedSum": { + "p50": 143.5519978404045, + "p90": 162.7199947834015, + "p95": 183.00800025463104, + "p99": 201.21599733829498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.26399940252304, + "p90": 95.61599791049957, + "p95": 106.72000050544739, + "p99": 128.89599800109863 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 78.97599786520004, + "p95": 84.06399935483932, + "p99": 91.67999774217606 + }, + "roundtrip": { + "p50": 124.64000284671783, + "p90": 147.93600142002106, + "p95": 156.0640037059784, + "p99": 165.3120070695877 + }, + "isolatedSum": { + "p50": 145.6959992647171, + "p90": 174.59199577569962, + "p95": 190.7839998602867, + "p99": 220.5759957432747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.01599735021591, + "p90": 98.9760011434555, + "p95": 107.93600231409073, + "p99": 119.10399794578552 + }, + "combine": { + "p50": 72.83200323581696, + "p90": 86.7839977145195, + "p95": 92.70399808883667, + "p99": 98.1760025024414 + }, + "roundtrip": { + "p50": 129.98400628566742, + "p90": 156.19200468063354, + "p95": 164.86400365829468, + "p99": 178.3359944820404 + }, + "isolatedSum": { + "p50": 146.84800058603287, + "p90": 185.759998857975, + "p95": 200.6400004029274, + "p99": 217.28000044822693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.85599744319916, + "p90": 100.16000270843506, + "p95": 108.89600217342377, + "p99": 116.41599982976913 + }, + "combine": { + "p50": 77.56800204515457, + "p90": 85.63199639320374, + "p95": 93.47199648618698, + "p99": 102.01600193977356 + }, + "roundtrip": { + "p50": 136.03200018405914, + "p90": 156.3200056552887, + "p95": 163.42400014400482, + "p99": 174.5920032262802 + }, + "isolatedSum": { + "p50": 159.42399948835373, + "p90": 185.7919991016388, + "p95": 202.36799865961075, + "p99": 218.4320017695427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.94400024414062, + "p90": 107.13600367307663, + "p95": 119.9679970741272, + "p99": 175.52000284194946 + }, + "combine": { + "p50": 87.55200356245041, + "p90": 96.47999703884125, + "p95": 105.82400113344193, + "p99": 114.81600254774094 + }, + "roundtrip": { + "p50": 158.59200060367584, + "p90": 175.29599368572235, + "p95": 186.52799725532532, + "p99": 197.66399264335632 + }, + "isolatedSum": { + "p50": 182.49600380659103, + "p90": 203.61600071191788, + "p95": 225.79199820756912, + "p99": 290.3360053896904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.0559972524643, + "p90": 125.59999525547028, + "p95": 135.04000008106232, + "p99": 146.5280055999756 + }, + "combine": { + "p50": 105.31199723482132, + "p90": 114.46399986743927, + "p95": 123.77600371837616, + "p99": 130.87999820709229 + }, + "roundtrip": { + "p50": 198.71999323368073, + "p90": 212.38400042057037, + "p95": 221.21599316596985, + "p99": 236.4799976348877 + }, + "isolatedSum": { + "p50": 222.3679944872856, + "p90": 240.06399512290955, + "p95": 258.8160037994385, + "p99": 277.40800380706787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7a20a925", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_5b852b95", + "comparisonKey": "59bc10da7303946d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:50.255974+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 74.33599978685379, + "p90": 101.95200145244598, + "p95": 109.53599959611893, + "p99": 126.62400305271149 + }, + "combine": { + "p50": 69.92000341415405, + "p90": 82.62400329113007, + "p95": 89.6959975361824, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 125.95200538635254, + "p90": 152.0639955997467, + "p95": 161.9199961423874, + "p99": 174.5920032262802 + }, + "isolatedSum": { + "p50": 144.25600320100784, + "p90": 184.57600474357605, + "p95": 199.23199713230133, + "p99": 228.80000621080399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.27999919652939, + "p90": 99.13600236177444, + "p95": 104.22399640083313, + "p99": 114.23999816179276 + }, + "combine": { + "p50": 69.34399902820587, + "p90": 82.30400085449219, + "p95": 85.53600311279297, + "p99": 89.82399851083755 + }, + "roundtrip": { + "p50": 121.69600278139114, + "p90": 149.59999918937683, + "p95": 153.76000106334686, + "p99": 163.87200355529785 + }, + "isolatedSum": { + "p50": 142.62399822473526, + "p90": 181.44000321626663, + "p95": 189.7599995136261, + "p99": 204.0639966726303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.18399846553802, + "p90": 96.73599898815155, + "p95": 104.22399640083313, + "p99": 115.13599753379822 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 83.61600339412689, + "p95": 89.79199826717377, + "p99": 99.2640033364296 + }, + "roundtrip": { + "p50": 123.6800029873848, + "p90": 151.45599842071533, + "p95": 157.05600380897522, + "p99": 170.04799842834473 + }, + "isolatedSum": { + "p50": 143.19999516010284, + "p90": 180.35200238227844, + "p95": 194.0159946680069, + "p99": 214.4000008702278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.97599720954895, + "p90": 101.27999633550644, + "p95": 108.19199681282043, + "p99": 122.3360002040863 + }, + "combine": { + "p50": 71.03999704122543, + "p90": 83.0719992518425, + "p95": 87.8399983048439, + "p99": 92.99200028181076 + }, + "roundtrip": { + "p50": 127.3919939994812, + "p90": 163.10399770736694, + "p95": 173.92000555992126, + "p99": 283.6480140686035 + }, + "isolatedSum": { + "p50": 146.01599425077438, + "p90": 184.35199558734894, + "p95": 196.03199511766434, + "p99": 215.32800048589706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.64800298213959, + "p90": 107.744000852108, + "p95": 112.2559979557991, + "p99": 124.89599734544754 + }, + "combine": { + "p50": 73.02399724721909, + "p90": 84.83199775218964, + "p95": 90.62399715185165, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 130.52800297737122, + "p90": 153.47200632095337, + "p95": 161.82400286197662, + "p99": 181.15200102329254 + }, + "isolatedSum": { + "p50": 152.67200022935867, + "p90": 192.57599860429764, + "p95": 202.87999510765076, + "p99": 223.35999459028244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 84.6719965338707, + "p90": 105.02400249242783, + "p95": 109.82400178909302, + "p99": 121.85599654912949 + }, + "combine": { + "p50": 80.57600259780884, + "p90": 96.03200107812881, + "p95": 99.5199978351593, + "p99": 105.98400235176086 + }, + "roundtrip": { + "p50": 141.37600362300873, + "p90": 164.92800414562225, + "p95": 170.81600427627563, + "p99": 183.26400220394135 + }, + "isolatedSum": { + "p50": 165.24799913167953, + "p90": 201.05600357055664, + "p95": 209.34399962425232, + "p99": 227.83999890089035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 98.75199943780899, + "p90": 117.47200042009354, + "p95": 122.52800166606903, + "p99": 128.22400033473969 + }, + "combine": { + "p50": 90.14400094747543, + "p90": 105.34399747848511, + "p95": 108.44799876213074, + "p99": 119.29599940776825 + }, + "roundtrip": { + "p50": 165.02399742603302, + "p90": 184.09599363803864, + "p95": 189.15200233459473, + "p99": 198.4959989786148 + }, + "isolatedSum": { + "p50": 188.89600038528442, + "p90": 222.81599789857864, + "p95": 230.97600042819977, + "p99": 247.51999974250793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 123.64800274372101, + "p90": 135.13599336147308, + "p95": 140.22399485111237, + "p99": 153.79199385643005 + }, + "combine": { + "p50": 117.21599847078323, + "p90": 130.72000443935394, + "p95": 135.5839967727661, + "p99": 142.2400027513504 + }, + "roundtrip": { + "p50": 221.53599560260773, + "p90": 242.65600740909576, + "p95": 247.80799448490143, + "p99": 256.3199996948242 + }, + "isolatedSum": { + "p50": 240.86400121450424, + "p90": 265.855997800827, + "p95": 275.8079916238785, + "p99": 296.03199660778046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63c5c5bf", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_d63aaf09", + "comparisonKey": "b67a40b407de1a8d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:15.680881+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.71200215816498, + "p90": 86.5280032157898, + "p95": 96.67199850082397, + "p99": 119.1679984331131 + }, + "combine": { + "p50": 68.41599941253662, + "p90": 77.56800204515457, + "p95": 82.5280025601387, + "p99": 89.9839997291565 + }, + "roundtrip": { + "p50": 120.54400146007538, + "p90": 136.54400408267975, + "p95": 147.0080018043518, + "p99": 159.07199680805206 + }, + "isolatedSum": { + "p50": 140.1280015707016, + "p90": 164.09600526094437, + "p95": 179.20000106096268, + "p99": 209.1519981622696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.34399968385696, + "p90": 93.08800101280212, + "p95": 99.45599734783173, + "p99": 108.22399705648422 + }, + "combine": { + "p50": 68.92800331115723, + "p90": 78.07999849319458, + "p95": 84.83199775218964, + "p99": 92.92799979448318 + }, + "roundtrip": { + "p50": 124.54400211572647, + "p90": 148.73600006103516, + "p95": 155.58399260044098, + "p99": 168.5120016336441 + }, + "isolatedSum": { + "p50": 142.2720029950142, + "p90": 171.1679995059967, + "p95": 184.28799510002136, + "p99": 201.1519968509674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.31199944019318, + "p90": 80.25600016117096, + "p95": 87.20000088214874, + "p99": 105.02400249242783 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 73.2479989528656, + "p95": 78.3040001988411, + "p99": 90.55999666452408 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 131.74399733543396, + "p95": 141.184002161026, + "p99": 160.288006067276 + }, + "isolatedSum": { + "p50": 142.46399700641632, + "p90": 153.50399911403656, + "p95": 165.50400108098984, + "p99": 195.5839991569519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.07199794054031, + "p90": 93.08800101280212, + "p95": 106.46399855613708, + "p99": 117.79200285673141 + }, + "combine": { + "p50": 70.91200351715088, + "p90": 81.02399855852127, + "p95": 88.128000497818, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 123.36000055074692, + "p90": 140.44800400733948, + "p95": 148.3840048313141, + "p99": 163.32800686359406 + }, + "isolatedSum": { + "p50": 145.9840014576912, + "p90": 174.1119995713234, + "p95": 194.59199905395508, + "p99": 212.22399920225143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.07999783754349, + "p90": 83.45600217580795, + "p95": 89.66399729251862, + "p99": 102.75200009346008 + }, + "combine": { + "p50": 72.12799787521362, + "p90": 78.015998005867, + "p95": 84.57600325345993, + "p99": 94.62399780750275 + }, + "roundtrip": { + "p50": 128.35200130939484, + "p90": 140.09599387645721, + "p95": 144.6399986743927, + "p99": 163.39200735092163 + }, + "isolatedSum": { + "p50": 146.2079957127571, + "p90": 161.47200018167496, + "p95": 174.24000054597855, + "p99": 197.37599790096283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.8159973025322, + "p90": 91.64799749851227, + "p95": 101.50399804115295, + "p99": 113.72800171375275 + }, + "combine": { + "p50": 78.015998005867, + "p90": 82.43200182914734, + "p95": 87.64799684286118, + "p99": 98.01600128412247 + }, + "roundtrip": { + "p50": 136.76799833774567, + "p90": 147.8399932384491, + "p95": 154.30399775505066, + "p99": 169.0240055322647 + }, + "isolatedSum": { + "p50": 160.8319953083992, + "p90": 174.0799993276596, + "p95": 189.15199488401413, + "p99": 211.7440029978752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.70399874448776, + "p90": 104.22399640083313, + "p95": 110.91200262308121, + "p99": 120.7680031657219 + }, + "combine": { + "p50": 86.87999844551086, + "p90": 92.83199906349182, + "p95": 96.38399630784988, + "p99": 108.03200304508209 + }, + "roundtrip": { + "p50": 158.4320068359375, + "p90": 165.72800278663635, + "p95": 171.32799327373505, + "p99": 183.9040070772171 + }, + "isolatedSum": { + "p50": 183.58399718999863, + "p90": 197.05599546432495, + "p95": 207.2959989309311, + "p99": 228.80000621080399 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.66400188207626, + "p90": 125.02400577068329, + "p95": 128.86400520801544, + "p99": 145.05599439144135 + }, + "combine": { + "p50": 104.73600029945374, + "p90": 110.27199774980545, + "p95": 115.87200313806534, + "p99": 126.36800110340118 + }, + "roundtrip": { + "p50": 197.1520036458969, + "p90": 206.52799308300018, + "p95": 216.48000180721283, + "p99": 233.91999304294586 + }, + "isolatedSum": { + "p50": 222.40000218153, + "p90": 235.29600352048874, + "p95": 244.73600834608078, + "p99": 271.42399549484253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d92c18f5", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_c09dca7a", + "comparisonKey": "39d3f6a5c3e1dcde", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:47.042676+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.8480030298233, + "p90": 93.08800101280212, + "p95": 103.61599922180176, + "p99": 115.32799899578094 + }, + "combine": { + "p50": 67.19999760389328, + "p90": 74.5920017361641, + "p95": 82.78399705886841, + "p99": 91.45600348711014 + }, + "roundtrip": { + "p50": 120.51200121641159, + "p90": 145.11999487876892, + "p95": 150.9760022163391, + "p99": 177.59999632835388 + }, + "isolatedSum": { + "p50": 138.04800063371658, + "p90": 167.68000274896622, + "p95": 186.39999628067017, + "p99": 206.78400248289108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.11199742555618, + "p90": 90.87999910116196, + "p95": 97.98400104045868, + "p99": 118.01599711179733 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 75.83999633789062, + "p95": 82.62400329113007, + "p99": 91.58399701118469 + }, + "roundtrip": { + "p50": 121.72800302505493, + "p90": 144.73600685596466, + "p95": 153.53600680828094, + "p99": 162.27200627326965 + }, + "isolatedSum": { + "p50": 138.2399946451187, + "p90": 166.71999543905258, + "p95": 180.60800433158875, + "p99": 209.59999412298203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.82400333881378, + "p90": 97.4079966545105, + "p95": 105.59999942779541, + "p99": 117.0559972524643 + }, + "combine": { + "p50": 68.80000233650208, + "p90": 78.68800312280655, + "p95": 83.3280012011528, + "p99": 90.14400094747543 + }, + "roundtrip": { + "p50": 123.29600006341934, + "p90": 149.59999918937683, + "p95": 156.0640037059784, + "p99": 167.29600727558136 + }, + "isolatedSum": { + "p50": 142.62400567531586, + "p90": 176.09599977731705, + "p95": 188.9280006289482, + "p99": 207.19999819993973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.91999661922455, + "p90": 91.36000275611877, + "p95": 99.32799637317657, + "p99": 113.76000195741653 + }, + "combine": { + "p50": 69.05599683523178, + "p90": 78.04799824953079, + "p95": 84.28800106048584, + "p99": 91.13600105047226 + }, + "roundtrip": { + "p50": 123.19999933242798, + "p90": 149.9519944190979, + "p95": 156.51200711727142, + "p99": 188.6720061302185 + }, + "isolatedSum": { + "p50": 142.97599345445633, + "p90": 169.40800100564957, + "p95": 183.61599743366241, + "p99": 204.8960030078888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.15999811887741, + "p90": 91.87199920415878, + "p95": 100.12800246477127, + "p99": 112.35199868679047 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 80.22399991750717, + "p95": 89.02399986982346, + "p99": 93.75999867916107 + }, + "roundtrip": { + "p50": 124.64000284671783, + "p90": 145.9520012140274, + "p95": 153.6960005760193, + "p99": 161.82400286197662 + }, + "isolatedSum": { + "p50": 142.4959972500801, + "p90": 172.09599912166595, + "p95": 189.15200233459473, + "p99": 206.11199736595154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 82.43200182914734, + "p90": 100.54399818181992, + "p95": 106.11200332641602, + "p99": 118.14399808645248 + }, + "combine": { + "p50": 77.2479996085167, + "p90": 86.36800199747086, + "p95": 95.42399644851685, + "p99": 105.0880029797554 + }, + "roundtrip": { + "p50": 137.2160017490387, + "p90": 158.87999534606934, + "p95": 168.12799870967865, + "p99": 181.2479943037033 + }, + "isolatedSum": { + "p50": 159.68000143766403, + "p90": 186.91200017929077, + "p95": 201.53599977493286, + "p99": 223.23200106620789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 102.01600193977356, + "p90": 117.85600334405899, + "p95": 123.19999933242798, + "p99": 142.43200421333313 + }, + "combine": { + "p50": 87.13600039482117, + "p90": 99.71199929714203, + "p95": 105.69600015878677, + "p99": 115.4559999704361 + }, + "roundtrip": { + "p50": 163.16799819469452, + "p90": 180.28800189495087, + "p95": 188.1600022315979, + "p99": 199.35999810695648 + }, + "isolatedSum": { + "p50": 189.15200233459473, + "p90": 217.56800264120102, + "p95": 228.89599949121475, + "p99": 257.8880041837692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.54400211572647, + "p90": 135.3600025177002, + "p95": 139.55199718475342, + "p99": 152.0320028066635 + }, + "combine": { + "p50": 112.28799819946289, + "p90": 123.77600371837616, + "p95": 131.32800161838531, + "p99": 140.6719982624054 + }, + "roundtrip": { + "p50": 213.6320024728775, + "p90": 231.1680018901825, + "p95": 236.32000386714935, + "p99": 245.27999758720398 + }, + "isolatedSum": { + "p50": 236.83200031518936, + "p90": 259.13600623607635, + "p95": 270.87999880313873, + "p99": 292.7040010690689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1b26be53", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_7b142e34", + "comparisonKey": "896c6c91db0fa9fa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:14.159746+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.9919970035553, + "p90": 94.59199756383896, + "p95": 105.6319996714592, + "p99": 131.6159963607788 + }, + "combine": { + "p50": 68.12799721956253, + "p90": 77.05599814653397, + "p95": 83.13599973917007, + "p99": 90.20800143480301 + }, + "roundtrip": { + "p50": 123.32800030708313, + "p90": 147.2640037536621, + "p95": 155.4879993200302, + "p99": 170.9119975566864 + }, + "isolatedSum": { + "p50": 141.11999422311783, + "p90": 171.64799571037292, + "p95": 188.76799941062927, + "p99": 221.82399779558182 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.69600236415863, + "p90": 93.6959981918335, + "p95": 101.95200145244598, + "p99": 117.79200285673141 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 74.75200295448303, + "p95": 84.06399935483932, + "p99": 90.68799763917923 + }, + "roundtrip": { + "p50": 123.45600128173828, + "p90": 145.88800072669983, + "p95": 153.05599570274353, + "p99": 162.01600432395935 + }, + "isolatedSum": { + "p50": 142.65600591897964, + "p90": 168.44800114631653, + "p95": 186.0160008072853, + "p99": 208.48000049591064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.98399710655212, + "p90": 94.71999853849411, + "p95": 104.60799932479858, + "p99": 150.11200308799744 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 73.88799637556076, + "p95": 82.78399705886841, + "p99": 90.01599997282028 + }, + "roundtrip": { + "p50": 124.35200065374374, + "p90": 140.70400595664978, + "p95": 150.94399452209473, + "p99": 158.1439971923828 + }, + "isolatedSum": { + "p50": 143.0719941854477, + "p90": 168.60799491405487, + "p95": 187.391996383667, + "p99": 240.12800306081772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 75.77600330114365, + "p90": 96.70399874448776, + "p95": 102.7199998497963, + "p99": 117.21599847078323 + }, + "combine": { + "p50": 70.43199986219406, + "p90": 79.58400249481201, + "p95": 88.67199718952179, + "p99": 93.18400174379349 + }, + "roundtrip": { + "p50": 126.0479986667633, + "p90": 149.82399344444275, + "p95": 157.27999806404114, + "p99": 168.5439944267273 + }, + "isolatedSum": { + "p50": 146.2080031633377, + "p90": 176.28800123929977, + "p95": 191.39199703931808, + "p99": 210.40000021457672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.16799867153168, + "p90": 91.13600105047226, + "p95": 100.00000149011612, + "p99": 115.29599875211716 + }, + "combine": { + "p50": 71.87200337648392, + "p90": 80.19199967384338, + "p95": 86.94399893283844, + "p99": 99.16800260543823 + }, + "roundtrip": { + "p50": 126.88000500202179, + "p90": 150.01599490642548, + "p95": 156.70399367809296, + "p99": 178.0800074338913 + }, + "isolatedSum": { + "p50": 147.0400020480156, + "p90": 171.32800072431564, + "p95": 186.94400042295456, + "p99": 214.4640013575554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.82399719953537, + "p90": 95.39200365543365, + "p95": 107.2319969534874, + "p99": 114.56000059843063 + }, + "combine": { + "p50": 77.34400033950806, + "p90": 84.927998483181, + "p95": 91.5519967675209, + "p99": 97.88800030946732 + }, + "roundtrip": { + "p50": 136.9599997997284, + "p90": 157.79200196266174, + "p95": 169.0559983253479, + "p99": 209.85600352287292 + }, + "isolatedSum": { + "p50": 159.16799753904343, + "p90": 180.32000213861465, + "p95": 198.7839937210083, + "p99": 212.44800090789795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.40800344944, + "p90": 107.744000852108, + "p95": 116.5120005607605, + "p99": 128.9920061826706 + }, + "combine": { + "p50": 88.16000074148178, + "p90": 96.12800180912018, + "p95": 103.2319962978363, + "p99": 110.91200262308121 + }, + "roundtrip": { + "p50": 158.84800255298615, + "p90": 168.89600455760956, + "p95": 180.12799322605133, + "p99": 188.09600174427032 + }, + "isolatedSum": { + "p50": 181.56800419092178, + "p90": 203.87200266122818, + "p95": 219.7439968585968, + "p99": 239.9040088057518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.08799749612808, + "p90": 126.8479973077774, + "p95": 138.84800672531128, + "p99": 146.464005112648 + }, + "combine": { + "p50": 105.8880016207695, + "p90": 114.88000303506851, + "p95": 124.25599992275238, + "p99": 132.9600065946579 + }, + "roundtrip": { + "p50": 200.00000298023224, + "p90": 213.05599808692932, + "p95": 220.12799978256226, + "p99": 233.2800030708313 + }, + "isolatedSum": { + "p50": 222.97599911689758, + "p90": 241.72800034284592, + "p95": 263.10400664806366, + "p99": 279.4240117073059 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-639dfcb1", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_3b36fa26", + "comparisonKey": "9e6bd622f4b9b929", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:51.869049+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.95199686288834, + "p90": 93.6959981918335, + "p95": 100.60799866914749, + "p99": 116.95999652147293 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 77.08799839019775, + "p95": 84.41600203514099, + "p99": 91.0400003194809 + }, + "roundtrip": { + "p50": 124.1919994354248, + "p90": 146.464005112648, + "p95": 154.04799580574036, + "p99": 165.53600132465363 + }, + "isolatedSum": { + "p50": 142.91200041770935, + "p90": 170.78399658203125, + "p95": 185.02400070428848, + "p99": 207.99999684095383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.29599964618683, + "p90": 95.74399888515472, + "p95": 101.24800354242325, + "p99": 110.49599945545197 + }, + "combine": { + "p50": 70.04799693822861, + "p90": 82.17599987983704, + "p95": 89.08800035715103, + "p99": 97.24800288677216 + }, + "roundtrip": { + "p50": 125.5359947681427, + "p90": 148.47999811172485, + "p95": 154.40000593662262, + "p99": 162.4000072479248 + }, + "isolatedSum": { + "p50": 145.34399658441544, + "p90": 177.91999876499176, + "p95": 190.33600389957428, + "p99": 207.74400234222412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 75.58400183916092, + "p90": 93.21600198745728, + "p95": 100.51199793815613, + "p99": 112.57600039243698 + }, + "combine": { + "p50": 69.98399645090103, + "p90": 76.4160007238388, + "p95": 84.03199911117554, + "p99": 89.91999924182892 + }, + "roundtrip": { + "p50": 124.67200309038162, + "p90": 148.47999811172485, + "p95": 154.78399395942688, + "p99": 163.42400014400482 + }, + "isolatedSum": { + "p50": 145.56799829006195, + "p90": 169.63200271129608, + "p95": 184.54399704933167, + "p99": 202.4959996342659 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.19199901819229, + "p90": 97.21600264310837, + "p95": 105.12000322341919, + "p99": 120.28799951076508 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 79.83999699354172, + "p95": 84.99199897050858, + "p99": 93.18400174379349 + }, + "roundtrip": { + "p50": 128.89599800109863, + "p90": 148.3519971370697, + "p95": 158.75199437141418, + "p99": 173.37599396705627 + }, + "isolatedSum": { + "p50": 147.5839987397194, + "p90": 177.05599963665009, + "p95": 190.11200219392776, + "p99": 213.47200125455856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 76.25599950551987, + "p90": 96.73599898815155, + "p95": 105.6319996714592, + "p99": 115.29599875211716 + }, + "combine": { + "p50": 73.08799773454666, + "p90": 82.46400207281113, + "p95": 91.36000275611877, + "p99": 98.78399968147278 + }, + "roundtrip": { + "p50": 130.17599284648895, + "p90": 153.53600680828094, + "p95": 160.16000509262085, + "p99": 177.7919977903366 + }, + "isolatedSum": { + "p50": 149.34399724006653, + "p90": 179.20000106096268, + "p95": 196.99200242757797, + "p99": 214.07999843358994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 86.65599673986435, + "p90": 112.19199746847153, + "p95": 119.39200013875961, + "p99": 136.31999492645264 + }, + "combine": { + "p50": 79.45600152015686, + "p90": 90.81599861383438, + "p95": 97.02400118112564, + "p99": 102.55999863147736 + }, + "roundtrip": { + "p50": 138.14400136470795, + "p90": 160.76800227165222, + "p95": 169.76000368595123, + "p99": 185.44000387191772 + }, + "isolatedSum": { + "p50": 166.1119982600212, + "p90": 203.0079960823059, + "p95": 216.41600131988525, + "p99": 238.87999355793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 96.63999825716019, + "p90": 109.69600081443787, + "p95": 118.20799857378006, + "p99": 131.6480040550232 + }, + "combine": { + "p50": 88.8959988951683, + "p90": 100.35199671983719, + "p95": 107.96800255775452, + "p99": 114.59200084209442 + }, + "roundtrip": { + "p50": 161.6320013999939, + "p90": 177.15199291706085, + "p95": 184.7359985113144, + "p99": 194.4960057735443 + }, + "isolatedSum": { + "p50": 185.5359971523285, + "p90": 210.04799753427505, + "p95": 226.17600113153458, + "p99": 246.24000489711761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 120.64000219106674, + "p90": 135.5839967727661, + "p95": 143.36000382900238, + "p99": 157.3439985513687 + }, + "combine": { + "p50": 110.07999628782272, + "p90": 118.43200027942657, + "p95": 122.75200337171555, + "p99": 129.7599971294403 + }, + "roundtrip": { + "p50": 200.8959949016571, + "p90": 215.42400121688843, + "p95": 222.84799814224243, + "p99": 233.7920069694519 + }, + "isolatedSum": { + "p50": 230.71999847888947, + "p90": 254.0159970521927, + "p95": 266.1120072007179, + "p99": 287.103995680809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bd3c8117", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_0d2bf145", + "comparisonKey": "395c9d9959f566bb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:35.854756+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.231997191905975, + "p90": 81.216000020504, + "p95": 87.74399757385254, + "p99": 97.85600006580353 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 81.18399977684021, + "p95": 90.04800021648407, + "p99": 98.04800152778625 + }, + "roundtrip": { + "p50": 112.86400258541107, + "p90": 133.63200426101685, + "p95": 144.25599575042725, + "p99": 163.83999586105347 + }, + "isolatedSum": { + "p50": 133.2479938864708, + "p90": 162.3999997973442, + "p95": 177.7919977903366, + "p99": 195.90400159358978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 63.19999694824219, + "p90": 84.44800227880478, + "p95": 91.20000153779984, + "p99": 102.65599936246872 + }, + "combine": { + "p50": 70.0799971818924, + "p90": 78.49600166082382, + "p95": 83.83999764919281, + "p99": 90.33600240945816 + }, + "roundtrip": { + "p50": 110.91200262308121, + "p90": 136.60800457000732, + "p95": 144.48000490665436, + "p99": 178.1120002269745 + }, + "isolatedSum": { + "p50": 133.27999413013458, + "p90": 162.9440039396286, + "p95": 175.03999918699265, + "p99": 192.99200177192688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 63.26399743556976, + "p90": 84.1279998421669, + "p95": 91.87199920415878, + "p99": 102.46399790048599 + }, + "combine": { + "p50": 70.46400010585785, + "p90": 81.11999928951263, + "p95": 85.9839990735054, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 112.86400258541107, + "p90": 133.7919980287552, + "p95": 141.59999787807465, + "p99": 151.5199989080429 + }, + "isolatedSum": { + "p50": 133.7279975414276, + "p90": 165.24799913167953, + "p95": 177.85599827766418, + "p99": 195.77600061893463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 64.2239972949028, + "p90": 82.94399827718735, + "p95": 90.30400216579437, + "p99": 97.9200005531311 + }, + "combine": { + "p50": 70.3359991312027, + "p90": 81.08799904584885, + "p95": 89.08800035715103, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 115.00799655914307, + "p90": 139.67999815940857, + "p95": 151.16800367832184, + "p99": 172.4800020456314 + }, + "isolatedSum": { + "p50": 134.5599964261055, + "p90": 164.0319973230362, + "p95": 179.3920025229454, + "p99": 195.23200392723083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 60.03199890255928, + "p90": 76.7040029168129, + "p95": 86.7839977145195, + "p99": 99.5199978351593 + }, + "combine": { + "p50": 71.99999690055847, + "p90": 81.08799904584885, + "p95": 90.71999788284302, + "p99": 98.39999675750732 + }, + "roundtrip": { + "p50": 115.55200070142746, + "p90": 138.5280042886734, + "p95": 145.47200500965118, + "p99": 159.04000401496887 + }, + "isolatedSum": { + "p50": 132.03199580311775, + "p90": 157.79200196266174, + "p95": 177.50399559736252, + "p99": 197.91999459266663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 70.91200351715088, + "p90": 87.36000210046768, + "p95": 93.88799965381622, + "p99": 105.47199845314026 + }, + "combine": { + "p50": 78.52800190448761, + "p90": 91.48799628019333, + "p95": 97.120001912117, + "p99": 102.24000364542007 + }, + "roundtrip": { + "p50": 125.82400441169739, + "p90": 143.61600577831268, + "p95": 150.04800260066986, + "p99": 157.21599757671356 + }, + "isolatedSum": { + "p50": 149.4400054216385, + "p90": 178.847998380661, + "p95": 191.00800156593323, + "p99": 207.71200209856033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.16800063848495, + "p90": 95.8079993724823, + "p95": 100.67199915647507, + "p99": 109.92000252008438 + }, + "combine": { + "p50": 87.77599781751633, + "p90": 98.62399846315384, + "p95": 106.27199709415436, + "p99": 115.80800265073776 + }, + "roundtrip": { + "p50": 148.25600385665894, + "p90": 162.30399906635284, + "p95": 166.52800142765045, + "p99": 173.40800166130066 + }, + "isolatedSum": { + "p50": 174.94399845600128, + "p90": 194.43199783563614, + "p95": 206.94399625062943, + "p99": 225.72800517082214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.79200088977814, + "p90": 122.43200093507767, + "p95": 127.74400413036346, + "p99": 133.37600231170654 + }, + "combine": { + "p50": 104.89600151777267, + "p90": 119.23199892044067, + "p95": 123.71200323104858, + "p99": 132.83200562000275 + }, + "roundtrip": { + "p50": 186.8479996919632, + "p90": 198.68800044059753, + "p95": 204.57600057125092, + "p99": 247.29600548744202 + }, + "isolatedSum": { + "p50": 210.6880024075508, + "p90": 241.66399985551834, + "p95": 251.45600736141205, + "p99": 266.2080079317093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-78b81d98", + "identity": "h200|deepep|v2|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_7aec116c", + "comparisonKey": "0391d8b55a70fe9c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:34.661772+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 36.768000572919846, + "p90": 38.88000175356865, + "p95": 44.319998472929, + "p99": 53.69599908590317 + }, + "combine": { + "p50": 33.31200033426285, + "p90": 35.0399985909462, + "p95": 37.696000188589096, + "p99": 47.040000557899475 + }, + "roundtrip": { + "p50": 50.52800104022026, + "p90": 55.84000051021576, + "p95": 60.256000608205795, + "p99": 72.41600006818771 + }, + "isolatedSum": { + "p50": 70.0800009071827, + "p90": 73.92000034451485, + "p95": 82.0159986615181, + "p99": 100.73599964380264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.18400001525879, + "p90": 39.872001856565475, + "p95": 43.776001781225204, + "p99": 54.655998945236206 + }, + "combine": { + "p50": 33.504001796245575, + "p90": 35.232000052928925, + "p95": 39.0079990029335, + "p99": 46.751998364925385 + }, + "roundtrip": { + "p50": 51.392000168561935, + "p90": 55.87200075387955, + "p95": 57.08799883723259, + "p99": 62.39999830722809 + }, + "isolatedSum": { + "p50": 70.68800181150436, + "p90": 75.1040019094944, + "p95": 82.7840007841587, + "p99": 101.40799731016159 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 38.047999143600464, + "p90": 40.92799872159958, + "p95": 46.1760014295578, + "p99": 58.17599967122078 + }, + "combine": { + "p50": 33.695999532938, + "p90": 36.06399893760681, + "p95": 39.45599868893623, + "p99": 47.200001776218414 + }, + "roundtrip": { + "p50": 52.191998809576035, + "p90": 56.86400085687637, + "p95": 62.65600025653839, + "p99": 76.25599950551987 + }, + "isolatedSum": { + "p50": 71.74399867653847, + "p90": 76.99199765920639, + "p95": 85.63200011849403, + "p99": 105.3760014474392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.47199848294258, + "p90": 39.84000161290169, + "p95": 46.62400111556053, + "p99": 56.76800012588501 + }, + "combine": { + "p50": 34.78400036692619, + "p90": 36.38400137424469, + "p95": 38.30400109291077, + "p99": 47.93599992990494 + }, + "roundtrip": { + "p50": 55.456001311540604, + "p90": 58.46399813890457, + "p95": 63.77600133419037, + "p99": 79.1039988398552 + }, + "isolatedSum": { + "p50": 72.25599884986877, + "p90": 76.22400298714638, + "p95": 84.9280022084713, + "p99": 104.70400005578995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 38.91199827194214, + "p90": 44.67200115323067, + "p95": 50.52800104022026, + "p99": 64.89600241184235 + }, + "combine": { + "p50": 35.840000957250595, + "p90": 41.600000113248825, + "p95": 45.85599899291992, + "p99": 53.31199988722801 + }, + "roundtrip": { + "p50": 60.28800085186958, + "p90": 65.08799642324448, + "p95": 68.9919963479042, + "p99": 81.66400343179703 + }, + "isolatedSum": { + "p50": 74.75199922919273, + "p90": 86.27200126647949, + "p95": 96.38400003314018, + "p99": 118.20800229907036 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.75999826192856, + "p90": 48.448000103235245, + "p95": 55.00800162553787, + "p99": 61.72800064086914 + }, + "combine": { + "p50": 43.90399903059006, + "p90": 46.14400118589401, + "p95": 51.231998950242996, + "p99": 55.52000179886818 + }, + "roundtrip": { + "p50": 72.41600006818771, + "p90": 75.6160020828247, + "p95": 79.71200346946716, + "p99": 89.15200084447861 + }, + "isolatedSum": { + "p50": 89.66399729251862, + "p90": 94.59200128912926, + "p95": 106.24000057578087, + "p99": 117.24800243973732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 57.56799876689911, + "p90": 81.31200075149536, + "p95": 84.89599823951721, + "p99": 96.38399630784988 + }, + "combine": { + "p50": 58.848001062870026, + "p90": 61.88800185918808, + "p95": 69.66400146484375, + "p99": 73.37599992752075 + }, + "roundtrip": { + "p50": 97.37599641084671, + "p90": 104.80000078678131, + "p95": 111.7440015077591, + "p99": 243.77599358558655 + }, + "isolatedSum": { + "p50": 116.41599982976913, + "p90": 143.20000261068344, + "p95": 154.55999970436096, + "p99": 169.75999623537064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 79.96799796819687, + "p90": 84.06399935483932, + "p95": 92.32000261545181, + "p99": 103.58399897813797 + }, + "combine": { + "p50": 87.20000088214874, + "p90": 93.63199770450592, + "p95": 97.88800030946732, + "p99": 105.43999820947647 + }, + "roundtrip": { + "p50": 148.60799908638, + "p90": 154.52800691127777, + "p95": 161.6320013999939, + "p99": 171.07200622558594 + }, + "isolatedSum": { + "p50": 167.1679988503456, + "p90": 177.69599705934525, + "p95": 190.20800292491913, + "p99": 209.02399718761444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-10f5ca0c", + "identity": "h200|deepep|v2|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_c94b913c", + "comparisonKey": "abdaf28f3ff3290b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:05.266355+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 36.86400130391121, + "p90": 45.1200008392334, + "p95": 49.984000623226166, + "p99": 56.992001831531525 + }, + "combine": { + "p50": 32.80000016093254, + "p90": 38.7520007789135, + "p95": 43.935999274253845, + "p99": 48.25599864125252 + }, + "roundtrip": { + "p50": 49.8879998922348, + "p90": 59.87200140953064, + "p95": 66.6240006685257, + "p99": 72.7040022611618 + }, + "isolatedSum": { + "p50": 69.66400146484375, + "p90": 83.8720016181469, + "p95": 93.91999989748001, + "p99": 105.24800047278404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 37.59999945759773, + "p90": 46.52800038456917, + "p95": 49.536000937223434, + "p99": 64.67200070619583 + }, + "combine": { + "p50": 32.54399821162224, + "p90": 40.28800129890442, + "p95": 44.064000248909, + "p99": 47.58400097489357 + }, + "roundtrip": { + "p50": 53.05600166320801, + "p90": 64.99200314283371, + "p95": 68.9919963479042, + "p99": 83.77599716186523 + }, + "isolatedSum": { + "p50": 70.14399766921997, + "p90": 86.81600168347359, + "p95": 93.60000118613243, + "p99": 112.2560016810894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 37.50399872660637, + "p90": 46.65600135922432, + "p95": 53.21599915623665, + "p99": 60.83200126886368 + }, + "combine": { + "p50": 33.02399814128876, + "p90": 40.063999593257904, + "p95": 44.73600164055824, + "p99": 48.22399839758873 + }, + "roundtrip": { + "p50": 52.89600044488907, + "p90": 66.75200164318085, + "p95": 73.95199686288834, + "p99": 206.1759978532791 + }, + "isolatedSum": { + "p50": 70.52799686789513, + "p90": 86.72000095248222, + "p95": 97.95200079679489, + "p99": 109.05599966645241 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 37.408001720905304, + "p90": 46.52800038456917, + "p95": 49.95200037956238, + "p99": 59.61599946022034 + }, + "combine": { + "p50": 34.52799841761589, + "p90": 42.30400174856186, + "p95": 44.319998472929, + "p99": 46.911999583244324 + }, + "roundtrip": { + "p50": 55.03999814391136, + "p90": 63.32799792289734, + "p95": 67.87200272083282, + "p99": 79.8719972372055 + }, + "isolatedSum": { + "p50": 71.9360001385212, + "p90": 88.83200213313103, + "p95": 94.27199885249138, + "p99": 106.52799904346466 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 39.32800143957138, + "p90": 49.47200044989586, + "p95": 54.11199852824211, + "p99": 58.52799862623215 + }, + "combine": { + "p50": 36.06399893760681, + "p90": 44.44799944758415, + "p95": 45.951999723911285, + "p99": 50.11200159788132 + }, + "roundtrip": { + "p50": 60.5119988322258, + "p90": 72.64000177383423, + "p95": 76.03199779987335, + "p99": 84.28800106048584 + }, + "isolatedSum": { + "p50": 75.39200037717819, + "p90": 93.91999989748001, + "p95": 100.0639982521534, + "p99": 108.64000022411346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.99199986457825, + "p90": 50.81599950790405, + "p95": 56.44800141453743, + "p99": 60.99199876189232 + }, + "combine": { + "p50": 43.64800080657005, + "p90": 51.67999863624573, + "p95": 53.44000086188316, + "p99": 56.19199946522713 + }, + "roundtrip": { + "p50": 72.51200079917908, + "p90": 79.32800054550171, + "p95": 84.22400057315826, + "p99": 88.73599767684937 + }, + "isolatedSum": { + "p50": 88.6400006711483, + "p90": 102.49599814414978, + "p95": 109.8880022764206, + "p99": 117.18399822711945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.26399984955788, + "p90": 63.77600133419037, + "p95": 67.07199662923813, + "p99": 74.30399954319 + }, + "combine": { + "p50": 58.400001376867294, + "p90": 65.21599739789963, + "p95": 69.69600170850754, + "p99": 85.28000116348267 + }, + "roundtrip": { + "p50": 96.54399752616882, + "p90": 103.55199873447418, + "p95": 110.07999628782272, + "p99": 116.28799885511398 + }, + "isolatedSum": { + "p50": 113.66400122642517, + "p90": 128.99199873209, + "p95": 136.76799833774567, + "p99": 159.58400070667267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 79.48800176382065, + "p90": 86.91199868917465, + "p95": 90.71999788284302, + "p99": 96.89600020647049 + }, + "combine": { + "p50": 86.14400029182434, + "p90": 92.06400066614151, + "p95": 94.43199634552002, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 148.60799908638, + "p90": 156.70399367809296, + "p95": 161.47199273109436, + "p99": 170.9440052509308 + }, + "isolatedSum": { + "p50": 165.632002055645, + "p90": 178.97599935531616, + "p95": 185.15199422836304, + "p99": 199.072003364563 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-be10b0f3", + "identity": "h200|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "d25161dd12f786d6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:20.553129+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 194.87999379634857, + "p90": 243.26400458812714, + "p95": 256.3839852809906, + "p99": 349.92000460624695 + }, + "combine": { + "p50": 48.51200059056282, + "p90": 57.37600103020668, + "p95": 67.23199784755707, + "p99": 72.60800153017044 + }, + "roundtrip": { + "p50": 219.58400309085846, + "p90": 264.47999477386475, + "p95": 272.7679908275604, + "p99": 288.7679934501648 + }, + "isolatedSum": { + "p50": 243.3919943869114, + "p90": 300.6400056183338, + "p95": 323.61598312854767, + "p99": 422.5280061364174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 187.16800212860107, + "p90": 244.1920042037964, + "p95": 287.51999139785767, + "p99": 322.52800464630127 + }, + "combine": { + "p50": 48.48000034689903, + "p90": 56.60799890756607, + "p95": 65.18399715423584, + "p99": 68.60800087451935 + }, + "roundtrip": { + "p50": 223.4559953212738, + "p90": 263.5200023651123, + "p95": 273.824006319046, + "p99": 358.240008354187 + }, + "isolatedSum": { + "p50": 235.6480024755001, + "p90": 300.80000311136246, + "p95": 352.7039885520935, + "p99": 391.1360055208206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 199.8720020055771, + "p90": 244.9599951505661, + "p95": 253.4080147743225, + "p99": 279.4879972934723 + }, + "combine": { + "p50": 49.60000142455101, + "p90": 57.440001517534256, + "p95": 67.23199784755707, + "p99": 73.95199686288834 + }, + "roundtrip": { + "p50": 222.52799570560455, + "p90": 271.7120051383972, + "p95": 280.3199887275696, + "p99": 314.2400085926056 + }, + "isolatedSum": { + "p50": 249.4720034301281, + "p90": 302.39999666810036, + "p95": 320.6400126218796, + "p99": 353.4399941563606 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 190.0479942560196, + "p90": 229.91999983787537, + "p95": 236.28799617290497, + "p99": 251.39200687408447 + }, + "combine": { + "p50": 49.92000013589859, + "p90": 59.74400043487549, + "p95": 67.23199784755707, + "p99": 72.48000055551529 + }, + "roundtrip": { + "p50": 234.14400219917297, + "p90": 278.656005859375, + "p95": 291.7439937591553, + "p99": 324.7680068016052 + }, + "isolatedSum": { + "p50": 239.96799439191818, + "p90": 289.66400027275085, + "p95": 303.51999402046204, + "p99": 323.87200742959976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 183.52000415325165, + "p90": 230.14399409294128, + "p95": 238.3359968662262, + "p99": 265.28000831604004 + }, + "combine": { + "p50": 51.10400170087814, + "p90": 61.08799949288368, + "p95": 69.50400024652481, + "p99": 78.33600044250488 + }, + "roundtrip": { + "p50": 221.24800086021423, + "p90": 265.76000452041626, + "p95": 271.0399925708771, + "p99": 282.8480005264282 + }, + "isolatedSum": { + "p50": 234.6240058541298, + "p90": 291.23199358582497, + "p95": 307.839997112751, + "p99": 343.6160087585449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 196.51199877262115, + "p90": 238.304004073143, + "p95": 256.8959891796112, + "p99": 344.4159924983978 + }, + "combine": { + "p50": 54.9440011382103, + "p90": 66.94400310516357, + "p95": 71.96799665689468, + "p99": 79.74400371313095 + }, + "roundtrip": { + "p50": 236.38400435447693, + "p90": 274.30400252342224, + "p95": 281.8880081176758, + "p99": 313.6959969997406 + }, + "isolatedSum": { + "p50": 251.45599991083145, + "p90": 305.2480071783066, + "p95": 328.8639858365059, + "p99": 424.1599962115288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 196.48000597953796, + "p90": 248.416006565094, + "p95": 262.87999749183655, + "p99": 275.9999930858612 + }, + "combine": { + "p50": 61.824001371860504, + "p90": 73.88799637556076, + "p95": 79.83999699354172, + "p99": 86.75199747085571 + }, + "roundtrip": { + "p50": 246.20799720287323, + "p90": 296.03201150894165, + "p95": 311.6160035133362, + "p99": 353.92001271247864 + }, + "isolatedSum": { + "p50": 258.30400735139847, + "p90": 322.30400294065475, + "p95": 342.71999448537827, + "p99": 362.7519905567169 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 206.04799687862396, + "p90": 256.54399394989014, + "p95": 269.3760097026825, + "p99": 313.50401043891907 + }, + "combine": { + "p50": 73.05599749088287, + "p90": 82.5280025601387, + "p95": 92.06400066614151, + "p99": 96.25600278377533 + }, + "roundtrip": { + "p50": 253.4080147743225, + "p90": 295.26400566101074, + "p95": 300.9920120239258, + "p99": 318.11198592185974 + }, + "isolatedSum": { + "p50": 279.10399436950684, + "p90": 339.07199651002884, + "p95": 361.440010368824, + "p99": 409.7600132226944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9c1935a4", + "identity": "h200|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "4700d8a927aafe42", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:16.251890+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 202.65600085258484, + "p90": 246.43200635910034, + "p95": 287.87198662757874, + "p99": 396.7039883136749 + }, + "combine": { + "p50": 52.480001002550125, + "p90": 65.95200300216675, + "p95": 71.45600020885468, + "p99": 81.40800148248672 + }, + "roundtrip": { + "p50": 253.6959946155548, + "p90": 318.9760148525238, + "p95": 355.103999376297, + "p99": 450.3040015697479 + }, + "isolatedSum": { + "p50": 255.13600185513496, + "p90": 312.3840093612671, + "p95": 359.3279868364334, + "p99": 478.11198979616165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 200.99200308322906, + "p90": 241.2160038948059, + "p95": 252.57599353790283, + "p99": 301.4400005340576 + }, + "combine": { + "p50": 53.02400141954422, + "p90": 58.78400057554245, + "p95": 63.968002796173096, + "p99": 73.79200309515 + }, + "roundtrip": { + "p50": 250.65600872039795, + "p90": 306.2399923801422, + "p95": 369.1200017929077, + "p99": 569.7919726371765 + }, + "isolatedSum": { + "p50": 254.01600450277328, + "p90": 300.00000447034836, + "p95": 316.5439963340759, + "p99": 375.2320036292076 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 195.5839991569519, + "p90": 233.72800648212433, + "p95": 244.7039932012558, + "p99": 299.8400032520294 + }, + "combine": { + "p50": 54.496001452207565, + "p90": 59.39200147986412, + "p95": 63.93600255250931, + "p99": 74.20799881219864 + }, + "roundtrip": { + "p50": 235.4239970445633, + "p90": 260.73598861694336, + "p95": 277.3120105266571, + "p99": 301.0239899158478 + }, + "isolatedSum": { + "p50": 250.08000060915947, + "p90": 293.12000796198845, + "p95": 308.6399957537651, + "p99": 374.04800206422806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 202.72000133991241, + "p90": 251.55198574066162, + "p95": 266.55998826026917, + "p99": 354.20799255371094 + }, + "combine": { + "p50": 54.71999943256378, + "p90": 65.76000154018402, + "p95": 71.03999704122543, + "p99": 77.98399776220322 + }, + "roundtrip": { + "p50": 239.48800563812256, + "p90": 283.488005399704, + "p95": 293.08798909187317, + "p99": 404.38398718833923 + }, + "isolatedSum": { + "p50": 257.4400007724762, + "p90": 317.31198728084564, + "p95": 337.5999853014946, + "p99": 432.19199031591415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 195.0719952583313, + "p90": 247.67999351024628, + "p95": 309.79201197624207, + "p99": 389.50398564338684 + }, + "combine": { + "p50": 55.58399856090546, + "p90": 73.08799773454666, + "p95": 83.52000266313553, + "p99": 95.36000341176987 + }, + "roundtrip": { + "p50": 242.78399348258972, + "p90": 297.88801074028015, + "p95": 317.6319897174835, + "p99": 420.9280014038086 + }, + "isolatedSum": { + "p50": 250.65599381923676, + "p90": 320.76799124479294, + "p95": 393.3120146393776, + "p99": 484.8639890551567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 198.43199849128723, + "p90": 246.33599817752838, + "p95": 256.8640112876892, + "p99": 443.77601146698 + }, + "combine": { + "p50": 60.736000537872314, + "p90": 66.84800237417221, + "p95": 71.6480016708374, + "p99": 82.49600231647491 + }, + "roundtrip": { + "p50": 247.99999594688416, + "p90": 286.52799129486084, + "p95": 295.4240143299103, + "p99": 412.25600242614746 + }, + "isolatedSum": { + "p50": 259.16799902915955, + "p90": 313.1840005517006, + "p95": 328.5120129585266, + "p99": 526.2720137834549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 196.3520050048828, + "p90": 231.90400004386902, + "p95": 245.08799612522125, + "p99": 433.4079921245575 + }, + "combine": { + "p50": 67.84000247716904, + "p90": 73.53600114583969, + "p95": 78.72000336647034, + "p99": 87.48800307512283 + }, + "roundtrip": { + "p50": 251.77600979804993, + "p90": 297.21599817276, + "p95": 419.71200704574585, + "p99": 468.095988035202 + }, + "isolatedSum": { + "p50": 264.19200748205185, + "p90": 305.4400011897087, + "p95": 323.8079994916916, + "p99": 520.8959951996803 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 214.62400257587433, + "p90": 246.3040053844452, + "p95": 257.6960027217865, + "p99": 404.9600064754486 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 95.77599912881851, + "p95": 100.25600343942642, + "p99": 107.10400342941284 + }, + "roundtrip": { + "p50": 272.8959918022156, + "p90": 308.6400032043457, + "p95": 318.9760148525238, + "p99": 424.83198642730713 + }, + "isolatedSum": { + "p50": 297.40799963474274, + "p90": 342.0800045132637, + "p95": 357.9520061612129, + "p99": 512.0640099048615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-248ba5cf", + "identity": "h200|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "de773fd98f36b42f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:13.036354+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 200.3519982099533, + "p90": 237.37600445747375, + "p95": 245.66400051116943, + "p99": 271.87201380729675 + }, + "combine": { + "p50": 55.87200075387955, + "p90": 63.90400230884552, + "p95": 71.23199850320816, + "p99": 76.99199765920639 + }, + "roundtrip": { + "p50": 240.31999707221985, + "p90": 277.6640057563782, + "p95": 284.0000092983246, + "p99": 401.15201473236084 + }, + "isolatedSum": { + "p50": 256.22399896383286, + "p90": 301.2800067663193, + "p95": 316.8959990143776, + "p99": 348.86401146650314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 198.81600141525269, + "p90": 240.447998046875, + "p95": 246.72000110149384, + "p99": 262.4320089817047 + }, + "combine": { + "p50": 57.18399956822395, + "p90": 66.75200164318085, + "p95": 74.94399696588516, + "p99": 80.60800284147263 + }, + "roundtrip": { + "p50": 240.83200097084045, + "p90": 277.8559923171997, + "p95": 284.5120131969452, + "p99": 330.01598715782166 + }, + "isolatedSum": { + "p50": 256.00000098347664, + "p90": 307.19999969005585, + "p95": 321.663998067379, + "p99": 343.04001182317734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 203.0400037765503, + "p90": 244.7039932012558, + "p95": 261.4719867706299, + "p99": 339.26400542259216 + }, + "combine": { + "p50": 56.60799890756607, + "p90": 70.0799971818924, + "p95": 76.4160007238388, + "p99": 81.31200075149536 + }, + "roundtrip": { + "p50": 244.22399699687958, + "p90": 286.75198554992676, + "p95": 299.9359965324402, + "p99": 694.7199702262878 + }, + "isolatedSum": { + "p50": 259.64800268411636, + "p90": 314.7839903831482, + "p95": 337.8879874944687, + "p99": 420.5760061740875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 210.4640007019043, + "p90": 253.1520128250122, + "p95": 264.41600918769836, + "p99": 296.06398940086365 + }, + "combine": { + "p50": 59.13599953055382, + "p90": 69.47200000286102, + "p95": 76.60800218582153, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 254.59200143814087, + "p90": 294.5599853992462, + "p95": 308.83198976516724, + "p99": 344.89598870277405 + }, + "isolatedSum": { + "p50": 269.6000002324581, + "p90": 322.62401282787323, + "p95": 341.0240113735199, + "p99": 379.9999877810478 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 197.7279931306839, + "p90": 235.58400571346283, + "p95": 241.95200204849243, + "p99": 271.4560031890869 + }, + "combine": { + "p50": 59.4559982419014, + "p90": 71.84000313282013, + "p95": 77.69600301980972, + "p99": 82.71999657154083 + }, + "roundtrip": { + "p50": 255.295991897583, + "p90": 303.99999022483826, + "p95": 320.67200541496277, + "p99": 513.6319994926453 + }, + "isolatedSum": { + "p50": 257.1839913725853, + "p90": 307.42400884628296, + "p95": 319.64800506830215, + "p99": 354.17599976062775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 207.0080041885376, + "p90": 247.23200500011444, + "p95": 262.81601190567017, + "p99": 297.9840040206909 + }, + "combine": { + "p50": 64.15999680757523, + "p90": 73.98399710655212, + "p95": 82.71999657154083, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 255.840003490448, + "p90": 298.4960079193115, + "p95": 308.03200602531433, + "p99": 375.90399384498596 + }, + "isolatedSum": { + "p50": 271.1680009961128, + "p90": 321.21600210666656, + "p95": 345.536008477211, + "p99": 385.0240036845207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.8080015182495, + "p90": 239.68000710010529, + "p95": 250.0160038471222, + "p99": 318.7839984893799 + }, + "combine": { + "p50": 72.95999675989151, + "p90": 84.95999872684479, + "p95": 91.74399822950363, + "p99": 96.76799923181534 + }, + "roundtrip": { + "p50": 259.00799036026, + "p90": 297.5040078163147, + "p95": 304.7359883785248, + "p99": 322.7199912071228 + }, + "isolatedSum": { + "p50": 272.767998278141, + "p90": 324.6400058269501, + "p95": 341.7600020766258, + "p99": 415.5519977211952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.8959962129593, + "p90": 247.39199876785278, + "p95": 254.33599948883057, + "p99": 279.7119915485382 + }, + "combine": { + "p50": 89.75999802350998, + "p90": 100.41599720716476, + "p95": 108.73600095510483, + "p99": 116.19199812412262 + }, + "roundtrip": { + "p50": 295.00800371170044, + "p90": 341.37600660324097, + "p95": 369.7600066661835, + "p99": 729.3760180473328 + }, + "isolatedSum": { + "p50": 298.65599423646927, + "p90": 347.80799597501755, + "p95": 363.0720004439354, + "p99": 395.9039896726608 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7792a876", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_a06b3704", + "comparisonKey": "2fd88f31a4c655f3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:31.923523+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 63.551999628543854, + "p90": 88.48000317811966, + "p95": 96.16000205278397, + "p99": 108.99200290441513 + }, + "combine": { + "p50": 59.776000678539276, + "p90": 67.29599833488464, + "p95": 76.31999999284744, + "p99": 79.8719972372055 + }, + "roundtrip": { + "p50": 152.0639955997467, + "p90": 187.48800456523895, + "p95": 193.53599846363068, + "p99": 206.88000321388245 + }, + "isolatedSum": { + "p50": 123.32800030708313, + "p90": 155.7760015130043, + "p95": 172.4800020456314, + "p99": 188.86400014162064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 64.35199826955795, + "p90": 92.6079973578453, + "p95": 100.44799745082855, + "p99": 108.99200290441513 + }, + "combine": { + "p50": 61.11999973654747, + "p90": 73.02399724721909, + "p95": 80.12799918651581, + "p99": 86.91199868917465 + }, + "roundtrip": { + "p50": 156.0640037059784, + "p90": 194.5279985666275, + "p95": 204.67199385166168, + "p99": 222.08000719547272 + }, + "isolatedSum": { + "p50": 125.47199800610542, + "p90": 165.6319946050644, + "p95": 180.57599663734436, + "p99": 195.90400159358978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 65.11999666690826, + "p90": 88.73599767684937, + "p95": 96.3520035147667, + "p99": 105.05600273609161 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 70.52800059318542, + "p95": 80.70400357246399, + "p99": 84.86399799585342 + }, + "roundtrip": { + "p50": 159.87199544906616, + "p90": 196.73599302768707, + "p95": 206.84799551963806, + "p99": 225.47200322151184 + }, + "isolatedSum": { + "p50": 125.791996717453, + "p90": 159.2639982700348, + "p95": 177.05600708723068, + "p99": 189.92000073194504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 65.60000032186508, + "p90": 91.20000153779984, + "p95": 100.96000134944916, + "p99": 109.95200276374817 + }, + "combine": { + "p50": 61.59999966621399, + "p90": 74.65600222349167, + "p95": 83.10399949550629, + "p99": 124.25599992275238 + }, + "roundtrip": { + "p50": 155.13600409030914, + "p90": 194.97600197792053, + "p95": 202.33599841594696, + "p99": 218.78400444984436 + }, + "isolatedSum": { + "p50": 127.19999998807907, + "p90": 165.8560037612915, + "p95": 184.06400084495544, + "p99": 234.20800268650055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 65.72800129652023, + "p90": 90.36800265312195, + "p95": 100.54399818181992, + "p99": 110.81600189208984 + }, + "combine": { + "p50": 64.41599875688553, + "p90": 78.33600044250488, + "p95": 84.86399799585342, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 164.32000696659088, + "p90": 197.50399887561798, + "p95": 216.8319970369339, + "p99": 231.90400004386902 + }, + "isolatedSum": { + "p50": 130.14400005340576, + "p90": 168.70400309562683, + "p95": 185.40799617767334, + "p99": 207.84000307321548 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 71.84000313282013, + "p90": 99.23200309276581, + "p95": 109.27999764680862, + "p99": 117.88800358772278 + }, + "combine": { + "p50": 70.68800181150436, + "p90": 81.79199695587158, + "p95": 90.91199934482574, + "p99": 97.98400104045868 + }, + "roundtrip": { + "p50": 166.17600619792938, + "p90": 207.48800039291382, + "p95": 216.86400473117828, + "p99": 231.9359928369522 + }, + "isolatedSum": { + "p50": 142.5280049443245, + "p90": 181.0240000486374, + "p95": 200.19199699163437, + "p99": 215.87200462818146 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 79.74400371313095, + "p90": 102.88000106811523, + "p95": 110.04800349473953, + "p99": 176.4799952507019 + }, + "combine": { + "p50": 79.23199981451035, + "p90": 89.56799656152725, + "p95": 98.43199700117111, + "p99": 103.10400277376175 + }, + "roundtrip": { + "p50": 175.48799514770508, + "p90": 213.76000344753265, + "p95": 223.29600155353546, + "p99": 240.57599902153015 + }, + "isolatedSum": { + "p50": 158.9760035276413, + "p90": 192.4479976296425, + "p95": 208.48000049591064, + "p99": 279.58399802446365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.77599781751633, + "p90": 103.42399775981903, + "p95": 112.31999844312668, + "p99": 122.94399738311768 + }, + "combine": { + "p50": 97.88800030946732, + "p90": 110.97600311040878, + "p95": 118.23999881744385, + "p99": 123.77600371837616 + }, + "roundtrip": { + "p50": 212.51200139522552, + "p90": 243.13600361347198, + "p95": 255.42399287223816, + "p99": 280.8000147342682 + }, + "isolatedSum": { + "p50": 185.66399812698364, + "p90": 214.4000008702278, + "p95": 230.55999726057053, + "p99": 246.72000110149384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4480ae8e", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "6b1c3fe16673dd33", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:30.513349+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 207.13600516319275, + "p90": 270.55999636650085, + "p95": 287.23201155662537, + "p99": 526.1759757995605 + }, + "combine": { + "p50": 60.67200005054474, + "p90": 69.11999732255936, + "p95": 76.92799717187881, + "p99": 85.1840004324913 + }, + "roundtrip": { + "p50": 249.4720071554184, + "p90": 297.12000489234924, + "p95": 304.4799864292145, + "p99": 356.76801204681396 + }, + "isolatedSum": { + "p50": 267.8080052137375, + "p90": 339.6799936890602, + "p95": 364.1600087285042, + "p99": 611.3599762320518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 201.34399831295013, + "p90": 251.74400210380554, + "p95": 267.10399985313416, + "p99": 288.7359857559204 + }, + "combine": { + "p50": 61.11999973654747, + "p90": 66.39999896287918, + "p95": 74.17599856853485, + "p99": 83.61600339412689 + }, + "roundtrip": { + "p50": 251.39200687408447, + "p90": 289.95200991630554, + "p95": 303.16799879074097, + "p99": 355.19999265670776 + }, + "isolatedSum": { + "p50": 262.4639980494976, + "p90": 318.1440010666847, + "p95": 341.279998421669, + "p99": 372.3519891500473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 197.7279931306839, + "p90": 221.15199267864227, + "p95": 232.63999819755554, + "p99": 253.34399938583374 + }, + "combine": { + "p50": 60.80000102519989, + "p90": 65.18399715423584, + "p95": 70.27199864387512, + "p99": 94.33600306510925 + }, + "roundtrip": { + "p50": 246.0480034351349, + "p90": 264.1279995441437, + "p95": 276.95998549461365, + "p99": 413.37600350379944 + }, + "isolatedSum": { + "p50": 258.5279941558838, + "p90": 286.3359898328781, + "p95": 302.91199684143066, + "p99": 347.680002450943 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 202.5279998779297, + "p90": 251.00800395011902, + "p95": 264.6079957485199, + "p99": 425.02400279045105 + }, + "combine": { + "p50": 61.11999973654747, + "p90": 66.30399823188782, + "p95": 70.39999961853027, + "p99": 82.20800012350082 + }, + "roundtrip": { + "p50": 248.73599410057068, + "p90": 280.5440127849579, + "p95": 298.3039915561676, + "p99": 314.0160143375397 + }, + "isolatedSum": { + "p50": 263.64799961447716, + "p90": 317.31200218200684, + "p95": 335.00799536705017, + "p99": 507.2320029139519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 201.60000026226044, + "p90": 238.17600309848785, + "p95": 248.51199984550476, + "p99": 277.21598744392395 + }, + "combine": { + "p50": 63.64800035953522, + "p90": 69.31199878454208, + "p95": 75.52000135183334, + "p99": 86.59200370311737 + }, + "roundtrip": { + "p50": 250.91201066970825, + "p90": 294.71999406814575, + "p95": 300.9920120239258, + "p99": 324.70399141311646 + }, + "isolatedSum": { + "p50": 265.24800062179565, + "p90": 307.48800188302994, + "p95": 324.0320011973381, + "p99": 363.8079911470413 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 205.4399996995926, + "p90": 236.4799976348877, + "p95": 243.55199933052063, + "p99": 259.00799036026 + }, + "combine": { + "p50": 69.66400146484375, + "p90": 87.2960016131401, + "p95": 91.61599725484848, + "p99": 155.5200070142746 + }, + "roundtrip": { + "p50": 260.8320116996765, + "p90": 306.0480058193207, + "p95": 331.712007522583, + "p99": 385.3119909763336 + }, + "isolatedSum": { + "p50": 275.10400116443634, + "p90": 323.7759992480278, + "p95": 335.1679965853691, + "p99": 414.5279973745346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 199.93600249290466, + "p90": 216.5440022945404, + "p95": 221.15199267864227, + "p99": 262.2720003128052 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 92.96000003814697, + "p95": 95.48799693584442, + "p99": 102.46399790048599 + }, + "roundtrip": { + "p50": 272.15999364852905, + "p90": 294.2720055580139, + "p95": 303.9039969444275, + "p99": 350.271999835968 + }, + "isolatedSum": { + "p50": 279.2320027947426, + "p90": 309.5040023326874, + "p95": 316.6399896144867, + "p99": 364.73599821329117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 217.98400580883026, + "p90": 236.89599335193634, + "p95": 245.05600333213806, + "p99": 290.71998596191406 + }, + "combine": { + "p50": 97.37599641084671, + "p90": 102.14400291442871, + "p95": 106.4319983124733, + "p99": 114.27199840545654 + }, + "roundtrip": { + "p50": 306.7840039730072, + "p90": 323.0719864368439, + "p95": 333.0880105495453, + "p99": 439.9360120296478 + }, + "isolatedSum": { + "p50": 315.36000221967697, + "p90": 339.03999626636505, + "p95": 351.48800164461136, + "p99": 404.9919843673706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-564f9683", + "identity": "h200|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "25e224034f9c969b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:18.757006+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 211.13599836826324, + "p90": 250.8159875869751, + "p95": 258.432000875473, + "p99": 278.6880135536194 + }, + "combine": { + "p50": 59.99999865889549, + "p90": 70.0799971818924, + "p95": 78.78399640321732, + "p99": 82.78399705886841 + }, + "roundtrip": { + "p50": 258.4959864616394, + "p90": 301.15199089050293, + "p95": 308.51200222969055, + "p99": 326.33599638938904 + }, + "isolatedSum": { + "p50": 271.13599702715874, + "p90": 320.8959847688675, + "p95": 337.21599727869034, + "p99": 361.4720106124878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 213.28000724315643, + "p90": 255.16799092292786, + "p95": 262.9759907722473, + "p99": 282.52801299095154 + }, + "combine": { + "p50": 59.23200026154518, + "p90": 68.28799843788147, + "p95": 78.04799824953079, + "p99": 83.3280012011528 + }, + "roundtrip": { + "p50": 253.1520128250122, + "p90": 295.3599989414215, + "p95": 301.7919957637787, + "p99": 312.48000264167786 + }, + "isolatedSum": { + "p50": 272.5120075047016, + "p90": 323.4559893608093, + "p95": 341.0239890217781, + "p99": 365.85601419210434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 218.87999773025513, + "p90": 267.4559950828552, + "p95": 277.8240144252777, + "p99": 298.7520098686218 + }, + "combine": { + "p50": 61.535999178886414, + "p90": 71.23199850320816, + "p95": 80.35200089216232, + "p99": 87.13600039482117 + }, + "roundtrip": { + "p50": 254.7839879989624, + "p90": 299.6160089969635, + "p95": 308.351993560791, + "p99": 326.84800028800964 + }, + "isolatedSum": { + "p50": 280.41599690914154, + "p90": 338.6879935860634, + "p95": 358.17601531744003, + "p99": 385.888010263443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 214.23999965190887, + "p90": 253.9519965648651, + "p95": 263.90400528907776, + "p99": 283.80799293518066 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 70.97599655389786, + "p95": 81.7599967122078, + "p99": 87.55200356245041 + }, + "roundtrip": { + "p50": 257.53599405288696, + "p90": 302.17599868774414, + "p95": 309.9200129508972, + "p99": 319.7759985923767 + }, + "isolatedSum": { + "p50": 276.12800151109695, + "p90": 324.92799311876297, + "p95": 345.66400200128555, + "p99": 371.3599964976311 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 210.52800118923187, + "p90": 252.03201174736023, + "p95": 258.36798548698425, + "p99": 270.6240117549896 + }, + "combine": { + "p50": 63.840001821517944, + "p90": 72.4480003118515, + "p95": 83.77599716186523, + "p99": 95.0080007314682 + }, + "roundtrip": { + "p50": 266.975998878479, + "p90": 316.32000207901, + "p95": 330.4319977760315, + "p99": 388.7679874897003 + }, + "isolatedSum": { + "p50": 274.3680030107498, + "p90": 324.48001205921173, + "p95": 342.1439826488495, + "p99": 365.6320124864578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 212.51200139522552, + "p90": 255.61600923538208, + "p95": 261.4719867706299, + "p99": 283.2320034503937 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 83.16799998283386, + "p95": 90.71999788284302, + "p99": 95.64799815416336 + }, + "roundtrip": { + "p50": 268.2560086250305, + "p90": 308.51200222969055, + "p95": 321.1840093135834, + "p99": 353.5679876804352 + }, + "isolatedSum": { + "p50": 282.52799808979034, + "p90": 338.78400921821594, + "p95": 352.1919846534729, + "p99": 378.88000160455704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 227.6799976825714, + "p90": 276.8639922142029, + "p95": 286.78399324417114, + "p99": 306.2720000743866 + }, + "combine": { + "p50": 80.12799918651581, + "p90": 96.3200032711029, + "p95": 101.18400305509567, + "p99": 106.175996363163 + }, + "roundtrip": { + "p50": 280.8000147342682, + "p90": 323.8080143928528, + "p95": 329.4079899787903, + "p99": 368.831992149353 + }, + "isolatedSum": { + "p50": 307.8079968690872, + "p90": 373.1839954853058, + "p95": 387.9679962992668, + "p99": 412.4479964375496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 237.47199773788452, + "p90": 287.3600125312805, + "p95": 297.66398668289185, + "p99": 317.0880079269409 + }, + "combine": { + "p50": 99.16800260543823, + "p90": 115.10399729013443, + "p95": 120.60800194740295, + "p99": 124.7360035777092 + }, + "roundtrip": { + "p50": 320.0959861278534, + "p90": 360.3839874267578, + "p95": 367.96799302101135, + "p99": 385.98400354385376 + }, + "isolatedSum": { + "p50": 336.64000034332275, + "p90": 402.46400982141495, + "p95": 418.2719886302948, + "p99": 441.8240115046501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cbf6a26a", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_6e8d2608", + "comparisonKey": "5e3ceaaa5d5ed7a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:30.308397+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 55.776000022888184, + "p90": 66.52799993753433, + "p95": 71.68000191450119, + "p99": 91.42400324344635 + }, + "combine": { + "p50": 61.08799949288368, + "p90": 67.07199662923813, + "p95": 71.77600264549255, + "p99": 105.21599650382996 + }, + "roundtrip": { + "p50": 142.94399321079254, + "p90": 155.68000078201294, + "p95": 162.6559942960739, + "p99": 173.37599396705627 + }, + "isolatedSum": { + "p50": 116.86399951577187, + "p90": 133.59999656677246, + "p95": 143.45600455999374, + "p99": 196.6399997472763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 53.66399884223938, + "p90": 65.47199934720993, + "p95": 78.49600166082382, + "p99": 109.0880036354065 + }, + "combine": { + "p50": 61.344001442193985, + "p90": 67.00800359249115, + "p95": 73.02399724721909, + "p99": 87.16800063848495 + }, + "roundtrip": { + "p50": 141.12000167369843, + "p90": 156.44800662994385, + "p95": 160.7999950647354, + "p99": 256.25601410865784 + }, + "isolatedSum": { + "p50": 115.00800028443336, + "p90": 132.48000293970108, + "p95": 151.5199989080429, + "p99": 196.25600427389145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 53.0879981815815, + "p90": 63.45599889755249, + "p95": 71.71200215816498, + "p99": 87.74399757385254 + }, + "combine": { + "p50": 60.95999851822853, + "p90": 65.05600363016129, + "p95": 68.15999746322632, + "p99": 73.82400333881378 + }, + "roundtrip": { + "p50": 141.24800264835358, + "p90": 157.18400478363037, + "p95": 173.69599640369415, + "p99": 210.33599972724915 + }, + "isolatedSum": { + "p50": 114.04799669981003, + "p90": 128.51200252771378, + "p95": 139.8719996213913, + "p99": 161.56800091266632 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 52.480001002550125, + "p90": 62.30400130152702, + "p95": 67.29599833488464, + "p99": 105.3759977221489 + }, + "combine": { + "p50": 61.88800185918808, + "p90": 67.16799736022949, + "p95": 73.85600358247757, + "p99": 139.26400244235992 + }, + "roundtrip": { + "p50": 142.2400027513504, + "p90": 155.71199357509613, + "p95": 159.29600596427917, + "p99": 167.35999286174774 + }, + "isolatedSum": { + "p50": 114.3680028617382, + "p90": 129.47199866175652, + "p95": 141.1520019173622, + "p99": 244.64000016450882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 54.496001452207565, + "p90": 64.38399851322174, + "p95": 69.34399902820587, + "p99": 82.62400329113007 + }, + "combine": { + "p50": 63.519999384880066, + "p90": 67.9360032081604, + "p95": 71.16799801588058, + "p99": 77.56800204515457 + }, + "roundtrip": { + "p50": 143.45599710941315, + "p90": 157.4079990386963, + "p95": 162.4639928340912, + "p99": 176.1920005083084 + }, + "isolatedSum": { + "p50": 118.01600083708763, + "p90": 132.32000172138214, + "p95": 140.51199704408646, + "p99": 160.19200533628464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.24800005555153, + "p90": 63.58399987220764, + "p95": 66.65600091218948, + "p99": 72.83200323581696 + }, + "combine": { + "p50": 68.96000355482101, + "p90": 73.40800017118454, + "p95": 76.57600194215775, + "p99": 89.1840010881424 + }, + "roundtrip": { + "p50": 146.7839926481247, + "p90": 160.76800227165222, + "p95": 165.40800034999847, + "p99": 181.43999576568604 + }, + "isolatedSum": { + "p50": 126.20800361037254, + "p90": 136.99200004339218, + "p95": 143.23200285434723, + "p99": 162.01600432395935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 67.03999638557434, + "p90": 72.28799909353256, + "p95": 76.25599950551987, + "p99": 84.57600325345993 + }, + "combine": { + "p50": 80.1599994301796, + "p90": 88.83199840784073, + "p95": 98.62399846315384, + "p99": 279.29601073265076 + }, + "roundtrip": { + "p50": 165.98400473594666, + "p90": 179.74400520324707, + "p95": 185.59999763965607, + "p99": 200.3840059041977 + }, + "isolatedSum": { + "p50": 147.19999581575394, + "p90": 161.1199975013733, + "p95": 174.8799979686737, + "p99": 363.8720139861107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.62400197982788, + "p90": 79.83999699354172, + "p95": 83.55200290679932, + "p99": 92.99200028181076 + }, + "combine": { + "p50": 97.6639986038208, + "p90": 104.35199737548828, + "p95": 108.60799998044968, + "p99": 121.0239976644516 + }, + "roundtrip": { + "p50": 197.85599410533905, + "p90": 203.2960057258606, + "p95": 206.7199945449829, + "p99": 215.29600024223328 + }, + "isolatedSum": { + "p50": 172.28800058364868, + "p90": 184.19199436903, + "p95": 192.160002887249, + "p99": 214.01599794626236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-11cb8acd", + "identity": "h200|deepep|v2|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_1999008d", + "comparisonKey": "735ee298c1b84308", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:57.411247+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 28.575999662280083, + "p90": 57.24800005555153, + "p95": 63.77600133419037, + "p99": 71.74400240182877 + }, + "combine": { + "p50": 51.93600058555603, + "p90": 62.752000987529755, + "p95": 73.11999797821045, + "p99": 208.22399854660034 + }, + "roundtrip": { + "p50": 1858.8160276412964, + "p90": 1875.391960144043, + "p95": 1883.5840225219727, + "p99": 1916.0959720611572 + }, + "isolatedSum": { + "p50": 80.51200024783611, + "p90": 120.00000104308128, + "p95": 136.89599931240082, + "p99": 279.9680009484291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 27.83999964594841, + "p90": 38.975998759269714, + "p95": 50.84799975156784, + "p99": 60.19200012087822 + }, + "combine": { + "p50": 49.75999891757965, + "p90": 60.92799827456474, + "p95": 64.83200192451477, + "p99": 112.12799698114395 + }, + "roundtrip": { + "p50": 1858.2079410552979, + "p90": 1871.6800212860107, + "p95": 1877.6639699935913, + "p99": 1905.791997909546 + }, + "isolatedSum": { + "p50": 77.59999856352806, + "p90": 99.90399703383446, + "p95": 115.68000167608261, + "p99": 172.31999710202217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 28.736000880599022, + "p90": 48.31999912858009, + "p95": 56.19199946522713, + "p99": 62.68800050020218 + }, + "combine": { + "p50": 51.35999992489815, + "p90": 64.89600241184235, + "p95": 81.63200318813324, + "p99": 270.01601457595825 + }, + "roundtrip": { + "p50": 1861.9519472122192, + "p90": 1879.6160221099854, + "p95": 1886.080026626587, + "p99": 2024.6078968048096 + }, + "isolatedSum": { + "p50": 80.09600080549717, + "p90": 113.21600154042244, + "p95": 137.82400265336037, + "p99": 332.70401507616043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 28.672000393271446, + "p90": 39.84000161290169, + "p95": 45.9199994802475, + "p99": 52.38400027155876 + }, + "combine": { + "p50": 50.592001527547836, + "p90": 59.61599946022034, + "p95": 63.87200206518173, + "p99": 80.70400357246399 + }, + "roundtrip": { + "p50": 1860.3520393371582, + "p90": 1875.040054321289, + "p95": 1882.0159435272217, + "p99": 1946.0159540176392 + }, + "isolatedSum": { + "p50": 79.26400192081928, + "p90": 99.45600107312202, + "p95": 109.79200154542923, + "p99": 133.08800384402275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.039999797940254, + "p90": 46.04800045490265, + "p95": 54.016001522541046, + "p99": 64.15999680757523 + }, + "combine": { + "p50": 52.928000688552856, + "p90": 63.64800035953522, + "p95": 68.67200136184692, + "p99": 149.02399480342865 + }, + "roundtrip": { + "p50": 1863.968014717102, + "p90": 1881.376028060913, + "p95": 1891.2960290908813, + "p99": 1954.9440145492554 + }, + "isolatedSum": { + "p50": 83.96800048649311, + "p90": 109.69600081443787, + "p95": 122.68800288438797, + "p99": 213.18399161100388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 33.92000123858452, + "p90": 46.78399860858917, + "p95": 52.799999713897705, + "p99": 62.78400123119354 + }, + "combine": { + "p50": 58.36800113320351, + "p90": 69.34399902820587, + "p95": 73.37599992752075, + "p99": 133.760005235672 + }, + "roundtrip": { + "p50": 1873.4400272369385, + "p90": 1888.2240056991577, + "p95": 1892.8320407867432, + "p99": 1922.7839708328247 + }, + "isolatedSum": { + "p50": 92.28800237178802, + "p90": 116.12799763679504, + "p95": 126.17599964141846, + "p99": 196.54400646686554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.48799893260002, + "p90": 50.4320003092289, + "p95": 57.792000472545624, + "p99": 66.43199920654297 + }, + "combine": { + "p50": 68.67200136184692, + "p90": 76.9599974155426, + "p95": 81.79199695587158, + "p99": 193.66399943828583 + }, + "roundtrip": { + "p50": 1888.8319730758667, + "p90": 1900.7999897003174, + "p95": 1905.7600498199463, + "p99": 2005.1839351654053 + }, + "isolatedSum": { + "p50": 108.16000029444695, + "p90": 127.3919977247715, + "p95": 139.5839974284172, + "p99": 260.0959986448288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 51.83999985456467, + "p90": 63.93600255250931, + "p95": 68.1919977068901, + "p99": 74.36800003051758 + }, + "combine": { + "p50": 96.79999947547913, + "p90": 105.53599894046783, + "p95": 109.15199667215347, + "p99": 171.32799327373505 + }, + "roundtrip": { + "p50": 1934.0159893035889, + "p90": 1953.760027885437, + "p95": 1961.2799882888794, + "p99": 2125.2479553222656 + }, + "isolatedSum": { + "p50": 148.6399993300438, + "p90": 169.47200149297714, + "p95": 177.34399437904358, + "p99": 245.69599330425262 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-43d3de4d", + "identity": "h200|deepep|v2|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_0de71e05", + "comparisonKey": "928e298d5aae8c9f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:48.857672+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 29.600000008940697, + "p90": 34.4959981739521, + "p95": 38.336001336574554, + "p99": 41.98399931192398 + }, + "combine": { + "p50": 37.50399872660637, + "p90": 46.78399860858917, + "p95": 60.127999633550644, + "p99": 92.92799979448318 + }, + "roundtrip": { + "p50": 1848.479986190796, + "p90": 1859.4239950180054, + "p95": 1884.6720457077026, + "p99": 1953.1840085983276 + }, + "isolatedSum": { + "p50": 67.10399873554707, + "p90": 81.27999678254128, + "p95": 98.4640009701252, + "p99": 134.91199910640717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 29.440000653266907, + "p90": 34.20799970626831, + "p95": 37.05599904060364, + "p99": 44.44799944758415 + }, + "combine": { + "p50": 37.53599897027016, + "p90": 44.95999962091446, + "p95": 51.263999193906784, + "p99": 111.10399663448334 + }, + "roundtrip": { + "p50": 1848.2240438461304, + "p90": 1857.151985168457, + "p95": 1868.127942085266, + "p99": 1972.480058670044 + }, + "isolatedSum": { + "p50": 66.97599962353706, + "p90": 79.16799932718277, + "p95": 88.31999823451042, + "p99": 155.5519960820675 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 29.279999434947968, + "p90": 35.392001271247864, + "p95": 39.64800015091896, + "p99": 65.60000032186508 + }, + "combine": { + "p50": 39.39199820160866, + "p90": 59.84000116586685, + "p95": 65.34399837255478, + "p99": 91.5519967675209 + }, + "roundtrip": { + "p50": 1851.904034614563, + "p90": 1871.5840578079224, + "p95": 1883.2639455795288, + "p99": 1916.767954826355 + }, + "isolatedSum": { + "p50": 68.67199763655663, + "p90": 95.23200243711472, + "p95": 104.99199852347374, + "p99": 157.151997089386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 29.85600009560585, + "p90": 32.86400064826012, + "p95": 36.12799942493439, + "p99": 44.64000090956688 + }, + "combine": { + "p50": 38.495998829603195, + "p90": 45.184001326560974, + "p95": 49.82399940490723, + "p99": 85.69599688053131 + }, + "roundtrip": { + "p50": 1848.863959312439, + "p90": 1856.6399812698364, + "p95": 1860.7679605484009, + "p99": 1907.9359769821167 + }, + "isolatedSum": { + "p50": 68.35199892520905, + "p90": 78.04800197482109, + "p95": 85.95199882984161, + "p99": 130.3359977900982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 31.679999083280563, + "p90": 34.46400165557861, + "p95": 38.336001336574554, + "p99": 44.60800066590309 + }, + "combine": { + "p50": 40.832001715898514, + "p90": 46.52800038456917, + "p95": 52.51200124621391, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 1854.0799617767334, + "p90": 1861.8240356445312, + "p95": 1868.064045906067, + "p99": 1923.1679439544678 + }, + "isolatedSum": { + "p50": 72.51200079917908, + "p90": 80.99200204014778, + "p95": 90.84800258278847, + "p99": 137.85600289702415 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 34.432001411914825, + "p90": 37.18400001525879, + "p95": 40.76800122857094, + "p99": 51.90400034189224 + }, + "combine": { + "p50": 45.9199994802475, + "p90": 50.65599828958511, + "p95": 53.0879981815815, + "p99": 69.82400268316269 + }, + "roundtrip": { + "p50": 1862.5279664993286, + "p90": 1867.9360151290894, + "p95": 1871.8080520629883, + "p99": 1928.704023361206 + }, + "isolatedSum": { + "p50": 80.35200089216232, + "p90": 87.8399983048439, + "p95": 93.85599941015244, + "p99": 121.72800302505493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 40.57599976658821, + "p90": 45.504000037908554, + "p95": 57.792000472545624, + "p99": 63.551999628543854 + }, + "combine": { + "p50": 61.95199862122536, + "p90": 77.66400277614594, + "p95": 86.2400010228157, + "p99": 125.11999905109406 + }, + "roundtrip": { + "p50": 1882.7199935913086, + "p90": 1896.224021911621, + "p95": 1903.040051460266, + "p99": 1947.6799964904785 + }, + "isolatedSum": { + "p50": 102.52799838781357, + "p90": 123.16800281405449, + "p95": 144.03200149536133, + "p99": 188.6719986796379 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 52.73599922657013, + "p90": 56.89600110054016, + "p95": 63.968002796173096, + "p99": 74.91199672222137 + }, + "combine": { + "p50": 88.19200098514557, + "p90": 105.76000064611435, + "p95": 112.12799698114395, + "p99": 185.56800484657288 + }, + "roundtrip": { + "p50": 1924.1280555725098, + "p90": 1940.7039880752563, + "p95": 1949.3759870529175, + "p99": 2004.0318965911865 + }, + "isolatedSum": { + "p50": 140.9280002117157, + "p90": 162.6560017466545, + "p95": 176.09599977731705, + "p99": 260.48000156879425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4eec41c8", + "identity": "h200|deepep|v1|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_25d672be", + "comparisonKey": "2ce1bfcf58cf91fe", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:38.355548+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.67199718952179, + "p90": 105.92000186443329, + "p95": 112.96000331640244, + "p99": 125.85599720478058 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 89.24800157546997, + "p95": 97.15200215578079, + "p99": 104.8320010304451 + }, + "roundtrip": { + "p50": 149.31200444698334, + "p90": 164.70399498939514, + "p95": 170.6240028142929, + "p99": 182.5920045375824 + }, + "isolatedSum": { + "p50": 170.27200013399124, + "p90": 195.16800343990326, + "p95": 210.11200547218323, + "p99": 230.68799823522568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 109.98400300741196, + "p90": 128.89599800109863, + "p95": 135.04000008106232, + "p99": 148.51200580596924 + }, + "combine": { + "p50": 104.70400005578995, + "p90": 119.90399658679962, + "p95": 122.56000190973282, + "p99": 130.68799674510956 + }, + "roundtrip": { + "p50": 192.3840045928955, + "p90": 207.13600516319275, + "p95": 214.23999965190887, + "p99": 231.1680018901825 + }, + "isolatedSum": { + "p50": 214.6880030632019, + "p90": 248.79999458789825, + "p95": 257.60000199079514, + "p99": 279.2000025510788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 146.04799449443817, + "p90": 162.4000072479248, + "p95": 165.69599509239197, + "p99": 172.67200350761414 + }, + "combine": { + "p50": 154.7520011663437, + "p90": 164.15999829769135, + "p95": 168.12799870967865, + "p99": 179.967999458313 + }, + "roundtrip": { + "p50": 273.53599667549133, + "p90": 291.8719947338104, + "p95": 297.5040078163147, + "p99": 311.5200102329254 + }, + "isolatedSum": { + "p50": 300.79999566078186, + "p90": 326.56000554561615, + "p95": 333.8239938020706, + "p99": 352.6400029659271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 213.82400393486023, + "p90": 233.34400355815887, + "p95": 239.96800184249878, + "p99": 252.9279887676239 + }, + "combine": { + "p50": 251.93598866462708, + "p90": 260.47998666763306, + "p95": 265.6320035457611, + "p99": 276.3519883155823 + }, + "roundtrip": { + "p50": 440.064013004303, + "p90": 452.7040123939514, + "p95": 459.3279957771301, + "p99": 484.44798588752747 + }, + "isolatedSum": { + "p50": 465.7599925994873, + "p90": 493.82399022579193, + "p95": 505.6000053882599, + "p99": 529.2799770832062 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 347.9999899864197, + "p90": 358.7839901447296, + "p95": 365.4400110244751, + "p99": 382.30401277542114 + }, + "combine": { + "p50": 424.4160056114197, + "p90": 433.4399998188019, + "p95": 436.70400977134705, + "p99": 444.7360038757324 + }, + "roundtrip": { + "p50": 746.8159794807434, + "p90": 757.4719786643982, + "p95": 763.6799812316895, + "p99": 834.7839713096619 + }, + "isolatedSum": { + "p50": 772.4159955978394, + "p90": 792.2239899635315, + "p95": 802.1440207958221, + "p99": 827.0400166511536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 622.4960088729858, + "p90": 631.55198097229, + "p95": 634.880006313324, + "p99": 643.8080072402954 + }, + "combine": { + "p50": 770.6239819526672, + "p90": 781.4080119132996, + "p95": 785.2159738540649, + "p99": 792.9279804229736 + }, + "roundtrip": { + "p50": 1369.696021080017, + "p90": 1381.9199800491333, + "p95": 1387.7760171890259, + "p99": 1408.6400270462036 + }, + "isolatedSum": { + "p50": 1393.119990825653, + "p90": 1412.9599928855896, + "p95": 1420.095980167389, + "p99": 1436.735987663269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e63d0f6", + "identity": "h200|deepep|v1|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_25d672be", + "comparisonKey": "cdc365e7fc1464cf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:04.893427+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.85600006580353, + "p90": 107.35999792814255, + "p95": 112.35199868679047, + "p99": 123.07199835777283 + }, + "combine": { + "p50": 89.37600255012512, + "p90": 94.7519987821579, + "p95": 96.57599776983261, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 165.53600132465363, + "p90": 171.00800573825836, + "p95": 175.20000040531158, + "p99": 188.38399648666382 + }, + "isolatedSum": { + "p50": 187.23200261592865, + "p90": 202.11199671030045, + "p95": 208.92799645662308, + "p99": 229.88799959421158 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 123.58400225639343, + "p90": 129.18399274349213, + "p95": 133.2480013370514, + "p99": 141.12000167369843 + }, + "combine": { + "p50": 119.10399794578552, + "p90": 124.06399846076965, + "p95": 127.20000743865967, + "p99": 135.6160044670105 + }, + "roundtrip": { + "p50": 214.4639939069748, + "p90": 221.02400660514832, + "p95": 224.16000068187714, + "p99": 248.99199604988098 + }, + "isolatedSum": { + "p50": 242.68800020217896, + "p90": 253.24799120426178, + "p95": 260.44800877571106, + "p99": 276.7360061407089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 162.23999857902527, + "p90": 168.73599588871002, + "p95": 172.09599912166595, + "p99": 186.81600689888 + }, + "combine": { + "p50": 178.39999496936798, + "p90": 183.07200074195862, + "p95": 185.7600063085556, + "p99": 193.4719979763031 + }, + "roundtrip": { + "p50": 310.36800146102905, + "p90": 318.5279965400696, + "p95": 321.4719891548157, + "p99": 331.2639892101288 + }, + "isolatedSum": { + "p50": 340.63999354839325, + "p90": 351.80799663066864, + "p95": 357.85600543022156, + "p99": 380.2880048751831 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 241.28000438213348, + "p90": 248.9600032567978, + "p95": 252.60800123214722, + "p99": 266.84799790382385 + }, + "combine": { + "p50": 281.3119888305664, + "p90": 287.6479923725128, + "p95": 290.336012840271, + "p99": 302.97601222991943 + }, + "roundtrip": { + "p50": 497.98399209976196, + "p90": 505.5360198020935, + "p95": 508.86398553848267, + "p99": 526.3680219650269 + }, + "isolatedSum": { + "p50": 522.5919932126999, + "p90": 536.6079956293106, + "p95": 542.9440140724182, + "p99": 569.8240101337433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 401.69599652290344, + "p90": 410.0160002708435, + "p95": 412.80001401901245, + "p99": 444.35200095176697 + }, + "combine": { + "p50": 487.13600635528564, + "p90": 495.32800912857056, + "p95": 497.5680112838745, + "p99": 508.1599950790405 + }, + "roundtrip": { + "p50": 863.103985786438, + "p90": 872.3199963569641, + "p95": 875.5519986152649, + "p99": 910.3360176086426 + }, + "isolatedSum": { + "p50": 888.8320028781891, + "p90": 905.3440093994141, + "p95": 910.368025302887, + "p99": 952.5119960308075 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 748.7999796867371, + "p90": 764.7680044174194, + "p95": 770.0480222702026, + "p99": 784.9599719047546 + }, + "combine": { + "p50": 880.8959722518921, + "p90": 889.631986618042, + "p95": 893.8559889793396, + "p99": 948.0640292167664 + }, + "roundtrip": { + "p50": 1594.815969467163, + "p90": 1609.536051750183, + "p95": 1616.960048675537, + "p99": 1661.5040302276611 + }, + "isolatedSum": { + "p50": 1629.6959519386292, + "p90": 1654.3999910354614, + "p95": 1663.9040112495422, + "p99": 1733.024001121521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-94287be8", + "identity": "h200|deepep|v1|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_25d672be", + "comparisonKey": "d7e46888cbd6e110", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:34.697042+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.43999820947647, + "p90": 124.35200065374374, + "p95": 131.55199587345123, + "p99": 143.93599331378937 + }, + "combine": { + "p50": 95.90400010347366, + "p90": 121.37600034475327, + "p95": 187.19999492168427, + "p99": 239.68000710010529 + }, + "roundtrip": { + "p50": 180.95999956130981, + "p90": 211.39200031757355, + "p95": 234.40000414848328, + "p99": 401.98400616645813 + }, + "isolatedSum": { + "p50": 201.34399831295013, + "p90": 245.728000998497, + "p95": 318.7519907951355, + "p99": 383.61600041389465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.86399841308594, + "p90": 160.288006067276, + "p95": 167.80799627304077, + "p99": 178.6240041255951 + }, + "combine": { + "p50": 127.71199643611908, + "p90": 146.17599546909332, + "p95": 151.10400319099426, + "p99": 159.32799875736237 + }, + "roundtrip": { + "p50": 233.50399732589722, + "p90": 257.0880055427551, + "p95": 268.6080038547516, + "p99": 282.24000334739685 + }, + "isolatedSum": { + "p50": 260.575994849205, + "p90": 306.4640015363693, + "p95": 318.91199946403503, + "p99": 337.95200288295746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 183.71200561523438, + "p90": 221.6320037841797, + "p95": 230.20799458026886, + "p99": 244.22399699687958 + }, + "combine": { + "p50": 199.0080028772354, + "p90": 212.351992726326, + "p95": 217.8560048341751, + "p99": 232.09600150585175 + }, + "roundtrip": { + "p50": 353.15200686454773, + "p90": 379.07201051712036, + "p95": 385.15201210975647, + "p99": 401.5040099620819 + }, + "isolatedSum": { + "p50": 382.7200084924698, + "p90": 433.9839965105057, + "p95": 448.06399941444397, + "p99": 476.3199985027313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 275.90399980545044, + "p90": 302.2400140762329, + "p95": 313.85600566864014, + "p99": 326.52801275253296 + }, + "combine": { + "p50": 319.10398602485657, + "p90": 332.0640027523041, + "p95": 336.38399839401245, + "p99": 357.2799861431122 + }, + "roundtrip": { + "p50": 572.2240209579468, + "p90": 595.2320098876953, + "p95": 608.3199977874756, + "p99": 633.7599754333496 + }, + "isolatedSum": { + "p50": 595.007985830307, + "p90": 634.304016828537, + "p95": 650.2400040626526, + "p99": 683.8079988956451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 462.97600865364075, + "p90": 476.83200240135193, + "p95": 484.22399163246155, + "p99": 504.1919946670532 + }, + "combine": { + "p50": 546.2719798088074, + "p90": 560.4159832000732, + "p95": 573.3439922332764, + "p99": 596.2560176849365 + }, + "roundtrip": { + "p50": 981.2160134315491, + "p90": 996.5119957923889, + "p95": 1048.8959550857544, + "p99": 1214.7200107574463 + }, + "isolatedSum": { + "p50": 1009.2479884624481, + "p90": 1037.2479856014252, + "p95": 1057.567983865738, + "p99": 1100.4480123519897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 869.2799806594849, + "p90": 889.5999789237976, + "p95": 897.9520201683044, + "p99": 934.9759817123413 + }, + "combine": { + "p50": 1000.0959634780884, + "p90": 1009.0559720993042, + "p95": 1013.152003288269, + "p99": 1025.6320238113403 + }, + "roundtrip": { + "p50": 1835.5519771575928, + "p90": 1854.7519445419312, + "p95": 1860.0000143051147, + "p99": 1892.0320272445679 + }, + "isolatedSum": { + "p50": 1869.3759441375732, + "p90": 1898.6559510231018, + "p95": 1911.1040234565735, + "p99": 1960.6080055236816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c98b4e15", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_608ab302", + "comparisonKey": "ba9cbdb3e10dce05", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:38.778968+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.76000261306763, + "p90": 125.44000148773193, + "p95": 130.5920034646988, + "p99": 141.76000654697418 + }, + "combine": { + "p50": 106.52799904346466, + "p90": 112.5440001487732, + "p95": 115.9679964184761, + "p99": 123.87199699878693 + }, + "roundtrip": { + "p50": 199.77599382400513, + "p90": 209.1200053691864, + "p95": 214.81600403785706, + "p99": 253.1839907169342 + }, + "isolatedSum": { + "p50": 224.2880016565323, + "p90": 237.98400163650513, + "p95": 246.5599998831749, + "p99": 265.6320035457611 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.37599682807922, + "p90": 151.87199413776398, + "p95": 156.8640023469925, + "p99": 175.29599368572235 + }, + "combine": { + "p50": 144.1279947757721, + "p90": 148.99200201034546, + "p95": 151.67999267578125, + "p99": 158.6879938840866 + }, + "roundtrip": { + "p50": 264.2880082130432, + "p90": 272.44800329208374, + "p95": 277.9200077056885, + "p99": 302.97601222991943 + }, + "isolatedSum": { + "p50": 289.5039916038513, + "p90": 300.86399614810944, + "p95": 308.54399502277374, + "p99": 333.98398756980896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.0079960823059, + "p90": 212.76800334453583, + "p95": 218.4000015258789, + "p99": 237.92000114917755 + }, + "combine": { + "p50": 222.56000339984894, + "p90": 229.44000363349915, + "p95": 234.78400707244873, + "p99": 278.49599719047546 + }, + "roundtrip": { + "p50": 399.58399534225464, + "p90": 408.735990524292, + "p95": 415.583997964859, + "p99": 434.9760115146637 + }, + "isolatedSum": { + "p50": 425.56799948215485, + "p90": 442.208006978035, + "p95": 453.18400859832764, + "p99": 516.415998339653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.84800577163696, + "p90": 322.07998633384705, + "p95": 326.01600885391235, + "p99": 335.04000306129456 + }, + "combine": { + "p50": 355.6160032749176, + "p90": 363.23198676109314, + "p95": 366.65600538253784, + "p99": 378.33601236343384 + }, + "roundtrip": { + "p50": 644.6400284767151, + "p90": 653.8879871368408, + "p95": 658.847987651825, + "p99": 687.1359944343567 + }, + "isolatedSum": { + "p50": 670.4640090465546, + "p90": 685.3119730949402, + "p95": 692.6720142364502, + "p99": 713.3760154247284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.3600220680237, + "p90": 536.0959768295288, + "p95": 543.9040064811707, + "p99": 590.8799767494202 + }, + "combine": { + "p50": 619.8400259017944, + "p90": 629.6319961547852, + "p95": 634.2399716377258, + "p99": 647.167980670929 + }, + "roundtrip": { + "p50": 1120.1280355453491, + "p90": 1133.247971534729, + "p95": 1139.0399932861328, + "p99": 1225.5040407180786 + }, + "isolatedSum": { + "p50": 1147.2000479698181, + "p90": 1165.727972984314, + "p95": 1178.1439781188965, + "p99": 1238.0479574203491 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1009.503960609436, + "p90": 1033.3759784698486, + "p95": 1040.38405418396, + "p99": 1094.4960117340088 + }, + "combine": { + "p50": 1121.6959953308105, + "p90": 1133.7599754333496, + "p95": 1139.6160125732422, + "p99": 1223.8719463348389 + }, + "roundtrip": { + "p50": 2094.4321155548096, + "p90": 2113.215923309326, + "p95": 2120.896100997925, + "p99": 2187.1039867401123 + }, + "isolatedSum": { + "p50": 2131.1999559402466, + "p90": 2167.1359539031982, + "p95": 2180.000066757202, + "p99": 2318.3679580688477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fbfe8169", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_25d672be", + "comparisonKey": "7807702d7480d62d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:06.164929+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.37599968910217, + "p90": 129.60000336170197, + "p95": 133.85599851608276, + "p99": 144.6080058813095 + }, + "combine": { + "p50": 105.79200088977814, + "p90": 110.91200262308121, + "p95": 114.14399743080139, + "p99": 125.47199428081512 + }, + "roundtrip": { + "p50": 199.10399615764618, + "p90": 205.9839963912964, + "p95": 212.0320051908493, + "p99": 223.1999933719635 + }, + "isolatedSum": { + "p50": 223.1680005788803, + "p90": 240.51200598478317, + "p95": 247.99999594688416, + "p99": 270.08000016212463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.04799449443817, + "p90": 153.56799960136414, + "p95": 157.56799280643463, + "p99": 167.61599481105804 + }, + "combine": { + "p50": 144.896000623703, + "p90": 150.4639983177185, + "p95": 153.18399667739868, + "p99": 160.16000509262085 + }, + "roundtrip": { + "p50": 265.1839852333069, + "p90": 271.84000611305237, + "p95": 275.9360074996948, + "p99": 288.1920039653778 + }, + "isolatedSum": { + "p50": 290.9439951181412, + "p90": 304.03199791908264, + "p95": 310.7519894838333, + "p99": 327.7759999036789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.87999510765076, + "p90": 209.98400449752808, + "p95": 214.65599536895752, + "p99": 223.07200729846954 + }, + "combine": { + "p50": 223.26399385929108, + "p90": 229.79199886322021, + "p95": 233.2800030708313, + "p99": 239.45599794387817 + }, + "roundtrip": { + "p50": 399.00800585746765, + "p90": 406.1119854450226, + "p95": 408.3839952945709, + "p99": 419.8080003261566 + }, + "isolatedSum": { + "p50": 426.14398896694183, + "p90": 439.7760033607483, + "p95": 447.9359984397888, + "p99": 462.5280052423477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 315.16799330711365, + "p90": 323.2319951057434, + "p95": 326.4960050582886, + "p99": 356.28798604011536 + }, + "combine": { + "p50": 355.5839955806732, + "p90": 362.39999532699585, + "p95": 365.2479946613312, + "p99": 392.2879993915558 + }, + "roundtrip": { + "p50": 644.6719765663147, + "p90": 654.7520160675049, + "p95": 661.8559956550598, + "p99": 890.496015548706 + }, + "isolatedSum": { + "p50": 670.7519888877869, + "p90": 685.6319904327393, + "p95": 691.7439997196198, + "p99": 748.5759854316711 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 527.7439951896667, + "p90": 535.6159806251526, + "p95": 540.287971496582, + "p99": 553.7279844284058 + }, + "combine": { + "p50": 617.0560121536255, + "p90": 625.3119707107544, + "p95": 628.5439729690552, + "p99": 643.455982208252 + }, + "roundtrip": { + "p50": 1119.488000869751, + "p90": 1130.3679943084717, + "p95": 1133.7599754333496, + "p99": 1173.408031463623 + }, + "isolatedSum": { + "p50": 1144.8000073432922, + "p90": 1160.927951335907, + "p95": 1168.8319444656372, + "p99": 1197.1839666366577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1009.5360279083252, + "p90": 1034.1119766235352, + "p95": 1042.4319505691528, + "p99": 1100.1280546188354 + }, + "combine": { + "p50": 1121.8559741973877, + "p90": 1134.2719793319702, + "p95": 1139.583945274353, + "p99": 1198.1439590454102 + }, + "roundtrip": { + "p50": 2093.440055847168, + "p90": 2116.640090942383, + "p95": 2125.4398822784424, + "p99": 2259.2639923095703 + }, + "isolatedSum": { + "p50": 2131.392002105713, + "p90": 2168.3839559555054, + "p95": 2182.015895843506, + "p99": 2298.2720136642456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e8e69042", + "identity": "h200|deepep|v1|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_25d672be", + "comparisonKey": "2238ea91c3addbb8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:12.216723+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.30399930477142, + "p90": 131.58400356769562, + "p95": 139.52000439167023, + "p99": 156.12800419330597 + }, + "combine": { + "p50": 105.47199845314026, + "p90": 120.51200121641159, + "p95": 126.62400305271149, + "p99": 151.13599598407745 + }, + "roundtrip": { + "p50": 196.03200256824493, + "p90": 210.68799495697021, + "p95": 219.29599344730377, + "p99": 233.21600258350372 + }, + "isolatedSum": { + "p50": 223.77599775791168, + "p90": 252.0960047841072, + "p95": 266.1440074443817, + "p99": 307.2640001773834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.6959992647171, + "p90": 164.8319959640503, + "p95": 169.98399794101715, + "p99": 188.38399648666382 + }, + "combine": { + "p50": 143.2960033416748, + "p90": 155.35999834537506, + "p95": 161.15200519561768, + "p99": 172.35200107097626 + }, + "roundtrip": { + "p50": 265.3439939022064, + "p90": 279.90400791168213, + "p95": 286.3039970397949, + "p99": 293.7600016593933 + }, + "isolatedSum": { + "p50": 288.9920026063919, + "p90": 320.19199430942535, + "p95": 331.1360031366348, + "p99": 360.7359975576401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.68799364566803, + "p90": 218.1439995765686, + "p95": 226.33600234985352, + "p99": 240.4160052537918 + }, + "combine": { + "p50": 223.29600155353546, + "p90": 233.21600258350372, + "p95": 236.64000630378723, + "p99": 243.48799884319305 + }, + "roundtrip": { + "p50": 398.9120125770569, + "p90": 412.76800632476807, + "p95": 418.0479943752289, + "p99": 429.85600233078003 + }, + "isolatedSum": { + "p50": 425.9839951992035, + "p90": 451.3600021600723, + "p95": 462.97600865364075, + "p99": 483.90400409698486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 309.1520071029663, + "p90": 325.50400495529175, + "p95": 331.64799213409424, + "p99": 344.7360098361969 + }, + "combine": { + "p50": 356.3840091228485, + "p90": 367.42401123046875, + "p95": 373.9840090274811, + "p99": 392.41600036621094 + }, + "roundtrip": { + "p50": 643.3600187301636, + "p90": 661.3759994506836, + "p95": 666.7839884757996, + "p99": 760.8640193939209 + }, + "isolatedSum": { + "p50": 665.5360162258148, + "p90": 692.9280161857605, + "p95": 705.6320011615753, + "p99": 737.1520102024078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 525.8240103721619, + "p90": 538.2720232009888, + "p95": 543.4240102767944, + "p99": 737.0880246162415 + }, + "combine": { + "p50": 617.9199814796448, + "p90": 644.2880034446716, + "p95": 657.8879952430725, + "p99": 773.9520072937012 + }, + "roundtrip": { + "p50": 1118.175983428955, + "p90": 1134.335994720459, + "p95": 1150.015950202942, + "p99": 1199.0079879760742 + }, + "isolatedSum": { + "p50": 1143.7439918518066, + "p90": 1182.5600266456604, + "p95": 1201.312005519867, + "p99": 1511.0400319099426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.8639965057373, + "p90": 1012.9280090332031, + "p95": 1022.9439735412598, + "p99": 1137.5999450683594 + }, + "combine": { + "p50": 1114.4640445709229, + "p90": 1126.911997795105, + "p95": 1132.1920156478882, + "p99": 1260.7040405273438 + }, + "roundtrip": { + "p50": 2057.5358867645264, + "p90": 2088.3519649505615, + "p95": 2098.0160236358643, + "p99": 2254.175901412964 + }, + "isolatedSum": { + "p50": 2099.32804107666, + "p90": 2139.840006828308, + "p95": 2155.135989189148, + "p99": 2398.303985595703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e560027c", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_09ca428a", + "comparisonKey": "3a157c28e8e12369", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:20.426918+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.66399705410004, + "p90": 159.87199544906616, + "p95": 166.46400094032288, + "p99": 174.49599504470825 + }, + "combine": { + "p50": 124.57600235939026, + "p90": 133.08799266815186, + "p95": 137.60000467300415, + "p99": 149.08799529075623 + }, + "roundtrip": { + "p50": 230.5919975042343, + "p90": 254.27201390266418, + "p95": 263.5200023651123, + "p99": 291.3280129432678 + }, + "isolatedSum": { + "p50": 258.2399994134903, + "p90": 292.959988117218, + "p95": 304.064005613327, + "p99": 323.5839903354645 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 177.15199291706085, + "p90": 198.94400238990784, + "p95": 208.8959962129593, + "p99": 219.200000166893 + }, + "combine": { + "p50": 177.5359958410263, + "p90": 192.73599982261658, + "p95": 199.52000677585602, + "p99": 275.7120132446289 + }, + "roundtrip": { + "p50": 326.04798674583435, + "p90": 343.48800778388977, + "p95": 351.6480028629303, + "p99": 376.6399919986725 + }, + "isolatedSum": { + "p50": 354.68798875808716, + "p90": 391.6800022125244, + "p95": 408.4160029888153, + "p99": 494.9120134115219 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 275.10398626327515, + "p90": 311.0080063343048, + "p95": 316.0960078239441, + "p99": 336.8000090122223 + }, + "combine": { + "p50": 269.98400688171387, + "p90": 282.24000334739685, + "p95": 290.1119887828827, + "p99": 305.759996175766 + }, + "roundtrip": { + "p50": 517.7599787712097, + "p90": 530.2079916000366, + "p95": 537.4079942703247, + "p99": 563.2960200309753 + }, + "isolatedSum": { + "p50": 545.087993144989, + "p90": 593.2480096817017, + "p95": 606.2079966068268, + "p99": 642.5600051879883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 457.12000131607056, + "p90": 475.48800706863403, + "p95": 483.8080108165741, + "p99": 519.3600058555603 + }, + "combine": { + "p50": 456.38400316238403, + "p90": 467.5520062446594, + "p95": 472.8640019893646, + "p99": 486.6560101509094 + }, + "roundtrip": { + "p50": 889.6639943122864, + "p90": 918.9440011978149, + "p95": 935.3280067443848, + "p99": 977.4079918861389 + }, + "isolatedSum": { + "p50": 913.5040044784546, + "p90": 943.0400133132935, + "p95": 956.6720128059387, + "p99": 1006.0160160064697 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 824.4799971580505, + "p90": 857.088029384613, + "p95": 863.5519742965698, + "p99": 899.071991443634 + }, + "combine": { + "p50": 826.3360261917114, + "p90": 837.6960158348083, + "p95": 845.1840281486511, + "p99": 882.7199935913086 + }, + "roundtrip": { + "p50": 1619.5520162582397, + "p90": 1653.5680294036865, + "p95": 1666.5279865264893, + "p99": 1866.528034210205 + }, + "isolatedSum": { + "p50": 1650.816023349762, + "p90": 1694.7840452194214, + "p95": 1708.736002445221, + "p99": 1781.7919850349426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1568.127989768982, + "p90": 1583.7759971618652, + "p95": 1591.5199518203735, + "p99": 1628.4799575805664 + }, + "combine": { + "p50": 1542.0479774475098, + "p90": 1552.6080131530762, + "p95": 1557.1199655532837, + "p99": 1580.672025680542 + }, + "roundtrip": { + "p50": 3079.9360275268555, + "p90": 3099.2960929870605, + "p95": 3112.8320693969727, + "p99": 3188.512086868286 + }, + "isolatedSum": { + "p50": 3110.1759672164917, + "p90": 3136.3840103149414, + "p95": 3148.639917373657, + "p99": 3209.1519832611084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-356eaaca", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_1ec4b445", + "comparisonKey": "201949f9f95778b3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:56.397290+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 84.41600203514099, + "p90": 106.36799782514572, + "p95": 113.95200341939926, + "p99": 137.2160017490387 + }, + "combine": { + "p50": 70.14399766921997, + "p90": 81.31200075149536, + "p95": 90.7839983701706, + "p99": 100.0640019774437 + }, + "roundtrip": { + "p50": 128.28800082206726, + "p90": 149.3760049343109, + "p95": 157.3760062456131, + "p99": 181.2800019979477 + }, + "isolatedSum": { + "p50": 154.55999970436096, + "p90": 187.67999857664108, + "p95": 204.73600178956985, + "p99": 237.2800037264824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 107.84000158309937, + "p90": 118.04799735546112, + "p95": 126.65599584579468, + "p99": 143.71199905872345 + }, + "combine": { + "p50": 116.92799627780914, + "p90": 123.6800029873848, + "p95": 131.1040073633194, + "p99": 157.1200042963028 + }, + "roundtrip": { + "p50": 201.79200172424316, + "p90": 219.16800737380981, + "p95": 224.35200214385986, + "p99": 242.5280064344406 + }, + "isolatedSum": { + "p50": 224.7679978609085, + "p90": 241.72800034284592, + "p95": 257.7600032091141, + "p99": 300.83200335502625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 189.69599902629852, + "p90": 203.16800475120544, + "p95": 211.71200275421143, + "p99": 225.98400712013245 + }, + "combine": { + "p50": 286.75198554992676, + "p90": 297.85600304603577, + "p95": 303.48798632621765, + "p99": 315.90399146080017 + }, + "roundtrip": { + "p50": 446.6240108013153, + "p90": 460.31999588012695, + "p95": 467.4240052700043, + "p99": 482.7199876308441 + }, + "isolatedSum": { + "p50": 476.4479845762253, + "p90": 501.0240077972412, + "p95": 515.1999890804291, + "p99": 541.8879985809326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0ace27d7", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_ecb98184", + "comparisonKey": "831d9c3972994858", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:35.824913+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.58399766683578, + "p90": 112.86400258541107, + "p95": 124.28800016641617, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 83.83999764919281, + "p90": 97.120001912117, + "p95": 104.16000336408615, + "p99": 119.55200135707855 + }, + "roundtrip": { + "p50": 154.33600544929504, + "p90": 168.7999963760376, + "p95": 179.00800704956055, + "p99": 206.496000289917 + }, + "isolatedSum": { + "p50": 179.4239953160286, + "p90": 209.98400449752808, + "p95": 228.44800353050232, + "p99": 257.8880041837692 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 115.4559999704361, + "p90": 129.02399897575378, + "p95": 135.74400544166565, + "p99": 154.7199934720993 + }, + "combine": { + "p50": 103.71199995279312, + "p90": 113.21599781513214, + "p95": 118.81600320339203, + "p99": 126.52799487113953 + }, + "roundtrip": { + "p50": 200.25600492954254, + "p90": 215.87200462818146, + "p95": 225.8879989385605, + "p99": 295.3599989414215 + }, + "isolatedSum": { + "p50": 219.16799992322922, + "p90": 242.23999679088593, + "p95": 254.56000864505768, + "p99": 281.24798834323883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 147.64800667762756, + "p90": 165.12000560760498, + "p95": 173.24799299240112, + "p99": 182.17599391937256 + }, + "combine": { + "p50": 141.4400041103363, + "p90": 148.51200580596924, + "p95": 154.84799444675446, + "p99": 174.0799993276596 + }, + "roundtrip": { + "p50": 266.33599400520325, + "p90": 280.7359993457794, + "p95": 290.43200612068176, + "p99": 312.00000643730164 + }, + "isolatedSum": { + "p50": 289.08801078796387, + "p90": 313.6320114135742, + "p95": 328.0959874391556, + "p99": 356.25599324703217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 208.0959975719452, + "p90": 227.29599475860596, + "p95": 235.32800376415253, + "p99": 291.0720109939575 + }, + "combine": { + "p50": 220.15999257564545, + "p90": 232.67200589179993, + "p95": 237.56800591945648, + "p99": 247.0719963312149 + }, + "roundtrip": { + "p50": 405.7919979095459, + "p90": 419.3919897079468, + "p95": 426.7520010471344, + "p99": 501.50400400161743 + }, + "isolatedSum": { + "p50": 428.25599014759064, + "p90": 459.9680006504059, + "p95": 472.896009683609, + "p99": 538.1440073251724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 333.3759903907776, + "p90": 347.4879860877991, + "p95": 359.26398634910583, + "p99": 538.5919809341431 + }, + "combine": { + "p50": 365.7599985599518, + "p90": 376.0319948196411, + "p95": 378.495991230011, + "p99": 391.4879858493805 + }, + "roundtrip": { + "p50": 669.6320176124573, + "p90": 680.6399822235107, + "p95": 686.9440078735352, + "p99": 714.0160202980042 + }, + "isolatedSum": { + "p50": 699.1359889507294, + "p90": 723.5199809074402, + "p95": 737.7599775791168, + "p99": 930.0799667835236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 586.5600109100342, + "p90": 598.0479717254639, + "p95": 604.1280031204224, + "p99": 915.008008480072 + }, + "combine": { + "p50": 635.7439756393433, + "p90": 647.711992263794, + "p95": 652.0959734916687, + "p99": 671.392023563385 + }, + "roundtrip": { + "p50": 1192.8000450134277, + "p90": 1206.8159580230713, + "p95": 1214.2399549484253, + "p99": 1430.2719831466675 + }, + "isolatedSum": { + "p50": 1222.3039865493774, + "p90": 1245.7599639892578, + "p95": 1256.223976612091, + "p99": 1586.400032043457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f21fead8", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_bbacb788", + "comparisonKey": "3830c8f14e1823a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:11.801082+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 125.15200674533844, + "p90": 133.91999900341034, + "p95": 139.8719996213913, + "p99": 152.0639955997467 + }, + "combine": { + "p50": 119.03999745845795, + "p90": 124.03199821710587, + "p95": 128.28800082206726, + "p99": 136.7039978504181 + }, + "roundtrip": { + "p50": 221.343994140625, + "p90": 233.2800030708313, + "p95": 240.31999707221985, + "p99": 253.85600328445435 + }, + "isolatedSum": { + "p50": 244.1920042037964, + "p90": 257.9519972205162, + "p95": 268.16000044345856, + "p99": 288.7679934501648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.2879992723465, + "p90": 172.03199863433838, + "p95": 176.06399953365326, + "p99": 188.4479969739914 + }, + "combine": { + "p50": 165.69599509239197, + "p90": 173.34400117397308, + "p95": 178.24000120162964, + "p99": 188.76799941062927 + }, + "roundtrip": { + "p50": 302.7519881725311, + "p90": 315.64798951148987, + "p95": 325.6959915161133, + "p99": 339.55198526382446 + }, + "isolatedSum": { + "p50": 329.98399436473846, + "p90": 345.37599980831146, + "p95": 354.3040007352829, + "p99": 377.21599638462067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 243.20000410079956, + "p90": 252.16001272201538, + "p95": 255.0399899482727, + "p99": 264.8319900035858 + }, + "combine": { + "p50": 266.6879892349243, + "p90": 279.1360020637512, + "p95": 285.18399596214294, + "p99": 303.9360046386719 + }, + "roundtrip": { + "p50": 485.9200119972229, + "p90": 495.61598896980286, + "p95": 499.35999512672424, + "p99": 509.5679759979248 + }, + "isolatedSum": { + "p50": 509.8879933357239, + "p90": 531.2960147857666, + "p95": 540.2239859104156, + "p99": 568.7679946422577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 397.024005651474, + "p90": 405.88799118995667, + "p95": 408.28800201416016, + "p99": 415.71199893951416 + }, + "combine": { + "p50": 448.5119879245758, + "p90": 458.20799469947815, + "p95": 463.20000290870667, + "p99": 473.66398572921753 + }, + "roundtrip": { + "p50": 824.5760202407837, + "p90": 838.3039832115173, + "p95": 843.1680202484131, + "p99": 929.6960234642029 + }, + "isolatedSum": { + "p50": 845.5359935760498, + "p90": 864.0959858894348, + "p95": 871.4880049228668, + "p99": 889.3759846687317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 723.2959866523743, + "p90": 732.5119972229004, + "p95": 736.8639707565308, + "p99": 761.6639733314514 + }, + "combine": { + "p50": 811.3279938697815, + "p90": 821.5680122375488, + "p95": 825.6319761276245, + "p99": 889.1839981079102 + }, + "roundtrip": { + "p50": 1507.7120065689087, + "p90": 1522.0799446105957, + "p95": 1531.7440032958984, + "p99": 1727.679967880249 + }, + "isolatedSum": { + "p50": 1534.6239805221558, + "p90": 1554.0800094604492, + "p95": 1562.4959468841553, + "p99": 1650.8479714393616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1395.0719833374023, + "p90": 1407.0080518722534, + "p95": 1410.5600118637085, + "p99": 1426.3360500335693 + }, + "combine": { + "p50": 1507.2640180587769, + "p90": 1519.4239616394043, + "p95": 1524.351954460144, + "p99": 1535.2959632873535 + }, + "roundtrip": { + "p50": 2877.887964248657, + "p90": 2895.7440853118896, + "p95": 2902.911901473999, + "p99": 2939.743995666504 + }, + "isolatedSum": { + "p50": 2902.336001396179, + "p90": 2926.4320135116577, + "p95": 2934.9119663238525, + "p99": 2961.632013320923 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-19d8d4df", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_c32b0e66", + "comparisonKey": "4fe27ea61ee86e15", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:44.704479+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.67200177907944, + "p90": 128.7360042333603, + "p95": 135.8720064163208, + "p99": 150.81599354743958 + }, + "combine": { + "p50": 107.10400342941284, + "p90": 115.77600240707397, + "p95": 123.10399860143661, + "p99": 131.9040060043335 + }, + "roundtrip": { + "p50": 196.86399400234222, + "p90": 213.31200003623962, + "p95": 222.30400145053864, + "p99": 243.03999543190002 + }, + "isolatedSum": { + "p50": 223.77600520849228, + "p90": 244.51200664043427, + "p95": 258.9760050177574, + "p99": 282.71999955177307 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.51199769973755, + "p90": 164.86400365829468, + "p95": 174.0799993276596, + "p99": 193.31200420856476 + }, + "combine": { + "p50": 145.28000354766846, + "p90": 161.02400422096252, + "p95": 165.6000018119812, + "p99": 173.2800006866455 + }, + "roundtrip": { + "p50": 264.19198513031006, + "p90": 281.823992729187, + "p95": 291.29600524902344, + "p99": 300.7679879665375 + }, + "isolatedSum": { + "p50": 289.792001247406, + "p90": 325.8880078792572, + "p95": 339.6800011396408, + "p99": 366.59200489521027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.13599705696106, + "p90": 214.62400257587433, + "p95": 223.13599288463593, + "p99": 238.24000358581543 + }, + "combine": { + "p50": 222.88000583648682, + "p90": 232.92799293994904, + "p95": 237.56800591945648, + "p99": 249.85599517822266 + }, + "roundtrip": { + "p50": 398.6879885196686, + "p90": 411.3920032978058, + "p95": 419.23201084136963, + "p99": 431.13601207733154 + }, + "isolatedSum": { + "p50": 426.0160028934479, + "p90": 447.55199551582336, + "p95": 460.7039988040924, + "p99": 488.0959987640381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.4079873561859, + "p90": 333.47201347351074, + "p95": 337.119996547699, + "p99": 348.63999485969543 + }, + "combine": { + "p50": 360.4480028152466, + "p90": 370.7840144634247, + "p95": 373.9199936389923, + "p99": 388.67199420928955 + }, + "roundtrip": { + "p50": 650.4319906234741, + "p90": 661.3119840621948, + "p95": 665.0559902191162, + "p99": 689.1840100288391 + }, + "isolatedSum": { + "p50": 673.8559901714325, + "p90": 704.2560279369354, + "p95": 711.0399901866913, + "p99": 737.311989068985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 537.3439788818359, + "p90": 548.8319993019104, + "p95": 552.3840188980103, + "p99": 565.9520030021667 + }, + "combine": { + "p50": 624.1919994354248, + "p90": 638.3039951324463, + "p95": 645.6000208854675, + "p99": 751.2959837913513 + }, + "roundtrip": { + "p50": 1132.7999830245972, + "p90": 1149.888038635254, + "p95": 1155.4880142211914, + "p99": 1190.2400255203247 + }, + "isolatedSum": { + "p50": 1161.5359783172607, + "p90": 1187.1359944343567, + "p95": 1197.9840397834778, + "p99": 1317.247986793518 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.2240214347839, + "p90": 1000.7359981536865, + "p95": 1008.0959796905518, + "p99": 1036.9600057601929 + }, + "combine": { + "p50": 1111.5520000457764, + "p90": 1126.431941986084, + "p95": 1132.7680349349976, + "p99": 1162.8479957580566 + }, + "roundtrip": { + "p50": 2068.160057067871, + "p90": 2090.9440517425537, + "p95": 2104.736089706421, + "p99": 2339.359998703003 + }, + "isolatedSum": { + "p50": 2095.7760214805603, + "p90": 2127.1679401397705, + "p95": 2140.8640146255493, + "p99": 2199.8080015182495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e0c999a8", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_79a4cecc", + "comparisonKey": "e8e424b4e2e76459", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:55.591893+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.21599847078323, + "p90": 125.47199428081512, + "p95": 133.82400572299957, + "p99": 145.88800072669983 + }, + "combine": { + "p50": 103.07200253009796, + "p90": 110.3999987244606, + "p95": 116.38399958610535, + "p99": 126.11199915409088 + }, + "roundtrip": { + "p50": 196.60800695419312, + "p90": 205.31199872493744, + "p95": 213.69600296020508, + "p99": 230.6240051984787 + }, + "isolatedSum": { + "p50": 220.2880010008812, + "p90": 235.87199300527573, + "p95": 250.20800530910492, + "p99": 271.9999998807907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 143.64799857139587, + "p90": 152.8320014476776, + "p95": 157.1200042963028, + "p99": 168.7680035829544 + }, + "combine": { + "p50": 145.91999351978302, + "p90": 152.19199657440186, + "p95": 157.60000050067902, + "p99": 171.39199376106262 + }, + "roundtrip": { + "p50": 263.808012008667, + "p90": 273.72801303863525, + "p95": 279.90400791168213, + "p99": 294.1119968891144 + }, + "isolatedSum": { + "p50": 289.5679920911789, + "p90": 305.02399802207947, + "p95": 314.7200047969818, + "p99": 340.15999734401703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.59200036525726, + "p90": 216.0000056028366, + "p95": 225.8560061454773, + "p99": 235.32800376415253 + }, + "combine": { + "p50": 222.81600534915924, + "p90": 229.63200509548187, + "p95": 235.55199801921844, + "p99": 246.39999866485596 + }, + "roundtrip": { + "p50": 398.1119990348816, + "p90": 406.39999508857727, + "p95": 412.06398606300354, + "p99": 435.90399622917175 + }, + "isolatedSum": { + "p50": 425.4080057144165, + "p90": 445.6320106983185, + "p95": 461.40800416469574, + "p99": 481.7280024290085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 307.99999833106995, + "p90": 322.1760094165802, + "p95": 330.4640054702759, + "p99": 341.47199988365173 + }, + "combine": { + "p50": 358.7839901447296, + "p90": 371.7760145664215, + "p95": 377.53599882125854, + "p99": 392.60798692703247 + }, + "roundtrip": { + "p50": 644.2559957504272, + "p90": 662.5919938087463, + "p95": 670.2079772949219, + "p99": 686.0799789428711 + }, + "isolatedSum": { + "p50": 666.7839884757996, + "p90": 693.9520239830017, + "p95": 708.0000042915344, + "p99": 734.0799868106842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 528.4479856491089, + "p90": 538.1439924240112, + "p95": 542.7839756011963, + "p99": 573.7599730491638 + }, + "combine": { + "p50": 612.4799847602844, + "p90": 623.8719820976257, + "p95": 629.0879845619202, + "p99": 735.040009021759 + }, + "roundtrip": { + "p50": 1113.8240098953247, + "p90": 1128.767967224121, + "p95": 1139.1359567642212, + "p99": 1232.0959568023682 + }, + "isolatedSum": { + "p50": 1140.9279704093933, + "p90": 1162.015974521637, + "p95": 1171.8719601631165, + "p99": 1308.7999820709229 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1011.3279819488525, + "p90": 1034.559965133667, + "p95": 1039.4560098648071, + "p99": 1059.6799850463867 + }, + "combine": { + "p50": 1108.9919805526733, + "p90": 1122.27201461792, + "p95": 1125.7280111312866, + "p99": 1164.86394405365 + }, + "roundtrip": { + "p50": 2080.415964126587, + "p90": 2102.7839183807373, + "p95": 2111.776113510132, + "p99": 2256.416082382202 + }, + "isolatedSum": { + "p50": 2120.319962501526, + "p90": 2156.831979751587, + "p95": 2165.1840209960938, + "p99": 2224.5439291000366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ec47e057", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_740192e9", + "comparisonKey": "fe7602926e0644a7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:41.209389+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.8800036907196, + "p90": 127.23200023174286, + "p95": 132.7040046453476, + "p99": 141.92000031471252 + }, + "combine": { + "p50": 111.61600053310394, + "p90": 117.34399944543839, + "p95": 122.36800044775009, + "p99": 134.07999277114868 + }, + "roundtrip": { + "p50": 211.5200012922287, + "p90": 220.768004655838, + "p95": 227.52000391483307, + "p99": 240.447998046875 + }, + "isolatedSum": { + "p50": 230.49600422382355, + "p90": 244.57599967718124, + "p95": 255.0720050930977, + "p99": 275.9999930858612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 158.1439971923828, + "p90": 165.92000424861908, + "p95": 170.49600183963776, + "p99": 193.4400051832199 + }, + "combine": { + "p50": 156.95999562740326, + "p90": 162.432000041008, + "p95": 165.6000018119812, + "p99": 176.9919991493225 + }, + "roundtrip": { + "p50": 292.57598519325256, + "p90": 302.5279939174652, + "p95": 307.0400059223175, + "p99": 319.8080062866211 + }, + "isolatedSum": { + "p50": 315.1039928197861, + "p90": 328.3520042896271, + "p95": 336.09600365161896, + "p99": 370.4320043325424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.58400571346283, + "p90": 246.91200256347656, + "p95": 251.39200687408447, + "p99": 340.7360017299652 + }, + "combine": { + "p50": 262.08001375198364, + "p90": 274.30400252342224, + "p95": 280.64000606536865, + "p99": 302.4640083312988 + }, + "roundtrip": { + "p50": 472.9599952697754, + "p90": 491.9680058956146, + "p95": 500.19198656082153, + "p99": 537.1519923210144 + }, + "isolatedSum": { + "p50": 497.6640194654465, + "p90": 521.2160050868988, + "p95": 532.0320129394531, + "p99": 643.200010061264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.9280016422272, + "p90": 386.1759901046753, + "p95": 390.3680145740509, + "p99": 402.8800129890442 + }, + "combine": { + "p50": 439.8399889469147, + "p90": 449.0239918231964, + "p95": 454.8479914665222, + "p99": 472.4479913711548 + }, + "roundtrip": { + "p50": 792.7680015563965, + "p90": 802.3999929428101, + "p95": 810.0799918174744, + "p99": 846.3039994239807 + }, + "isolatedSum": { + "p50": 816.7679905891418, + "p90": 835.1999819278717, + "p95": 845.2160060405731, + "p99": 875.328004360199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 669.7919964790344, + "p90": 678.2079935073853, + "p95": 681.7920207977295, + "p99": 701.2479901313782 + }, + "combine": { + "p50": 793.1200265884399, + "p90": 803.3919930458069, + "p95": 806.8479895591736, + "p99": 829.5999765396118 + }, + "roundtrip": { + "p50": 1436.5439414978027, + "p90": 1452.5120258331299, + "p95": 1466.6880369186401, + "p99": 1548.6079454421997 + }, + "isolatedSum": { + "p50": 1462.9120230674744, + "p90": 1481.5999865531921, + "p95": 1488.640010356903, + "p99": 1530.84796667099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1267.680048942566, + "p90": 1281.0239791870117, + "p95": 1286.7519855499268, + "p99": 1429.6640157699585 + }, + "combine": { + "p50": 1488.4480237960815, + "p90": 1502.079963684082, + "p95": 1507.1680545806885, + "p99": 1521.7920541763306 + }, + "roundtrip": { + "p50": 2731.872081756592, + "p90": 2746.880054473877, + "p95": 2756.2239170074463, + "p99": 2836.384057998657 + }, + "isolatedSum": { + "p50": 2756.1280727386475, + "p90": 2783.1039428710938, + "p95": 2793.9200401306152, + "p99": 2951.456069946289 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9824a789", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_af66e0b3", + "comparisonKey": "b7fad76b4bc468cd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:26.203669+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.43200027942657, + "p90": 127.07200646400452, + "p95": 134.39999520778656, + "p99": 149.4079977273941 + }, + "combine": { + "p50": 108.51199924945831, + "p90": 121.15199863910675, + "p95": 126.88000500202179, + "p99": 133.37600231170654 + }, + "roundtrip": { + "p50": 205.08800446987152, + "p90": 217.6000028848648, + "p95": 225.3119945526123, + "p99": 237.47199773788452 + }, + "isolatedSum": { + "p50": 226.9439995288849, + "p90": 248.22400510311127, + "p95": 261.28000020980835, + "p99": 282.78400003910065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.75999426841736, + "p90": 164.76799547672272, + "p95": 169.63200271129608, + "p99": 205.1199972629547 + }, + "combine": { + "p50": 149.9519944190979, + "p90": 163.87200355529785, + "p95": 169.5680022239685, + "p99": 177.37600207328796 + }, + "roundtrip": { + "p50": 284.64001417160034, + "p90": 296.9920039176941, + "p95": 303.48798632621765, + "p99": 314.8159980773926 + }, + "isolatedSum": { + "p50": 307.71198868751526, + "p90": 328.63999903202057, + "p95": 339.2000049352646, + "p99": 382.4959993362427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.10400140285492, + "p90": 238.46399784088135, + "p95": 243.6159998178482, + "p99": 254.68799471855164 + }, + "combine": { + "p50": 248.1279969215393, + "p90": 258.976012468338, + "p95": 263.13599944114685, + "p99": 272.19200134277344 + }, + "roundtrip": { + "p50": 456.928014755249, + "p90": 471.52000665664673, + "p95": 478.11201214790344, + "p99": 487.71199584007263 + }, + "isolatedSum": { + "p50": 479.2319983243942, + "p90": 497.44001030921936, + "p95": 506.75199925899506, + "p99": 526.8799960613251 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 373.63201379776, + "p90": 382.1119964122772, + "p95": 385.8560025691986, + "p99": 392.5119936466217 + }, + "combine": { + "p50": 425.4719913005829, + "p90": 435.39199233055115, + "p95": 440.8319890499115, + "p99": 449.47201013565063 + }, + "roundtrip": { + "p50": 776.639997959137, + "p90": 790.8480167388916, + "p95": 800.9600043296814, + "p99": 911.4559888839722 + }, + "isolatedSum": { + "p50": 799.1040050983429, + "p90": 817.5039887428284, + "p95": 826.6879916191101, + "p99": 841.9840037822723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 662.0799899101257, + "p90": 675.3600239753723, + "p95": 683.7760210037231, + "p99": 741.6960000991821 + }, + "combine": { + "p50": 781.0879945755005, + "p90": 793.0240035057068, + "p95": 798.2720136642456, + "p99": 814.848005771637 + }, + "roundtrip": { + "p50": 1416.416049003601, + "p90": 1434.7840547561646, + "p95": 1444.6079730987549, + "p99": 1488.3840084075928 + }, + "isolatedSum": { + "p50": 1443.1679844856262, + "p90": 1468.384027481079, + "p95": 1482.0480346679688, + "p99": 1556.544005870819 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1247.4559545516968, + "p90": 1258.5279941558838, + "p95": 1262.495994567871, + "p99": 1275.007963180542 + }, + "combine": { + "p50": 1452.8319835662842, + "p90": 1468.5759544372559, + "p95": 1476.5119552612305, + "p99": 1575.711965560913 + }, + "roundtrip": { + "p50": 2674.9119758605957, + "p90": 2694.0479278564453, + "p95": 2702.656030654907, + "p99": 2891.3280963897705 + }, + "isolatedSum": { + "p50": 2700.287938117981, + "p90": 2727.1039485931396, + "p95": 2739.0079498291016, + "p99": 2850.719928741455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fcd9882c", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_6bff286b", + "comparisonKey": "fac3c8e2b1e86f75", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:00.675623+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 114.75200206041336, + "p90": 122.04799801111221, + "p95": 129.7920048236847, + "p99": 140.76800644397736 + }, + "combine": { + "p50": 103.7760004401207, + "p90": 109.92000252008438, + "p95": 115.26399850845337, + "p99": 123.26399981975555 + }, + "roundtrip": { + "p50": 196.19199633598328, + "p90": 205.88800311088562, + "p95": 211.74399554729462, + "p99": 221.5999960899353 + }, + "isolatedSum": { + "p50": 218.52800250053406, + "p90": 231.9680005311966, + "p95": 245.05600333213806, + "p99": 264.0320062637329 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.1200029850006, + "p90": 157.6640009880066, + "p95": 165.18400609493256, + "p99": 185.5359971523285 + }, + "combine": { + "p50": 147.74399995803833, + "p90": 152.38399803638458, + "p95": 156.99200332164764, + "p99": 165.50399363040924 + }, + "roundtrip": { + "p50": 269.1839933395386, + "p90": 278.4639894962311, + "p95": 284.64001417160034, + "p99": 302.5279939174652 + }, + "isolatedSum": { + "p50": 296.86400294303894, + "p90": 310.0479990243912, + "p95": 322.1760094165802, + "p99": 351.03999078273773 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.85599541664124, + "p90": 214.78399634361267, + "p95": 221.11999988555908, + "p99": 234.65600609779358 + }, + "combine": { + "p50": 221.76000475883484, + "p90": 227.87199914455414, + "p95": 232.96000063419342, + "p99": 240.51199853420258 + }, + "roundtrip": { + "p50": 402.52798795700073, + "p90": 411.45598888397217, + "p95": 415.77601432800293, + "p99": 426.5280067920685 + }, + "isolatedSum": { + "p50": 427.6160001754761, + "p90": 442.6559954881668, + "p95": 454.0800005197525, + "p99": 475.16800463199615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.87999176979065, + "p90": 329.3760120868683, + "p95": 335.2000117301941, + "p99": 364.6720051765442 + }, + "combine": { + "p50": 362.3040020465851, + "p90": 372.8959858417511, + "p95": 377.9839873313904, + "p99": 387.5519931316376 + }, + "roundtrip": { + "p50": 654.6559929847717, + "p90": 664.2240285873413, + "p95": 668.3520078659058, + "p99": 685.1840019226074 + }, + "isolatedSum": { + "p50": 681.1839938163757, + "p90": 702.2719979286194, + "p95": 713.1839990615845, + "p99": 752.2239983081818 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 543.2959794998169, + "p90": 553.6959767341614, + "p95": 559.6479773521423, + "p99": 574.8479962348938 + }, + "combine": { + "p50": 624.4159936904907, + "p90": 634.6880197525024, + "p95": 637.7599835395813, + "p99": 647.4239826202393 + }, + "roundtrip": { + "p50": 1143.7760591506958, + "p90": 1154.8479795455933, + "p95": 1159.6800088882446, + "p99": 1186.079978942871 + }, + "isolatedSum": { + "p50": 1167.7119731903076, + "p90": 1188.3839964866638, + "p95": 1197.4079608917236, + "p99": 1222.271978855133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1011.9680166244507, + "p90": 1032.5440168380737, + "p95": 1039.199948310852, + "p99": 1052.9279708862305 + }, + "combine": { + "p50": 1144.8639631271362, + "p90": 1156.9279432296753, + "p95": 1163.2959842681885, + "p99": 1261.728048324585 + }, + "roundtrip": { + "p50": 2125.6000995635986, + "p90": 2142.848014831543, + "p95": 2149.5680809020996, + "p99": 2222.8479385375977 + }, + "isolatedSum": { + "p50": 2156.831979751587, + "p90": 2189.471960067749, + "p95": 2202.4959325790405, + "p99": 2314.6560192108154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8016235b", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_eaca5b26", + "comparisonKey": "991f8d3fd54c2cc2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:23.536677+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.01599776744843, + "p90": 131.71200454235077, + "p95": 140.9599930047989, + "p99": 201.37600600719452 + }, + "combine": { + "p50": 115.10399729013443, + "p90": 120.99199742078781, + "p95": 128.28800082206726, + "p99": 138.20800185203552 + }, + "roundtrip": { + "p50": 216.19200706481934, + "p90": 224.63999688625336, + "p95": 235.20000278949738, + "p99": 256.8640112876892 + }, + "isolatedSum": { + "p50": 237.11999505758286, + "p90": 252.70400196313858, + "p95": 269.24799382686615, + "p99": 339.58400785923004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 162.59199380874634, + "p90": 169.8240041732788, + "p95": 174.01599884033203, + "p99": 181.7920058965683 + }, + "combine": { + "p50": 159.9999964237213, + "p90": 170.68800330162048, + "p95": 174.72000420093536, + "p99": 186.3040030002594 + }, + "roundtrip": { + "p50": 298.17599058151245, + "p90": 312.5759959220886, + "p95": 317.6000118255615, + "p99": 329.47200536727905 + }, + "isolatedSum": { + "p50": 322.59199023246765, + "p90": 340.5120074748993, + "p95": 348.7360030412674, + "p99": 368.0960088968277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 234.72000658512115, + "p90": 243.03999543190002, + "p95": 247.39199876785278, + "p99": 255.48800826072693 + }, + "combine": { + "p50": 259.2960000038147, + "p90": 270.9439992904663, + "p95": 279.00800108909607, + "p99": 301.63198709487915 + }, + "roundtrip": { + "p50": 472.0959961414337, + "p90": 493.9520061016083, + "p95": 500.2239942550659, + "p99": 515.7120227813721 + }, + "isolatedSum": { + "p50": 494.01600658893585, + "p90": 513.9839947223663, + "p95": 526.3999998569489, + "p99": 557.1199953556061 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 376.8320083618164, + "p90": 386.2079977989197, + "p95": 389.6639943122864, + "p99": 401.66398882865906 + }, + "combine": { + "p50": 439.5520091056824, + "p90": 449.5680034160614, + "p95": 456.86399936676025, + "p99": 474.2720127105713 + }, + "roundtrip": { + "p50": 793.0240035057068, + "p90": 804.6720027923584, + "p95": 810.7839822769165, + "p99": 831.3279747962952 + }, + "isolatedSum": { + "p50": 816.3840174674988, + "p90": 835.7760012149811, + "p95": 846.5279936790466, + "p99": 875.9360015392303 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 680.2560091018677, + "p90": 691.6159987449646, + "p95": 695.967972278595, + "p99": 777.1199941635132 + }, + "combine": { + "p50": 777.504026889801, + "p90": 786.4959836006165, + "p95": 790.4000282287598, + "p99": 803.6159873008728 + }, + "roundtrip": { + "p50": 1429.6319484710693, + "p90": 1443.8400268554688, + "p95": 1450.5599737167358, + "p99": 1569.983959197998 + }, + "isolatedSum": { + "p50": 1457.7600359916687, + "p90": 1478.111982345581, + "p95": 1486.3680005073547, + "p99": 1580.735981464386 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1281.2800407409668, + "p90": 1294.0800189971924, + "p95": 1299.615979194641, + "p99": 1393.5999870300293 + }, + "combine": { + "p50": 1461.8879556655884, + "p90": 1475.9360551834106, + "p95": 1485.1839542388916, + "p99": 1672.1919775009155 + }, + "roundtrip": { + "p50": 2718.4319496154785, + "p90": 2734.976053237915, + "p95": 2742.5599098205566, + "p99": 2918.816089630127 + }, + "isolatedSum": { + "p50": 2743.167996406555, + "p90": 2770.016074180603, + "p95": 2784.7999334335327, + "p99": 3065.791964530945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cbc447b9", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_e3f779e8", + "comparisonKey": "6573ce5d5df67b8f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:48.852038+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.47200042009354, + "p90": 128.86400520801544, + "p95": 134.88000631332397, + "p99": 151.58399939537048 + }, + "combine": { + "p50": 103.80800068378448, + "p90": 112.92800307273865, + "p95": 119.93599683046341, + "p99": 129.50399518013 + }, + "roundtrip": { + "p50": 195.39199769496918, + "p90": 204.3199986219406, + "p95": 213.50400149822235, + "p99": 225.24799406528473 + }, + "isolatedSum": { + "p50": 221.28000110387802, + "p90": 241.7920082807541, + "p95": 254.81600314378738, + "p99": 281.0879945755005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.79200744628906, + "p90": 161.85599565505981, + "p95": 165.69599509239197, + "p99": 176.60799622535706 + }, + "combine": { + "p50": 142.81600713729858, + "p90": 156.95999562740326, + "p95": 162.4639928340912, + "p99": 168.83200407028198 + }, + "roundtrip": { + "p50": 263.4879946708679, + "p90": 279.80801463127136, + "p95": 286.17599606513977, + "p99": 298.14401268959045 + }, + "isolatedSum": { + "p50": 288.60801458358765, + "p90": 318.8159912824631, + "p95": 328.15998792648315, + "p99": 345.44000029563904 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.4960070848465, + "p90": 218.6879962682724, + "p95": 227.61599719524384, + "p99": 246.3040053844452 + }, + "combine": { + "p50": 227.29599475860596, + "p90": 242.46400594711304, + "p95": 246.87999486923218, + "p99": 272.5760042667389 + }, + "roundtrip": { + "p50": 402.0799994468689, + "p90": 420.03199458122253, + "p95": 426.07998847961426, + "p99": 534.1119766235352 + }, + "isolatedSum": { + "p50": 429.79200184345245, + "p90": 461.15200221538544, + "p95": 474.495992064476, + "p99": 518.8800096511841 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 311.5200102329254, + "p90": 332.28799700737, + "p95": 337.8559947013855, + "p99": 353.7920117378235 + }, + "combine": { + "p50": 355.45599460601807, + "p90": 368.5759902000427, + "p95": 373.05599451065063, + "p99": 386.9439959526062 + }, + "roundtrip": { + "p50": 643.8720226287842, + "p90": 665.5679941177368, + "p95": 675.9039759635925, + "p99": 713.4079933166504 + }, + "isolatedSum": { + "p50": 666.9760048389435, + "p90": 700.8639872074127, + "p95": 710.9119892120361, + "p99": 740.7360076904297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 530.0800204277039, + "p90": 544.5119738578796, + "p95": 551.5199899673462, + "p99": 584.991991519928 + }, + "combine": { + "p50": 613.2479906082153, + "p90": 623.0400204658508, + "p95": 628.928005695343, + "p99": 664.6400094032288 + }, + "roundtrip": { + "p50": 1114.0480041503906, + "p90": 1127.7439594268799, + "p95": 1138.0480527877808, + "p99": 1216.7680263519287 + }, + "isolatedSum": { + "p50": 1143.3280110359192, + "p90": 1167.5519943237305, + "p95": 1180.4479956626892, + "p99": 1249.6320009231567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 985.9200119972229, + "p90": 1005.6960582733154, + "p95": 1014.4319534301758, + "p99": 1236.7360591888428 + }, + "combine": { + "p50": 1123.5840320587158, + "p90": 1135.4880332946777, + "p95": 1139.5519971847534, + "p99": 1330.3359746932983 + }, + "roundtrip": { + "p50": 2079.1680812835693, + "p90": 2103.1041145324707, + "p95": 2117.9521083831787, + "p99": 2298.7520694732666 + }, + "isolatedSum": { + "p50": 2109.5040440559387, + "p90": 2141.184091567993, + "p95": 2153.983950614929, + "p99": 2567.072033882141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5612d65d", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_cc176d35", + "comparisonKey": "7b7235353c12a0be", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:51.914907+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 124.22399967908859, + "p90": 134.07999277114868, + "p95": 139.0720009803772, + "p99": 158.78400206565857 + }, + "combine": { + "p50": 112.19199746847153, + "p90": 132.9919993877411, + "p95": 137.472003698349, + "p99": 153.56799960136414 + }, + "roundtrip": { + "p50": 212.76800334453583, + "p90": 228.28799486160278, + "p95": 235.83999276161194, + "p99": 245.7599937915802 + }, + "isolatedSum": { + "p50": 236.41599714756012, + "p90": 267.07199215888977, + "p95": 276.5440046787262, + "p99": 312.3520016670227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.6639941930771, + "p90": 172.7360039949417, + "p95": 180.67200481891632, + "p99": 199.26400482654572 + }, + "combine": { + "p50": 157.151997089386, + "p90": 164.2560064792633, + "p95": 171.4559942483902, + "p99": 182.97599256038666 + }, + "roundtrip": { + "p50": 295.52000761032104, + "p90": 310.4639947414398, + "p95": 317.56800413131714, + "p99": 330.55999875068665 + }, + "isolatedSum": { + "p50": 318.8159912824631, + "p90": 336.992010474205, + "p95": 352.1279990673065, + "p99": 382.2399973869324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 236.38400435447693, + "p90": 247.00799584388733, + "p95": 253.37600708007812, + "p99": 279.58399057388306 + }, + "combine": { + "p50": 262.4639868736267, + "p90": 275.55200457572937, + "p95": 282.24000334739685, + "p99": 312.00000643730164 + }, + "roundtrip": { + "p50": 475.74400901794434, + "p90": 501.50400400161743, + "p95": 509.3119740486145, + "p99": 755.1360130310059 + }, + "isolatedSum": { + "p50": 498.84799122810364, + "p90": 522.5600004196167, + "p95": 535.616010427475, + "p99": 591.5839970111847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.35999035835266, + "p90": 391.61598682403564, + "p95": 396.86399698257446, + "p99": 413.59999775886536 + }, + "combine": { + "p50": 440.5120015144348, + "p90": 452.12799310684204, + "p95": 457.4719965457916, + "p99": 470.11199593544006 + }, + "roundtrip": { + "p50": 796.9599962234497, + "p90": 821.7920064926147, + "p95": 834.4320058822632, + "p99": 848.6080169677734 + }, + "isolatedSum": { + "p50": 819.8719918727875, + "p90": 843.7439799308777, + "p95": 854.3359935283661, + "p99": 883.7119936943054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 670.3680157661438, + "p90": 680.6079745292664, + "p95": 687.4560117721558, + "p99": 760.0640058517456 + }, + "combine": { + "p50": 794.5600152015686, + "p90": 808.6720108985901, + "p95": 818.015992641449, + "p99": 851.6160249710083 + }, + "roundtrip": { + "p50": 1436.5119934082031, + "p90": 1454.5279741287231, + "p95": 1468.6399698257446, + "p99": 1530.2079916000366 + }, + "isolatedSum": { + "p50": 1464.9280309677124, + "p90": 1489.2799854278564, + "p95": 1505.4720044136047, + "p99": 1611.680030822754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1267.2959566116333, + "p90": 1280.6400060653687, + "p95": 1288.4479761123657, + "p99": 1407.1680307388306 + }, + "combine": { + "p50": 1492.8319454193115, + "p90": 1511.9680166244507, + "p95": 1523.0400562286377, + "p99": 1728.2240390777588 + }, + "roundtrip": { + "p50": 2734.783887863159, + "p90": 2756.8960189819336, + "p95": 2779.360055923462, + "p99": 2991.9679164886475 + }, + "isolatedSum": { + "p50": 2760.127902030945, + "p90": 2792.6080226898193, + "p95": 2811.4880323410034, + "p99": 3135.3920698165894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6b5dc6ad", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_6cbbd029", + "comparisonKey": "b2db5a87a4a52091", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:16.692533+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.63200163841248, + "p90": 126.01600587368011, + "p95": 132.06399977207184, + "p99": 147.87200093269348 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 111.16799712181091, + "p95": 116.03199690580368, + "p99": 127.13600695133209 + }, + "roundtrip": { + "p50": 198.5280066728592, + "p90": 211.58400177955627, + "p95": 222.91199862957, + "p99": 248.6719936132431 + }, + "isolatedSum": { + "p50": 221.98399901390076, + "p90": 237.18400299549103, + "p95": 248.09599667787552, + "p99": 275.0080078840256 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 145.24799585342407, + "p90": 156.0640037059784, + "p95": 163.7440025806427, + "p99": 173.0560064315796 + }, + "combine": { + "p50": 144.6080058813095, + "p90": 152.44799852371216, + "p95": 158.27199816703796, + "p99": 166.20799899101257 + }, + "roundtrip": { + "p50": 265.1199996471405, + "p90": 286.24001145362854, + "p95": 295.00800371170044, + "p99": 319.7439908981323 + }, + "isolatedSum": { + "p50": 289.8560017347336, + "p90": 308.51200222969055, + "p95": 322.01600074768066, + "p99": 339.26400542259216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.42400670051575, + "p90": 221.98399901390076, + "p95": 228.99200022220612, + "p99": 243.93600225448608 + }, + "combine": { + "p50": 222.46399521827698, + "p90": 228.64000499248505, + "p95": 232.12799429893494, + "p99": 239.96800184249878 + }, + "roundtrip": { + "p50": 398.3039855957031, + "p90": 409.66400504112244, + "p95": 416.6400134563446, + "p99": 552.9279708862305 + }, + "isolatedSum": { + "p50": 425.8880019187927, + "p90": 450.6240040063858, + "p95": 461.11999452114105, + "p99": 483.90400409698486 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.032014131546, + "p90": 331.9999873638153, + "p95": 336.9919955730438, + "p99": 353.15200686454773 + }, + "combine": { + "p50": 355.19999265670776, + "p90": 366.2720024585724, + "p95": 372.5759983062744, + "p99": 728.223979473114 + }, + "roundtrip": { + "p50": 644.2559957504272, + "p90": 660.5759859085083, + "p95": 665.120005607605, + "p99": 685.9520077705383 + }, + "isolatedSum": { + "p50": 667.2320067882538, + "p90": 698.2719898223877, + "p95": 709.5679938793182, + "p99": 1081.3759863376617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 525.8560180664062, + "p90": 538.9119982719421, + "p95": 545.3439950942993, + "p99": 578.6240100860596 + }, + "combine": { + "p50": 617.8240180015564, + "p90": 627.5200247764587, + "p95": 631.3920021057129, + "p99": 652.5120139122009 + }, + "roundtrip": { + "p50": 1118.175983428955, + "p90": 1131.8399906158447, + "p95": 1138.0800008773804, + "p99": 1189.3119812011719 + }, + "isolatedSum": { + "p50": 1143.6800360679626, + "p90": 1166.4320230484009, + "p95": 1176.7359972000122, + "p99": 1231.1360239982605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 999.6160268783569, + "p90": 1026.0800123214722, + "p95": 1031.6799879074097, + "p99": 1106.592059135437 + }, + "combine": { + "p50": 1119.871973991394, + "p90": 1131.0399770736694, + "p95": 1135.3280544281006, + "p99": 1236.1279726028442 + }, + "roundtrip": { + "p50": 2082.0798873901367, + "p90": 2110.912084579468, + "p95": 2122.1439838409424, + "p99": 2315.999984741211 + }, + "isolatedSum": { + "p50": 2119.488000869751, + "p90": 2157.1199893951416, + "p95": 2167.0080423355103, + "p99": 2342.7200317382812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-eb438b89", + "identity": "h200|deepep|v1|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_0bcc3225", + "comparisonKey": "02007f9dd43422d0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:19.789906+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.13599818944931, + "p90": 133.85599851608276, + "p95": 144.86399292945862, + "p99": 158.1760048866272 + }, + "combine": { + "p50": 104.35199737548828, + "p90": 117.0559972524643, + "p95": 123.99999797344208, + "p99": 130.5920034646988 + }, + "roundtrip": { + "p50": 196.19199633598328, + "p90": 213.02400529384613, + "p95": 223.51999580860138, + "p99": 242.0479953289032 + }, + "isolatedSum": { + "p50": 223.4879955649376, + "p90": 250.91199576854706, + "p95": 268.8639909029007, + "p99": 288.768008351326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 142.84799993038177, + "p90": 163.13600540161133, + "p95": 168.99199783802032, + "p99": 179.32799458503723 + }, + "combine": { + "p50": 144.16000247001648, + "p90": 160.25599837303162, + "p95": 163.58399391174316, + "p99": 172.57599532604218 + }, + "roundtrip": { + "p50": 263.71198892593384, + "p90": 283.1679880619049, + "p95": 289.40799832344055, + "p99": 303.26399207115173 + }, + "isolatedSum": { + "p50": 287.00800240039825, + "p90": 323.39200377464294, + "p95": 332.5759917497635, + "p99": 351.9039899110794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.16800475120544, + "p90": 222.49600291252136, + "p95": 229.95199263095856, + "p99": 240.31999707221985 + }, + "combine": { + "p50": 225.0239998102188, + "p90": 240.6720072031021, + "p95": 244.9920028448105, + "p99": 259.7759962081909 + }, + "roundtrip": { + "p50": 402.52798795700073, + "p90": 419.0720021724701, + "p95": 425.4080057144165, + "p99": 441.0560131072998 + }, + "isolatedSum": { + "p50": 428.19200456142426, + "p90": 463.1680101156235, + "p95": 474.94399547576904, + "p99": 500.09599328041077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 309.6640110015869, + "p90": 325.9519934654236, + "p95": 331.9999873638153, + "p99": 353.2800078392029 + }, + "combine": { + "p50": 354.3680012226105, + "p90": 366.36799573898315, + "p95": 372.8320002555847, + "p99": 433.21600556373596 + }, + "roundtrip": { + "p50": 641.152024269104, + "p90": 655.3919911384583, + "p95": 661.9200110435486, + "p99": 676.6080260276794 + }, + "isolatedSum": { + "p50": 664.0320122241974, + "p90": 692.3199892044067, + "p95": 704.8319876194, + "p99": 786.4960134029388 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 524.832010269165, + "p90": 535.9359979629517, + "p95": 542.4000024795532, + "p99": 567.7760243415833 + }, + "combine": { + "p50": 619.3600296974182, + "p90": 630.5919885635376, + "p95": 633.9200139045715, + "p99": 658.4320068359375 + }, + "roundtrip": { + "p50": 1122.5919723510742, + "p90": 1152.735948562622, + "p95": 1158.3679914474487, + "p99": 1306.1439990997314 + }, + "isolatedSum": { + "p50": 1144.1920399665833, + "p90": 1166.5279865264893, + "p95": 1176.3200163841248, + "p99": 1226.2080311775208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1004.8320293426514, + "p90": 1032.8320264816284, + "p95": 1042.3359870910645, + "p99": 1137.3759508132935 + }, + "combine": { + "p50": 1119.9359893798828, + "p90": 1132.0960521697998, + "p95": 1138.1759643554688, + "p99": 1202.4320363998413 + }, + "roundtrip": { + "p50": 2079.1358947753906, + "p90": 2109.4720363616943, + "p95": 2124.608039855957, + "p99": 2288.3520126342773 + }, + "isolatedSum": { + "p50": 2124.768018722534, + "p90": 2164.928078651428, + "p95": 2180.511951446533, + "p99": 2339.8079872131348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34fb9b55", + "identity": "h200|deepep|v1|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d1bd20fa", + "comparisonKey": "368cbb6883063cc8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:21.740321+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 103.07200253009796, + "p90": 108.83200168609619, + "p95": 113.95200341939926, + "p99": 192.35199689865112 + }, + "combine": { + "p50": 104.19200360774994, + "p90": 108.96000266075134, + "p95": 111.35999858379364, + "p99": 116.22399836778641 + }, + "roundtrip": { + "p50": 184.25600230693817, + "p90": 189.43999707698822, + "p95": 193.88799369335175, + "p99": 200.95999538898468 + }, + "isolatedSum": { + "p50": 207.2640061378479, + "p90": 217.79200434684753, + "p95": 225.3120020031929, + "p99": 308.57599526643753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 131.45600259304047, + "p90": 137.66400516033173, + "p95": 142.0159935951233, + "p99": 155.93600273132324 + }, + "combine": { + "p50": 143.61600577831268, + "p90": 149.79200065135956, + "p95": 153.60000729560852, + "p99": 163.71199488639832 + }, + "roundtrip": { + "p50": 250.20799040794373, + "p90": 255.67999482154846, + "p95": 259.42400097846985, + "p99": 271.7759907245636 + }, + "isolatedSum": { + "p50": 275.07200837135315, + "p90": 287.4560058116913, + "p95": 295.6160008907318, + "p99": 319.64799761772156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 191.80800020694733, + "p90": 197.50399887561798, + "p95": 200.57600736618042, + "p99": 209.53600108623505 + }, + "combine": { + "p50": 223.23200106620789, + "p90": 229.18400168418884, + "p95": 231.455996632576, + "p99": 239.19999599456787 + }, + "roundtrip": { + "p50": 387.07199692726135, + "p90": 393.887996673584, + "p95": 396.92801237106323, + "p99": 405.7919979095459 + }, + "isolatedSum": { + "p50": 415.0400012731552, + "p90": 426.6880005598068, + "p95": 432.0320039987564, + "p99": 448.7359970808029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 301.63198709487915, + "p90": 308.1279993057251, + "p95": 311.0400140285492, + "p99": 329.5679986476898 + }, + "combine": { + "p50": 354.94399070739746, + "p90": 361.91999912261963, + "p95": 365.9519851207733, + "p99": 378.30400466918945 + }, + "roundtrip": { + "p50": 630.3039789199829, + "p90": 637.6960277557373, + "p95": 641.759991645813, + "p99": 673.5039949417114 + }, + "isolatedSum": { + "p50": 656.5759778022766, + "p90": 670.0479984283447, + "p95": 676.9919991493225, + "p99": 707.8720033168793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 515.2959823608398, + "p90": 523.1360197067261, + "p95": 526.6879796981812, + "p99": 550.4639744758606 + }, + "combine": { + "p50": 622.6879954338074, + "p90": 632.6720118522644, + "p95": 636.0960006713867, + "p99": 653.3439755439758 + }, + "roundtrip": { + "p50": 1110.5600595474243, + "p90": 1120.736002922058, + "p95": 1123.9360570907593, + "p99": 1132.8959465026855 + }, + "isolatedSum": { + "p50": 1137.9839777946472, + "p90": 1155.8080315589905, + "p95": 1162.7839803695679, + "p99": 1203.8079500198364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 972.8959798812866, + "p90": 996.8960285186768, + "p95": 1005.3119659423828, + "p99": 1022.5600004196167 + }, + "combine": { + "p50": 1120.9280490875244, + "p90": 1132.6719522476196, + "p95": 1141.0239934921265, + "p99": 1204.4479846954346 + }, + "roundtrip": { + "p50": 2060.703992843628, + "p90": 2083.9359760284424, + "p95": 2095.4558849334717, + "p99": 2269.9201107025146 + }, + "isolatedSum": { + "p50": 2093.824028968811, + "p90": 2129.5679807662964, + "p95": 2146.3359594345093, + "p99": 2227.0079851150513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4c94b0db", + "identity": "h200|deepep|v1|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_76b40c99", + "comparisonKey": "64fb812424481671", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:56.928817+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 202.4960070848465, + "p90": 242.3039972782135, + "p95": 253.02401185035706, + "p99": 272.8320062160492 + }, + "combine": { + "p50": 72.4480003118515, + "p90": 79.77599650621414, + "p95": 90.97599983215332, + "p99": 97.02400118112564 + }, + "roundtrip": { + "p50": 263.7439966201782, + "p90": 303.3919930458069, + "p95": 314.65598940849304, + "p99": 368.73599886894226 + }, + "isolatedSum": { + "p50": 274.944007396698, + "p90": 322.07999378442764, + "p95": 344.0000116825104, + "p99": 369.85600739717484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 230.71999847888947, + "p90": 260.99199056625366, + "p95": 270.112007856369, + "p99": 294.7840094566345 + }, + "combine": { + "p50": 99.10400211811066, + "p90": 112.8000020980835, + "p95": 118.43200027942657, + "p99": 127.74400413036346 + }, + "roundtrip": { + "p50": 315.39198756217957, + "p90": 338.6879861354828, + "p95": 346.9119966030121, + "p99": 366.9759929180145 + }, + "isolatedSum": { + "p50": 329.8240005970001, + "p90": 373.79199266433716, + "p95": 388.5440081357956, + "p99": 422.528013586998 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 306.8160116672516, + "p90": 335.00799536705017, + "p95": 343.9359962940216, + "p99": 382.04801082611084 + }, + "combine": { + "p50": 149.6960073709488, + "p90": 158.24000537395477, + "p95": 163.55200111865997, + "p99": 168.7680035829544 + }, + "roundtrip": { + "p50": 442.1440064907074, + "p90": 475.2959907054901, + "p95": 487.16801404953003, + "p99": 508.2240104675293 + }, + "isolatedSum": { + "p50": 456.5120190382004, + "p90": 493.24800074100494, + "p95": 507.4879974126816, + "p99": 550.8160144090652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 451.55200362205505, + "p90": 489.24800753593445, + "p95": 498.1440007686615, + "p99": 523.7439870834351 + }, + "combine": { + "p50": 243.6159998178482, + "p90": 252.9599964618683, + "p95": 257.0880055427551, + "p99": 263.2000148296356 + }, + "roundtrip": { + "p50": 675.9039759635925, + "p90": 701.2479901313782, + "p95": 712.4800086021423, + "p99": 785.0239872932434 + }, + "isolatedSum": { + "p50": 695.1680034399033, + "p90": 742.2080039978027, + "p95": 755.2320063114166, + "p99": 786.9440019130707 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 737.0880246162415, + "p90": 748.1600046157837, + "p95": 753.1200051307678, + "p99": 948.5759735107422 + }, + "combine": { + "p50": 416.03198647499084, + "p90": 422.7199852466583, + "p95": 427.3279905319214, + "p99": 457.7600061893463 + }, + "roundtrip": { + "p50": 1142.5280570983887, + "p90": 1155.9040546417236, + "p95": 1167.8400039672852, + "p99": 1199.455976486206 + }, + "isolatedSum": { + "p50": 1153.1200110912323, + "p90": 1170.879989862442, + "p95": 1180.4479956626892, + "p99": 1406.3359797000885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1386.0479593276978, + "p90": 1396.6399431228638, + "p95": 1399.9359607696533, + "p99": 1413.2800102233887 + }, + "combine": { + "p50": 763.5200023651123, + "p90": 771.6159820556641, + "p95": 774.8159766197205, + "p99": 784.8320007324219 + }, + "roundtrip": { + "p50": 2153.183937072754, + "p90": 2169.663906097412, + "p95": 2175.9679317474365, + "p99": 2377.7599334716797 + }, + "isolatedSum": { + "p50": 2149.56796169281, + "p90": 2168.255925178528, + "p95": 2174.751937389374, + "p99": 2198.1120109558105 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9e9af0e3", + "identity": "h200|deepep|v1|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_76b40c99", + "comparisonKey": "44b55d691d04fcd6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:23.842432+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 208.03199708461761, + "p90": 254.46400046348572, + "p95": 269.9519991874695, + "p99": 372.48000502586365 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 94.36800330877304, + "p95": 97.6639986038208, + "p99": 106.27199709415436 + }, + "roundtrip": { + "p50": 272.70400524139404, + "p90": 314.0160143375397, + "p95": 323.87199997901917, + "p99": 418.91199350357056 + }, + "isolatedSum": { + "p50": 287.9359945654869, + "p90": 348.83200377225876, + "p95": 367.6159977912903, + "p99": 478.752002120018 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 243.83999407291412, + "p90": 283.7440073490143, + "p95": 293.5360074043274, + "p99": 324.7680068016052 + }, + "combine": { + "p50": 112.57600039243698, + "p90": 124.1919994354248, + "p95": 128.09599936008453, + "p99": 133.53599607944489 + }, + "roundtrip": { + "p50": 340.4479920864105, + "p90": 374.65599179267883, + "p95": 382.207989692688, + "p99": 402.8800129890442 + }, + "isolatedSum": { + "p50": 356.4159944653511, + "p90": 407.9360067844391, + "p95": 421.6320067644119, + "p99": 458.3040028810501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 332.4800133705139, + "p90": 363.1359934806824, + "p95": 372.79999256134033, + "p99": 389.6639943122864 + }, + "combine": { + "p50": 175.10400712490082, + "p90": 183.07200074195862, + "p95": 186.36800348758698, + "p99": 196.03200256824493 + }, + "roundtrip": { + "p50": 488.5120093822479, + "p90": 511.26402616500854, + "p95": 519.6160078048706, + "p99": 539.135992527008 + }, + "isolatedSum": { + "p50": 507.58402049541473, + "p90": 546.207994222641, + "p95": 559.1679960489273, + "p99": 585.6959968805313 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 495.4240024089813, + "p90": 519.711971282959, + "p95": 531.3599705696106, + "p99": 573.7919807434082 + }, + "combine": { + "p50": 276.06400847435, + "p90": 286.3999903202057, + "p95": 289.66400027275085, + "p99": 296.06398940086365 + }, + "roundtrip": { + "p50": 764.3200159072876, + "p90": 789.3440127372742, + "p95": 801.2480139732361, + "p99": 885.1839900016785 + }, + "isolatedSum": { + "p50": 771.4880108833313, + "p90": 806.1119616031647, + "p95": 821.0239708423615, + "p99": 869.8559701442719 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 862.496018409729, + "p90": 870.2399730682373, + "p95": 873.3440041542053, + "p99": 1052.575945854187 + }, + "combine": { + "p50": 477.3760139942169, + "p90": 484.2880070209503, + "p95": 487.3279929161072, + "p99": 492.99201369285583 + }, + "roundtrip": { + "p50": 1325.0880241394043, + "p90": 1333.4720134735107, + "p95": 1337.0239734649658, + "p99": 1603.1359434127808 + }, + "isolatedSum": { + "p50": 1339.872032403946, + "p90": 1354.5279800891876, + "p95": 1360.6719970703125, + "p99": 1545.5679595470428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1631.4560174942017, + "p90": 1643.3279514312744, + "p95": 1649.0559577941895, + "p99": 1749.5360374450684 + }, + "combine": { + "p50": 880.511999130249, + "p90": 889.5360231399536, + "p95": 893.5040235519409, + "p99": 901.6320109367371 + }, + "roundtrip": { + "p50": 2484.031915664673, + "p90": 2535.9039306640625, + "p95": 2554.3038845062256, + "p99": 2740.000009536743 + }, + "isolatedSum": { + "p50": 2511.9680166244507, + "p90": 2532.863974571228, + "p95": 2542.5599813461304, + "p99": 2651.1680483818054 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-01055abb", + "identity": "h200|deepep|v1|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_76b40c99", + "comparisonKey": "74c46f6b3649aa7c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:51.973401+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 216.3199931383133, + "p90": 260.47998666763306, + "p95": 265.9839987754822, + "p99": 290.8479869365692 + }, + "combine": { + "p50": 86.7839977145195, + "p90": 95.2640026807785, + "p95": 105.3759977221489, + "p99": 116.44800007343292 + }, + "roundtrip": { + "p50": 290.0159955024719, + "p90": 334.9120020866394, + "p95": 343.1679904460907, + "p99": 357.1839928627014 + }, + "isolatedSum": { + "p50": 303.1039908528328, + "p90": 355.74398934841156, + "p95": 371.3599964976311, + "p99": 407.29598701000214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 264.5759880542755, + "p90": 303.3919930458069, + "p95": 315.3280019760132, + "p99": 334.04800295829773 + }, + "combine": { + "p50": 119.71200257539749, + "p90": 130.62399625778198, + "p95": 138.75199854373932, + "p99": 149.3760049343109 + }, + "roundtrip": { + "p50": 377.6319921016693, + "p90": 418.3039963245392, + "p95": 428.47999930381775, + "p99": 465.1840031147003 + }, + "isolatedSum": { + "p50": 384.287990629673, + "p90": 434.01598930358887, + "p95": 454.0800005197525, + "p99": 483.42400789260864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 366.5919899940491, + "p90": 403.1359851360321, + "p95": 416.159987449646, + "p99": 513.5359764099121 + }, + "combine": { + "p50": 192.89599359035492, + "p90": 203.5519927740097, + "p95": 208.8640034198761, + "p99": 221.82400524616241 + }, + "roundtrip": { + "p50": 545.5999970436096, + "p90": 579.584002494812, + "p95": 587.9679918289185, + "p99": 739.6479845046997 + }, + "isolatedSum": { + "p50": 559.487983584404, + "p90": 606.6879779100418, + "p95": 625.0239908695221, + "p99": 735.3599816560745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 556.4799904823303, + "p90": 590.5920267105103, + "p95": 605.791985988617, + "p99": 686.1760020256042 + }, + "combine": { + "p50": 309.1840147972107, + "p90": 318.4640109539032, + "p95": 324.0000009536743, + "p99": 358.17599296569824 + }, + "roundtrip": { + "p50": 860.5759739875793, + "p90": 893.5999870300293, + "p95": 909.3760251998901, + "p99": 973.1519818305969 + }, + "isolatedSum": { + "p50": 865.664005279541, + "p90": 909.0560376644135, + "p95": 929.7919869422913, + "p99": 1044.3519949913025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 992.8960204124451, + "p90": 1002.079963684082, + "p95": 1008.3839893341064, + "p99": 1117.7279949188232 + }, + "combine": { + "p50": 544.48002576828, + "p90": 553.8240075111389, + "p95": 558.1759810447693, + "p99": 569.4079995155334 + }, + "roundtrip": { + "p50": 1527.999997138977, + "p90": 1542.9760217666626, + "p95": 1551.1360168457031, + "p99": 1850.2720594406128 + }, + "isolatedSum": { + "p50": 1537.376046180725, + "p90": 1555.903971195221, + "p95": 1566.5599703788757, + "p99": 1687.1359944343567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1875.7760524749756, + "p90": 1889.0559673309326, + "p95": 1899.135947227478, + "p99": 2038.3999347686768 + }, + "combine": { + "p50": 996.7039823532104, + "p90": 1006.432056427002, + "p95": 1011.072039604187, + "p99": 1026.6239643096924 + }, + "roundtrip": { + "p50": 2936.448097229004, + "p90": 2963.871955871582, + "p95": 2977.8881072998047, + "p99": 3042.880058288574 + }, + "isolatedSum": { + "p50": 2872.480034828186, + "p90": 2895.4880237579346, + "p95": 2910.207986831665, + "p99": 3065.023899078369 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f58a6788", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_aa333d39", + "comparisonKey": "375db1e8f9eff09c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:50.166068+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 87.99999952316284, + "p90": 94.7519987821579, + "p95": 98.68799895048141, + "p99": 107.68000036478043 + }, + "combine": { + "p50": 97.05600142478943, + "p90": 102.14400291442871, + "p95": 107.51999914646149, + "p99": 113.8560026884079 + }, + "roundtrip": { + "p50": 211.04000508785248, + "p90": 222.88000583648682, + "p95": 228.35199534893036, + "p99": 248.31999838352203 + }, + "isolatedSum": { + "p50": 185.05600094795227, + "p90": 196.8960016965866, + "p95": 206.2079980969429, + "p99": 221.53600305318832 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 104.92800176143646, + "p90": 113.27999830245972, + "p95": 117.85600334405899, + "p99": 132.6719969511032 + }, + "combine": { + "p50": 137.11999356746674, + "p90": 141.27999544143677, + "p95": 144.6399986743927, + "p99": 149.4079977273941 + }, + "roundtrip": { + "p50": 310.2079927921295, + "p90": 317.82400608062744, + "p95": 322.11199402809143, + "p99": 338.8800024986267 + }, + "isolatedSum": { + "p50": 242.0479953289032, + "p90": 254.55999374389648, + "p95": 262.4960020184517, + "p99": 282.0799946784973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 141.56800508499146, + "p90": 148.8640010356903, + "p95": 154.6880006790161, + "p99": 192.54399836063385 + }, + "combine": { + "p50": 216.06400609016418, + "p90": 221.69600427150726, + "p95": 223.61600399017334, + "p99": 230.3999960422516 + }, + "roundtrip": { + "p50": 497.24799394607544, + "p90": 504.92799282073975, + "p95": 509.40799713134766, + "p99": 520.4799771308899 + }, + "isolatedSum": { + "p50": 357.63201117515564, + "p90": 370.56000530719757, + "p95": 378.30400466918945, + "p99": 422.94399440288544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 205.85599541664124, + "p90": 215.45599400997162, + "p95": 222.3680019378662, + "p99": 239.42400515079498 + }, + "combine": { + "p50": 349.7920036315918, + "p90": 356.06399178504944, + "p95": 360.76799035072327, + "p99": 378.87999415397644 + }, + "roundtrip": { + "p50": 835.8079791069031, + "p90": 846.1120128631592, + "p95": 853.4079790115356, + "p99": 909.600019454956 + }, + "isolatedSum": { + "p50": 555.647999048233, + "p90": 571.5199857950211, + "p95": 583.1359922885895, + "p99": 618.3039993047714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 337.8880023956299, + "p90": 346.015989780426, + "p95": 350.5919873714447, + "p99": 450.4320025444031 + }, + "combine": { + "p50": 614.4319772720337, + "p90": 623.6799955368042, + "p95": 626.8799901008606, + "p99": 644.0960168838501 + }, + "roundtrip": { + "p50": 1514.3680572509766, + "p90": 1526.2399911880493, + "p95": 1531.1360359191895, + "p99": 1610.3999614715576 + }, + "isolatedSum": { + "p50": 952.3199796676636, + "p90": 969.6959853172302, + "p95": 977.4719774723053, + "p99": 1094.5280194282532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 611.6480231285095, + "p90": 620.3200221061707, + "p95": 624.4480013847351, + "p99": 688.7999773025513 + }, + "combine": { + "p50": 1118.1440353393555, + "p90": 1131.0080289840698, + "p95": 1137.4080181121826, + "p99": 1237.2159957885742 + }, + "roundtrip": { + "p50": 2864.8641109466553, + "p90": 2882.8160762786865, + "p95": 2902.208089828491, + "p99": 3092.736005783081 + }, + "isolatedSum": { + "p50": 1729.792058467865, + "p90": 1751.3280510902405, + "p95": 1761.8560194969177, + "p99": 1926.0159730911255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3ebbb64c", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_76b40c99", + "comparisonKey": "84c85742adeaaaa9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:23.699936+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 224.31999444961548, + "p90": 263.8719975948334, + "p95": 270.2080011367798, + "p99": 284.2560112476349 + }, + "combine": { + "p50": 96.76799923181534, + "p90": 108.03200304508209, + "p95": 113.27999830245972, + "p99": 123.19999933242798 + }, + "roundtrip": { + "p50": 305.9200048446655, + "p90": 339.6799862384796, + "p95": 346.78399562835693, + "p99": 522.271990776062 + }, + "isolatedSum": { + "p50": 321.0879936814308, + "p90": 371.90400063991547, + "p95": 383.4879994392395, + "p99": 407.45601058006287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 285.72800755500793, + "p90": 325.9199857711792, + "p95": 333.47201347351074, + "p99": 351.74399614334106 + }, + "combine": { + "p50": 136.4160031080246, + "p90": 150.94399452209473, + "p95": 155.90399503707886, + "p99": 164.92800414562225 + }, + "roundtrip": { + "p50": 416.128009557724, + "p90": 454.52800393104553, + "p95": 465.6960070133209, + "p99": 508.9920163154602 + }, + "isolatedSum": { + "p50": 422.14401066303253, + "p90": 476.8639802932739, + "p95": 489.3760085105896, + "p99": 516.6720002889633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 401.95199847221375, + "p90": 431.36000633239746, + "p95": 441.9519901275635, + "p99": 479.0079891681671 + }, + "combine": { + "p50": 217.056006193161, + "p90": 229.08799350261688, + "p95": 232.96000063419342, + "p99": 242.3039972782135 + }, + "roundtrip": { + "p50": 606.112003326416, + "p90": 631.1039924621582, + "p95": 636.9600296020508, + "p99": 657.535970211029 + }, + "isolatedSum": { + "p50": 619.0080046653748, + "p90": 660.4479998350143, + "p95": 674.9119907617569, + "p99": 721.3119864463806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 611.7439866065979, + "p90": 638.047993183136, + "p95": 648.4159827232361, + "p99": 666.4959788322449 + }, + "combine": { + "p50": 350.43200850486755, + "p90": 362.4640107154846, + "p95": 367.23199486732483, + "p99": 376.9280016422272 + }, + "roundtrip": { + "p50": 969.3440198898315, + "p90": 987.0719909667969, + "p95": 999.7760057449341, + "p99": 1144.8320150375366 + }, + "isolatedSum": { + "p50": 962.1759951114655, + "p90": 1000.5120038986206, + "p95": 1015.6479775905609, + "p99": 1043.423980474472 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1114.143967628479, + "p90": 1124.7999668121338, + "p95": 1131.1999559402466, + "p99": 1173.1200218200684 + }, + "combine": { + "p50": 614.4000291824341, + "p90": 624.7040033340454, + "p95": 628.9600133895874, + "p99": 646.5280055999756 + }, + "roundtrip": { + "p50": 1706.9439888000488, + "p90": 1719.1040515899658, + "p95": 1726.7839908599854, + "p99": 1878.8800239562988 + }, + "isolatedSum": { + "p50": 1728.543996810913, + "p90": 1749.5039701461792, + "p95": 1760.159969329834, + "p99": 1819.648027420044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2112.9279136657715, + "p90": 2123.552083969116, + "p95": 2128.959894180298, + "p99": 2335.7760906219482 + }, + "combine": { + "p50": 1119.7760105133057, + "p90": 1133.1839561462402, + "p95": 1138.1759643554688, + "p99": 1160.256028175354 + }, + "roundtrip": { + "p50": 3234.976053237915, + "p90": 3256.1280727386475, + "p95": 3265.471935272217, + "p99": 3467.9040908813477 + }, + "isolatedSum": { + "p50": 3232.703924179077, + "p90": 3256.7360401153564, + "p95": 3267.1358585357666, + "p99": 3496.0321187973022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c6acfe83", + "identity": "h200|deepep|v1|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_76b40c99", + "comparisonKey": "b0219eed9083cb9e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:26.887750+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 219.7120040655136, + "p90": 242.23999679088593, + "p95": 255.93599677085876, + "p99": 330.9760093688965 + }, + "combine": { + "p50": 96.25600278377533, + "p90": 102.30399668216705, + "p95": 105.05600273609161, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 301.85601115226746, + "p90": 321.1199939250946, + "p95": 331.90399408340454, + "p99": 374.208003282547 + }, + "isolatedSum": { + "p50": 315.96800684928894, + "p90": 344.543993473053, + "p95": 360.9919995069504, + "p99": 444.2240074276924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 273.9199995994568, + "p90": 295.55198550224304, + "p95": 308.51200222969055, + "p99": 337.8880023956299 + }, + "combine": { + "p50": 139.55199718475342, + "p90": 143.13599467277527, + "p95": 145.31199634075165, + "p99": 154.23999726772308 + }, + "roundtrip": { + "p50": 404.2240083217621, + "p90": 427.3279905319214, + "p95": 448.2879936695099, + "p99": 487.5519871711731 + }, + "isolatedSum": { + "p50": 413.4719967842102, + "p90": 438.6879801750183, + "p95": 453.8239985704422, + "p99": 492.12799966335297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 384.5759928226471, + "p90": 401.7919898033142, + "p95": 405.9840142726898, + "p99": 424.0959882736206 + }, + "combine": { + "p50": 215.03999829292297, + "p90": 219.39200162887573, + "p95": 221.8559980392456, + "p99": 234.52800512313843 + }, + "roundtrip": { + "p50": 587.1359705924988, + "p90": 606.0799956321716, + "p95": 624.4159936904907, + "p99": 669.920027256012 + }, + "isolatedSum": { + "p50": 599.6159911155701, + "p90": 621.1839914321899, + "p95": 627.8400123119354, + "p99": 658.623993396759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 604.095995426178, + "p90": 627.1039843559265, + "p95": 647.7760076522827, + "p99": 858.5600256919861 + }, + "combine": { + "p50": 350.97599029541016, + "p90": 357.9840064048767, + "p95": 362.11198568344116, + "p99": 373.63201379776 + }, + "roundtrip": { + "p50": 946.1119771003723, + "p90": 983.7440252304077, + "p95": 991.9360280036926, + "p99": 1035.423994064331 + }, + "isolatedSum": { + "p50": 955.0719857215881, + "p90": 985.0879907608032, + "p95": 1009.8879933357239, + "p99": 1232.192039489746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1106.2719821929932, + "p90": 1112.768054008484, + "p95": 1115.7439947128296, + "p99": 1153.663992881775 + }, + "combine": { + "p50": 604.6079993247986, + "p90": 613.152027130127, + "p95": 616.9919967651367, + "p99": 637.503981590271 + }, + "roundtrip": { + "p50": 1695.2320337295532, + "p90": 1706.0480117797852, + "p95": 1711.5520238876343, + "p99": 1757.9519748687744 + }, + "isolatedSum": { + "p50": 1710.8799815177917, + "p90": 1725.9200811386108, + "p95": 1732.7359914779663, + "p99": 1791.167974472046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2105.695962905884, + "p90": 2114.0799522399902, + "p95": 2119.4241046905518, + "p99": 2219.42400932312 + }, + "combine": { + "p50": 1103.1359434127808, + "p90": 1115.488052368164, + "p95": 1119.0400123596191, + "p99": 1189.344048500061 + }, + "roundtrip": { + "p50": 3214.4320011138916, + "p90": 3239.743947982788, + "p95": 3252.5761127471924, + "p99": 3397.023916244507 + }, + "isolatedSum": { + "p50": 3208.8319063186646, + "p90": 3229.5680046081543, + "p95": 3238.464117050171, + "p99": 3408.768057823181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a59fca70", + "identity": "h200|deepep|v1|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_8701b74d", + "comparisonKey": "bae93b4aa7e4890b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:34.862901+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_13", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v1", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "1.2.1", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.40800017118454, + "p90": 78.87999713420868, + "p95": 83.64800363779068, + "p99": 99.7759997844696 + }, + "combine": { + "p50": 97.21600264310837, + "p90": 101.53599828481674, + "p95": 105.0880029797554, + "p99": 115.167997777462 + }, + "roundtrip": { + "p50": 198.04799556732178, + "p90": 202.43200659751892, + "p95": 207.45599269866943, + "p99": 217.0879989862442 + }, + "isolatedSum": { + "p50": 170.6240028142929, + "p90": 180.41599541902542, + "p95": 188.73600661754608, + "p99": 214.9439975619316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 92.03200042247772, + "p90": 100.38399696350098, + "p95": 109.6000000834465, + "p99": 124.83199685811996 + }, + "combine": { + "p50": 137.40800321102142, + "p90": 144.44799721240997, + "p95": 153.08800339698792, + "p99": 157.98400342464447 + }, + "roundtrip": { + "p50": 299.74400997161865, + "p90": 311.3600015640259, + "p95": 318.08000802993774, + "p99": 333.3120048046112 + }, + "isolatedSum": { + "p50": 229.44000363349915, + "p90": 244.83199417591095, + "p95": 262.6880034804344, + "p99": 282.81600028276443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 127.42400169372559, + "p90": 132.79999792575836, + "p95": 137.1839940547943, + "p99": 152.0960032939911 + }, + "combine": { + "p50": 216.63999557495117, + "p90": 222.6240038871765, + "p95": 226.9439995288849, + "p99": 245.53599953651428 + }, + "roundtrip": { + "p50": 483.99999737739563, + "p90": 489.6000027656555, + "p95": 492.0960068702698, + "p99": 503.39198112487793 + }, + "isolatedSum": { + "p50": 344.06399726867676, + "p90": 355.4240018129349, + "p95": 364.1279935836792, + "p99": 397.63200283050537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 194.04800236225128, + "p90": 204.70400154590607, + "p95": 216.99200570583344, + "p99": 236.89599335193634 + }, + "combine": { + "p50": 349.0239977836609, + "p90": 356.9279909133911, + "p95": 360.48001050949097, + "p99": 372.9279935359955 + }, + "roundtrip": { + "p50": 825.6959915161133, + "p90": 833.6960077285767, + "p95": 839.1039967536926, + "p99": 854.9119830131531 + }, + "isolatedSum": { + "p50": 543.0720001459122, + "p90": 561.6319924592972, + "p95": 577.4720162153244, + "p99": 609.8239868879318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 324.22399520874023, + "p90": 330.6240141391754, + "p95": 334.1439962387085, + "p99": 343.6799943447113 + }, + "combine": { + "p50": 616.096019744873, + "p90": 624.7680187225342, + "p95": 629.1840076446533, + "p99": 676.3200163841248 + }, + "roundtrip": { + "p50": 1503.4879446029663, + "p90": 1515.328049659729, + "p95": 1520.3520059585571, + "p99": 1645.408034324646 + }, + "isolatedSum": { + "p50": 940.3200149536133, + "p90": 955.3920328617096, + "p95": 963.3280038833618, + "p99": 1020.0000107288361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 582.9439759254456, + "p90": 593.7920212745667, + "p95": 602.1119952201843, + "p99": 648.7360000610352 + }, + "combine": { + "p50": 1118.5280084609985, + "p90": 1130.6240558624268, + "p95": 1134.8479986190796, + "p99": 1180.4800033569336 + }, + "roundtrip": { + "p50": 2846.496105194092, + "p90": 2866.624116897583, + "p95": 2877.824068069458, + "p99": 3034.5280170440674 + }, + "isolatedSum": { + "p50": 1701.471984386444, + "p90": 1724.4160771369934, + "p95": 1736.959993839264, + "p99": 1829.2160034179688 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-be5c70a9", + "identity": "h200|deepep|v2|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_b8782b41", + "comparisonKey": "b5b8b04ba767a39a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:49.133890+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.74399822950363, + "p90": 100.8640006184578, + "p95": 103.71199995279312, + "p99": 116.31999909877777 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 89.24800157546997, + "p95": 94.08000111579895, + "p99": 108.2879975438118 + }, + "roundtrip": { + "p50": 151.10400319099426, + "p90": 157.4079990386963, + "p95": 162.08000481128693, + "p99": 171.07200622558594 + }, + "isolatedSum": { + "p50": 175.74399709701538, + "p90": 190.11200219392776, + "p95": 197.79200106859207, + "p99": 224.60799664258957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 113.43999952077866, + "p90": 120.86399644613266, + "p95": 125.05599856376648, + "p99": 135.16800105571747 + }, + "combine": { + "p50": 107.19999670982361, + "p90": 113.50400000810623, + "p95": 116.70400202274323, + "p99": 126.43200159072876 + }, + "roundtrip": { + "p50": 193.7599927186966, + "p90": 200.70399343967438, + "p95": 203.96800339221954, + "p99": 223.55200350284576 + }, + "isolatedSum": { + "p50": 220.63999623060226, + "p90": 234.3679964542389, + "p95": 241.7600005865097, + "p99": 261.6000026464462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 147.45600521564484, + "p90": 154.94400262832642, + "p95": 160.288006067276, + "p99": 169.0240055322647 + }, + "combine": { + "p50": 154.33600544929504, + "p90": 161.53599321842194, + "p95": 164.86400365829468, + "p99": 173.08799922466278 + }, + "roundtrip": { + "p50": 271.5199887752533, + "p90": 280.67201375961304, + "p95": 285.37601232528687, + "p99": 303.51999402046204 + }, + "isolatedSum": { + "p50": 301.7920106649399, + "p90": 316.47999584674835, + "p95": 325.1520097255707, + "p99": 342.1120047569275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 212.41599321365356, + "p90": 220.19200026988983, + "p95": 224.7679978609085, + "p99": 234.20800268650055 + }, + "combine": { + "p50": 254.36800718307495, + "p90": 261.21601462364197, + "p95": 264.0640139579773, + "p99": 293.5360074043274 + }, + "roundtrip": { + "p50": 439.520001411438, + "p90": 447.7759897708893, + "p95": 452.9600143432617, + "p99": 471.23199701309204 + }, + "isolatedSum": { + "p50": 466.7840003967285, + "p90": 481.4080148935318, + "p95": 488.8320118188858, + "p99": 527.7440100908279 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 346.20800614356995, + "p90": 356.4159870147705, + "p95": 364.4160032272339, + "p99": 392.7040100097656 + }, + "combine": { + "p50": 427.264004945755, + "p90": 435.13599038124084, + "p95": 439.13599848747253, + "p99": 447.80799746513367 + }, + "roundtrip": { + "p50": 746.4960217475891, + "p90": 756.1600208282471, + "p95": 761.4399790763855, + "p99": 829.3120265007019 + }, + "isolatedSum": { + "p50": 773.472011089325, + "p90": 791.5519773960114, + "p95": 803.5520017147064, + "p99": 840.5120074748993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 619.5840239524841, + "p90": 627.6159882545471, + "p95": 630.8799982070923, + "p99": 654.0480256080627 + }, + "combine": { + "p50": 772.6399898529053, + "p90": 780.896008014679, + "p95": 785.6960296630859, + "p99": 840.0319814682007 + }, + "roundtrip": { + "p50": 1368.5120344161987, + "p90": 1379.1359663009644, + "p95": 1383.1360340118408, + "p99": 1402.2719860076904 + }, + "isolatedSum": { + "p50": 1392.2240138053894, + "p90": 1408.511996269226, + "p95": 1416.5760278701782, + "p99": 1494.0800070762634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c125c8e7", + "identity": "h200|deepep|v2|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_b8782b41", + "comparisonKey": "22d44df98c05c343", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:45.910531+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 97.08800166845322, + "p90": 119.32799965143204, + "p95": 124.89599734544754, + "p99": 141.50400459766388 + }, + "combine": { + "p50": 92.83199906349182, + "p90": 103.20000350475311, + "p95": 110.33599823713303, + "p99": 119.64800208806992 + }, + "roundtrip": { + "p50": 165.18400609493256, + "p90": 181.37599527835846, + "p95": 187.9040002822876, + "p99": 197.56799936294556 + }, + "isolatedSum": { + "p50": 189.92000073194504, + "p90": 222.52800315618515, + "p95": 235.23199558258057, + "p99": 261.1520066857338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.15999919176102, + "p90": 133.88800621032715, + "p95": 140.22399485111237, + "p99": 145.79200744628906 + }, + "combine": { + "p50": 119.39200013875961, + "p90": 130.91200590133667, + "p95": 136.1279934644699, + "p99": 150.176003575325 + }, + "roundtrip": { + "p50": 216.0319983959198, + "p90": 228.12800109386444, + "p95": 234.01600122451782, + "p99": 242.68800020217896 + }, + "isolatedSum": { + "p50": 243.55199933052063, + "p90": 264.8000121116638, + "p95": 276.3519883155823, + "p99": 295.9680110216141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 162.36799955368042, + "p90": 175.20000040531158, + "p95": 180.4800033569336, + "p99": 188.03200125694275 + }, + "combine": { + "p50": 180.7679980993271, + "p90": 191.0720020532608, + "p95": 193.82399320602417, + "p99": 200.41599869728088 + }, + "roundtrip": { + "p50": 310.8159899711609, + "p90": 329.6639919281006, + "p95": 332.5439989566803, + "p99": 340.38400650024414 + }, + "isolatedSum": { + "p50": 343.1359976530075, + "p90": 366.2720024585724, + "p95": 374.30399656295776, + "p99": 388.44799995422363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 242.46400594711304, + "p90": 259.5840096473694, + "p95": 268.0320143699646, + "p99": 290.912002325058 + }, + "combine": { + "p50": 283.52001309394836, + "p90": 293.8559949398041, + "p95": 300.03198981285095, + "p99": 325.0240087509155 + }, + "roundtrip": { + "p50": 500.44798851013184, + "p90": 512.0959877967834, + "p95": 518.559992313385, + "p99": 534.7840189933777 + }, + "isolatedSum": { + "p50": 525.9840190410614, + "p90": 553.4400045871735, + "p95": 568.0640041828156, + "p99": 615.9360110759735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 401.311993598938, + "p90": 416.9600009918213, + "p95": 423.96798729896545, + "p99": 461.5040123462677 + }, + "combine": { + "p50": 491.93599820137024, + "p90": 503.29601764678955, + "p95": 506.9440007209778, + "p99": 513.375997543335 + }, + "roundtrip": { + "p50": 866.9120073318481, + "p90": 879.2319893836975, + "p95": 885.6319785118103, + "p99": 1253.2479763031006 + }, + "isolatedSum": { + "p50": 893.2479918003082, + "p90": 920.2560186386108, + "p95": 930.9119880199432, + "p99": 974.8800098896027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 747.4560141563416, + "p90": 764.9919986724854, + "p95": 769.9519991874695, + "p99": 788.2559895515442 + }, + "combine": { + "p50": 888.2880210876465, + "p90": 898.2080221176147, + "p95": 902.9120206832886, + "p99": 918.9119935035706 + }, + "roundtrip": { + "p50": 1597.5040197372437, + "p90": 1613.1839752197266, + "p95": 1621.216058731079, + "p99": 1670.6559658050537 + }, + "isolatedSum": { + "p50": 1635.744035243988, + "p90": 1663.2000207901, + "p95": 1672.864019870758, + "p99": 1707.1679830551147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f5efc8b9", + "identity": "h200|deepep|v2|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_b8782b41", + "comparisonKey": "6d1bc9ab9e5659bd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:45.701603+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.34399747848511, + "p90": 115.35999923944473, + "p95": 118.75200271606445, + "p99": 138.46400380134583 + }, + "combine": { + "p50": 96.47999703884125, + "p90": 101.85600072145462, + "p95": 104.89600151777267, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 179.9360066652298, + "p90": 186.0159933567047, + "p95": 190.14400243759155, + "p99": 203.74399423599243 + }, + "isolatedSum": { + "p50": 201.82399451732635, + "p90": 217.21599996089935, + "p95": 223.64800423383713, + "p99": 248.51200729608536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 132.9600065946579, + "p90": 138.84800672531128, + "p95": 141.95199310779572, + "p99": 148.00000190734863 + }, + "combine": { + "p50": 127.74400413036346, + "p90": 133.05599987506866, + "p95": 135.74400544166565, + "p99": 141.59999787807465 + }, + "roundtrip": { + "p50": 236.00000143051147, + "p90": 242.2720044851303, + "p95": 245.2159970998764, + "p99": 255.3279995918274 + }, + "isolatedSum": { + "p50": 260.70401072502136, + "p90": 271.90400660037994, + "p95": 277.69599854946136, + "p99": 289.5999997854233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 181.5679967403412, + "p90": 187.8719925880432, + "p95": 191.80800020694733, + "p99": 196.44799828529358 + }, + "combine": { + "p50": 200.3840059041977, + "p90": 206.08000457286835, + "p95": 208.3519995212555, + "p99": 217.6000028848648 + }, + "roundtrip": { + "p50": 352.9599905014038, + "p90": 360.00001430511475, + "p95": 362.7200126647949, + "p99": 371.8720078468323 + }, + "isolatedSum": { + "p50": 381.9520026445389, + "p90": 393.95199716091156, + "p95": 400.1599997282028, + "p99": 414.0480011701584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 275.04000067710876, + "p90": 281.3119888305664, + "p95": 284.92799401283264, + "p99": 292.7039861679077 + }, + "combine": { + "p50": 318.2080090045929, + "p90": 326.33599638938904, + "p95": 328.8959860801697, + "p99": 337.5999927520752 + }, + "roundtrip": { + "p50": 571.071982383728, + "p90": 579.4240236282349, + "p95": 581.7919969558716, + "p99": 619.5840239524841 + }, + "isolatedSum": { + "p50": 593.2480096817017, + "p90": 607.6479852199554, + "p95": 613.8239800930023, + "p99": 630.3039789199829 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 462.0479941368103, + "p90": 469.4719910621643, + "p95": 472.4479913711548, + "p99": 483.0720126628876 + }, + "combine": { + "p50": 547.7439761161804, + "p90": 555.0079941749573, + "p95": 559.1679811477661, + "p99": 606.0159802436829 + }, + "roundtrip": { + "p50": 982.9440116882324, + "p90": 992.5439953804016, + "p95": 995.3600168228149, + "p99": 1035.0719690322876 + }, + "isolatedSum": { + "p50": 1009.7919702529907, + "p90": 1024.4799852371216, + "p95": 1031.615972518921, + "p99": 1089.0879929065704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 852.1919846534729, + "p90": 867.4240112304688, + "p95": 874.3680119514465, + "p99": 906.5920114517212 + }, + "combine": { + "p50": 1005.5999755859375, + "p90": 1014.3680572509766, + "p95": 1017.024040222168, + "p99": 1044.2880392074585 + }, + "roundtrip": { + "p50": 1827.936053276062, + "p90": 1843.8400030136108, + "p95": 1850.2399921417236, + "p99": 1965.6000137329102 + }, + "isolatedSum": { + "p50": 1857.7919602394104, + "p90": 1881.7920684814453, + "p95": 1891.3920521736145, + "p99": 1950.8800506591797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0c647ef4", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_d7895571", + "comparisonKey": "6d1b97a966875452", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:10.078684+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.66400188207626, + "p90": 129.15199995040894, + "p95": 138.43199610710144, + "p99": 146.68799936771393 + }, + "combine": { + "p50": 107.13600367307663, + "p90": 115.55200070142746, + "p95": 125.02400577068329, + "p99": 131.20000064373016 + }, + "roundtrip": { + "p50": 202.5279998779297, + "p90": 217.056006193161, + "p95": 229.34399545192719, + "p99": 280.7680070400238 + }, + "isolatedSum": { + "p50": 224.8000055551529, + "p90": 244.7040006518364, + "p95": 263.45600187778473, + "p99": 277.8880000114441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.20000326633453, + "p90": 164.60800170898438, + "p95": 168.7680035829544, + "p99": 178.8800060749054 + }, + "combine": { + "p50": 146.7200070619583, + "p90": 157.79200196266174, + "p95": 162.59199380874634, + "p99": 169.91999745368958 + }, + "roundtrip": { + "p50": 265.79201221466064, + "p90": 282.8480005264282, + "p95": 286.97600960731506, + "p99": 295.23199796676636 + }, + "isolatedSum": { + "p50": 293.92001032829285, + "p90": 322.4000036716461, + "p95": 331.35999739170074, + "p99": 348.80000352859497 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.32800662517548, + "p90": 228.2239943742752, + "p95": 234.6239984035492, + "p99": 266.36800169944763 + }, + "combine": { + "p50": 226.01599991321564, + "p90": 241.60000681877136, + "p95": 246.36800587177277, + "p99": 256.76798820495605 + }, + "roundtrip": { + "p50": 404.7999978065491, + "p90": 419.0399944782257, + "p95": 425.28000473976135, + "p99": 441.9200122356415 + }, + "isolatedSum": { + "p50": 433.3440065383911, + "p90": 469.82400119304657, + "p95": 480.99200427532196, + "p99": 523.1359899044037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 318.65599751472473, + "p90": 331.2639892101288, + "p95": 335.7439935207367, + "p99": 353.63200306892395 + }, + "combine": { + "p50": 358.271986246109, + "p90": 368.1600093841553, + "p95": 373.9840090274811, + "p99": 389.44000005722046 + }, + "roundtrip": { + "p50": 652.7360081672668, + "p90": 663.3920073509216, + "p95": 669.6000099182129, + "p99": 701.8880248069763 + }, + "isolatedSum": { + "p50": 676.9279837608337, + "p90": 699.4239985942841, + "p95": 709.7280025482178, + "p99": 743.0720031261444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 531.8400263786316, + "p90": 541.5999889373779, + "p95": 548.3520030975342, + "p99": 618.3680295944214 + }, + "combine": { + "p50": 622.8479743003845, + "p90": 632.3840022087097, + "p95": 638.7519836425781, + "p99": 660.4480147361755 + }, + "roundtrip": { + "p50": 1126.528024673462, + "p90": 1138.1440162658691, + "p95": 1144.1919803619385, + "p99": 1197.8559494018555 + }, + "isolatedSum": { + "p50": 1154.688000679016, + "p90": 1173.9839911460876, + "p95": 1187.1039867401123, + "p99": 1278.816044330597 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 998.3680248260498, + "p90": 1017.632007598877, + "p95": 1026.2720584869385, + "p99": 1050.4319667816162 + }, + "combine": { + "p50": 1125.3119707107544, + "p90": 1136.1279487609863, + "p95": 1139.9999856948853, + "p99": 1197.5680589675903 + }, + "roundtrip": { + "p50": 2086.1120223999023, + "p90": 2105.5359840393066, + "p95": 2113.759994506836, + "p99": 2252.1278858184814 + }, + "isolatedSum": { + "p50": 2123.679995536804, + "p90": 2153.7599563598633, + "p95": 2166.2720441818237, + "p99": 2248.0000257492065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e8f2630", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_b8782b41", + "comparisonKey": "d374c82c32ed5cd4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:05.955871+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.40000003576279, + "p90": 141.79199934005737, + "p95": 146.55999839305878, + "p99": 169.98399794101715 + }, + "combine": { + "p50": 106.4319983124733, + "p90": 119.87199634313583, + "p95": 125.31200051307678, + "p99": 133.44000279903412 + }, + "roundtrip": { + "p50": 198.2399970293045, + "p90": 215.61600267887115, + "p95": 223.4240025281906, + "p99": 232.70399868488312 + }, + "isolatedSum": { + "p50": 224.83199834823608, + "p90": 261.6639956831932, + "p95": 271.87199890613556, + "p99": 303.42400074005127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 152.73599326610565, + "p90": 184.86399948596954, + "p95": 190.7840073108673, + "p99": 203.77600193023682 + }, + "combine": { + "p50": 150.84800124168396, + "p90": 171.29600048065186, + "p95": 174.43199455738068, + "p99": 184.35199558734894 + }, + "roundtrip": { + "p50": 271.0080146789551, + "p90": 300.79999566078186, + "p95": 304.57600951194763, + "p99": 311.2320005893707 + }, + "isolatedSum": { + "p50": 303.5839945077896, + "p90": 356.1599999666214, + "p95": 365.216001868248, + "p99": 388.12799751758575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 204.8639953136444, + "p90": 236.32000386714935, + "p95": 240.38399755954742, + "p99": 254.33599948883057 + }, + "combine": { + "p50": 224.2559939622879, + "p90": 232.96000063419342, + "p95": 238.14399540424347, + "p99": 250.75200200080872 + }, + "roundtrip": { + "p50": 402.75201201438904, + "p90": 414.5280122756958, + "p95": 421.31200432777405, + "p99": 433.9199960231781 + }, + "isolatedSum": { + "p50": 429.1199892759323, + "p90": 469.2800045013428, + "p95": 478.5279929637909, + "p99": 505.0880014896393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 316.76799058914185, + "p90": 332.35201239585876, + "p95": 335.6800079345703, + "p99": 344.57600116729736 + }, + "combine": { + "p50": 357.02401399612427, + "p90": 365.9839928150177, + "p95": 369.79201436042786, + "p99": 377.920001745224 + }, + "roundtrip": { + "p50": 649.7600078582764, + "p90": 662.7519726753235, + "p95": 669.983983039856, + "p99": 694.815993309021 + }, + "isolatedSum": { + "p50": 673.7920045852661, + "p90": 698.3360052108765, + "p95": 705.4720222949982, + "p99": 722.4960029125214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.480001449585, + "p90": 548.1280088424683, + "p95": 554.6879768371582, + "p99": 617.9199814796448 + }, + "combine": { + "p50": 622.5919723510742, + "p90": 633.247971534729, + "p95": 639.4240260124207, + "p99": 650.1759886741638 + }, + "roundtrip": { + "p50": 1125.1519918441772, + "p90": 1138.0159854888916, + "p95": 1143.3919668197632, + "p99": 1173.0560064315796 + }, + "isolatedSum": { + "p50": 1155.0719738006592, + "p90": 1181.3759803771973, + "p95": 1194.1120028495789, + "p99": 1268.0959701538086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1003.8080215454102, + "p90": 1023.7760543823242, + "p95": 1029.7919511795044, + "p99": 1161.344051361084 + }, + "combine": { + "p50": 1126.5920400619507, + "p90": 1137.4080181121826, + "p95": 1142.240047454834, + "p99": 1236.6399765014648 + }, + "roundtrip": { + "p50": 2088.6080265045166, + "p90": 2108.448028564453, + "p95": 2118.1440353393555, + "p99": 3230.560064315796 + }, + "isolatedSum": { + "p50": 2130.400061607361, + "p90": 2161.184072494507, + "p95": 2172.0319986343384, + "p99": 2397.984027862549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f2f7af93", + "identity": "h200|deepep|v2|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_b8782b41", + "comparisonKey": "96181e7c25e21c7c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:53.400962+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 121.95199728012085, + "p90": 141.9840008020401, + "p95": 147.23199605941772, + "p99": 162.23999857902527 + }, + "combine": { + "p50": 106.27199709415436, + "p90": 117.98399686813354, + "p95": 124.32000041007996, + "p99": 131.67999684810638 + }, + "roundtrip": { + "p50": 199.072003364563, + "p90": 217.056006193161, + "p95": 224.73600506782532, + "p99": 235.4239970445633 + }, + "isolatedSum": { + "p50": 228.2239943742752, + "p90": 259.96799767017365, + "p95": 271.5519964694977, + "p99": 293.91999542713165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 147.64800667762756, + "p90": 165.21599888801575, + "p95": 170.84799706935883, + "p99": 180.57599663734436 + }, + "combine": { + "p50": 145.24799585342407, + "p90": 156.8640023469925, + "p95": 163.2319986820221, + "p99": 172.54400253295898 + }, + "roundtrip": { + "p50": 267.0400142669678, + "p90": 285.37601232528687, + "p95": 291.6159927845001, + "p99": 302.43200063705444 + }, + "isolatedSum": { + "p50": 292.89600253105164, + "p90": 322.08000123500824, + "p95": 334.0799957513809, + "p99": 353.11999917030334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.53599298000336, + "p90": 221.91999852657318, + "p95": 229.37600314617157, + "p99": 238.46399784088135 + }, + "combine": { + "p50": 225.92000663280487, + "p90": 239.23200368881226, + "p95": 243.77599358558655, + "p99": 259.8080039024353 + }, + "roundtrip": { + "p50": 403.2000005245209, + "p90": 421.08801007270813, + "p95": 428.3199906349182, + "p99": 527.8400182723999 + }, + "isolatedSum": { + "p50": 431.4559996128082, + "p90": 461.15200221538544, + "p95": 473.1519967317581, + "p99": 498.27200174331665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.94399905204773, + "p90": 332.5439989566803, + "p95": 334.4320058822632, + "p99": 353.43998670578003 + }, + "combine": { + "p50": 356.06399178504944, + "p90": 365.56801199913025, + "p95": 368.9599931240082, + "p99": 379.96798753738403 + }, + "roundtrip": { + "p50": 644.2880034446716, + "p90": 658.5279703140259, + "p95": 664.4160151481628, + "p99": 723.4560251235962 + }, + "isolatedSum": { + "p50": 671.0079908370972, + "p90": 698.1120109558105, + "p95": 703.3919990062714, + "p99": 733.4079742431641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 529.2800068855286, + "p90": 543.071985244751, + "p95": 549.6960282325745, + "p99": 575.3600001335144 + }, + "combine": { + "p50": 614.5920157432556, + "p90": 624.4159936904907, + "p95": 628.000020980835, + "p99": 635.9360218048096 + }, + "roundtrip": { + "p50": 1118.5920238494873, + "p90": 1145.8560228347778, + "p95": 1191.9679641723633, + "p99": 1319.2960023880005 + }, + "isolatedSum": { + "p50": 1143.8720226287842, + "p90": 1167.4879789352417, + "p95": 1177.6960492134094, + "p99": 1211.296021938324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 983.1039905548096, + "p90": 1002.7519464492798, + "p95": 1009.8559856414795, + "p99": 1025.9840488433838 + }, + "combine": { + "p50": 1116.3519620895386, + "p90": 1126.0160207748413, + "p95": 1129.8880577087402, + "p99": 1141.3120031356812 + }, + "roundtrip": { + "p50": 2059.904098510742, + "p90": 2081.8560123443604, + "p95": 2088.8640880584717, + "p99": 2176.3839721679688 + }, + "isolatedSum": { + "p50": 2099.455952644348, + "p90": 2128.767967224121, + "p95": 2139.7440433502197, + "p99": 2167.296051979065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e65fc6ef", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_fa12a3e3", + "comparisonKey": "7cef788ea445c515", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:46.637191+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.7279975414276, + "p90": 149.1519957780838, + "p95": 157.0879966020584, + "p99": 168.44800114631653 + }, + "combine": { + "p50": 126.52799487113953, + "p90": 139.42399621009827, + "p95": 146.27200365066528, + "p99": 153.24799716472626 + }, + "roundtrip": { + "p50": 229.18400168418884, + "p90": 247.96800315380096, + "p95": 251.52000784873962, + "p99": 258.2719922065735 + }, + "isolatedSum": { + "p50": 260.25599241256714, + "p90": 288.57599198818207, + "p95": 303.3600002527237, + "p99": 321.6959983110428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 178.9119988679886, + "p90": 198.65599274635315, + "p95": 203.39199900627136, + "p99": 211.2639993429184 + }, + "combine": { + "p50": 177.88800597190857, + "p90": 191.77600741386414, + "p95": 195.26399672031403, + "p99": 208.51199328899384 + }, + "roundtrip": { + "p50": 324.864000082016, + "p90": 339.52000737190247, + "p95": 345.18399834632874, + "p99": 354.6240031719208 + }, + "isolatedSum": { + "p50": 356.80000483989716, + "p90": 390.4320001602173, + "p95": 398.6559957265854, + "p99": 419.77599263191223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 271.232008934021, + "p90": 285.3440046310425, + "p95": 291.29600524902344, + "p99": 301.15199089050293 + }, + "combine": { + "p50": 270.7839906215668, + "p90": 281.3760042190552, + "p95": 284.7679853439331, + "p99": 291.80800914764404 + }, + "roundtrip": { + "p50": 515.1039958000183, + "p90": 528.6080241203308, + "p95": 532.5760245323181, + "p99": 548.416018486023 + }, + "isolatedSum": { + "p50": 542.0159995555878, + "p90": 566.7200088500977, + "p95": 576.0639905929565, + "p99": 592.960000038147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 450.6239891052246, + "p90": 467.5840139389038, + "p95": 473.2480049133301, + "p99": 497.6319968700409 + }, + "combine": { + "p50": 458.68799090385437, + "p90": 470.5280065536499, + "p95": 474.8159945011139, + "p99": 495.07200717926025 + }, + "roundtrip": { + "p50": 881.3760280609131, + "p90": 891.8399810791016, + "p95": 899.4879722595215, + "p99": 918.6239838600159 + }, + "isolatedSum": { + "p50": 909.311980009079, + "p90": 938.1120204925537, + "p95": 948.063999414444, + "p99": 992.7040040493011 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 807.4880242347717, + "p90": 820.2239871025085, + "p95": 825.984001159668, + "p99": 850.4959940910339 + }, + "combine": { + "p50": 829.0560245513916, + "p90": 839.2000198364258, + "p95": 842.9759740829468, + "p99": 869.8559999465942 + }, + "roundtrip": { + "p50": 1606.495976448059, + "p90": 1625.8560419082642, + "p95": 1635.3280544281006, + "p99": 1850.111961364746 + }, + "isolatedSum": { + "p50": 1636.5440487861633, + "p90": 1659.4240069389343, + "p95": 1668.9599752426147, + "p99": 1720.3519940376282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1537.0240211486816, + "p90": 1549.3760108947754, + "p95": 1558.4959983825684, + "p99": 1598.5280275344849 + }, + "combine": { + "p50": 1547.1999645233154, + "p90": 1558.7199926376343, + "p95": 1564.1599893569946, + "p99": 1666.3999557495117 + }, + "roundtrip": { + "p50": 3053.8558959960938, + "p90": 3072.000026702881, + "p95": 3093.4720039367676, + "p99": 3234.71999168396 + }, + "isolatedSum": { + "p50": 3084.223985671997, + "p90": 3108.0960035324097, + "p95": 3122.655987739563, + "p99": 3264.9279832839966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-cf97ea4b", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_de57b2b2", + "comparisonKey": "879d08443f8e1d58", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:35.877999+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 80.48000186681747, + "p90": 89.37600255012512, + "p95": 95.93600034713745, + "p99": 109.66400057077408 + }, + "combine": { + "p50": 71.32799923419952, + "p90": 75.80800354480743, + "p95": 79.13599908351898, + "p99": 87.71199733018875 + }, + "roundtrip": { + "p50": 129.82399761676788, + "p90": 134.2719942331314, + "p95": 139.23199474811554, + "p99": 147.13600277900696 + }, + "isolatedSum": { + "p50": 151.808001101017, + "p90": 165.18400609493256, + "p95": 175.07199943065643, + "p99": 197.37599790096283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 107.90400207042694, + "p90": 116.12799763679504, + "p95": 120.80000340938568, + "p99": 140.79999923706055 + }, + "combine": { + "p50": 119.4240003824234, + "p90": 123.16799908876419, + "p95": 126.01600587368011, + "p99": 134.5919966697693 + }, + "roundtrip": { + "p50": 202.2079974412918, + "p90": 209.53600108623505, + "p95": 212.67199516296387, + "p99": 240.54400622844696 + }, + "isolatedSum": { + "p50": 227.32800245285034, + "p90": 239.29599672555923, + "p95": 246.8160092830658, + "p99": 275.39199590682983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 187.6160055398941, + "p90": 192.32000410556793, + "p95": 195.93599438667297, + "p99": 206.9759964942932 + }, + "combine": { + "p50": 289.69600796699524, + "p90": 295.2960133552551, + "p95": 298.8480031490326, + "p99": 311.16798520088196 + }, + "roundtrip": { + "p50": 445.3119933605194, + "p90": 451.87199115753174, + "p95": 453.8559913635254, + "p99": 482.62399435043335 + }, + "isolatedSum": { + "p50": 477.31201350688934, + "p90": 487.61601746082306, + "p95": 494.78399753570557, + "p99": 518.1439816951752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-335e01ce", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_3515cf1b", + "comparisonKey": "00d274deeca156c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:26.492721+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 94.59199756383896, + "p90": 104.92800176143646, + "p95": 109.40799862146378, + "p99": 112.5119999051094 + }, + "combine": { + "p50": 84.73599702119827, + "p90": 94.52799707651138, + "p95": 99.90400075912476, + "p99": 109.63200032711029 + }, + "roundtrip": { + "p50": 155.90399503707886, + "p90": 168.16000640392303, + "p95": 175.61599612236023, + "p99": 182.81599879264832 + }, + "isolatedSum": { + "p50": 179.32799458503723, + "p90": 199.45599883794785, + "p95": 209.31199938058853, + "p99": 222.1440002322197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 116.7680025100708, + "p90": 126.97599828243256, + "p95": 130.5920034646988, + "p99": 139.45600390434265 + }, + "combine": { + "p50": 104.44799810647964, + "p90": 116.12799763679504, + "p95": 122.84799665212631, + "p99": 130.5920034646988 + }, + "roundtrip": { + "p50": 199.16799664497375, + "p90": 214.52799439430237, + "p95": 220.2879935503006, + "p99": 232.09600150585175 + }, + "isolatedSum": { + "p50": 221.21600061655045, + "p90": 243.1039959192276, + "p95": 253.4400001168251, + "p99": 270.04800736904144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 145.1839953660965, + "p90": 154.04799580574036, + "p95": 157.98400342464447, + "p99": 164.95999693870544 + }, + "combine": { + "p50": 142.68800616264343, + "p90": 151.2320041656494, + "p95": 155.35999834537506, + "p99": 164.32000696659088 + }, + "roundtrip": { + "p50": 266.11199975013733, + "p90": 283.58399868011475, + "p95": 291.1680042743683, + "p99": 300.7040023803711 + }, + "isolatedSum": { + "p50": 287.87200152873993, + "p90": 305.27999997138977, + "p95": 313.34400177001953, + "p99": 329.2800039052963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 206.43199980258942, + "p90": 216.2880003452301, + "p95": 220.57600319385529, + "p99": 231.26399517059326 + }, + "combine": { + "p50": 219.29599344730377, + "p90": 230.04800081253052, + "p95": 233.88800024986267, + "p99": 239.1040027141571 + }, + "roundtrip": { + "p50": 401.5040099620819, + "p90": 415.48800468444824, + "p95": 420.80000042915344, + "p99": 435.64799427986145 + }, + "isolatedSum": { + "p50": 425.7279932498932, + "p90": 446.3360011577606, + "p95": 454.46400344371796, + "p99": 470.36799788475037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 327.58399844169617, + "p90": 337.72799372673035, + "p95": 341.2800133228302, + "p99": 361.82400584220886 + }, + "combine": { + "p50": 364.9919927120209, + "p90": 374.91199374198914, + "p95": 379.71198558807373, + "p99": 392.5760090351105 + }, + "roundtrip": { + "p50": 664.2559766769409, + "p90": 673.4079718589783, + "p95": 678.8480281829834, + "p99": 694.5919990539551 + }, + "isolatedSum": { + "p50": 692.575991153717, + "p90": 712.6399874687195, + "p95": 720.9919989109039, + "p99": 754.4000148773193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 572.3519921302795, + "p90": 581.9519758224487, + "p95": 584.2880010604858, + "p99": 601.5040278434753 + }, + "combine": { + "p50": 635.1360082626343, + "p90": 644.1919803619385, + "p95": 647.487998008728, + "p99": 662.1440052986145 + }, + "roundtrip": { + "p50": 1184.8000288009644, + "p90": 1199.9679803848267, + "p95": 1215.775966644287, + "p99": 1259.1359615325928 + }, + "isolatedSum": { + "p50": 1207.4880003929138, + "p90": 1226.1439561843872, + "p95": 1231.7759990692139, + "p99": 1263.6480331420898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7d22bbbf", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_f933a4f5", + "comparisonKey": "94383079091608a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:32.761521+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 122.40000069141388, + "p90": 129.08799946308136, + "p95": 131.74399733543396, + "p99": 140.51200449466705 + }, + "combine": { + "p50": 117.95199662446976, + "p90": 122.01599776744843, + "p95": 125.44000148773193, + "p99": 135.903999209404 + }, + "roundtrip": { + "p50": 219.90400552749634, + "p90": 225.8560061454773, + "p95": 230.24000227451324, + "p99": 239.84000086784363 + }, + "isolatedSum": { + "p50": 240.35199731588364, + "p90": 251.10399723052979, + "p95": 257.1839988231659, + "p99": 276.41600370407104 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 164.06400501728058, + "p90": 171.39199376106262, + "p95": 174.6239960193634, + "p99": 180.7039976119995 + }, + "combine": { + "p50": 166.46400094032288, + "p90": 171.77599668502808, + "p95": 174.01599884033203, + "p99": 185.18400192260742 + }, + "roundtrip": { + "p50": 302.3679852485657, + "p90": 308.83198976516724, + "p95": 311.19999289512634, + "p99": 321.1840093135834 + }, + "isolatedSum": { + "p50": 330.52800595760345, + "p90": 343.1679904460907, + "p95": 348.63999485969543, + "p99": 365.88799953460693 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 242.17599630355835, + "p90": 250.4960000514984, + "p95": 252.54398584365845, + "p99": 265.9519910812378 + }, + "combine": { + "p50": 265.5999958515167, + "p90": 271.58400416374207, + "p95": 274.6880054473877, + "p99": 282.6560139656067 + }, + "roundtrip": { + "p50": 485.9839975833893, + "p90": 494.30400133132935, + "p95": 498.27200174331665, + "p99": 563.3599758148193 + }, + "isolatedSum": { + "p50": 507.7759921550751, + "p90": 522.0800042152405, + "p95": 527.2319912910461, + "p99": 548.6080050468445 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 399.616003036499, + "p90": 408.54400396347046, + "p95": 413.02400827407837, + "p99": 426.0160028934479 + }, + "combine": { + "p50": 449.50398802757263, + "p90": 456.64000511169434, + "p95": 459.52001214027405, + "p99": 471.6480076313019 + }, + "roundtrip": { + "p50": 823.1040239334106, + "p90": 831.167995929718, + "p95": 835.2640271186829, + "p99": 881.056010723114 + }, + "isolatedSum": { + "p50": 849.1199910640717, + "p90": 865.1840090751648, + "p95": 872.5440204143524, + "p99": 897.6640105247498 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 724.2559790611267, + "p90": 736.2239956855774, + "p95": 749.5359778404236, + "p99": 835.4880213737488 + }, + "combine": { + "p50": 808.5759878158569, + "p90": 817.3760175704956, + "p95": 821.1519718170166, + "p99": 844.3840146064758 + }, + "roundtrip": { + "p50": 1501.695990562439, + "p90": 1512.2560262680054, + "p95": 1515.9679651260376, + "p99": 1668.8640117645264 + }, + "isolatedSum": { + "p50": 1532.8319668769836, + "p90": 1553.600013256073, + "p95": 1570.6879496574402, + "p99": 1679.8720359802246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1387.935996055603, + "p90": 1399.392008781433, + "p95": 1404.0319919586182, + "p99": 1424.5439767837524 + }, + "combine": { + "p50": 1505.7599544525146, + "p90": 1517.2799825668335, + "p95": 1524.9600410461426, + "p99": 1612.768054008484 + }, + "roundtrip": { + "p50": 2866.1439418792725, + "p90": 2879.6799182891846, + "p95": 2888.0960941314697, + "p99": 2964.7040367126465 + }, + "isolatedSum": { + "p50": 2893.6959505081177, + "p90": 2916.6719913482666, + "p95": 2928.9920330047607, + "p99": 3037.3120307922363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-12ea18be", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_72b3c469", + "comparisonKey": "2ec640d2a24f2676", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:17.545580+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.9679970741272, + "p90": 138.17599415779114, + "p95": 144.6080058813095, + "p99": 154.4959992170334 + }, + "combine": { + "p50": 109.15199667215347, + "p90": 121.34400010108948, + "p95": 126.20800733566284, + "p99": 138.59200477600098 + }, + "roundtrip": { + "p50": 200.03199577331543, + "p90": 215.32799303531647, + "p95": 224.0000069141388, + "p99": 237.21599578857422 + }, + "isolatedSum": { + "p50": 229.11999374628067, + "p90": 259.5199942588806, + "p95": 270.81601321697235, + "p99": 293.08800399303436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.2399959564209, + "p90": 162.56000101566315, + "p95": 170.59199512004852, + "p99": 181.88799917697906 + }, + "combine": { + "p50": 146.62399888038635, + "p90": 161.98399662971497, + "p95": 165.95199704170227, + "p99": 174.04800653457642 + }, + "roundtrip": { + "p50": 267.4559950828552, + "p90": 284.4800055027008, + "p95": 291.9040024280548, + "p99": 302.0800054073334 + }, + "isolatedSum": { + "p50": 292.86399483680725, + "p90": 324.5439976453781, + "p95": 336.5439921617508, + "p99": 355.9360057115555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.34400641918182, + "p90": 217.21599996089935, + "p95": 226.49599611759186, + "p99": 239.9040013551712 + }, + "combine": { + "p50": 224.95999932289124, + "p90": 235.9039932489395, + "p95": 243.77599358558655, + "p99": 253.08799743652344 + }, + "roundtrip": { + "p50": 402.3360013961792, + "p90": 418.11200976371765, + "p95": 426.2720048427582, + "p99": 439.39200043678284 + }, + "isolatedSum": { + "p50": 430.30400574207306, + "p90": 453.11999320983887, + "p95": 470.2719897031784, + "p99": 492.99199879169464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 315.16799330711365, + "p90": 328.15998792648315, + "p95": 332.35201239585876, + "p99": 374.65599179267883 + }, + "combine": { + "p50": 363.072007894516, + "p90": 373.56799840927124, + "p95": 378.1439960002899, + "p99": 389.9199962615967 + }, + "roundtrip": { + "p50": 656.1279892921448, + "p90": 668.2239770889282, + "p95": 673.3760237693787, + "p99": 691.4880275726318 + }, + "isolatedSum": { + "p50": 678.2400012016296, + "p90": 701.7279863357544, + "p95": 710.4960083961487, + "p99": 764.5759880542755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.0000052452087, + "p90": 547.7759838104248, + "p95": 554.4639825820923, + "p99": 582.912027835846 + }, + "combine": { + "p50": 626.0799765586853, + "p90": 637.6000046730042, + "p95": 643.5840129852295, + "p99": 682.1439862251282 + }, + "roundtrip": { + "p50": 1129.8240423202515, + "p90": 1148.095965385437, + "p95": 1162.592053413391, + "p99": 1213.7600183486938 + }, + "isolatedSum": { + "p50": 1158.079981803894, + "p90": 1185.375988483429, + "p95": 1198.0479955673218, + "p99": 1265.0560140609741 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 984.0959906578064, + "p90": 999.5520114898682, + "p95": 1004.7680139541626, + "p99": 1043.4880256652832 + }, + "combine": { + "p50": 1114.9120330810547, + "p90": 1126.8160343170166, + "p95": 1134.4959735870361, + "p99": 1167.5200462341309 + }, + "roundtrip": { + "p50": 2065.5999183654785, + "p90": 2082.8158855438232, + "p95": 2091.7439460754395, + "p99": 2846.6238975524902 + }, + "isolatedSum": { + "p50": 2099.008023738861, + "p90": 2126.3680458068848, + "p95": 2139.2639875411987, + "p99": 2211.008071899414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c51cc57f", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_3586ca3d", + "comparisonKey": "7e866bb02b8bc46c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:32.406643+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.45600062608719, + "p90": 129.60000336170197, + "p95": 137.5039964914322, + "p99": 146.68799936771393 + }, + "combine": { + "p50": 106.88000172376633, + "p90": 119.80800330638885, + "p95": 126.30400061607361, + "p99": 141.6960060596466 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 215.71199595928192, + "p95": 224.2880016565323, + "p99": 234.8800003528595 + }, + "isolatedSum": { + "p50": 226.33600234985352, + "p90": 249.40800666809082, + "p95": 263.8079971075058, + "p99": 288.38400542736053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.7200070619583, + "p90": 153.6960005760193, + "p95": 158.04800391197205, + "p99": 181.43999576568604 + }, + "combine": { + "p50": 146.464005112648, + "p90": 154.7199934720993, + "p95": 161.28000617027283, + "p99": 180.12799322605133 + }, + "roundtrip": { + "p50": 267.1999931335449, + "p90": 278.6239981651306, + "p95": 289.66400027275085, + "p99": 314.91199135780334 + }, + "isolatedSum": { + "p50": 293.1840121746063, + "p90": 308.4159940481186, + "p95": 319.3280100822449, + "p99": 361.56798899173737 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 205.47200739383698, + "p90": 215.29600024223328, + "p95": 226.01599991321564, + "p99": 236.80000007152557 + }, + "combine": { + "p50": 226.84800624847412, + "p90": 234.52800512313843, + "p95": 241.7600005865097, + "p99": 256.99201226234436 + }, + "roundtrip": { + "p50": 404.03199195861816, + "p90": 413.08799386024475, + "p95": 419.5840060710907, + "p99": 479.0399968624115 + }, + "isolatedSum": { + "p50": 432.3200136423111, + "p90": 449.8240053653717, + "p95": 467.77600049972534, + "p99": 493.79201233386993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 312.4479949474335, + "p90": 331.87198638916016, + "p95": 335.83998680114746, + "p99": 344.7679877281189 + }, + "combine": { + "p50": 359.42399501800537, + "p90": 367.23199486732483, + "p95": 369.8880076408386, + "p99": 380.5760145187378 + }, + "roundtrip": { + "p50": 644.864022731781, + "p90": 658.1439971923828, + "p95": 662.1760129928589, + "p99": 682.3040246963501 + }, + "isolatedSum": { + "p50": 671.8719899654388, + "p90": 699.103981256485, + "p95": 705.7279944419861, + "p99": 725.3440022468567 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.7680110931396, + "p90": 545.0239777565002, + "p95": 551.8400073051453, + "p99": 622.6239800453186 + }, + "combine": { + "p50": 612.9279732704163, + "p90": 624.288022518158, + "p95": 628.383994102478, + "p99": 664.2559766769409 + }, + "roundtrip": { + "p50": 1117.0560121536255, + "p90": 1132.0639848709106, + "p95": 1142.4640417099, + "p99": 1367.6480054855347 + }, + "isolatedSum": { + "p50": 1145.695984363556, + "p90": 1169.3120002746582, + "p95": 1180.2240014076233, + "p99": 1286.8799567222595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1011.1360549926758, + "p90": 1029.5679569244385, + "p95": 1036.3199710845947, + "p99": 1086.8799686431885 + }, + "combine": { + "p50": 1116.063952445984, + "p90": 1140.928030014038, + "p95": 1148.5439538955688, + "p99": 1270.5600261688232 + }, + "roundtrip": { + "p50": 2088.7041091918945, + "p90": 2108.959913253784, + "p95": 2118.0479526519775, + "p99": 2240.384101867676 + }, + "isolatedSum": { + "p50": 2127.2000074386597, + "p90": 2170.4959869384766, + "p95": 2184.8639249801636, + "p99": 2357.4399948120117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-53a38c50", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_37ab84c8", + "comparisonKey": "479f3f0397bfb048", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:34.843168+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.39200013875961, + "p90": 128.7039965391159, + "p95": 132.38400220870972, + "p99": 139.90400731563568 + }, + "combine": { + "p50": 113.08799684047699, + "p90": 124.83199685811996, + "p95": 132.28799402713776, + "p99": 142.56000518798828 + }, + "roundtrip": { + "p50": 214.36800062656403, + "p90": 229.37600314617157, + "p95": 237.21599578857422, + "p99": 247.6159930229187 + }, + "isolatedSum": { + "p50": 232.4799969792366, + "p90": 253.53599339723587, + "p95": 264.6719962358475, + "p99": 282.46401250362396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 161.72799468040466, + "p90": 172.89599776268005, + "p95": 180.4479956626892, + "p99": 199.39200580120087 + }, + "combine": { + "p50": 159.87199544906616, + "p90": 179.74400520324707, + "p95": 185.95199286937714, + "p99": 204.51200008392334 + }, + "roundtrip": { + "p50": 296.3840067386627, + "p90": 308.25600028038025, + "p95": 315.2639865875244, + "p99": 323.10399413108826 + }, + "isolatedSum": { + "p50": 321.5999901294708, + "p90": 352.6400029659271, + "p95": 366.39998853206635, + "p99": 403.9040058851242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 235.1360023021698, + "p90": 243.13600361347198, + "p95": 246.24000489711761, + "p99": 251.64800882339478 + }, + "combine": { + "p50": 263.0079984664917, + "p90": 273.4079957008362, + "p95": 276.92800760269165, + "p99": 286.9119942188263 + }, + "roundtrip": { + "p50": 477.05599665641785, + "p90": 496.288001537323, + "p95": 500.12797117233276, + "p99": 511.3599896430969 + }, + "isolatedSum": { + "p50": 498.1440007686615, + "p90": 516.5439993143082, + "p95": 523.1680124998093, + "p99": 538.5600030422211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 378.9120018482208, + "p90": 387.4880075454712, + "p95": 391.2320137023926, + "p99": 403.328001499176 + }, + "combine": { + "p50": 442.2079920768738, + "p90": 452.1920084953308, + "p95": 456.4799964427948, + "p99": 473.7600088119507 + }, + "roundtrip": { + "p50": 799.3599772453308, + "p90": 814.3360018730164, + "p95": 819.2960023880005, + "p99": 843.6800241470337 + }, + "isolatedSum": { + "p50": 821.1199939250946, + "p90": 839.680016040802, + "p95": 847.7120101451874, + "p99": 877.0880103111267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 668.8960194587708, + "p90": 679.6479821205139, + "p95": 683.2320094108582, + "p99": 703.935980796814 + }, + "combine": { + "p50": 794.7199940681458, + "p90": 804.8959970474243, + "p95": 809.503972530365, + "p99": 835.7759714126587 + }, + "roundtrip": { + "p50": 1436.6400241851807, + "p90": 1450.7839679718018, + "p95": 1459.0719938278198, + "p99": 1647.0719575881958 + }, + "isolatedSum": { + "p50": 1463.6160135269165, + "p90": 1484.5439791679382, + "p95": 1492.7359819412231, + "p99": 1539.7119522094727 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1262.4319791793823, + "p90": 1274.3359804153442, + "p95": 1277.9200077056885, + "p99": 1337.5359773635864 + }, + "combine": { + "p50": 1492.5119876861572, + "p90": 1505.3119659423828, + "p95": 1510.6240510940552, + "p99": 1556.447982788086 + }, + "roundtrip": { + "p50": 2730.6559085845947, + "p90": 2749.3441104888916, + "p95": 2763.200044631958, + "p99": 2899.0399837493896 + }, + "isolatedSum": { + "p50": 2754.9439668655396, + "p90": 2779.647946357727, + "p95": 2788.5440587997437, + "p99": 2893.9839601516724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f7ba110a", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_2b0beea2", + "comparisonKey": "db8f53ab5dd44645", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:32.822979+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.53600090742111, + "p90": 128.54400277137756, + "p95": 132.7359974384308, + "p99": 135.68000495433807 + }, + "combine": { + "p50": 109.72800105810165, + "p90": 119.4240003824234, + "p95": 126.11199915409088, + "p99": 132.1599930524826 + }, + "roundtrip": { + "p50": 206.4639925956726, + "p90": 219.07199919223785, + "p95": 227.29599475860596, + "p99": 238.17600309848785 + }, + "isolatedSum": { + "p50": 227.26400196552277, + "p90": 247.96800315380096, + "p95": 258.84799659252167, + "p99": 267.8399980068207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 157.05600380897522, + "p90": 163.29599916934967, + "p95": 166.24000668525696, + "p99": 175.6799966096878 + }, + "combine": { + "p50": 150.91200172901154, + "p90": 159.0079963207245, + "p95": 164.70399498939514, + "p99": 173.15199971199036 + }, + "roundtrip": { + "p50": 285.98400950431824, + "p90": 297.760009765625, + "p95": 304.639995098114, + "p99": 316.8320059776306 + }, + "isolatedSum": { + "p50": 307.96800553798676, + "p90": 322.30399549007416, + "p95": 330.9440016746521, + "p99": 348.83199632167816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 231.48800432682037, + "p90": 239.48800563812256, + "p95": 243.93600225448608, + "p99": 256.7040026187897 + }, + "combine": { + "p50": 249.31199848651886, + "p90": 261.6960108280182, + "p95": 268.095999956131, + "p99": 277.47198939323425 + }, + "roundtrip": { + "p50": 459.1040015220642, + "p90": 475.74400901794434, + "p95": 482.14399814605713, + "p99": 511.135995388031 + }, + "isolatedSum": { + "p50": 480.80000281333923, + "p90": 501.18401646614075, + "p95": 512.0320022106171, + "p99": 534.1759920120239 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 374.2400109767914, + "p90": 384.0959966182709, + "p95": 387.4239921569824, + "p99": 401.7280042171478 + }, + "combine": { + "p50": 428.47999930381775, + "p90": 442.8800046443939, + "p95": 448.38398694992065, + "p99": 456.60799741744995 + }, + "roundtrip": { + "p50": 778.8479924201965, + "p90": 796.3839769363403, + "p95": 802.3999929428101, + "p99": 838.7839794158936 + }, + "isolatedSum": { + "p50": 802.7200102806091, + "p90": 826.9760012626648, + "p95": 835.8079791069031, + "p99": 858.3360016345978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 660.5759859085083, + "p90": 670.9120273590088, + "p95": 675.9679913520813, + "p99": 795.4879999160767 + }, + "combine": { + "p50": 780.9600234031677, + "p90": 793.0560111999512, + "p95": 796.9599962234497, + "p99": 829.5999765396118 + }, + "roundtrip": { + "p50": 1414.5280122756958, + "p90": 1433.568000793457, + "p95": 1445.0880289077759, + "p99": 1633.72802734375 + }, + "isolatedSum": { + "p50": 1441.536009311676, + "p90": 1463.96803855896, + "p95": 1472.927987575531, + "p99": 1625.0879764556885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1244.0320253372192, + "p90": 1254.2719841003418, + "p95": 1259.0399980545044, + "p99": 1290.2400493621826 + }, + "combine": { + "p50": 1453.7919759750366, + "p90": 1469.0239429473877, + "p95": 1474.33602809906, + "p99": 1668.6400175094604 + }, + "roundtrip": { + "p50": 2672.4159717559814, + "p90": 2700.256109237671, + "p95": 2718.3680534362793, + "p99": 2987.168073654175 + }, + "isolatedSum": { + "p50": 2697.824001312256, + "p90": 2723.2959270477295, + "p95": 2733.3760261535645, + "p99": 2958.880066871643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8e111545", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_07d2a5ec", + "comparisonKey": "0e3af18220b6b828", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:18.278904+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 119.29599940776825, + "p90": 124.54400211572647, + "p95": 127.42400169372559, + "p99": 133.18400084972382 + }, + "combine": { + "p50": 107.61599987745285, + "p90": 112.0000034570694, + "p95": 114.78400230407715, + "p99": 121.60000205039978 + }, + "roundtrip": { + "p50": 199.71199333667755, + "p90": 205.9199959039688, + "p95": 209.6640020608902, + "p99": 220.89600563049316 + }, + "isolatedSum": { + "p50": 226.9119992852211, + "p90": 236.54400557279587, + "p95": 242.20800399780273, + "p99": 254.7840029001236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 151.39199793338776, + "p90": 156.80000185966492, + "p95": 159.61599349975586, + "p99": 164.86400365829468 + }, + "combine": { + "p50": 148.76799285411835, + "p90": 154.14400398731232, + "p95": 156.73600137233734, + "p99": 164.51199352741241 + }, + "roundtrip": { + "p50": 272.352010011673, + "p90": 277.75999903678894, + "p95": 279.6480059623718, + "p99": 287.32800483703613 + }, + "isolatedSum": { + "p50": 300.1599907875061, + "p90": 310.94400584697723, + "p95": 316.3519948720932, + "p99": 329.3759971857071 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 208.22399854660034, + "p90": 214.81600403785706, + "p95": 217.72800385951996, + "p99": 226.4000028371811 + }, + "combine": { + "p50": 225.055992603302, + "p90": 230.9119999408722, + "p95": 233.18399488925934, + "p99": 247.0400035381317 + }, + "roundtrip": { + "p50": 407.4240028858185, + "p90": 415.8720076084137, + "p95": 419.840008020401, + "p99": 444.95999813079834 + }, + "isolatedSum": { + "p50": 433.27999114990234, + "p90": 445.72800397872925, + "p95": 450.9119987487793, + "p99": 473.4400063753128 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 323.0719864368439, + "p90": 329.50401306152344, + "p95": 334.30400490760803, + "p99": 346.015989780426 + }, + "combine": { + "p50": 363.5520040988922, + "p90": 371.0399866104126, + "p95": 372.99200892448425, + "p99": 379.90400195121765 + }, + "roundtrip": { + "p50": 660.5439782142639, + "p90": 669.2479848861694, + "p95": 671.1680293083191, + "p99": 676.639974117279 + }, + "isolatedSum": { + "p50": 686.6239905357361, + "p90": 700.543999671936, + "p95": 707.2960138320923, + "p99": 725.9199917316437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 547.4560260772705, + "p90": 555.3280115127563, + "p95": 557.9839944839478, + "p99": 565.9840106964111 + }, + "combine": { + "p50": 627.2000074386597, + "p90": 635.8720064163208, + "p95": 638.4320259094238, + "p99": 645.2800035476685 + }, + "roundtrip": { + "p50": 1149.8240232467651, + "p90": 1160.9920263290405, + "p95": 1166.7519807815552, + "p99": 1264.7360563278198 + }, + "isolatedSum": { + "p50": 1174.6560335159302, + "p90": 1191.2000179290771, + "p95": 1196.4160203933716, + "p99": 1211.2640142440796 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1016.1600112915039, + "p90": 1032.863974571228, + "p95": 1038.6879444122314, + "p99": 1133.8560581207275 + }, + "combine": { + "p50": 1146.9440460205078, + "p90": 1158.079981803894, + "p95": 1162.8799438476562, + "p99": 1229.9840450286865 + }, + "roundtrip": { + "p50": 2131.9680213928223, + "p90": 2146.4641094207764, + "p95": 2153.2158851623535, + "p99": 2238.0480766296387 + }, + "isolatedSum": { + "p50": 2163.1040573120117, + "p90": 2190.943956375122, + "p95": 2201.5678882598877, + "p99": 2363.840103149414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5debf4b6", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_5b852b95", + "comparisonKey": "ec5f4cfba74ab135", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:49.349735+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.14400684833527, + "p90": 133.40799510478973, + "p95": 137.92000710964203, + "p99": 149.9519944190979 + }, + "combine": { + "p50": 113.63200098276138, + "p90": 120.03199756145477, + "p95": 123.48800152540207, + "p99": 133.53599607944489 + }, + "roundtrip": { + "p50": 215.61600267887115, + "p90": 223.13599288463593, + "p95": 225.53600370883942, + "p99": 232.60800540447235 + }, + "isolatedSum": { + "p50": 239.77600783109665, + "p90": 253.4399926662445, + "p95": 261.4080086350441, + "p99": 283.4879904985428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.71199488639832, + "p90": 171.4559942483902, + "p95": 174.0799993276596, + "p99": 179.71199750900269 + }, + "combine": { + "p50": 161.40800714492798, + "p90": 168.99199783802032, + "p95": 172.57599532604218, + "p99": 180.86400628089905 + }, + "roundtrip": { + "p50": 298.0160117149353, + "p90": 305.63199520111084, + "p95": 311.5839958190918, + "p99": 324.41601157188416 + }, + "isolatedSum": { + "p50": 325.1200020313263, + "p90": 340.4479920864105, + "p95": 346.6559946537018, + "p99": 360.57600378990173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 236.76800727844238, + "p90": 242.97599494457245, + "p95": 245.82399427890778, + "p99": 251.3279914855957 + }, + "combine": { + "p50": 260.9280049800873, + "p90": 270.84800601005554, + "p95": 274.78399872779846, + "p99": 290.3999984264374 + }, + "roundtrip": { + "p50": 476.03198885917664, + "p90": 498.33598732948303, + "p95": 503.39198112487793, + "p99": 527.8080105781555 + }, + "isolatedSum": { + "p50": 497.69601225852966, + "p90": 513.824000954628, + "p95": 520.6079930067062, + "p99": 541.7279899120331 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 379.68000769615173, + "p90": 388.41599225997925, + "p95": 392.41600036621094, + "p99": 404.38398718833923 + }, + "combine": { + "p50": 440.5120015144348, + "p90": 448.5119879245758, + "p95": 452.06400752067566, + "p99": 460.1599872112274 + }, + "roundtrip": { + "p50": 792.2559976577759, + "p90": 802.3679852485657, + "p95": 806.4320087432861, + "p99": 820.2559947967529 + }, + "isolatedSum": { + "p50": 820.1920092105865, + "p90": 836.927980184555, + "p95": 844.4800078868866, + "p99": 864.5439743995667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 678.3360242843628, + "p90": 690.015971660614, + "p95": 694.208025932312, + "p99": 707.360029220581 + }, + "combine": { + "p50": 778.7839770317078, + "p90": 789.792001247406, + "p95": 795.6479787826538, + "p99": 826.1759877204895 + }, + "roundtrip": { + "p50": 1429.9520254135132, + "p90": 1441.1840438842773, + "p95": 1447.808027267456, + "p99": 1473.2799530029297 + }, + "isolatedSum": { + "p50": 1457.1200013160706, + "p90": 1479.80797290802, + "p95": 1489.8560047149658, + "p99": 1533.5360169410706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1274.623990058899, + "p90": 1287.0399951934814, + "p95": 1291.2960052490234, + "p99": 1339.3919467926025 + }, + "combine": { + "p50": 1466.528058052063, + "p90": 1479.80797290802, + "p95": 1484.9920272827148, + "p99": 1500.991940498352 + }, + "roundtrip": { + "p50": 2711.6799354553223, + "p90": 2728.9280891418457, + "p95": 2734.9441051483154, + "p99": 2967.008113861084 + }, + "isolatedSum": { + "p50": 2741.152048110962, + "p90": 2766.8479681015015, + "p95": 2776.2880325317383, + "p99": 2840.3838872909546 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d950dee", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_d63aaf09", + "comparisonKey": "3ae8cddaffd88495", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:21.278611+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.50400066375732, + "p90": 130.94399869441986, + "p95": 137.40800321102142, + "p99": 145.05599439144135 + }, + "combine": { + "p50": 105.27999699115753, + "p90": 117.95199662446976, + "p95": 123.77600371837616, + "p99": 127.13600695133209 + }, + "roundtrip": { + "p50": 198.62399995326996, + "p90": 214.08000588417053, + "p95": 220.64000368118286, + "p99": 238.8480007648468 + }, + "isolatedSum": { + "p50": 222.78399765491486, + "p90": 248.89599531888962, + "p95": 261.1840069293976, + "p99": 272.19200134277344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.1200029850006, + "p90": 165.6000018119812, + "p95": 170.49600183963776, + "p99": 176.1920005083084 + }, + "combine": { + "p50": 144.51199769973755, + "p90": 155.03999590873718, + "p95": 161.43999993801117, + "p99": 171.36000096797943 + }, + "roundtrip": { + "p50": 268.22400093078613, + "p90": 285.95200181007385, + "p95": 291.00799560546875, + "p99": 300.9600043296814 + }, + "isolatedSum": { + "p50": 293.63200068473816, + "p90": 320.6399977207184, + "p95": 331.9360017776489, + "p99": 347.55200147628784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.36799931526184, + "p90": 229.24800217151642, + "p95": 237.98400163650513, + "p99": 329.120010137558 + }, + "combine": { + "p50": 228.06400060653687, + "p90": 246.68799340724945, + "p95": 254.07999753952026, + "p99": 267.2320008277893 + }, + "roundtrip": { + "p50": 405.91999888420105, + "p90": 423.42400550842285, + "p95": 429.05598878860474, + "p99": 437.47198581695557 + }, + "isolatedSum": { + "p50": 434.4319999217987, + "p90": 475.93599557876587, + "p95": 492.0639991760254, + "p99": 596.3520109653473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 314.65598940849304, + "p90": 333.0560028553009, + "p95": 336.09598875045776, + "p99": 342.1440124511719 + }, + "combine": { + "p50": 357.60000348091125, + "p90": 367.5200045108795, + "p95": 371.71199917793274, + "p99": 388.8640105724335 + }, + "roundtrip": { + "p50": 645.2800035476685, + "p90": 657.6640009880066, + "p95": 662.0799899101257, + "p99": 671.8720197677612 + }, + "isolatedSum": { + "p50": 672.2559928894043, + "p90": 700.5760073661804, + "p95": 707.8079879283905, + "p99": 731.0080230236053 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 532.863974571228, + "p90": 544.704020023346, + "p95": 549.2159724235535, + "p99": 564.3519759178162 + }, + "combine": { + "p50": 616.5440082550049, + "p90": 629.1840076446533, + "p95": 634.8479986190796, + "p99": 657.2480201721191 + }, + "roundtrip": { + "p50": 1119.4239854812622, + "p90": 1134.592056274414, + "p95": 1140.9599781036377, + "p99": 1170.0479984283447 + }, + "isolatedSum": { + "p50": 1149.407982826233, + "p90": 1173.8880276679993, + "p95": 1184.063971042633, + "p99": 1221.5999960899353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 987.0079755783081, + "p90": 1002.4640560150146, + "p95": 1006.4959526062012, + "p99": 1016.1919593811035 + }, + "combine": { + "p50": 1128.0959844589233, + "p90": 1139.7119760513306, + "p95": 1143.7760591506958, + "p99": 1155.5839776992798 + }, + "roundtrip": { + "p50": 2080.22403717041, + "p90": 2096.832036972046, + "p95": 2103.1041145324707, + "p99": 2172.800064086914 + }, + "isolatedSum": { + "p50": 2115.1039600372314, + "p90": 2142.176032066345, + "p95": 2150.272011756897, + "p99": 2171.7759370803833 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f4424f6c", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_c09dca7a", + "comparisonKey": "184b50932b8a2088", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:47.989417+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 126.11199915409088, + "p90": 132.9919993877411, + "p95": 136.86400651931763, + "p99": 140.57600498199463 + }, + "combine": { + "p50": 112.99200356006622, + "p90": 125.59999525547028, + "p95": 130.49599528312683, + "p99": 138.62399756908417 + }, + "roundtrip": { + "p50": 214.75200355052948, + "p90": 229.8240065574646, + "p95": 236.86400055885315, + "p99": 245.08799612522125 + }, + "isolatedSum": { + "p50": 239.1040027141571, + "p90": 258.59199464321136, + "p95": 267.36000180244446, + "p99": 279.2000025510788 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 163.42400014400482, + "p90": 170.3999936580658, + "p95": 173.50399494171143, + "p99": 179.00800704956055 + }, + "combine": { + "p50": 159.67999398708344, + "p90": 171.00800573825836, + "p95": 174.72000420093536, + "p99": 182.46400356292725 + }, + "roundtrip": { + "p50": 298.0160117149353, + "p90": 314.2080008983612, + "p95": 322.2399950027466, + "p99": 337.95198798179626 + }, + "isolatedSum": { + "p50": 323.10399413108826, + "p90": 341.40799939632416, + "p95": 348.2239991426468, + "p99": 361.4720106124878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.78400027751923, + "p90": 244.80000138282776, + "p95": 247.0719963312149, + "p99": 254.30399179458618 + }, + "combine": { + "p50": 262.87999749183655, + "p90": 273.53599667549133, + "p95": 277.21598744392395, + "p99": 295.3599989414215 + }, + "roundtrip": { + "p50": 477.60000824928284, + "p90": 492.15999245643616, + "p95": 496.99199199676514, + "p99": 531.2319993972778 + }, + "isolatedSum": { + "p50": 501.6639977693558, + "p90": 518.3359980583191, + "p95": 524.2879837751389, + "p99": 549.6639907360077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 383.7760090827942, + "p90": 392.767995595932, + "p95": 397.5360095500946, + "p99": 426.2079894542694 + }, + "combine": { + "p50": 442.33599305152893, + "p90": 455.9679925441742, + "p95": 461.0559940338135, + "p99": 476.48000717163086 + }, + "roundtrip": { + "p50": 798.4640002250671, + "p90": 812.9280209541321, + "p95": 821.3440179824829, + "p99": 852.4799942970276 + }, + "isolatedSum": { + "p50": 826.1120021343231, + "p90": 848.7359881401062, + "p95": 858.5920035839081, + "p99": 902.6879966259003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 670.7839965820312, + "p90": 680.512011051178, + "p95": 683.5520267486572, + "p99": 698.8160014152527 + }, + "combine": { + "p50": 798.4319925308228, + "p90": 809.2479705810547, + "p95": 814.8159980773926, + "p99": 830.9760093688965 + }, + "roundtrip": { + "p50": 1440.991997718811, + "p90": 1458.9120149612427, + "p95": 1465.6959772109985, + "p99": 1554.8160076141357 + }, + "isolatedSum": { + "p50": 1469.215989112854, + "p90": 1489.7599816322327, + "p95": 1498.3680248260498, + "p99": 1529.7920107841492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1263.808012008667, + "p90": 1275.007963180542, + "p95": 1278.0159711837769, + "p99": 1290.6559705734253 + }, + "combine": { + "p50": 1495.2319860458374, + "p90": 1510.5600357055664, + "p95": 1515.8079862594604, + "p99": 1609.7919940948486 + }, + "roundtrip": { + "p50": 2733.4399223327637, + "p90": 2751.1041164398193, + "p95": 2758.5599422454834, + "p99": 3079.7119140625 + }, + "isolatedSum": { + "p50": 2759.0399980545044, + "p90": 2785.5679988861084, + "p95": 2793.8239574432373, + "p99": 2900.447964668274 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d537e0e", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_7b142e34", + "comparisonKey": "c00c5da72cfe851d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:19.849157+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.20799857378006, + "p90": 124.25599992275238, + "p95": 127.77599692344666, + "p99": 133.59999656677246 + }, + "combine": { + "p50": 104.16000336408615, + "p90": 110.04800349473953, + "p95": 113.37599903345108, + "p99": 119.07199770212173 + }, + "roundtrip": { + "p50": 197.2160041332245, + "p90": 204.19199764728546, + "p95": 208.80000293254852, + "p99": 220.64000368118286 + }, + "isolatedSum": { + "p50": 222.3680019378662, + "p90": 234.3040034174919, + "p95": 241.15199595689774, + "p99": 252.6719942688942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 146.5280055999756, + "p90": 158.6879938840866, + "p95": 167.90400445461273, + "p99": 187.391996383667 + }, + "combine": { + "p50": 146.4959979057312, + "p90": 154.2080044746399, + "p95": 158.24000537395477, + "p99": 170.04799842834473 + }, + "roundtrip": { + "p50": 265.4719948768616, + "p90": 272.19200134277344, + "p95": 276.6079902648926, + "p99": 307.8080117702484 + }, + "isolatedSum": { + "p50": 293.0240035057068, + "p90": 312.8959983587265, + "p95": 326.1440098285675, + "p99": 357.4399948120117 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.96800339221954, + "p90": 213.0880057811737, + "p95": 226.27200186252594, + "p99": 238.78400027751923 + }, + "combine": { + "p50": 224.35200214385986, + "p90": 233.60000550746918, + "p95": 240.31999707221985, + "p99": 249.40800666809082 + }, + "roundtrip": { + "p50": 401.12000703811646, + "p90": 416.6080057621002, + "p95": 431.4559996128082, + "p99": 461.5040123462677 + }, + "isolatedSum": { + "p50": 428.3200055360794, + "p90": 446.6880112886429, + "p95": 466.5919989347458, + "p99": 488.19200694561005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 311.7760121822357, + "p90": 318.9440071582794, + "p95": 322.4320113658905, + "p99": 332.67199993133545 + }, + "combine": { + "p50": 354.559987783432, + "p90": 361.6960048675537, + "p95": 365.2479946613312, + "p99": 381.632000207901 + }, + "roundtrip": { + "p50": 643.4879899024963, + "p90": 653.6639928817749, + "p95": 658.3999991416931, + "p99": 736.3839745521545 + }, + "isolatedSum": { + "p50": 666.3359999656677, + "p90": 680.6400120258331, + "p95": 687.6800060272217, + "p99": 714.3040001392365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.8800258636475, + "p90": 538.3679866790771, + "p95": 550.1440167427063, + "p99": 584.7039818763733 + }, + "combine": { + "p50": 620.3200221061707, + "p90": 629.0559768676758, + "p95": 632.5759887695312, + "p99": 647.167980670929 + }, + "roundtrip": { + "p50": 1120.3199625015259, + "p90": 1132.7999830245972, + "p95": 1146.0479497909546, + "p99": 1220.128059387207 + }, + "isolatedSum": { + "p50": 1147.2000479698181, + "p90": 1167.423963546753, + "p95": 1182.7200055122375, + "p99": 1231.8719625473022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.4959878921509, + "p90": 1017.1840190887451, + "p95": 1026.3999700546265, + "p99": 1396.8960046768188 + }, + "combine": { + "p50": 1124.351978302002, + "p90": 1136.0960006713867, + "p95": 1142.5280570983887, + "p99": 1177.6319742202759 + }, + "roundtrip": { + "p50": 2080.7039737701416, + "p90": 2099.0400314331055, + "p95": 2104.736089706421, + "p99": 2212.3520374298096 + }, + "isolatedSum": { + "p50": 2118.847966194153, + "p90": 2153.280019760132, + "p95": 2168.928027153015, + "p99": 2574.5279788970947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5ce4d90", + "identity": "h200|deepep|v2|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_3b36fa26", + "comparisonKey": "ca47baa02b242617", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:24.218435+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.20799857378006, + "p90": 123.16799908876419, + "p95": 126.43200159072876, + "p99": 131.42399489879608 + }, + "combine": { + "p50": 105.12000322341919, + "p90": 109.8560020327568, + "p95": 112.89600282907486, + "p99": 119.03999745845795 + }, + "roundtrip": { + "p50": 197.34400510787964, + "p90": 203.99999618530273, + "p95": 206.68800175189972, + "p99": 214.4320011138916 + }, + "isolatedSum": { + "p50": 223.32800179719925, + "p90": 233.024001121521, + "p95": 239.32800441980362, + "p99": 250.46399235725403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 144.67200636863708, + "p90": 150.751993060112, + "p95": 153.9199948310852, + "p99": 162.75200247764587 + }, + "combine": { + "p50": 146.17599546909332, + "p90": 152.79999375343323, + "p95": 158.49600732326508, + "p99": 190.8479928970337 + }, + "roundtrip": { + "p50": 267.10399985313416, + "p90": 273.8560140132904, + "p95": 277.3439884185791, + "p99": 326.911985874176 + }, + "isolatedSum": { + "p50": 290.8480018377304, + "p90": 303.5519868135452, + "p95": 312.4160021543503, + "p99": 353.59999537467957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 203.93599569797516, + "p90": 210.59200167655945, + "p95": 213.56800198554993, + "p99": 220.8320051431656 + }, + "combine": { + "p50": 224.12799298763275, + "p90": 229.95199263095856, + "p95": 232.57599771022797, + "p99": 251.583993434906 + }, + "roundtrip": { + "p50": 401.08799934387207, + "p90": 409.56801176071167, + "p95": 414.46399688720703, + "p99": 440.8639967441559 + }, + "isolatedSum": { + "p50": 428.0639886856079, + "p90": 440.543994307518, + "p95": 446.1439996957779, + "p99": 472.4159985780716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.1200075149536, + "p90": 320.8320140838623, + "p95": 325.24800300598145, + "p99": 343.392014503479 + }, + "combine": { + "p50": 355.0400137901306, + "p90": 362.2719943523407, + "p95": 365.31201004981995, + "p99": 402.49601006507874 + }, + "roundtrip": { + "p50": 643.5520052909851, + "p90": 670.7199811935425, + "p95": 701.2799978256226, + "p99": 764.352023601532 + }, + "isolatedSum": { + "p50": 668.1600213050842, + "p90": 683.104008436203, + "p95": 690.5600130558014, + "p99": 745.8880245685577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 526.7199873924255, + "p90": 534.9439978599548, + "p95": 540.4800176620483, + "p99": 590.6239748001099 + }, + "combine": { + "p50": 621.1839914321899, + "p90": 630.8799982070923, + "p95": 635.7439756393433, + "p99": 663.8079881668091 + }, + "roundtrip": { + "p50": 1120.7040548324585, + "p90": 1131.5200328826904, + "p95": 1135.2959871292114, + "p99": 1160.4160070419312 + }, + "isolatedSum": { + "p50": 1147.9039788246155, + "p90": 1165.8239960670471, + "p95": 1176.2239933013916, + "p99": 1254.431962966919 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 994.2079782485962, + "p90": 1015.2000188827515, + "p95": 1022.752046585083, + "p99": 1050.2400398254395 + }, + "combine": { + "p50": 1124.608039855957, + "p90": 1134.559988975525, + "p95": 1138.4639739990234, + "p99": 1180.191993713379 + }, + "roundtrip": { + "p50": 2082.592010498047, + "p90": 2101.792097091675, + "p95": 2110.0480556488037, + "p99": 2419.2960262298584 + }, + "isolatedSum": { + "p50": 2118.816018104553, + "p90": 2149.7600078582764, + "p95": 2161.2160205841064, + "p99": 2230.4320335388184 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0eb0b914", + "identity": "h200|deepep|v2|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_0d2bf145", + "comparisonKey": "eab78e3f28177381", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:07.980776+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.0880029797554, + "p90": 119.10399794578552, + "p95": 124.09599870443344, + "p99": 140.25600254535675 + }, + "combine": { + "p50": 106.62399977445602, + "p90": 120.25599926710129, + "p95": 124.22399967908859, + "p99": 131.9040060043335 + }, + "roundtrip": { + "p50": 187.45599687099457, + "p90": 198.2399970293045, + "p95": 203.93599569797516, + "p99": 214.4320011138916 + }, + "isolatedSum": { + "p50": 211.71200275421143, + "p90": 239.3599972128868, + "p95": 248.31999838352203, + "p99": 272.16000854969025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 137.2479945421219, + "p90": 150.94399452209473, + "p95": 155.87200224399567, + "p99": 163.10399770736694 + }, + "combine": { + "p50": 146.40000462532043, + "p90": 158.27199816703796, + "p95": 163.5199934244156, + "p99": 174.3679940700531 + }, + "roundtrip": { + "p50": 255.77598810195923, + "p90": 269.9199914932251, + "p95": 274.6559977531433, + "p99": 282.9119861125946 + }, + "isolatedSum": { + "p50": 283.6479991674423, + "p90": 309.2159926891327, + "p95": 319.39199566841125, + "p99": 337.47199177742004 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 194.815993309021, + "p90": 208.95999670028687, + "p95": 213.72799575328827, + "p99": 226.46400332450867 + }, + "combine": { + "p50": 226.52800381183624, + "p90": 236.9920015335083, + "p95": 241.34400486946106, + "p99": 256.22400641441345 + }, + "roundtrip": { + "p50": 392.15999841690063, + "p90": 400.7680118083954, + "p95": 405.7280123233795, + "p99": 413.05598616600037 + }, + "isolatedSum": { + "p50": 421.34399712085724, + "p90": 445.95199823379517, + "p95": 455.07200062274933, + "p99": 482.6880097389221 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 307.6159954071045, + "p90": 324.8319923877716, + "p95": 329.8879861831665, + "p99": 343.77598762512207 + }, + "combine": { + "p50": 359.391987323761, + "p90": 371.96800112724304, + "p95": 376.1279881000519, + "p99": 384.5439851284027 + }, + "roundtrip": { + "p50": 640.7679915428162, + "p90": 655.6800007820129, + "p95": 663.8399958610535, + "p99": 712.0320200920105 + }, + "isolatedSum": { + "p50": 667.0079827308655, + "p90": 696.7999935150146, + "p95": 706.0159742832184, + "p99": 728.3199727535248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 523.4559774398804, + "p90": 534.6559882164001, + "p95": 538.0480289459229, + "p99": 553.8880228996277 + }, + "combine": { + "p50": 623.6479878425598, + "p90": 640.3520107269287, + "p95": 643.6160206794739, + "p99": 659.712016582489 + }, + "roundtrip": { + "p50": 1114.4959926605225, + "p90": 1133.0879926681519, + "p95": 1142.016053199768, + "p99": 1282.1120023727417 + }, + "isolatedSum": { + "p50": 1147.1039652824402, + "p90": 1175.0079989433289, + "p95": 1181.6640496253967, + "p99": 1213.6000394821167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 972.7360010147095, + "p90": 994.5600032806396, + "p95": 1003.7120580673218, + "p99": 1046.2080240249634 + }, + "combine": { + "p50": 1128.9279460906982, + "p90": 1141.3120031356812, + "p95": 1147.1359729766846, + "p99": 1306.9440126419067 + }, + "roundtrip": { + "p50": 2060.192108154297, + "p90": 2081.88796043396, + "p95": 2093.280076980591, + "p99": 2233.9839935302734 + }, + "isolatedSum": { + "p50": 2101.6639471054077, + "p90": 2135.872006416321, + "p95": 2150.8480310440063, + "p99": 2353.15203666687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d4cd7da8", + "identity": "h200|deepep|v2|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "ff263f23e07173dc", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:52.757957+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 211.71200275421143, + "p90": 249.34400618076324, + "p95": 265.6320035457611, + "p99": 292.2559976577759 + }, + "combine": { + "p50": 74.75200295448303, + "p90": 85.31200140714645, + "p95": 93.50399672985077, + "p99": 100.92800110578537 + }, + "roundtrip": { + "p50": 269.9199914932251, + "p90": 305.6640028953552, + "p95": 312.608003616333, + "p99": 334.9759876728058 + }, + "isolatedSum": { + "p50": 286.46400570869446, + "p90": 334.6560075879097, + "p95": 359.1360002756119, + "p99": 393.18399876356125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 237.8239929676056, + "p90": 265.0560140609741, + "p95": 272.7360129356384, + "p99": 290.46401381492615 + }, + "combine": { + "p50": 98.7199991941452, + "p90": 109.95200276374817, + "p95": 117.27999895811081, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 319.487988948822, + "p90": 349.66400265693665, + "p95": 364.3200099468231, + "p99": 416.51201248168945 + }, + "isolatedSum": { + "p50": 336.5439921617508, + "p90": 375.0080168247223, + "p95": 390.01601189374924, + "p99": 415.3280109167099 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 306.2080144882202, + "p90": 339.9040102958679, + "p95": 344.67199444770813, + "p99": 368.3519959449768 + }, + "combine": { + "p50": 149.50400590896606, + "p90": 161.72799468040466, + "p95": 168.73599588871002, + "p99": 176.60799622535706 + }, + "roundtrip": { + "p50": 441.18401408195496, + "p90": 470.68798542022705, + "p95": 476.1919975280762, + "p99": 507.55202770233154 + }, + "isolatedSum": { + "p50": 455.7120203971863, + "p90": 501.6320049762726, + "p95": 513.4079903364182, + "p99": 544.9599921703339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 447.7120041847229, + "p90": 478.08000445365906, + "p95": 484.99199748039246, + "p99": 530.239999294281 + }, + "combine": { + "p50": 249.15200471878052, + "p90": 258.5600018501282, + "p95": 262.84798979759216, + "p99": 279.1680097579956 + }, + "roundtrip": { + "p50": 672.4799871444702, + "p90": 695.4560279846191, + "p95": 706.1759829521179, + "p99": 756.1920285224915 + }, + "isolatedSum": { + "p50": 696.8640089035034, + "p90": 736.6400063037872, + "p95": 747.8399872779846, + "p99": 809.4080090522766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 732.8959703445435, + "p90": 746.1439967155457, + "p95": 756.767988204956, + "p99": 798.6239790916443 + }, + "combine": { + "p50": 421.6639995574951, + "p90": 433.6639940738678, + "p95": 437.9200041294098, + "p99": 459.1040015220642 + }, + "roundtrip": { + "p50": 1143.3919668197632, + "p90": 1169.4400310516357, + "p95": 1183.39204788208, + "p99": 1411.6159677505493 + }, + "isolatedSum": { + "p50": 1154.5599699020386, + "p90": 1179.8079907894135, + "p95": 1194.6879923343658, + "p99": 1257.7279806137085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1371.0399866104126, + "p90": 1382.591962814331, + "p95": 1388.8959884643555, + "p99": 1451.1359930038452 + }, + "combine": { + "p50": 771.3599801063538, + "p90": 779.7759771347046, + "p95": 781.7280292510986, + "p99": 823.743999004364 + }, + "roundtrip": { + "p50": 2141.632080078125, + "p90": 2158.1759452819824, + "p95": 2163.424015045166, + "p99": 2274.944067001343 + }, + "isolatedSum": { + "p50": 2142.3999667167664, + "p90": 2162.3679399490356, + "p95": 2170.624017715454, + "p99": 2274.8799920082092 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7209fdc", + "identity": "h200|deepep|v2|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "fdd280c7ec80641f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:49.660731+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 210.4959934949875, + "p90": 228.83200645446777, + "p95": 236.06400191783905, + "p99": 252.06398963928223 + }, + "combine": { + "p50": 83.03999900817871, + "p90": 86.46400272846222, + "p95": 88.79999816417694, + "p99": 95.16800194978714 + }, + "roundtrip": { + "p50": 282.01600909233093, + "p90": 302.91199684143066, + "p95": 310.3039860725403, + "p99": 329.79199290275574 + }, + "isolatedSum": { + "p50": 293.5359925031662, + "p90": 315.29600918293, + "p95": 324.864000082016, + "p99": 347.23199158906937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 251.48800015449524, + "p90": 283.9680016040802, + "p95": 305.2160143852234, + "p99": 404.38398718833923 + }, + "combine": { + "p50": 113.6000007390976, + "p90": 125.59999525547028, + "p95": 129.18399274349213, + "p99": 139.0720009803772 + }, + "roundtrip": { + "p50": 347.2000062465668, + "p90": 370.88000774383545, + "p95": 387.64798641204834, + "p99": 415.6799912452698 + }, + "isolatedSum": { + "p50": 365.08800089359283, + "p90": 409.5679968595505, + "p95": 434.4000071287155, + "p99": 543.4559881687164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 339.29601311683655, + "p90": 360.79999804496765, + "p95": 377.02399492263794, + "p99": 399.07199144363403 + }, + "combine": { + "p50": 174.49599504470825, + "p90": 180.25599420070648, + "p95": 184.57600474357605, + "p99": 195.99999487400055 + }, + "roundtrip": { + "p50": 492.48000979423523, + "p90": 515.7759785652161, + "p95": 529.9199819564819, + "p99": 629.6319961547852 + }, + "isolatedSum": { + "p50": 513.7920081615448, + "p90": 541.0559922456741, + "p95": 561.599999666214, + "p99": 595.0719863176346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 501.0560154914856, + "p90": 533.6639881134033, + "p95": 549.2159724235535, + "p99": 655.456006526947 + }, + "combine": { + "p50": 278.0480086803436, + "p90": 289.8879945278168, + "p95": 295.9359884262085, + "p99": 304.28799986839294 + }, + "roundtrip": { + "p50": 762.4959945678711, + "p90": 792.5119996070862, + "p95": 807.5519800186157, + "p99": 864.0000224113464 + }, + "isolatedSum": { + "p50": 779.1040241718292, + "p90": 823.5519826412201, + "p95": 845.151960849762, + "p99": 959.74400639534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 863.4560108184814, + "p90": 872.4160194396973, + "p95": 877.1839737892151, + "p99": 1044.8640584945679 + }, + "combine": { + "p50": 485.9200119972229, + "p90": 495.2000081539154, + "p95": 500.5120038986206, + "p99": 545.8239912986755 + }, + "roundtrip": { + "p50": 1328.544020652771, + "p90": 1341.920018196106, + "p95": 1350.0800132751465, + "p99": 1499.1040229797363 + }, + "isolatedSum": { + "p50": 1349.3760228157043, + "p90": 1367.6160275936127, + "p95": 1377.6959776878357, + "p99": 1590.6880497932434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1622.3679780960083, + "p90": 1635.8400583267212, + "p95": 1644.8320150375366, + "p99": 2059.1039657592773 + }, + "combine": { + "p50": 881.9839954376221, + "p90": 892.6399946212769, + "p95": 896.6720104217529, + "p99": 930.9120178222656 + }, + "roundtrip": { + "p50": 2487.391948699951, + "p90": 2502.17604637146, + "p95": 2509.471893310547, + "p99": 2745.2480792999268 + }, + "isolatedSum": { + "p50": 2504.3519735336304, + "p90": 2528.480052947998, + "p95": 2541.5040254592896, + "p99": 2990.015983581543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c539a3fc", + "identity": "h200|deepep|v2|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "2ac03938d04d73e9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:47.813993+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 212.92799711227417, + "p90": 255.67999482154846, + "p95": 262.11199164390564, + "p99": 283.1040024757385 + }, + "combine": { + "p50": 88.8959988951683, + "p90": 98.9760011434555, + "p95": 107.58399963378906, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 294.40000653266907, + "p90": 337.2479975223541, + "p95": 346.17599844932556, + "p99": 362.7519905567169 + }, + "isolatedSum": { + "p50": 301.8239960074425, + "p90": 354.65599596500397, + "p95": 369.6959912776947, + "p99": 396.06400579214096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 264.5440101623535, + "p90": 312.9599988460541, + "p95": 319.5520043373108, + "p99": 334.01599526405334 + }, + "combine": { + "p50": 122.78400361537933, + "p90": 138.17599415779114, + "p95": 142.20799505710602, + "p99": 149.75999295711517 + }, + "roundtrip": { + "p50": 379.4560134410858, + "p90": 416.4159893989563, + "p95": 422.33601212501526, + "p99": 510.24001836776733 + }, + "isolatedSum": { + "p50": 387.32801377773285, + "p90": 451.1359930038452, + "p95": 461.7599993944168, + "p99": 483.7759882211685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 381.76000118255615, + "p90": 430.30399084091187, + "p95": 447.519987821579, + "p99": 492.031991481781 + }, + "combine": { + "p50": 193.85600090026855, + "p90": 203.10400426387787, + "p95": 208.92800390720367, + "p99": 216.19200706481934 + }, + "roundtrip": { + "p50": 546.0799932479858, + "p90": 570.6239938735962, + "p95": 580.128014087677, + "p99": 597.599983215332 + }, + "isolatedSum": { + "p50": 575.6160020828247, + "p90": 633.4079951047897, + "p95": 656.4479917287827, + "p99": 708.2239985466003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 572.8639960289001, + "p90": 608.1600189208984, + "p95": 620.8959817886353, + "p99": 655.8719873428345 + }, + "combine": { + "p50": 312.51201033592224, + "p90": 326.81599259376526, + "p95": 329.53599095344543, + "p99": 337.3759984970093 + }, + "roundtrip": { + "p50": 867.8079843521118, + "p90": 900.8319973945618, + "p95": 919.2320108413696, + "p99": 1772.447943687439 + }, + "isolatedSum": { + "p50": 885.3760063648224, + "p90": 934.9760115146637, + "p95": 950.4319727420807, + "p99": 993.2479858398438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 989.7599816322327, + "p90": 998.5920190811157, + "p95": 1003.9999485015869, + "p99": 1039.5840406417847 + }, + "combine": { + "p50": 544.9280142784119, + "p90": 552.8320074081421, + "p95": 555.9999942779541, + "p99": 567.3279762268066 + }, + "roundtrip": { + "p50": 1519.10400390625, + "p90": 1533.3119630813599, + "p95": 1536.8640422821045, + "p99": 1549.6959686279297 + }, + "isolatedSum": { + "p50": 1534.6879959106445, + "p90": 1551.4240264892578, + "p95": 1559.999942779541, + "p99": 1606.9120168685913 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1869.8240518569946, + "p90": 1882.8799724578857, + "p95": 1888.0959749221802, + "p99": 2121.5999126434326 + }, + "combine": { + "p50": 1002.784013748169, + "p90": 1013.4079456329346, + "p95": 1017.408013343811, + "p99": 1034.3999862670898 + }, + "roundtrip": { + "p50": 2938.87996673584, + "p90": 2962.6879692077637, + "p95": 2972.575902938843, + "p99": 3174.2401123046875 + }, + "isolatedSum": { + "p50": 2872.6080656051636, + "p90": 2896.2879180908203, + "p95": 2905.503988265991, + "p99": 3155.9998989105225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-edb55a5b", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_a06b3704", + "comparisonKey": "d59fde0cac4ad6cb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:04.796977+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 88.22400122880936, + "p90": 96.03200107812881, + "p95": 100.16000270843506, + "p99": 119.90399658679962 + }, + "combine": { + "p50": 98.27200323343277, + "p90": 101.69599950313568, + "p95": 104.2879968881607, + "p99": 112.09599673748016 + }, + "roundtrip": { + "p50": 212.89600431919098, + "p90": 220.768004655838, + "p95": 229.79199886322021, + "p99": 264.44798707962036 + }, + "isolatedSum": { + "p50": 186.49600446224213, + "p90": 197.7280005812645, + "p95": 204.44799959659576, + "p99": 231.99999332427979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.46399921178818, + "p90": 161.1199975013733, + "p95": 164.12800550460815, + "p99": 173.15199971199036 + }, + "combine": { + "p50": 141.4400041103363, + "p90": 172.992005944252, + "p95": 175.48799514770508, + "p99": 183.20000171661377 + }, + "roundtrip": { + "p50": 316.8320059776306, + "p90": 370.4639971256256, + "p95": 374.6879994869232, + "p99": 382.207989692688 + }, + "isolatedSum": { + "p50": 251.90400332212448, + "p90": 334.1120034456253, + "p95": 339.61600065231323, + "p99": 356.3520014286041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 138.87999951839447, + "p90": 147.64800667762756, + "p95": 152.73599326610565, + "p99": 165.0879979133606 + }, + "combine": { + "p50": 218.59200298786163, + "p90": 224.2559939622879, + "p95": 226.33600234985352, + "p99": 234.3679964542389 + }, + "roundtrip": { + "p50": 500.09602308273315, + "p90": 550.432026386261, + "p95": 557.3760271072388, + "p99": 565.4079914093018 + }, + "isolatedSum": { + "p50": 357.4720025062561, + "p90": 371.90400063991547, + "p95": 379.07199561595917, + "p99": 399.4559943675995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 204.79999482631683, + "p90": 211.29600703716278, + "p95": 217.02399849891663, + "p99": 224.83199834823608 + }, + "combine": { + "p50": 352.03200578689575, + "p90": 359.391987323761, + "p95": 363.99999260902405, + "p99": 370.59199810028076 + }, + "roundtrip": { + "p50": 837.9200100898743, + "p90": 850.1440286636353, + "p95": 859.3599796295166, + "p99": 899.7120261192322 + }, + "isolatedSum": { + "p50": 556.8320006132126, + "p90": 570.6879943609238, + "p95": 581.0239911079407, + "p99": 595.4239964485168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 332.73598551750183, + "p90": 342.9439961910248, + "p95": 351.1039912700653, + "p99": 375.10401010513306 + }, + "combine": { + "p50": 615.3920292854309, + "p90": 625.5040168762207, + "p95": 628.6720037460327, + "p99": 639.680027961731 + }, + "roundtrip": { + "p50": 1511.199951171875, + "p90": 1523.8720178604126, + "p95": 1528.2560586929321, + "p99": 1556.928038597107 + }, + "isolatedSum": { + "p50": 948.1280148029327, + "p90": 968.4480130672455, + "p95": 979.775995016098, + "p99": 1014.784038066864 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 600.7040143013, + "p90": 608.2559823989868, + "p95": 610.6879711151123, + "p99": 617.2800064086914 + }, + "combine": { + "p50": 1122.5279569625854, + "p90": 1133.8239908218384, + "p95": 1137.0559930801392, + "p99": 1151.3919830322266 + }, + "roundtrip": { + "p50": 2856.6720485687256, + "p90": 2872.607946395874, + "p95": 2877.6960372924805, + "p99": 3041.5360927581787 + }, + "isolatedSum": { + "p50": 1723.2319712638855, + "p90": 1742.0799732208252, + "p95": 1747.7439641952515, + "p99": 1768.671989440918 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a738ec03", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "1faf65aacffc990e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:06.735599+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.76000475883484, + "p90": 241.56799912452698, + "p95": 253.63200902938843, + "p99": 446.55999541282654 + }, + "combine": { + "p50": 97.47199714183807, + "p90": 101.82400047779083, + "p95": 104.35199737548828, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 306.0480058193207, + "p90": 324.12800192832947, + "p95": 331.87198638916016, + "p99": 379.64800000190735 + }, + "isolatedSum": { + "p50": 319.2320019006729, + "p90": 343.3919996023178, + "p95": 357.9840064048767, + "p99": 556.319996714592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 279.1999876499176, + "p90": 295.00800371170044, + "p95": 300.79999566078186, + "p99": 320.19200921058655 + }, + "combine": { + "p50": 137.95199990272522, + "p90": 142.62400567531586, + "p95": 145.02400159835815, + "p99": 151.13599598407745 + }, + "roundtrip": { + "p50": 407.3919951915741, + "p90": 424.47999119758606, + "p95": 432.48000741004944, + "p99": 472.6400077342987 + }, + "isolatedSum": { + "p50": 417.1519875526428, + "p90": 437.6320093870163, + "p95": 445.82399725914, + "p99": 471.328005194664 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 388.63998651504517, + "p90": 407.0720076560974, + "p95": 415.6799912452698, + "p99": 475.48800706863403 + }, + "combine": { + "p50": 217.47200191020966, + "p90": 221.8559980392456, + "p95": 224.89599883556366, + "p99": 230.97600042819977 + }, + "roundtrip": { + "p50": 594.5600271224976, + "p90": 611.2959980964661, + "p95": 617.5040006637573, + "p99": 668.9919829368591 + }, + "isolatedSum": { + "p50": 606.1119884252548, + "p90": 628.928005695343, + "p95": 640.5759900808334, + "p99": 706.4640074968338 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 604.4480204582214, + "p90": 619.5200085639954, + "p95": 626.7520189285278, + "p99": 659.0399742126465 + }, + "combine": { + "p50": 349.5999872684479, + "p90": 356.54398798942566, + "p95": 359.6160113811493, + "p99": 372.0319867134094 + }, + "roundtrip": { + "p50": 943.0080056190491, + "p90": 970.8799719810486, + "p95": 1007.9360008239746, + "p99": 1117.4399852752686 + }, + "isolatedSum": { + "p50": 954.0480077266693, + "p90": 976.063996553421, + "p95": 986.3680303096771, + "p99": 1031.071960926056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1110.368013381958, + "p90": 1124.1919994354248, + "p95": 1133.9199542999268, + "p99": 1574.112057685852 + }, + "combine": { + "p50": 616.5440082550049, + "p90": 625.8559823036194, + "p95": 629.4080018997192, + "p99": 646.399974822998 + }, + "roundtrip": { + "p50": 1698.7839937210083, + "p90": 1711.0079526901245, + "p95": 1718.6880111694336, + "p99": 1847.7439880371094 + }, + "isolatedSum": { + "p50": 1726.912021636963, + "p90": 1750.0479817390442, + "p95": 1763.327956199646, + "p99": 2220.51203250885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2103.0399799346924, + "p90": 2112.5760078430176, + "p95": 2121.72794342041, + "p99": 2324.608087539673 + }, + "combine": { + "p50": 1121.3120222091675, + "p90": 1133.0879926681519, + "p95": 1136.639952659607, + "p99": 1151.2000560760498 + }, + "roundtrip": { + "p50": 3221.407890319824, + "p90": 3241.0240173339844, + "p95": 3251.807928085327, + "p99": 3333.184003829956 + }, + "isolatedSum": { + "p50": 3224.35200214386, + "p90": 3245.6640005111694, + "p95": 3258.367896080017, + "p99": 3475.8081436157227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4cd038b0", + "identity": "h200|deepep|v2|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_ecc1d7a4", + "comparisonKey": "734e64cb34d78e97", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:55.219818+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 223.10400009155273, + "p90": 262.688010931015, + "p95": 270.9119915962219, + "p99": 315.2959942817688 + }, + "combine": { + "p50": 97.120001912117, + "p90": 103.42399775981903, + "p95": 111.58400028944016, + "p99": 117.40799993276596 + }, + "roundtrip": { + "p50": 312.99200654029846, + "p90": 347.4879860877991, + "p95": 357.31199383735657, + "p99": 398.75200390815735 + }, + "isolatedSum": { + "p50": 320.22400200366974, + "p90": 366.11200869083405, + "p95": 382.4959918856621, + "p99": 432.70399421453476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 284.7679853439331, + "p90": 309.5040023326874, + "p95": 333.6319923400879, + "p99": 357.56799578666687 + }, + "combine": { + "p50": 140.83200693130493, + "p90": 150.91200172901154, + "p95": 157.56799280643463, + "p99": 163.39200735092163 + }, + "roundtrip": { + "p50": 412.1600091457367, + "p90": 444.19199228286743, + "p95": 455.1680088043213, + "p99": 473.2159972190857 + }, + "isolatedSum": { + "p50": 425.59999227523804, + "p90": 460.4160040616989, + "p95": 491.1999851465225, + "p99": 520.9600031375885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 400.67198872566223, + "p90": 439.7760033607483, + "p95": 452.83201336860657, + "p99": 491.93599820137024 + }, + "combine": { + "p50": 218.4000015258789, + "p90": 229.50400412082672, + "p95": 235.87200045585632, + "p99": 243.96799504756927 + }, + "roundtrip": { + "p50": 609.6320152282715, + "p90": 648.8639712333679, + "p95": 665.8880114555359, + "p99": 684.6399903297424 + }, + "isolatedSum": { + "p50": 619.0719902515411, + "p90": 669.280007481575, + "p95": 688.7040138244629, + "p99": 735.9039932489395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 616.2559986114502, + "p90": 646.2720036506653, + "p95": 657.0559740066528, + "p99": 673.1200218200684 + }, + "combine": { + "p50": 351.99999809265137, + "p90": 359.5519959926605, + "p95": 363.3280098438263, + "p99": 371.16798758506775 + }, + "roundtrip": { + "p50": 949.3759870529175, + "p90": 975.8080244064331, + "p95": 986.2719774246216, + "p99": 1060.863971710205 + }, + "isolatedSum": { + "p50": 968.2559967041016, + "p90": 1005.8239996433258, + "p95": 1020.3839838504791, + "p99": 1044.288009405136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1110.975980758667, + "p90": 1121.5039491653442, + "p95": 1124.4159936904907, + "p99": 1135.6159448623657 + }, + "combine": { + "p50": 607.8400015830994, + "p90": 618.4639930725098, + "p95": 621.9840049743652, + "p99": 632.7360272407532 + }, + "roundtrip": { + "p50": 1694.3360567092896, + "p90": 1706.6880464553833, + "p95": 1712.3199701309204, + "p99": 1787.8719568252563 + }, + "isolatedSum": { + "p50": 1718.8159823417664, + "p90": 1739.967942237854, + "p95": 1746.399998664856, + "p99": 1768.351972103119 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2100.224018096924, + "p90": 2110.624074935913, + "p95": 2116.5120601654053, + "p99": 2185.0879192352295 + }, + "combine": { + "p50": 1110.1759672164917, + "p90": 1120.7040548324585, + "p95": 1124.6399879455566, + "p99": 1138.1759643554688 + }, + "roundtrip": { + "p50": 3205.984115600586, + "p90": 3234.879970550537, + "p95": 3241.856098175049, + "p99": 3389.5039558410645 + }, + "isolatedSum": { + "p50": 3210.3999853134155, + "p90": 3231.3281297683716, + "p95": 3241.152048110962, + "p99": 3323.2638835906982 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d7dfa0f", + "identity": "h200|deepep|v2|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_6e8d2608", + "comparisonKey": "f83a93541a28db1b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:03.332238+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_6", + "sku": "h200", + "backend": "deepep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep v2 · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "v2", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "2.0.0+af9a040", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 74.5600014925003, + "p90": 82.78399705886841, + "p95": 89.02399986982346, + "p99": 100.54399818181992 + }, + "combine": { + "p50": 97.69599884748459, + "p90": 104.80000078678131, + "p95": 110.46399921178818, + "p99": 118.94399672746658 + }, + "roundtrip": { + "p50": 198.84799420833588, + "p90": 208.8959962129593, + "p95": 227.26400196552277, + "p99": 276.7679989337921 + }, + "isolatedSum": { + "p50": 172.2560003399849, + "p90": 187.58399784564972, + "p95": 199.48799908161163, + "p99": 219.4879949092865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 92.06400066614151, + "p90": 98.01600128412247, + "p95": 102.46399790048599, + "p99": 108.83200168609619 + }, + "combine": { + "p50": 140.00000059604645, + "p90": 147.93600142002106, + "p95": 153.3759981393814, + "p99": 162.9759967327118 + }, + "roundtrip": { + "p50": 300.1919984817505, + "p90": 306.97599053382874, + "p95": 310.62400341033936, + "p99": 319.9999928474426 + }, + "isolatedSum": { + "p50": 232.06400126218796, + "p90": 245.95200270414352, + "p95": 255.8399960398674, + "p99": 271.807998418808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 127.3919939994812, + "p90": 134.24000144004822, + "p95": 140.99200069904327, + "p99": 163.16799819469452 + }, + "combine": { + "p50": 219.80799734592438, + "p90": 228.35199534893036, + "p95": 233.2800030708313, + "p99": 239.99999463558197 + }, + "roundtrip": { + "p50": 483.90400409698486, + "p90": 490.9439980983734, + "p95": 496.5440034866333, + "p99": 507.6479911804199 + }, + "isolatedSum": { + "p50": 347.1999913454056, + "p90": 362.5919967889786, + "p95": 374.2720037698746, + "p99": 403.1679928302765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 193.7279999256134, + "p90": 200.47999918460846, + "p95": 204.0639966726303, + "p99": 211.35999262332916 + }, + "combine": { + "p50": 351.00799798965454, + "p90": 358.0799996852875, + "p95": 361.2479865550995, + "p99": 373.21600317955017 + }, + "roundtrip": { + "p50": 824.0320086479187, + "p90": 832.0959806442261, + "p95": 836.7679715156555, + "p99": 860.256016254425 + }, + "isolatedSum": { + "p50": 544.735997915268, + "p90": 558.5599988698959, + "p95": 565.3119832277298, + "p99": 584.5759958028793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 319.7759985923767, + "p90": 327.93599367141724, + "p95": 334.75199341773987, + "p99": 346.6239869594574 + }, + "combine": { + "p50": 617.1519756317139, + "p90": 627.1679997444153, + "p95": 633.0239772796631, + "p99": 647.6479768753052 + }, + "roundtrip": { + "p50": 1496.3200092315674, + "p90": 1508.8319778442383, + "p95": 1516.0959959030151, + "p99": 1641.3439512252808 + }, + "isolatedSum": { + "p50": 936.9279742240906, + "p90": 955.1039934158325, + "p95": 967.775970697403, + "p99": 994.2719638347626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 572.3199844360352, + "p90": 582.9439759254456, + "p95": 588.0320072174072, + "p99": 609.6320152282715 + }, + "combine": { + "p50": 1121.6000318527222, + "p90": 1132.416009902954, + "p95": 1135.424017906189, + "p99": 1357.632040977478 + }, + "roundtrip": { + "p50": 2839.6480083465576, + "p90": 2872.8959560394287, + "p95": 2891.808032989502, + "p99": 3070.336103439331 + }, + "isolatedSum": { + "p50": 1693.9200162887573, + "p90": 1715.3599858283997, + "p95": 1723.4560251235962, + "p99": 1967.2640562057495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16ab1b80", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_74ad67fa", + "comparisonKey": "3987c132396f8ced", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:50.241734+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 182.78400599956512, + "p90": 231.10400140285492, + "p95": 243.45600605010986, + "p99": 309.1520071029663 + }, + "combine": { + "p50": 38.07999938726425, + "p90": 51.52000114321709, + "p95": 60.447998344898224, + "p99": 69.98399645090103 + }, + "roundtrip": { + "p50": 214.4320011138916, + "p90": 257.85601139068604, + "p95": 268.12800765037537, + "p99": 288.92800211906433 + }, + "isolatedSum": { + "p50": 220.86400538682938, + "p90": 282.624002546072, + "p95": 303.9040043950081, + "p99": 379.13600355386734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 180.09600043296814, + "p90": 227.52000391483307, + "p95": 241.34400486946106, + "p99": 266.62400364875793 + }, + "combine": { + "p50": 39.07199949026108, + "p90": 60.28800085186958, + "p95": 68.2239979505539, + "p99": 87.07199990749359 + }, + "roundtrip": { + "p50": 220.57600319385529, + "p90": 267.8399980068207, + "p95": 281.69599175453186, + "p99": 304.9919903278351 + }, + "isolatedSum": { + "p50": 219.16799992322922, + "p90": 287.80800476670265, + "p95": 309.56800282001495, + "p99": 353.6960035562515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 173.08799922466278, + "p90": 213.98399770259857, + "p95": 225.40800273418427, + "p99": 243.58400702476501 + }, + "combine": { + "p50": 40.41599854826927, + "p90": 50.23999884724617, + "p95": 58.97599831223488, + "p99": 67.58400052785873 + }, + "roundtrip": { + "p50": 204.54399287700653, + "p90": 245.728000998497, + "p95": 254.4960081577301, + "p99": 267.5839960575104 + }, + "isolatedSum": { + "p50": 213.50399777293205, + "p90": 264.22399654984474, + "p95": 284.38400104641914, + "p99": 311.16800755262375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 174.81599748134613, + "p90": 221.11999988555908, + "p95": 229.79199886322021, + "p99": 275.61599016189575 + }, + "combine": { + "p50": 41.24800115823746, + "p90": 53.82400006055832, + "p95": 60.35200133919716, + "p99": 67.84000247716904 + }, + "roundtrip": { + "p50": 212.09600567817688, + "p90": 256.1599910259247, + "p95": 270.4960107803345, + "p99": 321.5680122375488 + }, + "isolatedSum": { + "p50": 216.0639986395836, + "p90": 274.9439999461174, + "p95": 290.1440002024174, + "p99": 343.4559926390648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 183.48799645900726, + "p90": 221.91999852657318, + "p95": 231.51999711990356, + "p99": 258.432000875473 + }, + "combine": { + "p50": 44.12800073623657, + "p90": 57.66399949789047, + "p95": 66.59200042486191, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 218.87999773025513, + "p90": 270.30399441719055, + "p95": 278.59199047088623, + "p99": 321.02400064468384 + }, + "isolatedSum": { + "p50": 227.61599719524384, + "p90": 279.58399802446365, + "p95": 298.1119975447655, + "p99": 335.58399975299835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 208.28799903392792, + "p90": 241.7600005865097, + "p95": 253.4080147743225, + "p99": 276.095986366272 + }, + "combine": { + "p50": 45.75999826192856, + "p90": 53.408000618219376, + "p95": 60.67200005054474, + "p99": 71.35999947786331 + }, + "roundtrip": { + "p50": 245.82399427890778, + "p90": 279.83999252319336, + "p95": 291.51999950408936, + "p99": 307.6159954071045 + }, + "isolatedSum": { + "p50": 254.04799729585648, + "p90": 295.1680012047291, + "p95": 314.08001482486725, + "p99": 347.4559858441353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 265.21599292755127, + "p90": 294.46399211883545, + "p95": 306.65600299835205, + "p99": 419.99998688697815 + }, + "combine": { + "p50": 59.487998485565186, + "p90": 63.77600133419037, + "p95": 69.18399780988693, + "p99": 75.71200281381607 + }, + "roundtrip": { + "p50": 308.80001187324524, + "p90": 330.49601316452026, + "p95": 337.98399567604065, + "p99": 442.9439902305603 + }, + "isolatedSum": { + "p50": 324.70399141311646, + "p90": 358.2399934530258, + "p95": 375.840000808239, + "p99": 495.7119897007942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.67201113700867, + "p90": 297.7280020713806, + "p95": 307.0720136165619, + "p99": 324.67201352119446 + }, + "combine": { + "p50": 73.5040009021759, + "p90": 81.05599880218506, + "p95": 86.75199747085571, + "p99": 92.96000003814697 + }, + "roundtrip": { + "p50": 328.96000146865845, + "p90": 359.16799306869507, + "p95": 373.8879859447479, + "p99": 527.4879932403564 + }, + "isolatedSum": { + "p50": 338.17601203918457, + "p90": 378.7840008735657, + "p95": 393.8240110874176, + "p99": 417.63201355934143 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f6b6fe36", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_8938bbea", + "comparisonKey": "2d07fce95a2bb73a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:35.997543+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 171.55200242996216, + "p90": 186.5919977426529, + "p95": 192.47999787330627, + "p99": 204.16000485420227 + }, + "combine": { + "p50": 40.672000497579575, + "p90": 47.00800031423569, + "p95": 50.04800111055374, + "p99": 58.9120015501976 + }, + "roundtrip": { + "p50": 210.4640007019043, + "p90": 234.49599742889404, + "p95": 243.16799640655518, + "p99": 255.8079957962036 + }, + "isolatedSum": { + "p50": 212.22400292754173, + "p90": 233.59999805688858, + "p95": 242.52799898386002, + "p99": 263.0720064043999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 196.06399536132812, + "p90": 211.07199788093567, + "p95": 217.15199947357178, + "p99": 227.52000391483307 + }, + "combine": { + "p50": 49.536000937223434, + "p90": 52.25599929690361, + "p95": 54.71999943256378, + "p99": 59.967998415231705 + }, + "roundtrip": { + "p50": 226.623997092247, + "p90": 243.29599738121033, + "p95": 248.1600046157837, + "p99": 281.0240089893341 + }, + "isolatedSum": { + "p50": 245.59999629855156, + "p90": 263.3279971778393, + "p95": 271.87199890613556, + "p99": 287.4880023300648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 235.71200668811798, + "p90": 249.53599274158478, + "p95": 253.76001000404358, + "p99": 267.16798543930054 + }, + "combine": { + "p50": 75.9039968252182, + "p90": 80.54400235414505, + "p95": 83.80799740552902, + "p99": 91.45600348711014 + }, + "roundtrip": { + "p50": 300.25601387023926, + "p90": 317.1840012073517, + "p95": 324.8319923877716, + "p99": 363.20000886917114 + }, + "isolatedSum": { + "p50": 311.6160035133362, + "p90": 330.0799950957298, + "p95": 337.5680074095726, + "p99": 358.6239889264107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a52b66f", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_6f56b6d2", + "comparisonKey": "adefede8d85ce9c1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:59.708426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 168.7680035829544, + "p90": 200.9280025959015, + "p95": 206.94400370121002, + "p99": 281.3119888305664 + }, + "combine": { + "p50": 39.32800143957138, + "p90": 47.10400104522705, + "p95": 51.13599821925163, + "p99": 56.352000683546066 + }, + "roundtrip": { + "p50": 202.39999890327454, + "p90": 228.5120040178299, + "p95": 235.32800376415253, + "p99": 263.71198892593384 + }, + "isolatedSum": { + "p50": 208.0960050225258, + "p90": 248.03200364112854, + "p95": 258.08000192046165, + "p99": 337.6639895141125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 192.25600361824036, + "p90": 221.95200622081757, + "p95": 229.79199886322021, + "p99": 266.78401231765747 + }, + "combine": { + "p50": 47.87199944257736, + "p90": 53.53600159287453, + "p95": 56.384000927209854, + "p99": 62.17600032687187 + }, + "roundtrip": { + "p50": 230.81600666046143, + "p90": 259.10401344299316, + "p95": 276.16000175476074, + "p99": 458.1120014190674 + }, + "isolatedSum": { + "p50": 240.12800306081772, + "p90": 275.4880078136921, + "p95": 286.17599979043007, + "p99": 328.96001264452934 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 236.80000007152557, + "p90": 260.70401072502136, + "p95": 269.1839933395386, + "p99": 301.56800150871277 + }, + "combine": { + "p50": 74.23999905586243, + "p90": 80.73599636554718, + "p95": 83.10399949550629, + "p99": 91.39200299978256 + }, + "roundtrip": { + "p50": 304.1279911994934, + "p90": 335.9679877758026, + "p95": 341.37600660324097, + "p99": 422.33601212501526 + }, + "isolatedSum": { + "p50": 311.039999127388, + "p90": 341.44000709056854, + "p95": 352.28799283504486, + "p99": 392.96000450849533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dbe167a5", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_6e56b53f", + "comparisonKey": "f6b294b9087a8410", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:22.565671+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 176.4799952507019, + "p90": 222.1439927816391, + "p95": 228.7999987602234, + "p99": 443.10399889945984 + }, + "combine": { + "p50": 39.51999917626381, + "p90": 52.319999784231186, + "p95": 62.49599903821945, + "p99": 73.47200065851212 + }, + "roundtrip": { + "p50": 219.9999988079071, + "p90": 260.8959972858429, + "p95": 272.67199754714966, + "p99": 296.79998755455017 + }, + "isolatedSum": { + "p50": 215.9999944269657, + "p90": 274.4639925658703, + "p95": 291.29599779844284, + "p99": 516.575999557972 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 200.25600492954254, + "p90": 247.80799448490143, + "p95": 257.31199979782104, + "p99": 272.0000147819519 + }, + "combine": { + "p50": 48.35199937224388, + "p90": 60.38400158286095, + "p95": 64.38399851322174, + "p99": 71.48800045251846 + }, + "roundtrip": { + "p50": 234.20800268650055, + "p90": 275.84001421928406, + "p95": 283.03998708724976, + "p99": 297.0559895038605 + }, + "isolatedSum": { + "p50": 248.60800430178642, + "p90": 308.1919960677624, + "p95": 321.6959983110428, + "p99": 343.48801523447037 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 242.71999299526215, + "p90": 282.8800082206726, + "p95": 291.9679880142212, + "p99": 323.0719864368439 + }, + "combine": { + "p50": 74.17599856853485, + "p90": 86.56000345945358, + "p95": 92.73599833250046, + "p99": 96.41599655151367 + }, + "roundtrip": { + "p50": 302.43200063705444, + "p90": 344.9600040912628, + "p95": 349.95201230049133, + "p99": 366.87999963760376 + }, + "isolatedSum": { + "p50": 316.895991563797, + "p90": 369.4400116801262, + "p95": 384.70398634672165, + "p99": 419.48798298835754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bfd16d07", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_6d56b3ac", + "comparisonKey": "1295d03bde2972f3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:46.532711+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 178.27199399471283, + "p90": 198.30399751663208, + "p95": 206.2399983406067, + "p99": 300.00001192092896 + }, + "combine": { + "p50": 41.4079986512661, + "p90": 45.56800052523613, + "p95": 48.70399832725525, + "p99": 55.55199831724167 + }, + "roundtrip": { + "p50": 209.4399929046631, + "p90": 224.57599639892578, + "p95": 230.1120012998581, + "p99": 263.90400528907776 + }, + "isolatedSum": { + "p50": 219.67999264597893, + "p90": 243.8719980418682, + "p95": 254.94399666786194, + "p99": 355.5520102381706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 197.24799692630768, + "p90": 224.57599639892578, + "p95": 240.03200232982635, + "p99": 403.6479890346527 + }, + "combine": { + "p50": 51.16799846291542, + "p90": 54.88000065088272, + "p95": 59.487998485565186, + "p99": 66.23999774456024 + }, + "roundtrip": { + "p50": 229.5999974012375, + "p90": 254.46400046348572, + "p95": 266.7199969291687, + "p99": 284.06399488449097 + }, + "isolatedSum": { + "p50": 248.4159953892231, + "p90": 279.4559970498085, + "p95": 299.52000081539154, + "p99": 469.88798677921295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 243.3280050754547, + "p90": 261.6640031337738, + "p95": 271.67999744415283, + "p99": 406.8480134010315 + }, + "combine": { + "p50": 79.16799932718277, + "p90": 82.59200304746628, + "p95": 86.5280032157898, + "p99": 95.23200243711472 + }, + "roundtrip": { + "p50": 305.85598945617676, + "p90": 327.32799649238586, + "p95": 336.12799644470215, + "p99": 1480.5760383605957 + }, + "isolatedSum": { + "p50": 322.4960044026375, + "p90": 344.2560061812401, + "p95": 358.2080006599426, + "p99": 502.0800158381462 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-93a775a3", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_e7b91752", + "comparisonKey": "4950f50f5fa61afa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:47.547652+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 174.14399981498718, + "p90": 219.9680060148239, + "p95": 229.24800217151642, + "p99": 247.0400035381317 + }, + "combine": { + "p50": 38.14399987459183, + "p90": 52.12799832224846, + "p95": 61.824001371860504, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 204.57600057125092, + "p90": 248.9279955625534, + "p95": 256.8640112876892, + "p99": 273.6319899559021 + }, + "isolatedSum": { + "p50": 212.287999689579, + "p90": 272.0960043370724, + "p95": 291.0720035433769, + "p99": 343.6160013079643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 169.79199647903442, + "p90": 205.31199872493744, + "p95": 224.2880016565323, + "p99": 253.12000513076782 + }, + "combine": { + "p50": 37.728000432252884, + "p90": 43.64800080657005, + "p95": 49.375999718904495, + "p99": 69.66400146484375 + }, + "roundtrip": { + "p50": 198.88000190258026, + "p90": 230.14399409294128, + "p95": 243.58400702476501, + "p99": 262.4000012874603 + }, + "isolatedSum": { + "p50": 207.5199969112873, + "p90": 248.9599995315075, + "p95": 273.6640013754368, + "p99": 322.7840065956116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 168.92799735069275, + "p90": 205.1520049571991, + "p95": 216.8000042438507, + "p99": 231.00799322128296 + }, + "combine": { + "p50": 41.280001401901245, + "p90": 49.79199916124344, + "p95": 58.400001376867294, + "p99": 74.52800124883652 + }, + "roundtrip": { + "p50": 200.9280025959015, + "p90": 236.28799617290497, + "p95": 243.71199309825897, + "p99": 268.19199323654175 + }, + "isolatedSum": { + "p50": 210.207998752594, + "p90": 254.94400411844254, + "p95": 275.200005620718, + "p99": 305.5359944701195 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 171.61600291728973, + "p90": 210.87999641895294, + "p95": 222.84799814224243, + "p99": 242.43199825286865 + }, + "combine": { + "p50": 42.17600077390671, + "p90": 48.25599864125252, + "p95": 55.39200082421303, + "p99": 68.03199648857117 + }, + "roundtrip": { + "p50": 205.53599298000336, + "p90": 244.32000517845154, + "p95": 255.19999861717224, + "p99": 273.0239927768707 + }, + "isolatedSum": { + "p50": 213.79200369119644, + "p90": 259.13599506020546, + "p95": 278.23999896645546, + "p99": 310.4639947414398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 188.9919936656952, + "p90": 221.27999365329742, + "p95": 230.04800081253052, + "p99": 240.1919960975647 + }, + "combine": { + "p50": 43.74400153756142, + "p90": 54.59199845790863, + "p95": 60.70400029420853, + "p99": 68.7360018491745 + }, + "roundtrip": { + "p50": 219.61599588394165, + "p90": 259.16799902915955, + "p95": 264.5440101623535, + "p99": 299.8400032520294 + }, + "isolatedSum": { + "p50": 232.7359952032566, + "p90": 275.87199211120605, + "p95": 290.75200110673904, + "p99": 308.9279979467392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 231.23200237751007, + "p90": 264.22399282455444, + "p95": 276.3200104236603, + "p99": 301.2160062789917 + }, + "combine": { + "p50": 49.79199916124344, + "p90": 58.01599845290184, + "p95": 64.19199705123901, + "p99": 73.72800260782242 + }, + "roundtrip": { + "p50": 270.3680098056793, + "p90": 304.064005613327, + "p95": 317.56800413131714, + "p99": 740.4159903526306 + }, + "isolatedSum": { + "p50": 281.0240015387535, + "p90": 322.2399912774563, + "p95": 340.5120074748993, + "p99": 374.9440088868141 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 317.984014749527, + "p90": 392.32000708580017, + "p95": 408.4799885749817, + "p99": 463.9039933681488 + }, + "combine": { + "p50": 66.52799993753433, + "p90": 70.20799815654755, + "p95": 75.26399940252304, + "p99": 83.42400193214417 + }, + "roundtrip": { + "p50": 367.0719861984253, + "p90": 396.7359960079193, + "p95": 411.0400080680847, + "p99": 509.2480182647705 + }, + "isolatedSum": { + "p50": 384.5120146870613, + "p90": 462.5280052423477, + "p95": 483.74398797750473, + "p99": 547.327995300293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 318.4959888458252, + "p90": 347.9039967060089, + "p95": 363.9039993286133, + "p99": 397.3119854927063 + }, + "combine": { + "p50": 88.99199962615967, + "p90": 97.82399982213974, + "p95": 103.84000092744827, + "p99": 109.92000252008438 + }, + "roundtrip": { + "p50": 402.3999869823456, + "p90": 430.65598607063293, + "p95": 439.2639994621277, + "p99": 461.95200085639954 + }, + "isolatedSum": { + "p50": 407.48798847198486, + "p90": 445.72799652814865, + "p95": 467.74400025606155, + "p99": 507.2319880127907 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-be40cbd4", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_7b90a3ed", + "comparisonKey": "60f1d57542f66469", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:45.174673+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 173.88799786567688, + "p90": 199.77599382400513, + "p95": 213.05599808692932, + "p99": 296.60800099372864 + }, + "combine": { + "p50": 35.360001027584076, + "p90": 46.68800160288811, + "p95": 50.592001527547836, + "p99": 61.11999973654747 + }, + "roundtrip": { + "p50": 201.53599977493286, + "p90": 221.18400037288666, + "p95": 232.06399381160736, + "p99": 261.4080011844635 + }, + "isolatedSum": { + "p50": 209.24799889326096, + "p90": 246.46399542689323, + "p95": 263.64799961447716, + "p99": 357.7280007302761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 177.21599340438843, + "p90": 194.0159946680069, + "p95": 203.07199656963348, + "p99": 223.7440049648285 + }, + "combine": { + "p50": 37.82400116324425, + "p90": 44.224001467227936, + "p95": 47.10400104522705, + "p99": 50.49600079655647 + }, + "roundtrip": { + "p50": 216.41600131988525, + "p90": 241.34400486946106, + "p95": 251.45599246025085, + "p99": 278.81601452827454 + }, + "isolatedSum": { + "p50": 215.03999456763268, + "p90": 238.23999613523483, + "p95": 250.17599761486053, + "p99": 274.24000576138496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 187.19999492168427, + "p90": 212.8639966249466, + "p95": 223.80800545215607, + "p99": 253.63200902938843 + }, + "combine": { + "p50": 38.68800029158592, + "p90": 46.62400111556053, + "p95": 50.36799982190132, + "p99": 64.7360011935234 + }, + "roundtrip": { + "p50": 215.64799547195435, + "p90": 239.1359955072403, + "p95": 249.40800666809082, + "p99": 288.63999247550964 + }, + "isolatedSum": { + "p50": 225.8879952132702, + "p90": 259.4879977405071, + "p95": 274.1760052740574, + "p99": 318.36801022291183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 188.960000872612, + "p90": 214.1759991645813, + "p95": 237.59999871253967, + "p99": 264.92801308631897 + }, + "combine": { + "p50": 46.01600021123886, + "p90": 51.392000168561935, + "p95": 54.46400120854378, + "p99": 59.84000116586685 + }, + "roundtrip": { + "p50": 217.50399470329285, + "p90": 239.6160066127777, + "p95": 252.22399830818176, + "p99": 280.64000606536865 + }, + "isolatedSum": { + "p50": 234.97600108385086, + "p90": 265.56799933314323, + "p95": 292.06399992108345, + "p99": 324.7680142521858 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ff7860d5", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_e0bae1dc", + "comparisonKey": "7128899edca2d97a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:24.909992+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 180.41600286960602, + "p90": 207.2640061378479, + "p95": 233.75999927520752, + "p99": 771.5520262718201 + }, + "combine": { + "p50": 36.3520011305809, + "p90": 46.52800038456917, + "p95": 51.64799839258194, + "p99": 272.0000147819519 + }, + "roundtrip": { + "p50": 202.91200280189514, + "p90": 221.88800573349, + "p95": 229.08799350261688, + "p99": 422.91200160980225 + }, + "isolatedSum": { + "p50": 216.76800400018692, + "p90": 253.79200652241707, + "p95": 285.40799766778946, + "p99": 1043.552041053772 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 174.9120056629181, + "p90": 194.72000002861023, + "p95": 200.3519982099533, + "p99": 223.87200593948364 + }, + "combine": { + "p50": 37.408001720905304, + "p90": 43.99999976158142, + "p95": 48.09600114822388, + "p99": 59.20000001788139 + }, + "roundtrip": { + "p50": 208.064004778862, + "p90": 232.86400735378265, + "p95": 286.0479950904846, + "p99": 809.1520071029663 + }, + "isolatedSum": { + "p50": 212.3200073838234, + "p90": 238.71999979019165, + "p95": 248.44799935817719, + "p99": 283.07200595736504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 173.567995429039, + "p90": 192.47999787330627, + "p95": 199.42399859428406, + "p99": 238.0799949169159 + }, + "combine": { + "p50": 37.376001477241516, + "p90": 44.12800073623657, + "p95": 47.93599992990494, + "p99": 56.543998420238495 + }, + "roundtrip": { + "p50": 209.9519968032837, + "p90": 233.88800024986267, + "p95": 241.34400486946106, + "p99": 344.86401081085205 + }, + "isolatedSum": { + "p50": 210.94399690628052, + "p90": 236.60799860954285, + "p95": 247.359998524189, + "p99": 294.6239933371544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 175.35999417304993, + "p90": 199.23199713230133, + "p95": 211.5200012922287, + "p99": 258.6880028247833 + }, + "combine": { + "p50": 37.28000074625015, + "p90": 44.415999203920364, + "p95": 47.68000170588493, + "p99": 53.18399891257286 + }, + "roundtrip": { + "p50": 201.85600221157074, + "p90": 221.0559993982315, + "p95": 233.0559939146042, + "p99": 358.2080006599426 + }, + "isolatedSum": { + "p50": 212.63999491930008, + "p90": 243.6479963362217, + "p95": 259.20000299811363, + "p99": 311.8720017373562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 171.1679995059967, + "p90": 190.62399864196777, + "p95": 207.42399990558624, + "p99": 697.2159743309021 + }, + "combine": { + "p50": 37.856001406908035, + "p90": 44.256001710891724, + "p95": 48.48000034689903, + "p99": 58.14399942755699 + }, + "roundtrip": { + "p50": 202.65600085258484, + "p90": 219.80799734592438, + "p95": 225.95199942588806, + "p99": 256.79999589920044 + }, + "isolatedSum": { + "p50": 209.02400091290474, + "p90": 234.8800003528595, + "p95": 255.90400025248528, + "p99": 755.3599737584591 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 175.55199563503265, + "p90": 195.74399292469025, + "p95": 202.87999510765076, + "p99": 252.16001272201538 + }, + "combine": { + "p50": 40.28800129890442, + "p90": 45.69600149989128, + "p95": 48.70399832725525, + "p99": 54.07999828457832 + }, + "roundtrip": { + "p50": 203.90400290489197, + "p90": 222.75200486183167, + "p95": 232.7679991722107, + "p99": 302.91199684143066 + }, + "isolatedSum": { + "p50": 215.83999693393707, + "p90": 241.43999442458153, + "p95": 251.583993434906, + "p99": 306.2400110065937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 196.60800695419312, + "p90": 215.07200598716736, + "p95": 223.23200106620789, + "p99": 254.2400062084198 + }, + "combine": { + "p50": 46.14400118589401, + "p90": 51.392000168561935, + "p95": 55.135998874902725, + "p99": 60.127999633550644 + }, + "roundtrip": { + "p50": 227.9680073261261, + "p90": 244.54399943351746, + "p95": 254.55999374389648, + "p99": 311.8720054626465 + }, + "isolatedSum": { + "p50": 242.75200814008713, + "p90": 266.4640061557293, + "p95": 278.3679999411106, + "p99": 314.36800584197044 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 196.25599682331085, + "p90": 211.2320065498352, + "p95": 217.6000028848648, + "p99": 529.2800068855286 + }, + "combine": { + "p50": 54.048001766204834, + "p90": 58.04799869656563, + "p95": 61.344001442193985, + "p99": 68.9919963479042 + }, + "roundtrip": { + "p50": 236.00000143051147, + "p90": 249.1839975118637, + "p95": 254.7520101070404, + "p99": 302.623987197876 + }, + "isolatedSum": { + "p50": 250.3039985895157, + "p90": 269.28000524640083, + "p95": 278.9440043270588, + "p99": 598.2720032334328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6bc89323", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_5867c702", + "comparisonKey": "efd6b5a06ab8ad9f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:02.126585+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 175.3920018672943, + "p90": 207.5520008802414, + "p95": 225.0880002975464, + "p99": 328.031986951828 + }, + "combine": { + "p50": 42.047999799251556, + "p90": 50.944000482559204, + "p95": 61.503998935222626, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 208.41600000858307, + "p90": 249.7600018978119, + "p95": 263.0079984664917, + "p99": 285.98400950431824 + }, + "isolatedSum": { + "p50": 217.44000166654587, + "p90": 258.4960013628006, + "p95": 286.591999232769, + "p99": 404.4479876756668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 210.04800498485565, + "p90": 227.61599719524384, + "p95": 243.93600225448608, + "p99": 261.6960108280182 + }, + "combine": { + "p50": 52.44800075888634, + "p90": 58.240000158548355, + "p95": 62.912002205848694, + "p99": 71.00799679756165 + }, + "roundtrip": { + "p50": 244.4159984588623, + "p90": 266.04801416397095, + "p95": 280.9920012950897, + "p99": 295.0719892978668 + }, + "isolatedSum": { + "p50": 262.496005743742, + "p90": 285.8559973537922, + "p95": 306.8480044603348, + "p99": 332.70400762557983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 271.2000012397766, + "p90": 303.8719892501831, + "p95": 313.6320114135742, + "p99": 349.4400084018707 + }, + "combine": { + "p50": 77.60000228881836, + "p90": 84.32000130414963, + "p95": 93.79199892282486, + "p99": 98.68799895048141 + }, + "roundtrip": { + "p50": 338.20798993110657, + "p90": 370.4319894313812, + "p95": 385.50400733947754, + "p99": 448.06399941444397 + }, + "isolatedSum": { + "p50": 348.80000352859497, + "p90": 388.19199055433273, + "p95": 407.4240103363991, + "p99": 448.12800735235214 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8daf5579", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_a03e151a", + "comparisonKey": "9e2f189bf7b3d267", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:25.782937+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 176.38400197029114, + "p90": 194.68800723552704, + "p95": 201.08799636363983, + "p99": 239.87199366092682 + }, + "combine": { + "p50": 42.847998440265656, + "p90": 48.128001391887665, + "p95": 51.32799968123436, + "p99": 56.92800134420395 + }, + "roundtrip": { + "p50": 213.18399906158447, + "p90": 235.03999412059784, + "p95": 248.1279969215393, + "p99": 358.5599958896637 + }, + "isolatedSum": { + "p50": 219.2320004105568, + "p90": 242.8160086274147, + "p95": 252.4159960448742, + "p99": 296.79999500513077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 215.2000069618225, + "p90": 233.34400355815887, + "p95": 245.40799856185913, + "p99": 315.61601161956787 + }, + "combine": { + "p50": 52.41600051522255, + "p90": 56.992001831531525, + "p95": 60.5119988322258, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 248.83200228214264, + "p90": 268.5439884662628, + "p95": 280.4799973964691, + "p99": 370.5280125141144 + }, + "isolatedSum": { + "p50": 267.61600747704506, + "p90": 290.3360053896904, + "p95": 305.91999739408493, + "p99": 389.72800970077515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 270.1759934425354, + "p90": 288.32000494003296, + "p95": 295.26400566101074, + "p99": 354.8800051212311 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 83.74399691820145, + "p95": 87.99999952316284, + "p99": 93.82399916648865 + }, + "roundtrip": { + "p50": 335.4879915714264, + "p90": 351.967990398407, + "p95": 359.8720133304596, + "p99": 467.74399280548096 + }, + "isolatedSum": { + "p50": 349.4719937443733, + "p90": 372.0640018582344, + "p95": 383.2640051841736, + "p99": 448.7040042877197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4eba9844", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_9f3e1387", + "comparisonKey": "c39c6d1de2e4080b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:49.696833+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 187.83999979496002, + "p90": 210.07999777793884, + "p95": 215.39199352264404, + "p99": 231.10400140285492 + }, + "combine": { + "p50": 43.23200136423111, + "p90": 48.8319993019104, + "p95": 51.96800082921982, + "p99": 59.776000678539276 + }, + "roundtrip": { + "p50": 222.56000339984894, + "p90": 365.9200072288513, + "p95": 376.9280016422272, + "p99": 403.3919870853424 + }, + "isolatedSum": { + "p50": 231.07200115919113, + "p90": 258.91199707984924, + "p95": 267.35999435186386, + "p99": 290.8800020813942 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 244.4480061531067, + "p90": 318.7200129032135, + "p95": 325.6959915161133, + "p99": 439.87199664115906 + }, + "combine": { + "p50": 55.16799911856651, + "p90": 81.05599880218506, + "p95": 86.20800077915192, + "p99": 96.19200229644775 + }, + "roundtrip": { + "p50": 251.93598866462708, + "p90": 288.9600098133087, + "p95": 351.1359989643097, + "p99": 410.2720022201538 + }, + "isolatedSum": { + "p50": 299.6160052716732, + "p90": 399.77601170539856, + "p95": 411.9039922952652, + "p99": 536.0639989376068 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 272.352010011673, + "p90": 287.58400678634644, + "p95": 295.80798745155334, + "p99": 398.97599816322327 + }, + "combine": { + "p50": 78.75200361013412, + "p90": 83.36000144481659, + "p95": 87.00799942016602, + "p99": 138.14400136470795 + }, + "roundtrip": { + "p50": 335.83998680114746, + "p90": 348.9280045032501, + "p95": 354.2400002479553, + "p99": 365.53600430488586 + }, + "isolatedSum": { + "p50": 351.1040136218071, + "p90": 370.944008231163, + "p95": 382.81598687171936, + "p99": 537.1199995279312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ef1953c1", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_9e3e11f4", + "comparisonKey": "a68d30f2e9d4be5f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:12.990717+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 175.35999417304993, + "p90": 197.56799936294556, + "p95": 207.16799795627594, + "p99": 274.1119861602783 + }, + "combine": { + "p50": 42.14400053024292, + "p90": 49.72799867391586, + "p95": 61.95199862122536, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 212.38400042057037, + "p90": 241.43999814987183, + "p95": 260.3839933872223, + "p99": 349.7920036315918 + }, + "isolatedSum": { + "p50": 217.50399470329285, + "p90": 247.29599803686142, + "p95": 269.1199965775013, + "p99": 384.15998965501785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 216.67200326919556, + "p90": 232.35200345516205, + "p95": 240.38399755954742, + "p99": 257.4400007724762 + }, + "combine": { + "p50": 51.83999985456467, + "p90": 55.135998874902725, + "p95": 58.75200033187866, + "p99": 72.41600006818771 + }, + "roundtrip": { + "p50": 245.27999758720398, + "p90": 259.42400097846985, + "p95": 264.8000121116638, + "p99": 341.7919874191284 + }, + "isolatedSum": { + "p50": 268.5120031237602, + "p90": 287.4880023300648, + "p95": 299.1359978914261, + "p99": 329.8560008406639 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 267.4559950828552, + "p90": 281.0879945755005, + "p95": 286.8799865245819, + "p99": 338.9759957790375 + }, + "combine": { + "p50": 78.36800068616867, + "p90": 82.04799890518188, + "p95": 85.11999994516373, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 335.4560136795044, + "p90": 352.83198952674866, + "p95": 363.1359934806824, + "p99": 413.567990064621 + }, + "isolatedSum": { + "p50": 345.8239957690239, + "p90": 363.1359934806824, + "p95": 371.99998646974564, + "p99": 430.91199547052383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6224bd2c", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_42ab3d00", + "comparisonKey": "dd458495387bf44c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:29.599873+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 175.55199563503265, + "p90": 223.51999580860138, + "p95": 231.3919961452484, + "p99": 254.4960081577301 + }, + "combine": { + "p50": 37.02399879693985, + "p90": 53.568001836538315, + "p95": 59.4559982419014, + "p99": 70.27199864387512 + }, + "roundtrip": { + "p50": 221.8559980392456, + "p90": 268.6080038547516, + "p95": 281.98400139808655, + "p99": 315.8400058746338 + }, + "isolatedSum": { + "p50": 212.5759944319725, + "p90": 277.0879976451397, + "p95": 290.8479943871498, + "p99": 324.7680068016052 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 173.92000555992126, + "p90": 224.57599639892578, + "p95": 233.50399732589722, + "p99": 253.4720003604889 + }, + "combine": { + "p50": 37.696000188589096, + "p90": 55.64799904823303, + "p95": 64.31999802589417, + "p99": 77.56800204515457 + }, + "roundtrip": { + "p50": 208.6080014705658, + "p90": 257.82400369644165, + "p95": 269.1839933395386, + "p99": 300.3840148448944 + }, + "isolatedSum": { + "p50": 211.61600574851036, + "p90": 280.2239954471588, + "p95": 297.8239953517914, + "p99": 331.04000240564346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 182.8480064868927, + "p90": 238.304004073143, + "p95": 254.72000241279602, + "p99": 303.51999402046204 + }, + "combine": { + "p50": 40.70400074124336, + "p90": 58.62399935722351, + "p95": 67.03999638557434, + "p99": 77.88799703121185 + }, + "roundtrip": { + "p50": 218.78400444984436, + "p90": 270.27198672294617, + "p95": 287.9360020160675, + "p99": 309.53601002693176 + }, + "isolatedSum": { + "p50": 223.55200722813606, + "p90": 296.9280034303665, + "p95": 321.75999879837036, + "p99": 381.4079910516739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 187.77599930763245, + "p90": 237.59999871253967, + "p95": 248.416006565094, + "p99": 272.352010011673 + }, + "combine": { + "p50": 42.62400045990944, + "p90": 58.46399813890457, + "p95": 65.60000032186508, + "p99": 76.64000242948532 + }, + "roundtrip": { + "p50": 221.8559980392456, + "p90": 275.10398626327515, + "p95": 299.55199360847473, + "p99": 495.5520033836365 + }, + "isolatedSum": { + "p50": 230.39999976754189, + "p90": 296.06399685144424, + "p95": 314.0160068869591, + "p99": 348.9920124411583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 185.31200289726257, + "p90": 232.44799673557281, + "p95": 244.4159984588623, + "p99": 272.0319926738739 + }, + "combine": { + "p50": 46.04800045490265, + "p90": 61.47199869155884, + "p95": 70.49600034952164, + "p99": 81.91999793052673 + }, + "roundtrip": { + "p50": 231.77599906921387, + "p90": 297.21599817276, + "p95": 315.61601161956787, + "p99": 385.21599769592285 + }, + "isolatedSum": { + "p50": 231.36000335216522, + "p90": 293.91999542713165, + "p95": 314.91199880838394, + "p99": 353.95199060440063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 211.0079973936081, + "p90": 255.51998615264893, + "p95": 268.5120105743408, + "p99": 283.84000062942505 + }, + "combine": { + "p50": 48.54400083422661, + "p90": 59.13599953055382, + "p95": 64.7680014371872, + "p99": 73.11999797821045 + }, + "roundtrip": { + "p50": 247.00799584388733, + "p90": 285.2480113506317, + "p95": 293.5679852962494, + "p99": 335.1359963417053 + }, + "isolatedSum": { + "p50": 259.5519982278347, + "p90": 314.65598568320274, + "p95": 333.280012011528, + "p99": 356.9599986076355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 268.2879865169525, + "p90": 310.7199966907501, + "p95": 318.36798787117004, + "p99": 456.83199167251587 + }, + "combine": { + "p50": 58.01599845290184, + "p90": 68.76800209283829, + "p95": 74.27199929952621, + "p99": 85.79199761152267 + }, + "roundtrip": { + "p50": 324.95999336242676, + "p90": 364.51199650764465, + "p95": 426.144003868103, + "p99": 548.7359762191772 + }, + "isolatedSum": { + "p50": 326.30398496985435, + "p90": 379.4879987835884, + "p95": 392.63998717069626, + "p99": 542.6239892840385 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 271.2959945201874, + "p90": 312.54398822784424, + "p95": 321.6319978237152, + "p99": 335.87199449539185 + }, + "combine": { + "p50": 76.35200023651123, + "p90": 88.48000317811966, + "p95": 93.47199648618698, + "p99": 99.35999661684036 + }, + "roundtrip": { + "p50": 344.2560136318207, + "p90": 387.4559998512268, + "p95": 395.10399103164673, + "p99": 414.68799114227295 + }, + "isolatedSum": { + "p50": 347.6479947566986, + "p90": 401.0239914059639, + "p95": 415.1039943099022, + "p99": 435.2319911122322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c9fa1065", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_0e44604e", + "comparisonKey": "341172c0c64d0c1b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:05.913334+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 179.51999604701996, + "p90": 210.87999641895294, + "p95": 218.62399578094482, + "p99": 239.48800563812256 + }, + "combine": { + "p50": 37.248000502586365, + "p90": 49.247998744249344, + "p95": 54.78399991989136, + "p99": 65.8240020275116 + }, + "roundtrip": { + "p50": 211.74399554729462, + "p90": 243.20000410079956, + "p95": 253.4080147743225, + "p99": 311.19999289512634 + }, + "isolatedSum": { + "p50": 216.76799654960632, + "p90": 260.1279951632023, + "p95": 273.4079957008362, + "p99": 305.31200766563416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 181.08800053596497, + "p90": 210.62399446964264, + "p95": 219.58400309085846, + "p99": 234.46400463581085 + }, + "combine": { + "p50": 38.176000118255615, + "p90": 49.72799867391586, + "p95": 56.2559999525547, + "p99": 61.792001128196716 + }, + "roundtrip": { + "p50": 211.2320065498352, + "p90": 239.48800563812256, + "p95": 252.6719868183136, + "p99": 298.11200499534607 + }, + "isolatedSum": { + "p50": 219.26400065422058, + "p90": 260.3519931435585, + "p95": 275.84000304341316, + "p99": 296.25600576400757 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 181.63199722766876, + "p90": 210.24000644683838, + "p95": 221.18400037288666, + "p99": 239.29600417613983 + }, + "combine": { + "p50": 40.89599847793579, + "p90": 50.75199902057648, + "p95": 57.37600103020668, + "p99": 67.96800345182419 + }, + "roundtrip": { + "p50": 210.59200167655945, + "p90": 237.72799968719482, + "p95": 244.54399943351746, + "p99": 277.75999903678894 + }, + "isolatedSum": { + "p50": 222.52799570560455, + "p90": 260.99200546741486, + "p95": 278.56000140309334, + "p99": 307.264007627964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 190.2720034122467, + "p90": 236.67199909687042, + "p95": 252.70399451255798, + "p99": 510.047972202301 + }, + "combine": { + "p50": 42.14400053024292, + "p90": 55.23199960589409, + "p95": 62.04799935221672, + "p99": 67.96800345182419 + }, + "roundtrip": { + "p50": 224.06400740146637, + "p90": 257.63198733329773, + "p95": 269.3760097026825, + "p99": 287.07200288772583 + }, + "isolatedSum": { + "p50": 232.41600394248962, + "p90": 291.9039987027645, + "p95": 314.7519938647747, + "p99": 578.0159756541252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 187.74400651454926, + "p90": 227.07200050354004, + "p95": 239.9040013551712, + "p99": 256.3199996948242 + }, + "combine": { + "p50": 45.152001082897186, + "p90": 55.80800026655197, + "p95": 61.85600161552429, + "p99": 69.47200000286102 + }, + "roundtrip": { + "p50": 219.200000166893, + "p90": 264.92801308631897, + "p95": 286.8160009384155, + "p99": 433.3440065383911 + }, + "isolatedSum": { + "p50": 232.89600759744644, + "p90": 282.880000770092, + "p95": 301.7600029706955, + "p99": 325.79199969768524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 212.0639979839325, + "p90": 248.31999838352203, + "p95": 259.71201062202454, + "p99": 303.6159873008728 + }, + "combine": { + "p50": 46.46399989724159, + "p90": 57.98399820923805, + "p95": 61.792001128196716, + "p99": 67.90400296449661 + }, + "roundtrip": { + "p50": 248.1279969215393, + "p90": 285.3440046310425, + "p95": 296.03201150894165, + "p99": 309.31198596954346 + }, + "isolatedSum": { + "p50": 258.5279978811741, + "p90": 306.3039965927601, + "p95": 321.50401175022125, + "p99": 371.5199902653694 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 272.4800109863281, + "p90": 301.34400725364685, + "p95": 315.744012594223, + "p99": 358.3360016345978 + }, + "combine": { + "p50": 58.6559996008873, + "p90": 66.97600334882736, + "p95": 71.80800288915634, + "p99": 77.15199887752533 + }, + "roundtrip": { + "p50": 311.74400448799133, + "p90": 338.27200531959534, + "p95": 344.4480001926422, + "p99": 358.5920035839081 + }, + "isolatedSum": { + "p50": 331.1360105872154, + "p90": 368.3200106024742, + "p95": 387.55201548337936, + "p99": 435.4880005121231 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 266.11199975013733, + "p90": 297.88801074028015, + "p95": 305.85598945617676, + "p99": 316.22400879859924 + }, + "combine": { + "p50": 72.7040022611618, + "p90": 82.07999914884567, + "p95": 88.41600269079208, + "p99": 91.32800251245499 + }, + "roundtrip": { + "p50": 335.2000117301941, + "p90": 373.50401282310486, + "p95": 387.10400462150574, + "p99": 431.10400438308716 + }, + "isolatedSum": { + "p50": 338.81600201129913, + "p90": 379.9680098891258, + "p95": 394.27199214696884, + "p99": 407.55201131105423 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-14002e42", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_ef8c1dfa", + "comparisonKey": "375f564385b83b2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:09.768508+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 170.01600563526154, + "p90": 217.53600239753723, + "p95": 233.18399488925934, + "p99": 291.29600524902344 + }, + "combine": { + "p50": 40.511999279260635, + "p90": 49.92000013589859, + "p95": 56.12799897789955, + "p99": 66.880002617836 + }, + "roundtrip": { + "p50": 209.08799767494202, + "p90": 255.5840015411377, + "p95": 266.27200841903687, + "p99": 365.4080033302307 + }, + "isolatedSum": { + "p50": 210.52800491452217, + "p90": 267.4560025334358, + "p95": 289.3119938671589, + "p99": 358.17600786685944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 231.1680018901825, + "p90": 269.6320116519928, + "p95": 282.81599283218384, + "p99": 321.1199939250946 + }, + "combine": { + "p50": 49.375999718904495, + "p90": 57.0559985935688, + "p95": 63.840001821517944, + "p99": 72.92799651622772 + }, + "roundtrip": { + "p50": 272.15999364852905, + "p90": 310.68798899650574, + "p95": 330.7200074195862, + "p99": 360.0960075855255 + }, + "isolatedSum": { + "p50": 280.544001609087, + "p90": 326.6880102455616, + "p95": 346.6559946537018, + "p99": 394.0479904413223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 266, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 258.30399990081787, + "p90": 292.1920120716095, + "p95": 302.7519881725311, + "p99": 325.6320059299469 + }, + "combine": { + "p50": 79.6160027384758, + "p90": 89.72799777984619, + "p95": 94.43199634552002, + "p99": 100.44799745082855 + }, + "roundtrip": { + "p50": 331.7759931087494, + "p90": 362.87999153137207, + "p95": 374.783992767334, + "p99": 395.35999298095703 + }, + "isolatedSum": { + "p50": 337.92000263929367, + "p90": 381.9200098514557, + "p95": 397.18398451805115, + "p99": 426.08000338077545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 917, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d099a62", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_b627a6b4", + "comparisonKey": "d8f87dce2bc35883", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:28.721658+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 171.48800194263458, + "p90": 213.82400393486023, + "p95": 220.5120027065277, + "p99": 232.96000063419342 + }, + "combine": { + "p50": 36.607999354600906, + "p90": 49.31199923157692, + "p95": 58.33600088953972, + "p99": 69.05599683523178 + }, + "roundtrip": { + "p50": 209.21599864959717, + "p90": 250.84799528121948, + "p95": 261.1840069293976, + "p99": 277.2800028324127 + }, + "isolatedSum": { + "p50": 208.0960012972355, + "p90": 263.13600316643715, + "p95": 278.84800359606743, + "p99": 302.0159974694252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 175.52000284194946, + "p90": 221.0559993982315, + "p95": 234.14400219917297, + "p99": 280.2880108356476 + }, + "combine": { + "p50": 37.567999213933945, + "p90": 54.048001766204834, + "p95": 63.71200084686279, + "p99": 75.23199915885925 + }, + "roundtrip": { + "p50": 215.71199595928192, + "p90": 257.1839988231659, + "p95": 266.62400364875793, + "p99": 301.82400345802307 + }, + "isolatedSum": { + "p50": 213.0880020558834, + "p90": 275.10400116443634, + "p95": 297.85600304603577, + "p99": 355.52000999450684 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 181.43999576568604, + "p90": 224.63999688625336, + "p95": 236.12800240516663, + "p99": 248.9279955625534 + }, + "combine": { + "p50": 40.991999208927155, + "p90": 56.73599988222122, + "p95": 66.14399701356888, + "p99": 74.87999647855759 + }, + "roundtrip": { + "p50": 231.455996632576, + "p90": 281.66401386260986, + "p95": 294.43201422691345, + "p99": 331.0079872608185 + }, + "isolatedSum": { + "p50": 222.4319949746132, + "p90": 281.3759967684746, + "p95": 302.2719994187355, + "p99": 323.807992041111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 176.32000148296356, + "p90": 220.64000368118286, + "p95": 229.37600314617157, + "p99": 241.85599386692047 + }, + "combine": { + "p50": 39.84000161290169, + "p90": 52.73599922657013, + "p95": 60.416001826524734, + "p99": 67.58400052785873 + }, + "roundtrip": { + "p50": 204.8960030078888, + "p90": 248.9279955625534, + "p95": 258.4959864616394, + "p99": 272.67199754714966 + }, + "isolatedSum": { + "p50": 216.16000309586525, + "p90": 273.376002907753, + "p95": 289.7920049726963, + "p99": 309.4399943947792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 182.23999440670013, + "p90": 231.6800057888031, + "p95": 241.82400107383728, + "p99": 334.30400490760803 + }, + "combine": { + "p50": 43.136000633239746, + "p90": 54.687999188899994, + "p95": 61.216000467538834, + "p99": 71.84000313282013 + }, + "roundtrip": { + "p50": 215.55200219154358, + "p90": 255.45600056648254, + "p95": 267.36000180244446, + "p99": 341.21599793434143 + }, + "isolatedSum": { + "p50": 225.37599503993988, + "p90": 286.3680049777031, + "p95": 303.0400015413761, + "p99": 406.14400804042816 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 213.8880044221878, + "p90": 255.8079957962036, + "p95": 265.6320035457611, + "p99": 302.0800054073334 + }, + "combine": { + "p50": 46.94399982690811, + "p90": 57.66399949789047, + "p95": 64.19199705123901, + "p99": 72.25599884986877 + }, + "roundtrip": { + "p50": 245.95199525356293, + "p90": 287.7439856529236, + "p95": 304.1920065879822, + "p99": 330.52799105644226 + }, + "isolatedSum": { + "p50": 260.8320042490959, + "p90": 313.4719952940941, + "p95": 329.8240005970001, + "p99": 374.33600425720215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 269.3440020084381, + "p90": 310.04801392555237, + "p95": 317.1199858188629, + "p99": 345.5359935760498 + }, + "combine": { + "p50": 57.5999990105629, + "p90": 68.64000111818314, + "p95": 75.77600330114365, + "p99": 87.16800063848495 + }, + "roundtrip": { + "p50": 315.10400772094727, + "p90": 348.9600121974945, + "p95": 359.8079979419708, + "p99": 1157.3760509490967 + }, + "isolatedSum": { + "p50": 326.944001019001, + "p90": 378.6880150437355, + "p95": 392.89598912000656, + "p99": 432.70399421453476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 272.99201488494873, + "p90": 324.12800192832947, + "p95": 333.8559865951538, + "p99": 387.7759873867035 + }, + "combine": { + "p50": 73.47200065851212, + "p90": 86.14400029182434, + "p95": 90.14400094747543, + "p99": 95.29600292444229 + }, + "roundtrip": { + "p50": 335.29600501060486, + "p90": 378.30400466918945, + "p95": 390.9440040588379, + "p99": 477.31199860572815 + }, + "isolatedSum": { + "p50": 346.46401554346085, + "p90": 410.2720022201538, + "p95": 423.99998754262924, + "p99": 483.0719903111458 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ae1fe594", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_04db0b91", + "comparisonKey": "78245532b235248b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:33.693596+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 174.112007021904, + "p90": 225.79200565814972, + "p95": 243.13600361347198, + "p99": 433.82400274276733 + }, + "combine": { + "p50": 36.25600039958954, + "p90": 47.45600000023842, + "p95": 56.063998490571976, + "p99": 71.58400118350983 + }, + "roundtrip": { + "p50": 205.88800311088562, + "p90": 280.4799973964691, + "p95": 297.5679934024811, + "p99": 326.55999064445496 + }, + "isolatedSum": { + "p50": 210.36800742149353, + "p90": 273.24800565838814, + "p95": 299.20000210404396, + "p99": 505.40800392627716 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 176.15999281406403, + "p90": 216.22399985790253, + "p95": 226.59200429916382, + "p99": 252.9599964618683 + }, + "combine": { + "p50": 37.59999945759773, + "p90": 47.29599878191948, + "p95": 54.23999950289726, + "p99": 69.34399902820587 + }, + "roundtrip": { + "p50": 209.34399962425232, + "p90": 251.52000784873962, + "p95": 261.4400088787079, + "p99": 292.6720082759857 + }, + "isolatedSum": { + "p50": 213.75999227166176, + "p90": 263.519998639822, + "p95": 280.8320038020611, + "p99": 322.30399549007416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 184.64000523090363, + "p90": 237.7920001745224, + "p95": 251.3279914855957, + "p99": 277.8559923171997 + }, + "combine": { + "p50": 40.25600105524063, + "p90": 51.45600065588951, + "p95": 59.61599946022034, + "p99": 70.88000327348709 + }, + "roundtrip": { + "p50": 212.8639966249466, + "p90": 262.4639868736267, + "p95": 271.7440128326416, + "p99": 300.7360100746155 + }, + "isolatedSum": { + "p50": 224.89600628614426, + "p90": 289.2480008304119, + "p95": 310.94399094581604, + "p99": 348.7359955906868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 174.112007021904, + "p90": 216.67200326919556, + "p95": 230.46399652957916, + "p99": 272.15999364852905 + }, + "combine": { + "p50": 39.45599868893623, + "p90": 46.911999583244324, + "p95": 52.2879995405674, + "p99": 72.06399738788605 + }, + "roundtrip": { + "p50": 206.65599405765533, + "p90": 250.75200200080872, + "p95": 273.3120024204254, + "p99": 332.5439989566803 + }, + "isolatedSum": { + "p50": 213.56800571084023, + "p90": 263.5840028524399, + "p95": 282.75199607014656, + "p99": 344.2239910364151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 181.40800297260284, + "p90": 239.26399648189545, + "p95": 254.43199276924133, + "p99": 436.96001172065735 + }, + "combine": { + "p50": 43.39199885725975, + "p90": 56.51199817657471, + "p95": 63.45599889755249, + "p99": 78.40000092983246 + }, + "roundtrip": { + "p50": 214.78399634361267, + "p90": 265.0879919528961, + "p95": 276.1920094490051, + "p99": 298.5599935054779 + }, + "isolatedSum": { + "p50": 224.8000018298626, + "p90": 295.77599465847015, + "p95": 317.8879916667938, + "p99": 515.3600126504898 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 197.37599790096283, + "p90": 241.43999814987183, + "p95": 250.84799528121948, + "p99": 280.9920012950897 + }, + "combine": { + "p50": 48.31999912858009, + "p90": 59.487998485565186, + "p95": 67.16799736022949, + "p99": 75.03999769687653 + }, + "roundtrip": { + "p50": 234.52800512313843, + "p90": 273.53599667549133, + "p95": 285.7919931411743, + "p99": 322.6560056209564 + }, + "isolatedSum": { + "p50": 245.69599702954292, + "p90": 300.927996635437, + "p95": 318.015992641449, + "p99": 356.03199899196625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 231.55200481414795, + "p90": 268.99200677871704, + "p95": 284.5120131969452, + "p99": 319.4560110569 + }, + "combine": { + "p50": 57.151999324560165, + "p90": 66.43199920654297, + "p95": 73.85600358247757, + "p99": 83.93599838018417 + }, + "roundtrip": { + "p50": 272.41599559783936, + "p90": 315.39198756217957, + "p95": 326.4319896697998, + "p99": 344.5119857788086 + }, + "isolatedSum": { + "p50": 288.7040041387081, + "p90": 335.42400598526, + "p95": 358.36801677942276, + "p99": 403.3920094370842 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 241.11999571323395, + "p90": 275.39199590682983, + "p95": 289.69600796699524, + "p99": 301.6960024833679 + }, + "combine": { + "p50": 78.43200117349625, + "p90": 90.36800265312195, + "p95": 96.12800180912018, + "p99": 101.27999633550644 + }, + "roundtrip": { + "p50": 301.4400005340576, + "p90": 345.5039858818054, + "p95": 354.52800989151, + "p99": 443.77601146698 + }, + "isolatedSum": { + "p50": 319.5519968867302, + "p90": 365.7599985599518, + "p95": 385.8240097761154, + "p99": 402.97599881887436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-effe7b9f", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_dd5c8fab", + "comparisonKey": "c82e0f3d2d386bb8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:31.275653+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 150.33599734306335, + "p90": 168.5120016336441, + "p95": 174.78400468826294, + "p99": 205.08800446987152 + }, + "combine": { + "p50": 32.22399950027466, + "p90": 37.376001477241516, + "p95": 40.09599983692169, + "p99": 44.544000178575516 + }, + "roundtrip": { + "p50": 177.34399437904358, + "p90": 195.93599438667297, + "p95": 200.3519982099533, + "p99": 235.35999655723572 + }, + "isolatedSum": { + "p50": 182.559996843338, + "p90": 205.88800311088562, + "p95": 214.88000452518463, + "p99": 249.63200464844704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 169.18399930000305, + "p90": 212.70400285720825, + "p95": 220.768004655838, + "p99": 229.69600558280945 + }, + "combine": { + "p50": 34.36800092458725, + "p90": 44.03200000524521, + "p95": 52.44800075888634, + "p99": 67.03999638557434 + }, + "roundtrip": { + "p50": 198.40000569820404, + "p90": 238.5919988155365, + "p95": 246.3040053844452, + "p99": 259.552001953125 + }, + "isolatedSum": { + "p50": 203.5520002245903, + "p90": 256.73600286245346, + "p95": 273.21600541472435, + "p99": 296.7360019683838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 168.19199919700623, + "p90": 192.80000030994415, + "p95": 208.44799280166626, + "p99": 223.13599288463593 + }, + "combine": { + "p50": 36.12799942493439, + "p90": 43.07200014591217, + "p95": 46.592000871896744, + "p99": 59.808000922203064 + }, + "roundtrip": { + "p50": 201.9519954919815, + "p90": 231.26399517059326, + "p95": 243.3920055627823, + "p99": 405.5359959602356 + }, + "isolatedSum": { + "p50": 204.3199986219406, + "p90": 235.87200045585632, + "p95": 255.039993673563, + "p99": 282.943993806839 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 172.38399386405945, + "p90": 237.66399919986725, + "p95": 248.48000705242157, + "p99": 271.0399925708771 + }, + "combine": { + "p50": 37.02399879693985, + "p90": 52.41600051522255, + "p95": 58.14399942755699, + "p99": 65.2799978852272 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 243.0720031261444, + "p95": 252.44799256324768, + "p99": 267.0080065727234 + }, + "isolatedSum": { + "p50": 209.4079926609993, + "p90": 290.0799997150898, + "p95": 306.62400647997856, + "p99": 336.3199904561043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 169.0559983253479, + "p90": 214.56000208854675, + "p95": 228.64000499248505, + "p99": 277.98399329185486 + }, + "combine": { + "p50": 39.45599868893623, + "p90": 44.83199864625931, + "p95": 50.4320003092289, + "p99": 60.28800085186958 + }, + "roundtrip": { + "p50": 198.40000569820404, + "p90": 231.07199370861053, + "p95": 248.03200364112854, + "p99": 275.7439911365509 + }, + "isolatedSum": { + "p50": 208.51199701428413, + "p90": 259.39200073480606, + "p95": 279.07200530171394, + "p99": 338.27199414372444 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 183.74399840831757, + "p90": 223.26399385929108, + "p95": 233.75999927520752, + "p99": 263.839989900589 + }, + "combine": { + "p50": 43.776001781225204, + "p90": 56.03199824690819, + "p95": 63.80800157785416, + "p99": 76.4160007238388 + }, + "roundtrip": { + "p50": 206.2080055475235, + "p90": 250.14400482177734, + "p95": 263.2319927215576, + "p99": 286.20800375938416 + }, + "isolatedSum": { + "p50": 227.52000018954277, + "p90": 279.29599210619926, + "p95": 297.5680008530617, + "p99": 340.2559906244278 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 186.43200397491455, + "p90": 207.519993185997, + "p95": 218.23999285697937, + "p99": 245.4719990491867 + }, + "combine": { + "p50": 51.64799839258194, + "p90": 55.93600124120712, + "p95": 61.28000095486641, + "p99": 71.00799679756165 + }, + "roundtrip": { + "p50": 225.37599503993988, + "p90": 244.73600089550018, + "p95": 256.25601410865784, + "p99": 275.9679853916168 + }, + "isolatedSum": { + "p50": 238.0800023674965, + "p90": 263.45599442720413, + "p95": 279.5199938118458, + "p99": 316.47999584674835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 199.20000433921814, + "p90": 238.71999979019165, + "p95": 247.96800315380096, + "p99": 281.8880081176758 + }, + "combine": { + "p50": 71.00799679756165, + "p90": 81.91999793052673, + "p95": 87.07199990749359, + "p99": 92.73599833250046 + }, + "roundtrip": { + "p50": 281.3439965248108, + "p90": 316.864013671875, + "p95": 325.6959915161133, + "p99": 390.3680145740509 + }, + "isolatedSum": { + "p50": 270.2080011367798, + "p90": 320.6399977207184, + "p95": 335.04000306129456, + "p99": 374.62400645017624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aa19a12d", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_1923cf93", + "comparisonKey": "a7b032bf0d516331", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:07.977581+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 173.72800409793854, + "p90": 216.60800278186798, + "p95": 231.83999955654144, + "p99": 271.807998418808 + }, + "combine": { + "p50": 36.99199855327606, + "p90": 49.82399940490723, + "p95": 60.06399914622307, + "p99": 71.00799679756165 + }, + "roundtrip": { + "p50": 203.2639980316162, + "p90": 244.9920028448105, + "p95": 254.94399666786194, + "p99": 284.38401222229004 + }, + "isolatedSum": { + "p50": 210.7200026512146, + "p90": 266.4320021867752, + "p95": 291.9039987027645, + "p99": 342.81599521636963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 174.78400468826294, + "p90": 219.93599832057953, + "p95": 227.64800488948822, + "p99": 240.76800048351288 + }, + "combine": { + "p50": 37.18400001525879, + "p90": 53.15199866890907, + "p95": 60.256000608205795, + "p99": 72.86400347948074 + }, + "roundtrip": { + "p50": 216.2880003452301, + "p90": 259.74398851394653, + "p95": 270.9439992904663, + "p99": 294.14400458335876 + }, + "isolatedSum": { + "p50": 211.96800470352173, + "p90": 273.0879969894886, + "p95": 287.904005497694, + "p99": 313.6320039629936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 168.60799491405487, + "p90": 213.18399906158447, + "p95": 224.5119959115982, + "p99": 273.6639976501465 + }, + "combine": { + "p50": 39.744000881910324, + "p90": 49.15200173854828, + "p95": 58.111999183893204, + "p99": 68.31999868154526 + }, + "roundtrip": { + "p50": 204.76800203323364, + "p90": 253.53598594665527, + "p95": 267.13600754737854, + "p99": 329.53599095344543 + }, + "isolatedSum": { + "p50": 208.3519957959652, + "p90": 262.33600080013275, + "p95": 282.6239950954914, + "p99": 341.98399633169174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 173.12000691890717, + "p90": 233.88800024986267, + "p95": 247.3600059747696, + "p99": 267.39200949668884 + }, + "combine": { + "p50": 39.872001856565475, + "p90": 50.23999884724617, + "p95": 60.70400029420853, + "p99": 76.67200267314911 + }, + "roundtrip": { + "p50": 199.45600628852844, + "p90": 253.28001379966736, + "p95": 263.35999369621277, + "p99": 279.776006937027 + }, + "isolatedSum": { + "p50": 212.99200877547264, + "p90": 284.12799909710884, + "p95": 308.0640062689781, + "p99": 344.06401216983795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 176.03200674057007, + "p90": 194.7840005159378, + "p95": 214.84799683094025, + "p99": 237.15199530124664 + }, + "combine": { + "p50": 42.55999997258186, + "p90": 47.32799902558327, + "p95": 52.639998495578766, + "p99": 67.80800223350525 + }, + "roundtrip": { + "p50": 208.67200195789337, + "p90": 243.48799884319305, + "p95": 262.91200518608093, + "p99": 290.336012840271 + }, + "isolatedSum": { + "p50": 218.59200671315193, + "p90": 242.11199954152107, + "p95": 267.487995326519, + "p99": 304.9599975347519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 204.28800582885742, + "p90": 238.20799589157104, + "p95": 250.62400102615356, + "p99": 262.36799359321594 + }, + "combine": { + "p50": 44.92799937725067, + "p90": 51.7439991235733, + "p95": 61.59999966621399, + "p99": 74.0479975938797 + }, + "roundtrip": { + "p50": 239.19999599456787, + "p90": 277.69601345062256, + "p95": 287.29599714279175, + "p99": 335.2639973163605 + }, + "isolatedSum": { + "p50": 249.2160052061081, + "p90": 289.95199501514435, + "p95": 312.22400069236755, + "p99": 336.41599118709564 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 256.6399872303009, + "p90": 279.35999631881714, + "p95": 298.8159954547882, + "p99": 313.76001238822937 + }, + "combine": { + "p50": 55.424001067876816, + "p90": 59.36000123620033, + "p95": 63.680000603199005, + "p99": 74.46400076150894 + }, + "roundtrip": { + "p50": 302.3039996623993, + "p90": 333.47201347351074, + "p95": 346.46400809288025, + "p99": 411.6159975528717 + }, + "isolatedSum": { + "p50": 312.0639882981777, + "p90": 338.71999755501747, + "p95": 362.4959960579872, + "p99": 388.2240131497383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.5440101623535, + "p90": 297.1520125865936, + "p95": 317.31200218200684, + "p99": 392.5119936466217 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 76.38400048017502, + "p95": 81.60000294446945, + "p99": 91.93599969148636 + }, + "roundtrip": { + "p50": 321.9519853591919, + "p90": 344.7360098361969, + "p95": 359.8400056362152, + "p99": 381.98399543762207 + }, + "isolatedSum": { + "p50": 335.7440084218979, + "p90": 373.53601306676865, + "p95": 398.9120051264763, + "p99": 484.44799333810806 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc762076", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_646b54fe", + "comparisonKey": "eaccacef4896bd24", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:14.351502+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 167.4879938364029, + "p90": 208.44799280166626, + "p95": 215.29600024223328, + "p99": 236.64000630378723 + }, + "combine": { + "p50": 37.02399879693985, + "p90": 56.8000003695488, + "p95": 63.040003180503845, + "p99": 74.11199808120728 + }, + "roundtrip": { + "p50": 202.84800231456757, + "p90": 251.10399723052979, + "p95": 263.3279860019684, + "p99": 312.22400069236755 + }, + "isolatedSum": { + "p50": 204.51199263334274, + "p90": 265.24799317121506, + "p95": 278.3360034227371, + "p99": 310.7520043849945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 166.36799275875092, + "p90": 210.9760046005249, + "p95": 217.31199324131012, + "p99": 309.1520071029663 + }, + "combine": { + "p50": 37.376001477241516, + "p90": 51.16799846291542, + "p95": 58.75200033187866, + "p99": 69.24799829721451 + }, + "roundtrip": { + "p50": 198.94400238990784, + "p90": 247.99999594688416, + "p95": 257.6960027217865, + "p99": 341.2480056285858 + }, + "isolatedSum": { + "p50": 203.74399423599243, + "p90": 262.1440030634403, + "p95": 276.0639935731888, + "p99": 378.4000054001808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 169.63200271129608, + "p90": 215.71199595928192, + "p95": 224.73600506782532, + "p99": 472.57599234580994 + }, + "combine": { + "p50": 39.5519994199276, + "p90": 50.87999999523163, + "p95": 57.95200169086456, + "p99": 64.92800265550613 + }, + "roundtrip": { + "p50": 198.81600141525269, + "p90": 243.58400702476501, + "p95": 252.3840069770813, + "p99": 284.7679853439331 + }, + "isolatedSum": { + "p50": 209.18400213122368, + "p90": 266.59199595451355, + "p95": 282.6880067586899, + "p99": 537.5039950013161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 166.81599617004395, + "p90": 226.6560047864914, + "p95": 254.2400062084198, + "p99": 286.3360047340393 + }, + "combine": { + "p50": 39.93599861860275, + "p90": 49.56800118088722, + "p95": 57.440001517534256, + "p99": 65.08799642324448 + }, + "roundtrip": { + "p50": 205.47200739383698, + "p90": 250.43201446533203, + "p95": 265.76000452041626, + "p99": 320.16000151634216 + }, + "isolatedSum": { + "p50": 206.7519947886467, + "p90": 276.2240059673786, + "p95": 311.68000772595406, + "p99": 351.4240011572838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 172.2559928894043, + "p90": 218.9439982175827, + "p95": 225.18399357795715, + "p99": 259.71201062202454 + }, + "combine": { + "p50": 44.44799944758415, + "p90": 51.61599814891815, + "p95": 58.94400179386139, + "p99": 67.71200150251389 + }, + "roundtrip": { + "p50": 205.9199959039688, + "p90": 246.87999486923218, + "p95": 254.30399179458618, + "p99": 285.15198826789856 + }, + "isolatedSum": { + "p50": 216.70399233698845, + "p90": 270.55999636650085, + "p95": 284.12799537181854, + "p99": 327.4240121245384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 202.81599462032318, + "p90": 244.22399699687958, + "p95": 256.3520073890686, + "p99": 304.7040104866028 + }, + "combine": { + "p50": 48.48000034689903, + "p90": 57.760000228881836, + "p95": 63.93600255250931, + "p99": 69.31199878454208 + }, + "roundtrip": { + "p50": 237.69600689411163, + "p90": 278.9439857006073, + "p95": 290.49599170684814, + "p99": 388.7360095977783 + }, + "isolatedSum": { + "p50": 251.2959949672222, + "p90": 301.9839972257614, + "p95": 320.2880099415779, + "p99": 374.01600927114487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 258.33600759506226, + "p90": 295.74400186538696, + "p95": 304.639995098114, + "p99": 317.8879916667938 + }, + "combine": { + "p50": 57.37600103020668, + "p90": 66.94400310516357, + "p95": 73.37599992752075, + "p99": 78.14399898052216 + }, + "roundtrip": { + "p50": 297.37600684165955, + "p90": 338.9120101928711, + "p95": 344.83200311660767, + "p99": 363.48798871040344 + }, + "isolatedSum": { + "p50": 315.71200862526894, + "p90": 362.68800497055054, + "p95": 378.01599502563477, + "p99": 396.031990647316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 254.65598702430725, + "p90": 291.7439937591553, + "p95": 297.7280020713806, + "p99": 310.3039860725403 + }, + "combine": { + "p50": 76.89599692821503, + "p90": 86.5280032157898, + "p95": 90.36800265312195, + "p99": 93.37600320577621 + }, + "roundtrip": { + "p50": 327.84000039100647, + "p90": 366.62399768829346, + "p95": 384.44799184799194, + "p99": 490.3680086135864 + }, + "isolatedSum": { + "p50": 331.5519839525223, + "p90": 378.27199697494507, + "p95": 388.09600472450256, + "p99": 403.6799892783165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-02b9e962", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_2f342df0", + "comparisonKey": "73128e3509d36684", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:39.904449+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 164.92800414562225, + "p90": 182.91200697422028, + "p95": 188.7039989233017, + "p99": 211.07199788093567 + }, + "combine": { + "p50": 36.768000572919846, + "p90": 42.847998440265656, + "p95": 46.560000628232956, + "p99": 55.80800026655197 + }, + "roundtrip": { + "p50": 196.73599302768707, + "p90": 214.49600160121918, + "p95": 219.26400065422058, + "p99": 231.9680005311966 + }, + "isolatedSum": { + "p50": 201.6960047185421, + "p90": 225.76000541448593, + "p95": 235.26399955153465, + "p99": 266.87999814748764 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 168.92799735069275, + "p90": 188.76799941062927, + "p95": 195.80799341201782, + "p99": 212.54399418830872 + }, + "combine": { + "p50": 35.96799820661545, + "p90": 41.82400181889534, + "p95": 44.704001396894455, + "p99": 53.599998354911804 + }, + "roundtrip": { + "p50": 198.2399970293045, + "p90": 218.30399334430695, + "p95": 226.52800381183624, + "p99": 282.8480005264282 + }, + "isolatedSum": { + "p50": 204.8959955573082, + "p90": 230.5920012295246, + "p95": 240.51199480891228, + "p99": 266.1439925432205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 169.5680022239685, + "p90": 187.32799589633942, + "p95": 196.60800695419312, + "p99": 240.9600019454956 + }, + "combine": { + "p50": 39.45599868893623, + "p90": 43.74400153756142, + "p95": 46.911999583244324, + "p99": 51.77599936723709 + }, + "roundtrip": { + "p50": 205.28000593185425, + "p90": 225.055992603302, + "p95": 232.16000199317932, + "p99": 282.3359966278076 + }, + "isolatedSum": { + "p50": 209.02400091290474, + "p90": 231.07199743390083, + "p95": 243.52000653743744, + "p99": 292.7360013127327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 170.9119975566864, + "p90": 192.22399592399597, + "p95": 197.24799692630768, + "p99": 213.24799954891205 + }, + "combine": { + "p50": 40.25600105524063, + "p90": 44.35199871659279, + "p95": 48.35199937224388, + "p99": 54.17599901556969 + }, + "roundtrip": { + "p50": 201.50400698184967, + "p90": 224.60800409317017, + "p95": 237.98400163650513, + "p99": 311.0719919204712 + }, + "isolatedSum": { + "p50": 211.16799861192703, + "p90": 236.57599464058876, + "p95": 245.59999629855156, + "p99": 267.42399856448174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 175.80799758434296, + "p90": 191.55199825763702, + "p95": 197.31199741363525, + "p99": 216.19200706481934 + }, + "combine": { + "p50": 45.3759990632534, + "p90": 48.64000156521797, + "p95": 52.15999856591225, + "p99": 59.13599953055382 + }, + "roundtrip": { + "p50": 209.1200053691864, + "p90": 229.88800704479218, + "p95": 238.36800456047058, + "p99": 263.93601298332214 + }, + "isolatedSum": { + "p50": 221.18399664759636, + "p90": 240.191999822855, + "p95": 249.4719959795475, + "p99": 275.32800659537315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 203.61599326133728, + "p90": 219.29599344730377, + "p95": 224.0000069141388, + "p99": 245.2159970998764 + }, + "combine": { + "p50": 47.26399853825569, + "p90": 50.40000006556511, + "p95": 54.17599901556969, + "p99": 60.83200126886368 + }, + "roundtrip": { + "p50": 237.40799725055695, + "p90": 252.48000025749207, + "p95": 259.2960000038147, + "p99": 277.3120105266571 + }, + "isolatedSum": { + "p50": 250.87999179959297, + "p90": 269.6959935128689, + "p95": 278.1760059297085, + "p99": 306.0479983687401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 265.56798815727234, + "p90": 279.3920040130615, + "p95": 286.1120104789734, + "p99": 303.8719892501831 + }, + "combine": { + "p50": 58.43200162053108, + "p90": 61.824001371860504, + "p95": 64.92800265550613, + "p99": 72.38399982452393 + }, + "roundtrip": { + "p50": 307.3599934577942, + "p90": 323.168009519577, + "p95": 330.3360044956207, + "p99": 416.1919951438904 + }, + "isolatedSum": { + "p50": 323.9999897778034, + "p90": 341.216005384922, + "p95": 351.0400131344795, + "p99": 376.25598907470703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 264.16000723838806, + "p90": 281.21599555015564, + "p95": 287.32800483703613, + "p99": 351.77600383758545 + }, + "combine": { + "p50": 70.01599669456482, + "p90": 74.68800246715546, + "p95": 78.3040001988411, + "p99": 94.78399902582169 + }, + "roundtrip": { + "p50": 323.8080143928528, + "p90": 338.4320139884949, + "p95": 344.0000116825104, + "p99": 451.61598920822144 + }, + "isolatedSum": { + "p50": 334.1760039329529, + "p90": 355.9039980173111, + "p95": 365.6320050358772, + "p99": 446.56000286340714 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f740a9da", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_d3ac0b1d", + "comparisonKey": "353be41cdd03df51", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:11.687043+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 179.71199750900269, + "p90": 207.5520008802414, + "p95": 218.87999773025513, + "p99": 257.02399015426636 + }, + "combine": { + "p50": 37.98399865627289, + "p90": 47.32799902558327, + "p95": 54.88000065088272, + "p99": 67.90400296449661 + }, + "roundtrip": { + "p50": 219.80799734592438, + "p90": 249.7600018978119, + "p95": 260.6399953365326, + "p99": 296.8960106372833 + }, + "isolatedSum": { + "p50": 217.69599616527557, + "p90": 254.87999990582466, + "p95": 273.75999838113785, + "p99": 324.92799311876297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 180.25599420070648, + "p90": 203.90400290489197, + "p95": 213.53599429130554, + "p99": 281.2480032444 + }, + "combine": { + "p50": 38.14399987459183, + "p90": 52.671998739242554, + "p95": 59.42400172352791, + "p99": 69.92000341415405 + }, + "roundtrip": { + "p50": 214.56000208854675, + "p90": 230.68800568580627, + "p95": 234.40000414848328, + "p99": 244.22399699687958 + }, + "isolatedSum": { + "p50": 218.3999940752983, + "p90": 256.5760016441345, + "p95": 272.95999601483345, + "p99": 351.1680066585541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 172.83199727535248, + "p90": 196.25599682331085, + "p95": 207.2640061378479, + "p99": 251.8720030784607 + }, + "combine": { + "p50": 39.93599861860275, + "p90": 44.92799937725067, + "p95": 50.464000552892685, + "p99": 61.69600039720535 + }, + "roundtrip": { + "p50": 206.11199736595154, + "p90": 235.48799753189087, + "p95": 246.97600305080414, + "p99": 293.4400141239166 + }, + "isolatedSum": { + "p50": 212.76799589395523, + "p90": 241.18399620056152, + "p95": 257.7280066907406, + "p99": 313.56800347566605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 180.35200238227844, + "p90": 204.352006316185, + "p95": 212.44800090789795, + "p99": 236.00000143051147 + }, + "combine": { + "p50": 40.47999903559685, + "p90": 46.04800045490265, + "p95": 50.97600072622299, + "p99": 65.63200056552887 + }, + "roundtrip": { + "p50": 215.16799926757812, + "p90": 243.71199309825897, + "p95": 257.9840123653412, + "p99": 310.7840120792389 + }, + "isolatedSum": { + "p50": 220.8320014178753, + "p90": 250.40000677108765, + "p95": 263.42400163412094, + "p99": 301.63200199604034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 180.7039976119995, + "p90": 202.62399315834045, + "p95": 214.39999341964722, + "p99": 269.98400688171387 + }, + "combine": { + "p50": 43.58400031924248, + "p90": 49.15200173854828, + "p95": 55.615998804569244, + "p99": 71.03999704122543 + }, + "roundtrip": { + "p50": 223.07200729846954, + "p90": 255.45600056648254, + "p95": 269.9519991874695, + "p99": 316.5439963340759 + }, + "isolatedSum": { + "p50": 224.287997931242, + "p90": 251.77599489688873, + "p95": 270.01599222421646, + "p99": 341.0240039229393 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 190.8479928970337, + "p90": 206.7520022392273, + "p95": 221.53599560260773, + "p99": 244.25600469112396 + }, + "combine": { + "p50": 48.35199937224388, + "p90": 52.06400156021118, + "p95": 58.14399942755699, + "p99": 69.34399902820587 + }, + "roundtrip": { + "p50": 229.0239930152893, + "p90": 247.16800451278687, + "p95": 258.14399123191833, + "p99": 281.8560004234314 + }, + "isolatedSum": { + "p50": 239.19999226927757, + "p90": 258.8160037994385, + "p95": 279.6799950301647, + "p99": 313.60000371932983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 229.12000119686127, + "p90": 247.74399399757385, + "p95": 253.08799743652344, + "p99": 309.7279965877533 + }, + "combine": { + "p50": 57.34400078654289, + "p90": 60.15999987721443, + "p95": 63.040003180503845, + "p99": 71.00799679756165 + }, + "roundtrip": { + "p50": 264.95999097824097, + "p90": 277.69601345062256, + "p95": 282.8800082206726, + "p99": 302.97601222991943 + }, + "isolatedSum": { + "p50": 286.46400198340416, + "p90": 307.9039938747883, + "p95": 316.1280006170273, + "p99": 380.73599338531494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 228.99200022220612, + "p90": 246.0480034351349, + "p95": 254.7520101070404, + "p99": 272.8959918022156 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 80.70400357246399, + "p95": 85.34400165081024, + "p99": 92.41600334644318 + }, + "roundtrip": { + "p50": 297.5679934024811, + "p90": 325.6320059299469, + "p95": 335.999995470047, + "p99": 385.79198718070984 + }, + "isolatedSum": { + "p50": 305.60000240802765, + "p90": 326.7520070075989, + "p95": 340.09601175785065, + "p99": 365.31199514865875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16f06342", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_4dac9421", + "comparisonKey": "375e195ab390a2b2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:37.288676+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 175.48799514770508, + "p90": 227.9680073261261, + "p95": 239.71199989318848, + "p99": 271.42399549484253 + }, + "combine": { + "p50": 37.18400001525879, + "p90": 57.760000228881836, + "p95": 64.38399851322174, + "p99": 72.7040022611618 + }, + "roundtrip": { + "p50": 208.76799523830414, + "p90": 256.22400641441345, + "p95": 270.4319953918457, + "p99": 299.6160089969635 + }, + "isolatedSum": { + "p50": 212.67199516296387, + "p90": 285.72800755500793, + "p95": 304.0959984064102, + "p99": 344.12799775600433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 177.40799486637115, + "p90": 229.72799837589264, + "p95": 243.6479926109314, + "p99": 275.04000067710876 + }, + "combine": { + "p50": 37.567999213933945, + "p90": 55.39200082421303, + "p95": 63.93600255250931, + "p99": 74.97599720954895 + }, + "roundtrip": { + "p50": 205.34400641918182, + "p90": 251.6160011291504, + "p95": 264.76800441741943, + "p99": 290.23998975753784 + }, + "isolatedSum": { + "p50": 214.9759940803051, + "p90": 285.11999920010567, + "p95": 307.5839951634407, + "p99": 350.0159978866577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 172.83199727535248, + "p90": 217.056006193161, + "p95": 228.67199778556824, + "p99": 288.1920039653778 + }, + "combine": { + "p50": 39.48799893260002, + "p90": 52.639998495578766, + "p95": 61.503998935222626, + "p99": 70.36799937486649 + }, + "roundtrip": { + "p50": 207.64799416065216, + "p90": 253.37600708007812, + "p95": 261.28000020980835, + "p99": 288.89599442481995 + }, + "isolatedSum": { + "p50": 212.3199962079525, + "p90": 269.6960046887398, + "p95": 290.17599672079086, + "p99": 358.5600033402443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 178.78399789333344, + "p90": 226.0800004005432, + "p95": 235.3920042514801, + "p99": 253.53598594665527 + }, + "combine": { + "p50": 40.89599847793579, + "p90": 54.207999259233475, + "p95": 59.808000922203064, + "p99": 69.63200122117996 + }, + "roundtrip": { + "p50": 208.22399854660034, + "p90": 250.20799040794373, + "p95": 258.59200954437256, + "p99": 274.6559977531433 + }, + "isolatedSum": { + "p50": 219.67999637126923, + "p90": 280.2879996597767, + "p95": 295.20000517368317, + "p99": 323.16798716783524 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 184.1599941253662, + "p90": 229.88800704479218, + "p95": 241.5039986371994, + "p99": 280.0639867782593 + }, + "combine": { + "p50": 45.69600149989128, + "p90": 56.543998420238495, + "p95": 62.431998550891876, + "p99": 73.63200187683105 + }, + "roundtrip": { + "p50": 217.75999665260315, + "p90": 263.68001103401184, + "p95": 273.0880081653595, + "p99": 292.60799288749695 + }, + "isolatedSum": { + "p50": 229.8559956252575, + "p90": 286.43200546503067, + "p95": 303.9359971880913, + "p99": 353.69598865509033 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 211.4879935979843, + "p90": 254.20799851417542, + "p95": 263.2000148296356, + "p99": 290.5600070953369 + }, + "combine": { + "p50": 48.25599864125252, + "p90": 59.23200026154518, + "p95": 68.35199892520905, + "p99": 77.7600035071373 + }, + "roundtrip": { + "p50": 243.42399835586548, + "p90": 283.4559977054596, + "p95": 294.17601227760315, + "p99": 318.2080090045929 + }, + "isolatedSum": { + "p50": 259.74399223923683, + "p90": 313.4399987757206, + "p95": 331.55201375484467, + "p99": 368.3200106024742 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 266.04801416397095, + "p90": 300.31999945640564, + "p95": 311.8720054626465, + "p99": 333.9200019836426 + }, + "combine": { + "p50": 57.69599974155426, + "p90": 67.1359971165657, + "p95": 74.75200295448303, + "p99": 78.87999713420868 + }, + "roundtrip": { + "p50": 309.88800525665283, + "p90": 348.4799861907959, + "p95": 362.11198568344116, + "p99": 505.5040121078491 + }, + "isolatedSum": { + "p50": 323.7440139055252, + "p90": 367.45599657297134, + "p95": 386.6240084171295, + "p99": 412.79999911785126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 266.30398631095886, + "p90": 307.6159954071045, + "p95": 316.76799058914185, + "p99": 329.0880024433136 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 82.59200304746628, + "p95": 91.00800007581711, + "p99": 96.03200107812881 + }, + "roundtrip": { + "p50": 328.70399951934814, + "p90": 363.48798871040344, + "p95": 371.13600969314575, + "p99": 391.2320137023926 + }, + "isolatedSum": { + "p50": 338.3359834551811, + "p90": 390.20799845457077, + "p95": 407.77599066495895, + "p99": 425.1200035214424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0d49db4e", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_179dbf5d", + "comparisonKey": "a3de2b670783ff63", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:17.483535+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 174.3679940700531, + "p90": 191.13600254058838, + "p95": 198.91199469566345, + "p99": 219.4879949092865 + }, + "combine": { + "p50": 37.63199970126152, + "p90": 43.2640016078949, + "p95": 46.14400118589401, + "p99": 52.25599929690361 + }, + "roundtrip": { + "p50": 205.9839963912964, + "p90": 222.59199619293213, + "p95": 226.33600234985352, + "p99": 267.10399985313416 + }, + "isolatedSum": { + "p50": 211.99999377131462, + "p90": 234.40000414848328, + "p95": 245.05599588155746, + "p99": 271.7439942061901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 172.8000044822693, + "p90": 191.48799777030945, + "p95": 199.23199713230133, + "p99": 243.13600361347198 + }, + "combine": { + "p50": 37.43999823927879, + "p90": 42.94399917125702, + "p95": 46.560000628232956, + "p99": 49.56800118088722 + }, + "roundtrip": { + "p50": 204.03200387954712, + "p90": 220.2560007572174, + "p95": 225.92000663280487, + "p99": 257.6639950275421 + }, + "isolatedSum": { + "p50": 210.24000272154808, + "p90": 234.43199694156647, + "p95": 245.7919977605343, + "p99": 292.7040047943592 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 172.92800545692444, + "p90": 188.28800320625305, + "p95": 192.89599359035492, + "p99": 204.51200008392334 + }, + "combine": { + "p50": 40.063999593257904, + "p90": 45.21600157022476, + "p95": 47.61600121855736, + "p99": 52.89600044488907 + }, + "roundtrip": { + "p50": 205.6960016489029, + "p90": 222.08000719547272, + "p95": 228.0000001192093, + "p99": 279.83999252319336 + }, + "isolatedSum": { + "p50": 212.99200505018234, + "p90": 233.5040047764778, + "p95": 240.51199480891228, + "p99": 257.4080005288124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 174.72000420093536, + "p90": 194.11200284957886, + "p95": 198.91199469566345, + "p99": 248.09600412845612 + }, + "combine": { + "p50": 41.37599840760231, + "p90": 47.58400097489357, + "p95": 51.872000098228455, + "p99": 59.13599953055382 + }, + "roundtrip": { + "p50": 210.4640007019043, + "p90": 231.1680018901825, + "p95": 239.58399891853333, + "p99": 261.6319954395294 + }, + "isolatedSum": { + "p50": 216.09600260853767, + "p90": 241.69600382447243, + "p95": 250.7839947938919, + "p99": 307.23200365900993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 183.3599954843521, + "p90": 197.9839950799942, + "p95": 204.6079933643341, + "p99": 215.93600511550903 + }, + "combine": { + "p50": 44.73600164055824, + "p90": 48.48000034689903, + "p95": 52.51200124621391, + "p99": 55.71199953556061 + }, + "roundtrip": { + "p50": 216.06400609016418, + "p90": 235.23199558258057, + "p95": 242.08000302314758, + "p99": 279.87200021743774 + }, + "isolatedSum": { + "p50": 228.09599712491035, + "p90": 246.46399542689323, + "p95": 257.119994610548, + "p99": 271.64800465106964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 207.2959989309311, + "p90": 225.63199698925018, + "p95": 239.6160066127777, + "p99": 294.14400458335876 + }, + "combine": { + "p50": 46.431999653577805, + "p90": 49.75999891757965, + "p95": 53.408000618219376, + "p99": 60.7680007815361 + }, + "roundtrip": { + "p50": 238.75199258327484, + "p90": 251.19999051094055, + "p95": 255.264014005661, + "p99": 267.2320008277893 + }, + "isolatedSum": { + "p50": 253.7279985845089, + "p90": 275.39199590682983, + "p95": 293.0240072309971, + "p99": 354.91200536489487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 266.07999205589294, + "p90": 280.67201375961304, + "p95": 288.672000169754, + "p99": 359.6479892730713 + }, + "combine": { + "p50": 57.24800005555153, + "p90": 60.67200005054474, + "p95": 64.57599997520447, + "p99": 70.36799937486649 + }, + "roundtrip": { + "p50": 304.03199791908264, + "p90": 319.16800141334534, + "p95": 326.07999444007874, + "p99": 382.30401277542114 + }, + "isolatedSum": { + "p50": 323.3279921114445, + "p90": 341.3440138101578, + "p95": 353.2480001449585, + "p99": 430.0159886479378 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 270.4960107803345, + "p90": 286.624014377594, + "p95": 297.5359857082367, + "p99": 358.97600650787354 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 76.4480009675026, + "p95": 80.99199831485748, + "p99": 88.54400366544724 + }, + "roundtrip": { + "p50": 329.3119966983795, + "p90": 342.1120047569275, + "p95": 349.4400084018707, + "p99": 449.0559995174408 + }, + "isolatedSum": { + "p50": 343.77600997686386, + "p90": 363.0720153450966, + "p95": 378.5279840230942, + "p99": 447.52001017332077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-da96d67d", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_74ad67fa", + "comparisonKey": "f5022195e0ad2f2c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:21.478761+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 265.7279968261719, + "p90": 278.27200293540955, + "p95": 283.7119996547699, + "p99": 314.4960105419159 + }, + "combine": { + "p50": 70.592001080513, + "p90": 74.17599856853485, + "p95": 76.92799717187881, + "p99": 82.65600353479385 + }, + "roundtrip": { + "p50": 325.5999982357025, + "p90": 339.32799100875854, + "p95": 347.00798988342285, + "p99": 374.81600046157837 + }, + "isolatedSum": { + "p50": 336.3199979066849, + "p90": 352.4480015039444, + "p95": 360.6399968266487, + "p99": 397.15201407670975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 274.97598528862, + "p90": 291.1680042743683, + "p95": 298.8480031490326, + "p99": 336.8639945983887 + }, + "combine": { + "p50": 99.84000027179718, + "p90": 102.91200131177902, + "p95": 105.6319996714592, + "p99": 108.35199803113937 + }, + "roundtrip": { + "p50": 369.7600066661835, + "p90": 382.30401277542114, + "p95": 386.01601123809814, + "p99": 410.75199842453003 + }, + "isolatedSum": { + "p50": 374.8159855604172, + "p90": 394.0800055861473, + "p95": 404.4800028204918, + "p99": 445.21599262952805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 293.2479977607727, + "p90": 307.45598673820496, + "p95": 311.3600015640259, + "p99": 324.0320086479187 + }, + "combine": { + "p50": 164.57599401474, + "p90": 167.80799627304077, + "p95": 169.5680022239685, + "p99": 178.71999740600586 + }, + "roundtrip": { + "p50": 452.9919922351837, + "p90": 465.37598967552185, + "p95": 470.5280065536499, + "p99": 498.6239969730377 + }, + "isolatedSum": { + "p50": 457.8239917755127, + "p90": 475.2639830112457, + "p95": 480.9280037879944, + "p99": 502.75200605392456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 437.3759925365448, + "p90": 452.60798931121826, + "p95": 459.03998613357544, + "p99": 519.2639827728271 + }, + "combine": { + "p50": 285.2480113506317, + "p90": 288.83200883865356, + "p95": 289.95200991630554, + "p99": 299.6160089969635 + }, + "roundtrip": { + "p50": 722.495973110199, + "p90": 736.5760207176208, + "p95": 742.6559925079346, + "p99": 815.4240250587463 + }, + "isolatedSum": { + "p50": 722.6240038871765, + "p90": 741.4399981498718, + "p95": 748.991996049881, + "p99": 818.8799917697906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 706.1439752578735, + "p90": 722.7200269699097, + "p95": 740.5440211296082, + "p99": 849.1520285606384 + }, + "combine": { + "p50": 530.7199954986572, + "p90": 535.2320075035095, + "p95": 537.4079942703247, + "p99": 570.8479881286621 + }, + "roundtrip": { + "p50": 1232.4479818344116, + "p90": 1253.4079551696777, + "p95": 1283.6799621582031, + "p99": 1361.9199991226196 + }, + "isolatedSum": { + "p50": 1236.8639707565308, + "p90": 1257.9520344734192, + "p95": 1277.9520153999329, + "p99": 1420.0000166893005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1255.8079957962036, + "p90": 1269.312024116516, + "p95": 1277.2159576416016, + "p99": 1351.9999980926514 + }, + "combine": { + "p50": 1019.8719501495361, + "p90": 1025.5039930343628, + "p95": 1029.7919511795044, + "p99": 1085.3760242462158 + }, + "roundtrip": { + "p50": 2270.7839012145996, + "p90": 2286.8800163269043, + "p95": 2299.5519638061523, + "p99": 2446.784019470215 + }, + "isolatedSum": { + "p50": 2275.6799459457397, + "p90": 2294.816017150879, + "p95": 2307.007908821106, + "p99": 2437.376022338867 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a09c55d4", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_e7b91752", + "comparisonKey": "8a75636d32916022", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:21.524561+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 319.3280100822449, + "p90": 336.2880051136017, + "p95": 346.015989780426, + "p99": 440.6079947948456 + }, + "combine": { + "p50": 90.11200070381165, + "p90": 93.75999867916107, + "p95": 95.93600034713745, + "p99": 103.42399775981903 + }, + "roundtrip": { + "p50": 403.74401211738586, + "p90": 460.86400747299194, + "p95": 500.9920001029968, + "p99": 561.5040063858032 + }, + "isolatedSum": { + "p50": 409.4400107860565, + "p90": 430.04800379276276, + "p95": 441.9519901275635, + "p99": 544.0319925546646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 327.32799649238586, + "p90": 351.1680066585541, + "p95": 367.2640025615692, + "p99": 390.49598574638367 + }, + "combine": { + "p50": 138.20800185203552, + "p90": 143.64799857139587, + "p95": 150.65599977970123, + "p99": 162.75200247764587 + }, + "roundtrip": { + "p50": 457.4719965457916, + "p90": 493.47200989723206, + "p95": 499.7439980506897, + "p99": 510.6559991836548 + }, + "isolatedSum": { + "p50": 465.5359983444214, + "p90": 494.81600522994995, + "p95": 517.9200023412704, + "p99": 553.2479882240295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 373.1200098991394, + "p90": 406.14399313926697, + "p95": 421.31200432777405, + "p99": 478.303998708725 + }, + "combine": { + "p50": 231.9359928369522, + "p90": 239.45599794387817, + "p95": 242.01600253582, + "p99": 248.54399263858795 + }, + "roundtrip": { + "p50": 603.5199761390686, + "p90": 635.6480121612549, + "p95": 648.639976978302, + "p99": 799.8080253601074 + }, + "isolatedSum": { + "p50": 605.0560027360916, + "p90": 645.5999910831451, + "p95": 663.328006863594, + "p99": 726.8479913473129 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 597.1519947052002, + "p90": 619.3919777870178, + "p95": 632.6720118522644, + "p99": 722.5919961929321 + }, + "combine": { + "p50": 418.62401366233826, + "p90": 426.65600776672363, + "p95": 430.400013923645, + "p99": 435.07200479507446 + }, + "roundtrip": { + "p50": 1011.6159915924072, + "p90": 1033.8560342788696, + "p95": 1046.2720394134521, + "p99": 1171.2960004806519 + }, + "isolatedSum": { + "p50": 1015.7760083675385, + "p90": 1046.0479855537415, + "p95": 1063.0720257759094, + "p99": 1157.6640009880066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1014.0800476074219, + "p90": 1041.0239696502686, + "p95": 1055.999994277954, + "p99": 1175.8719682693481 + }, + "combine": { + "p50": 792.352020740509, + "p90": 804.256021976471, + "p95": 811.1039996147156, + "p99": 899.7759819030762 + }, + "roundtrip": { + "p50": 1797.984004020691, + "p90": 1833.0880403518677, + "p95": 1855.5519580841064, + "p99": 2012.063980102539 + }, + "isolatedSum": { + "p50": 1806.432068347931, + "p90": 1845.2799916267395, + "p95": 1867.1039938926697, + "p99": 2075.6479501724243 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1859.8079681396484, + "p90": 1896.8000411987305, + "p95": 1933.5999488830566, + "p99": 2082.7839374542236 + }, + "combine": { + "p50": 1534.1440439224243, + "p90": 1544.4159507751465, + "p95": 1552.832007408142, + "p99": 1652.83203125 + }, + "roundtrip": { + "p50": 3390.9120559692383, + "p90": 3466.207981109619, + "p95": 3578.6240100860596, + "p99": 3837.2480869293213 + }, + "isolatedSum": { + "p50": 3393.9520120620728, + "p90": 3441.215991973877, + "p95": 3486.4319562911987, + "p99": 3735.6159687042236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-331043c2", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_7b90a3ed", + "comparisonKey": "077753acf5274806", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:08.263328+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 172.89599776268005, + "p90": 192.32000410556793, + "p95": 209.50399339199066, + "p99": 242.8479939699173 + }, + "combine": { + "p50": 42.847998440265656, + "p90": 47.32799902558327, + "p95": 51.13599821925163, + "p99": 55.23199960589409 + }, + "roundtrip": { + "p50": 203.0079960823059, + "p90": 220.5120027065277, + "p95": 233.2800030708313, + "p99": 271.1679935455322 + }, + "isolatedSum": { + "p50": 215.7439962029457, + "p90": 239.6480031311512, + "p95": 260.6399916112423, + "p99": 298.0799935758114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 193.56800615787506, + "p90": 207.71199464797974, + "p95": 213.47199380397797, + "p99": 257.24801421165466 + }, + "combine": { + "p50": 60.06399914622307, + "p90": 64.70400094985962, + "p95": 68.28799843788147, + "p99": 71.55200093984604 + }, + "roundtrip": { + "p50": 248.44799935817719, + "p90": 312.51201033592224, + "p95": 334.1119885444641, + "p99": 387.5519931316376 + }, + "isolatedSum": { + "p50": 253.63200530409813, + "p90": 272.41599559783936, + "p95": 281.75999224185944, + "p99": 328.8000151515007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 325.3760039806366, + "p90": 342.49600768089294, + "p95": 356.6719889640808, + "p99": 418.4640049934387 + }, + "combine": { + "p50": 128.9599984884262, + "p90": 133.12000036239624, + "p95": 137.85600662231445, + "p99": 141.4719969034195 + }, + "roundtrip": { + "p50": 440.064013004303, + "p90": 452.32000946998596, + "p95": 460.4479968547821, + "p99": 503.4239888191223 + }, + "isolatedSum": { + "p50": 454.3360024690628, + "p90": 475.6160080432892, + "p95": 494.52799558639526, + "p99": 559.9360018968582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49b91a4f", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_e0bae1dc", + "comparisonKey": "60d61377bc0b0321", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:52.560141+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 221.72799706459045, + "p90": 329.5679986476898, + "p95": 338.591992855072, + "p99": 353.2159924507141 + }, + "combine": { + "p50": 57.023998349905014, + "p90": 80.86399734020233, + "p95": 84.22400057315826, + "p99": 94.27200257778168 + }, + "roundtrip": { + "p50": 254.7520101070404, + "p90": 377.6960074901581, + "p95": 385.53598523139954, + "p99": 404.60801124572754 + }, + "isolatedSum": { + "p50": 278.75199541449547, + "p90": 410.43199598789215, + "p95": 422.8159934282303, + "p99": 447.4879950284958 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 200.44800639152527, + "p90": 232.2240024805069, + "p95": 239.19999599456787, + "p99": 255.23200631141663 + }, + "combine": { + "p50": 68.4799998998642, + "p90": 76.51200145483017, + "p95": 82.36800134181976, + "p99": 90.11200070381165 + }, + "roundtrip": { + "p50": 263.3279860019684, + "p90": 298.6559867858887, + "p95": 313.6320114135742, + "p99": 337.8239870071411 + }, + "isolatedSum": { + "p50": 268.92800629138947, + "p90": 308.73600393533707, + "p95": 321.56799733638763, + "p99": 345.34400701522827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 229.37600314617157, + "p90": 253.24800610542297, + "p95": 263.07201385498047, + "p99": 304.32000756263733 + }, + "combine": { + "p50": 98.94400089979172, + "p90": 109.8880022764206, + "p95": 113.76000195741653, + "p99": 121.08799815177917 + }, + "roundtrip": { + "p50": 328.4800052642822, + "p90": 358.2080006599426, + "p95": 369.4080114364624, + "p99": 404.7040045261383 + }, + "isolatedSum": { + "p50": 328.3200040459633, + "p90": 363.13600838184357, + "p95": 376.832015812397, + "p99": 425.4080057144165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 308.1279993057251, + "p90": 331.0079872608185, + "p95": 337.47199177742004, + "p99": 385.2800130844116 + }, + "combine": { + "p50": 165.24800658226013, + "p90": 169.95200514793396, + "p95": 173.47200214862823, + "p99": 183.48799645900726 + }, + "roundtrip": { + "p50": 483.71198773384094, + "p90": 506.1439871788025, + "p95": 519.8079943656921, + "p99": 557.1839809417725 + }, + "isolatedSum": { + "p50": 473.37600588798523, + "p90": 500.95999240875244, + "p95": 510.9439939260483, + "p99": 568.7680095434189 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 464.89599347114563, + "p90": 486.2079918384552, + "p95": 496.5440034866333, + "p99": 659.3599915504456 + }, + "combine": { + "p50": 294.3359911441803, + "p90": 297.12000489234924, + "p95": 298.11200499534607, + "p99": 302.592009305954 + }, + "roundtrip": { + "p50": 765.887975692749, + "p90": 786.624014377594, + "p95": 795.7760095596313, + "p99": 913.375973701477 + }, + "isolatedSum": { + "p50": 759.2319846153259, + "p90": 783.3279967308044, + "p95": 794.6560084819794, + "p99": 961.9520008563995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 758.6879730224609, + "p90": 774.3039727210999, + "p95": 785.152018070221, + "p99": 911.903977394104 + }, + "combine": { + "p50": 556.1280250549316, + "p90": 560.479998588562, + "p95": 562.6559853553772, + "p99": 581.9200277328491 + }, + "roundtrip": { + "p50": 1326.1120319366455, + "p90": 1349.4399785995483, + "p95": 1360.1919412612915, + "p99": 1433.9519739151 + }, + "isolatedSum": { + "p50": 1314.8159980773926, + "p90": 1334.7839713096619, + "p95": 1347.8080034255981, + "p99": 1493.8240051269531 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5ffbc240", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_42ab3d00", + "comparisonKey": "3aa95e7651bc00a7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:03.204579+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 274.1439938545227, + "p90": 313.53598833084106, + "p95": 322.59199023246765, + "p99": 357.02401399612427 + }, + "combine": { + "p50": 79.32800054550171, + "p90": 95.32800316810608, + "p95": 99.29600358009338, + "p99": 105.3759977221489 + }, + "roundtrip": { + "p50": 345.5359935760498, + "p90": 384.768009185791, + "p95": 393.3440148830414, + "p99": 419.1040098667145 + }, + "isolatedSum": { + "p50": 353.4719944000244, + "p90": 408.86399149894714, + "p95": 421.88799381256104, + "p99": 462.40001171827316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 285.8560085296631, + "p90": 330.84800839424133, + "p95": 342.3359990119934, + "p99": 353.69598865509033 + }, + "combine": { + "p50": 121.15199863910675, + "p90": 134.5600038766861, + "p95": 138.91200721263885, + "p99": 143.45599710941315 + }, + "roundtrip": { + "p50": 405.4720103740692, + "p90": 446.1440145969391, + "p95": 457.66401290893555, + "p99": 490.84800481796265 + }, + "isolatedSum": { + "p50": 407.00800716876984, + "p90": 465.40801227092743, + "p95": 481.24800622463226, + "p99": 497.1519857645035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 348.35198521614075, + "p90": 378.495991230011, + "p95": 388.2240056991577, + "p99": 460.2240025997162 + }, + "combine": { + "p50": 206.65599405765533, + "p90": 214.01600539684296, + "p95": 217.79200434684753, + "p99": 285.15198826789856 + }, + "roundtrip": { + "p50": 570.3359842300415, + "p90": 594.2400097846985, + "p95": 602.2719740867615, + "p99": 638.7199759483337 + }, + "isolatedSum": { + "p50": 555.0079792737961, + "p90": 592.5119966268539, + "p95": 606.0160100460052, + "p99": 745.3759908676147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 530.4639935493469, + "p90": 549.7599840164185, + "p95": 554.751992225647, + "p99": 609.0880036354065 + }, + "combine": { + "p50": 384.73600149154663, + "p90": 388.5439932346344, + "p95": 390.6559944152832, + "p99": 413.567990064621 + }, + "roundtrip": { + "p50": 948.4480023384094, + "p90": 977.2480130195618, + "p95": 983.7759733200073, + "p99": 1236.0639572143555 + }, + "isolatedSum": { + "p50": 915.1999950408936, + "p90": 938.3039772510529, + "p95": 945.4079866409302, + "p99": 1022.6559937000275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 904.2239785194397, + "p90": 923.3279824256897, + "p95": 949.2480158805847, + "p99": 1091.1040306091309 + }, + "combine": { + "p50": 731.3920259475708, + "p90": 734.8480224609375, + "p95": 736.1279726028442, + "p99": 782.5919985771179 + }, + "roundtrip": { + "p50": 1664.9279594421387, + "p90": 1690.4319524765015, + "p95": 1705.7280540466309, + "p99": 1824.6400356292725 + }, + "isolatedSum": { + "p50": 1635.6160044670105, + "p90": 1658.1760048866272, + "p95": 1685.375988483429, + "p99": 1873.6960291862488 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1645.3759670257568, + "p90": 1663.424015045166, + "p95": 1676.8640279769897, + "p99": 1884.4799995422363 + }, + "combine": { + "p50": 1429.4719696044922, + "p90": 1434.4960451126099, + "p95": 1437.0239973068237, + "p99": 1466.0160541534424 + }, + "roundtrip": { + "p50": 3107.327938079834, + "p90": 3146.944046020508, + "p95": 3190.623998641968, + "p99": 3368.7679767608643 + }, + "isolatedSum": { + "p50": 3074.847936630249, + "p90": 3097.920060157776, + "p95": 3113.8880252838135, + "p99": 3350.4960536956787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-088cd827", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_0e44604e", + "comparisonKey": "2f5137bc05963b6d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:38.566283+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 277.0560085773468, + "p90": 319.13599371910095, + "p95": 332.12798833847046, + "p99": 453.66400480270386 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 82.46400207281113, + "p95": 90.7839983701706, + "p99": 98.24000298976898 + }, + "roundtrip": { + "p50": 330.1439881324768, + "p90": 365.88799953460693, + "p95": 375.7439851760864, + "p99": 394.52800154685974 + }, + "isolatedSum": { + "p50": 351.6800105571747, + "p90": 401.5999957919121, + "p95": 422.91198670864105, + "p99": 551.9040077924728 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 290.3999984264374, + "p90": 334.81600880622864, + "p95": 342.6879942417145, + "p99": 363.71201276779175 + }, + "combine": { + "p50": 102.62399911880493, + "p90": 118.72000247240067, + "p95": 122.30399996042252, + "p99": 126.81600451469421 + }, + "roundtrip": { + "p50": 385.0879967212677, + "p90": 424.54400658607483, + "p95": 433.8879883289337, + "p99": 454.8479914665222 + }, + "isolatedSum": { + "p50": 393.0239975452423, + "p90": 453.5360112786293, + "p95": 464.991994202137, + "p99": 490.52801728248596 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 297.95199632644653, + "p90": 337.98399567604065, + "p95": 346.015989780426, + "p99": 374.4319975376129 + }, + "combine": { + "p50": 165.18400609493256, + "p90": 178.30400168895721, + "p95": 181.63199722766876, + "p99": 187.83999979496002 + }, + "roundtrip": { + "p50": 470.97599506378174, + "p90": 507.23201036453247, + "p95": 514.6880149841309, + "p99": 548.2879877090454 + }, + "isolatedSum": { + "p50": 463.1360024213791, + "p90": 516.2879973649979, + "p95": 527.6479870080948, + "p99": 562.2719973325729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 455.58398962020874, + "p90": 494.9760138988495, + "p95": 504.96000051498413, + "p99": 566.3999915122986 + }, + "combine": { + "p50": 288.1920039653778, + "p90": 299.51998591423035, + "p95": 304.064005613327, + "p99": 311.2640082836151 + }, + "roundtrip": { + "p50": 740.447998046875, + "p90": 760.8640193939209, + "p95": 772.2880244255066, + "p99": 910.5280041694641 + }, + "isolatedSum": { + "p50": 743.7759935855865, + "p90": 794.4959998130798, + "p95": 809.0240061283112, + "p99": 877.6639997959137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 725.2159714698792, + "p90": 753.4400224685669, + "p95": 765.6319737434387, + "p99": 844.9280261993408 + }, + "combine": { + "p50": 530.9439897537231, + "p90": 540.8959984779358, + "p95": 545.4720258712769, + "p99": 565.4079914093018 + }, + "roundtrip": { + "p50": 1260.3839635849, + "p90": 1288.256049156189, + "p95": 1302.240014076233, + "p99": 1472.0959663391113 + }, + "isolatedSum": { + "p50": 1256.1599612236023, + "p90": 1294.3360209465027, + "p95": 1311.1039996147156, + "p99": 1410.3360176086426 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1279.1999578475952, + "p90": 1308.9920282363892, + "p95": 1325.3439664840698, + "p99": 1656.000018119812 + }, + "combine": { + "p50": 1024.0639448165894, + "p90": 1032.6720476150513, + "p95": 1038.815975189209, + "p99": 1245.4719543457031 + }, + "roundtrip": { + "p50": 2303.5519123077393, + "p90": 2333.888053894043, + "p95": 2351.7119884490967, + "p99": 2598.81591796875 + }, + "isolatedSum": { + "p50": 2303.2639026641846, + "p90": 2341.6640758514404, + "p95": 2364.159941673279, + "p99": 2901.471972465515 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f3980180", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_b627a6b4", + "comparisonKey": "2a62d1008e43eb93", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:00.260284+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 274.59201216697693, + "p90": 309.1199994087219, + "p95": 318.9760148525238, + "p99": 334.84798669815063 + }, + "combine": { + "p50": 73.98399710655212, + "p90": 84.73599702119827, + "p95": 91.42400324344635, + "p99": 96.00000083446503 + }, + "roundtrip": { + "p50": 333.21601152420044, + "p90": 366.40000343322754, + "p95": 370.88000774383545, + "p99": 387.36000657081604 + }, + "isolatedSum": { + "p50": 348.57600927352905, + "p90": 393.8559964299202, + "p95": 410.40001809597015, + "p99": 430.84798753261566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 279.90400791168213, + "p90": 317.9199993610382, + "p95": 326.04798674583435, + "p99": 340.7680094242096 + }, + "combine": { + "p50": 100.16000270843506, + "p90": 114.07999694347382, + "p95": 119.07199770212173, + "p99": 123.55200201272964 + }, + "roundtrip": { + "p50": 374.752014875412, + "p90": 407.29600191116333, + "p95": 414.0799939632416, + "p99": 427.3279905319214 + }, + "isolatedSum": { + "p50": 380.0640106201172, + "p90": 431.999996304512, + "p95": 445.1199844479561, + "p99": 464.32001143693924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 295.6799864768982, + "p90": 333.98398756980896, + "p95": 340.4160141944885, + "p99": 349.15199875831604 + }, + "combine": { + "p50": 164.92800414562225, + "p90": 177.279993891716, + "p95": 181.7920058965683, + "p99": 210.7519954442978 + }, + "roundtrip": { + "p50": 471.96799516677856, + "p90": 501.1199712753296, + "p95": 508.12798738479614, + "p99": 522.1760272979736 + }, + "isolatedSum": { + "p50": 460.60799062252045, + "p90": 511.26398146152496, + "p95": 522.2080200910568, + "p99": 559.9039942026138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 450.6880044937134, + "p90": 478.1759977340698, + "p95": 485.6959879398346, + "p99": 499.1999864578247 + }, + "combine": { + "p50": 285.8879864215851, + "p90": 300.31999945640564, + "p95": 303.5520017147064, + "p99": 316.0319924354553 + }, + "roundtrip": { + "p50": 736.1599802970886, + "p90": 763.6160254478455, + "p95": 772.8319764137268, + "p99": 925.6960153579712 + }, + "isolatedSum": { + "p50": 736.5759909152985, + "p90": 778.4959971904755, + "p95": 789.247989654541, + "p99": 815.23197889328 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 726.9759774208069, + "p90": 749.9520182609558, + "p95": 756.5760016441345, + "p99": 845.7599878311157 + }, + "combine": { + "p50": 530.4960012435913, + "p90": 540.6079888343811, + "p95": 543.8719987869263, + "p99": 552.6720285415649 + }, + "roundtrip": { + "p50": 1261.5360021591187, + "p90": 1292.512059211731, + "p95": 1299.9680042266846, + "p99": 1336.6719484329224 + }, + "isolatedSum": { + "p50": 1257.4719786643982, + "p90": 1290.560007095337, + "p95": 1300.4480004310608, + "p99": 1398.4320163726807 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1267.1040296554565, + "p90": 1294.20804977417, + "p95": 1306.9119453430176, + "p99": 1449.023962020874 + }, + "combine": { + "p50": 1019.8719501495361, + "p90": 1030.7199954986572, + "p95": 1035.1359844207764, + "p99": 1088.5440111160278 + }, + "roundtrip": { + "p50": 2292.1600341796875, + "p90": 2323.3280181884766, + "p95": 2342.3678874969482, + "p99": 2575.2639770507812 + }, + "isolatedSum": { + "p50": 2286.9759798049927, + "p90": 2324.928045272827, + "p95": 2342.047929763794, + "p99": 2537.567973136902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a322111f", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_04db0b91", + "comparisonKey": "7e8020130ea7fadf", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:06.520936+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 232.89600014686584, + "p90": 251.10399723052979, + "p95": 269.6320116519928, + "p99": 288.1920039653778 + }, + "combine": { + "p50": 76.12799853086472, + "p90": 86.43200248479843, + "p95": 90.65599739551544, + "p99": 100.28800368309021 + }, + "roundtrip": { + "p50": 293.4400141239166, + "p90": 320.0640082359314, + "p95": 337.66400814056396, + "p99": 355.48800230026245 + }, + "isolatedSum": { + "p50": 309.02399867773056, + "p90": 337.5359997153282, + "p95": 360.28800904750824, + "p99": 388.480007648468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 245.2159970998764, + "p90": 269.50401067733765, + "p95": 275.4879891872406, + "p99": 290.6239926815033 + }, + "combine": { + "p50": 116.54400080442429, + "p90": 130.23999333381653, + "p95": 136.9599997997284, + "p99": 148.3840048313141 + }, + "roundtrip": { + "p50": 366.8479919433594, + "p90": 410.1119935512543, + "p95": 421.60001397132874, + "p99": 492.96000599861145 + }, + "isolatedSum": { + "p50": 361.7599979043007, + "p90": 399.7440040111542, + "p95": 412.447988986969, + "p99": 439.0079975128174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 331.5519988536835, + "p90": 357.60000348091125, + "p95": 379.42400574684143, + "p99": 402.6240110397339 + }, + "combine": { + "p50": 201.05600357055664, + "p90": 203.87199521064758, + "p95": 205.37599921226501, + "p99": 209.56799387931824 + }, + "roundtrip": { + "p50": 542.2400236129761, + "p90": 570.0160264968872, + "p95": 585.5039954185486, + "p99": 675.104022026062 + }, + "isolatedSum": { + "p50": 532.6080024242401, + "p90": 561.4719986915588, + "p95": 584.8000049591064, + "p99": 612.1920049190521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 517.1200037002563, + "p90": 538.2720232009888, + "p95": 547.6800203323364, + "p99": 606.3359975814819 + }, + "combine": { + "p50": 376.2879967689514, + "p90": 379.32801246643066, + "p95": 380.41600584983826, + "p99": 392.2879993915558 + }, + "roundtrip": { + "p50": 902.176022529602, + "p90": 932.3840141296387, + "p95": 943.8400268554688, + "p99": 964.9279713630676 + }, + "isolatedSum": { + "p50": 893.4080004692078, + "p90": 917.6000356674194, + "p95": 928.0960261821747, + "p99": 998.6239969730377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 871.392011642456, + "p90": 882.0160031318665, + "p95": 887.615978717804, + "p99": 993.5359954833984 + }, + "combine": { + "p50": 715.2320146560669, + "p90": 718.2719707489014, + "p95": 719.327986240387, + "p99": 724.3199944496155 + }, + "roundtrip": { + "p50": 1603.7440299987793, + "p90": 1640.895962715149, + "p95": 1658.4320068359375, + "p99": 1764.3519639968872 + }, + "isolatedSum": { + "p50": 1586.624026298523, + "p90": 1600.2879738807678, + "p95": 1606.943964958191, + "p99": 1717.855989933014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1598.464012145996, + "p90": 1609.536051750183, + "p95": 1615.2960062026978, + "p99": 1969.8560237884521 + }, + "combine": { + "p50": 1390.8159732818604, + "p90": 1394.8479890823364, + "p95": 1396.6079950332642, + "p99": 1407.5520038604736 + }, + "roundtrip": { + "p50": 3018.7840461730957, + "p90": 3058.367967605591, + "p95": 3077.7599811553955, + "p99": 3357.4719429016113 + }, + "isolatedSum": { + "p50": 2989.2799854278564, + "p90": 3004.3840408325195, + "p95": 3011.904001235962, + "p99": 3377.408027648926 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a5fcad21", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_dd5c8fab", + "comparisonKey": "0cbe7dce2ff5c2c2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:04.001497+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 192.7040070295334, + "p90": 241.69600009918213, + "p95": 250.94398856163025, + "p99": 326.3680040836334 + }, + "combine": { + "p50": 73.18399846553802, + "p90": 86.2400010228157, + "p95": 91.51999652385712, + "p99": 94.43199634552002 + }, + "roundtrip": { + "p50": 265.5999958515167, + "p90": 313.24800848960876, + "p95": 324.0639865398407, + "p99": 411.77600622177124 + }, + "isolatedSum": { + "p50": 265.8880054950714, + "p90": 327.93600112199783, + "p95": 342.46398508548737, + "p99": 420.80000042915344 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 233.63199830055237, + "p90": 263.96799087524414, + "p95": 273.79199862480164, + "p99": 290.97598791122437 + }, + "combine": { + "p50": 112.38399893045425, + "p90": 124.79999661445618, + "p95": 128.67200374603271, + "p99": 176.4799952507019 + }, + "roundtrip": { + "p50": 360.1599931716919, + "p90": 402.52798795700073, + "p95": 410.2720022201538, + "p99": 428.2880127429962 + }, + "isolatedSum": { + "p50": 346.0159972310066, + "p90": 388.7679874897003, + "p95": 402.46400237083435, + "p99": 467.45598316192627 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 325.0240087509155, + "p90": 359.0080142021179, + "p95": 372.6080060005188, + "p99": 392.67200231552124 + }, + "combine": { + "p50": 193.34399700164795, + "p90": 196.0960030555725, + "p95": 198.2720047235489, + "p99": 205.47200739383698 + }, + "roundtrip": { + "p50": 538.5919809341431, + "p90": 566.7840242385864, + "p95": 575.007975101471, + "p99": 600.928008556366 + }, + "isolatedSum": { + "p50": 518.3680057525635, + "p90": 555.1040172576904, + "p95": 570.8800107240677, + "p99": 598.1440097093582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 506.27201795578003, + "p90": 520.8960175514221, + "p95": 527.4239778518677, + "p99": 540.1279926300049 + }, + "combine": { + "p50": 361.7919981479645, + "p90": 364.6079897880554, + "p95": 365.88799953460693, + "p99": 373.4079897403717 + }, + "roundtrip": { + "p50": 897.3119854927063, + "p90": 919.264018535614, + "p95": 926.8159866333008, + "p99": 956.1280012130737 + }, + "isolatedSum": { + "p50": 868.0640161037445, + "p90": 885.5040073394775, + "p95": 893.3119773864746, + "p99": 913.5359823703766 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 868.3199882507324, + "p90": 884.3520283699036, + "p95": 890.0480270385742, + "p99": 1019.9999809265137 + }, + "combine": { + "p50": 691.968023777008, + "p90": 695.3279972076416, + "p95": 696.3840126991272, + "p99": 702.7199864387512 + }, + "roundtrip": { + "p50": 1588.1600379943848, + "p90": 1622.879981994629, + "p95": 1636.8319988250732, + "p99": 1778.2080173492432 + }, + "isolatedSum": { + "p50": 1560.2880120277405, + "p90": 1579.6800255775452, + "p95": 1586.4320397377014, + "p99": 1722.719967365265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1589.1200304031372, + "p90": 1606.2079668045044, + "p95": 1617.2159910202026, + "p99": 1950.4319429397583 + }, + "combine": { + "p50": 1353.376030921936, + "p90": 1357.1200370788574, + "p95": 1358.8800430297852, + "p99": 1381.1520338058472 + }, + "roundtrip": { + "p50": 2977.0240783691406, + "p90": 2995.743989944458, + "p95": 3010.240077972412, + "p99": 3318.0160522460938 + }, + "isolatedSum": { + "p50": 2942.4960613250732, + "p90": 2963.328003883362, + "p95": 2976.096034049988, + "p99": 3331.5839767456055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-11188ae4", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_1923cf93", + "comparisonKey": "1efc0679933f7641", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:39.988553+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 269.6639895439148, + "p90": 284.7039997577667, + "p95": 289.72798585891724, + "p99": 311.5519881248474 + }, + "combine": { + "p50": 75.39200037717819, + "p90": 78.72000336647034, + "p95": 82.24000036716461, + "p99": 87.67999708652496 + }, + "roundtrip": { + "p50": 332.41599798202515, + "p90": 347.8719890117645, + "p95": 354.14400696754456, + "p99": 433.21600556373596 + }, + "isolatedSum": { + "p50": 345.055989921093, + "p90": 363.42400312423706, + "p95": 371.96798622608185, + "p99": 399.2319852113724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 284.7679853439331, + "p90": 305.9520125389099, + "p95": 318.2080090045929, + "p99": 335.6480002403259 + }, + "combine": { + "p50": 103.84000092744827, + "p90": 109.47199910879135, + "p95": 115.03999680280685, + "p99": 125.05599856376648 + }, + "roundtrip": { + "p50": 378.3999979496002, + "p90": 406.75199031829834, + "p95": 419.23201084136963, + "p99": 464.4800126552582 + }, + "isolatedSum": { + "p50": 388.6079862713814, + "p90": 415.42401164770126, + "p95": 433.24800580739975, + "p99": 460.7039988040924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 301.12001299858093, + "p90": 328.2879889011383, + "p95": 337.8559947013855, + "p99": 366.5280044078827 + }, + "combine": { + "p50": 169.27999258041382, + "p90": 174.01599884033203, + "p95": 181.8239986896515, + "p99": 191.71200692653656 + }, + "roundtrip": { + "p50": 462.2719883918762, + "p90": 493.6000108718872, + "p95": 501.8240213394165, + "p99": 521.4400291442871 + }, + "isolatedSum": { + "p50": 470.40000557899475, + "p90": 502.30398774147034, + "p95": 519.679993391037, + "p99": 558.2400113344193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 448.06399941444397, + "p90": 476.9279956817627, + "p95": 490.4319941997528, + "p99": 524.8960256576538 + }, + "combine": { + "p50": 296.51200771331787, + "p90": 303.1359910964966, + "p95": 306.62399530410767, + "p99": 313.6320114135742 + }, + "roundtrip": { + "p50": 746.944010257721, + "p90": 772.5440263748169, + "p95": 783.1680178642273, + "p99": 811.7759823799133 + }, + "isolatedSum": { + "p50": 744.5760071277618, + "p90": 780.0639867782593, + "p95": 797.0559895038605, + "p99": 838.528037071228 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 730.1120162010193, + "p90": 753.6320090293884, + "p95": 764.4479870796204, + "p99": 842.1440124511719 + }, + "combine": { + "p50": 552.4479746818542, + "p90": 557.3760271072388, + "p95": 559.2960119247437, + "p99": 564.8000240325928 + }, + "roundtrip": { + "p50": 1287.2960567474365, + "p90": 1319.0399408340454, + "p95": 1330.3040266036987, + "p99": 1479.7439575195312 + }, + "isolatedSum": { + "p50": 1282.5599908828735, + "p90": 1311.0080361366272, + "p95": 1323.743999004364, + "p99": 1406.9440364837646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1298.5600233078003, + "p90": 1320.896029472351, + "p95": 1337.2160196304321, + "p99": 2512.9599571228027 + }, + "combine": { + "p50": 1063.647985458374, + "p90": 1068.511962890625, + "p95": 1071.9679594039917, + "p99": 1193.2480335235596 + }, + "roundtrip": { + "p50": 2365.407943725586, + "p90": 2382.8799724578857, + "p95": 2408.576011657715, + "p99": 2678.5600185394287 + }, + "isolatedSum": { + "p50": 2362.2080087661743, + "p90": 2389.407992362976, + "p95": 2409.183979034424, + "p99": 3706.2079906463623 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a2bab9c3", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_646b54fe", + "comparisonKey": "cbb399edc40bb50f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:13.429893+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 257.60000944137573, + "p90": 288.4480059146881, + "p95": 298.3039915561676, + "p99": 373.088002204895 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 87.48800307512283, + "p95": 93.1520015001297, + "p99": 98.75199943780899 + }, + "roundtrip": { + "p50": 327.5519907474518, + "p90": 360.8640134334564, + "p95": 371.45599722862244, + "p99": 496.5119957923889 + }, + "isolatedSum": { + "p50": 335.26401221752167, + "p90": 375.93600898981094, + "p95": 391.4559930562973, + "p99": 471.840001642704 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 259.93600487709045, + "p90": 291.55200719833374, + "p95": 303.0720055103302, + "p99": 321.8879997730255 + }, + "combine": { + "p50": 117.47200042009354, + "p90": 125.59999525547028, + "p95": 132.1280002593994, + "p99": 140.09599387645721 + }, + "roundtrip": { + "p50": 381.18401169776917, + "p90": 408.03200006484985, + "p95": 417.1200096607208, + "p99": 430.55999279022217 + }, + "isolatedSum": { + "p50": 377.408005297184, + "p90": 417.152002453804, + "p95": 435.2000057697296, + "p99": 461.9839936494827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 335.61599254608154, + "p90": 366.33598804473877, + "p95": 377.1840035915375, + "p99": 399.1039991378784 + }, + "combine": { + "p50": 198.97599518299103, + "p90": 203.71200144290924, + "p95": 207.0080041885376, + "p99": 245.63199281692505 + }, + "roundtrip": { + "p50": 545.6960201263428, + "p90": 568.7360167503357, + "p95": 578.8480043411255, + "p99": 675.5840182304382 + }, + "isolatedSum": { + "p50": 534.5919877290726, + "p90": 570.047989487648, + "p95": 584.1920077800751, + "p99": 644.7359919548035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 509.5040202140808, + "p90": 529.4719934463501, + "p95": 540.7040119171143, + "p99": 1079.807996749878 + }, + "combine": { + "p50": 373.1839954853058, + "p90": 376.3839900493622, + "p95": 378.87999415397644, + "p99": 433.6639940738678 + }, + "roundtrip": { + "p50": 911.9359850883484, + "p90": 940.4159784317017, + "p95": 949.567973613739, + "p99": 1243.9039945602417 + }, + "isolatedSum": { + "p50": 882.6880156993866, + "p90": 905.8559834957123, + "p95": 919.5840060710907, + "p99": 1513.4719908237457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 868.511974811554, + "p90": 894.1760063171387, + "p95": 925.279974937439, + "p99": 1412.0320081710815 + }, + "combine": { + "p50": 700.2559900283813, + "p90": 703.5840153694153, + "p95": 706.5920233726501, + "p99": 788.5119915008545 + }, + "roundtrip": { + "p50": 1599.5839834213257, + "p90": 1636.1279487609863, + "p95": 1655.2000045776367, + "p99": 2021.440029144287 + }, + "isolatedSum": { + "p50": 1568.7679648399353, + "p90": 1597.760021686554, + "p95": 1631.871998310089, + "p99": 2200.543999671936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1565.3120279312134, + "p90": 1577.3760080337524, + "p95": 1582.3359489440918, + "p99": 1642.4959897994995 + }, + "combine": { + "p50": 1368.8640594482422, + "p90": 1375.3279447555542, + "p95": 1378.3040046691895, + "p99": 1411.8399620056152 + }, + "roundtrip": { + "p50": 2972.4481105804443, + "p90": 2998.271942138672, + "p95": 3020.8959579467773, + "p99": 3432.7681064605713 + }, + "isolatedSum": { + "p50": 2934.1760873794556, + "p90": 2952.7039527893066, + "p95": 2960.6399536132812, + "p99": 3054.3359518051147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-08595561", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_2f342df0", + "comparisonKey": "beaf180cb53f7ccb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:45.266649+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 263.07201385498047, + "p90": 291.26399755477905, + "p95": 296.4160144329071, + "p99": 322.27200269699097 + }, + "combine": { + "p50": 71.45600020885468, + "p90": 78.5600021481514, + "p95": 82.84799754619598, + "p99": 85.79199761152267 + }, + "roundtrip": { + "p50": 321.4719891548157, + "p90": 346.52799367904663, + "p95": 354.3359935283661, + "p99": 370.49600481987 + }, + "isolatedSum": { + "p50": 334.52801406383514, + "p90": 369.82399970293045, + "p95": 379.2640119791031, + "p99": 408.06400030851364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 269.1200077533722, + "p90": 296.1919903755188, + "p95": 301.6960024833679, + "p99": 315.5199885368347 + }, + "combine": { + "p50": 98.08000177145004, + "p90": 106.97600245475769, + "p95": 112.15999722480774, + "p99": 115.64800143241882 + }, + "roundtrip": { + "p50": 362.87999153137207, + "p90": 387.2640132904053, + "p95": 392.9600119590759, + "p99": 438.01599740982056 + }, + "isolatedSum": { + "p50": 367.20000952482224, + "p90": 403.1679928302765, + "p95": 413.85599970817566, + "p99": 431.16798996925354 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 291.48799180984497, + "p90": 318.87999176979065, + "p95": 329.6000063419342, + "p99": 372.51201272010803 + }, + "combine": { + "p50": 164.5440012216568, + "p90": 173.0239987373352, + "p95": 176.28799378871918, + "p99": 200.15999674797058 + }, + "roundtrip": { + "p50": 450.6880044937134, + "p90": 472.3840057849884, + "p95": 480.70400953292847, + "p99": 536.1279845237732 + }, + "isolatedSum": { + "p50": 456.03199303150177, + "p90": 491.90399050712585, + "p95": 505.8880001306534, + "p99": 572.6720094680786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 440.2880072593689, + "p90": 466.048002243042, + "p95": 476.00001096725464, + "p99": 492.76798963546753 + }, + "combine": { + "p50": 284.92799401283264, + "p90": 294.49599981307983, + "p95": 297.5359857082367, + "p99": 309.1199994087219 + }, + "roundtrip": { + "p50": 731.2319874763489, + "p90": 762.2720003128052, + "p95": 782.4959754943848, + "p99": 1014.4959688186646 + }, + "isolatedSum": { + "p50": 725.2160012722015, + "p90": 760.5440020561218, + "p95": 773.5359966754913, + "p99": 801.8879890441895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 712.3519778251648, + "p90": 733.6320281028748, + "p95": 744.6079850196838, + "p99": 802.2080063819885 + }, + "combine": { + "p50": 530.3360223770142, + "p90": 537.5999808311462, + "p95": 541.8239831924438, + "p99": 552.4479746818542 + }, + "roundtrip": { + "p50": 1249.3120431900024, + "p90": 1275.5199670791626, + "p95": 1284.8960161209106, + "p99": 1527.9359817504883 + }, + "isolatedSum": { + "p50": 1242.688000202179, + "p90": 1271.232008934021, + "p95": 1286.4319682121277, + "p99": 1354.6559810638428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1270.4639434814453, + "p90": 1298.0159521102905, + "p95": 1313.31205368042, + "p99": 1468.0960178375244 + }, + "combine": { + "p50": 1020.2239751815796, + "p90": 1027.8079509735107, + "p95": 1030.9760570526123, + "p99": 1065.600037574768 + }, + "roundtrip": { + "p50": 2294.2399978637695, + "p90": 2326.8799781799316, + "p95": 2347.1360206604004, + "p99": 2567.5199031829834 + }, + "isolatedSum": { + "p50": 2290.687918663025, + "p90": 2325.8239030838013, + "p95": 2344.288110733032, + "p99": 2533.6960554122925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dd4f7ee5", + "identity": "h200|deepep-hybrid|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_d3ac0b1d", + "comparisonKey": "7fe90354a9b4dcb4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:10.743924+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 238.304004073143, + "p90": 273.44000339508057, + "p95": 281.2800109386444, + "p99": 296.54398560523987 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 87.96799927949905, + "p95": 93.18400174379349, + "p99": 100.19200295209885 + }, + "roundtrip": { + "p50": 302.3679852485657, + "p90": 343.1999981403351, + "p95": 348.1599986553192, + "p99": 357.7280044555664 + }, + "isolatedSum": { + "p50": 315.71200489997864, + "p90": 361.4080026745796, + "p95": 374.4640126824379, + "p99": 396.7359885573387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 250.17601251602173, + "p90": 290.17600417137146, + "p95": 301.34400725364685, + "p99": 310.88000535964966 + }, + "combine": { + "p50": 116.06399714946747, + "p90": 125.08800625801086, + "p95": 132.51200318336487, + "p99": 140.4159963130951 + }, + "roundtrip": { + "p50": 361.7280125617981, + "p90": 399.83999729156494, + "p95": 408.03200006484985, + "p99": 425.6959855556488 + }, + "isolatedSum": { + "p50": 366.2400096654892, + "p90": 415.2640104293823, + "p95": 433.8560104370117, + "p99": 451.29600167274475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 337.5360071659088, + "p90": 375.328004360199, + "p95": 384.3519985675812, + "p99": 450.01599192619324 + }, + "combine": { + "p50": 200.83199441432953, + "p90": 210.1760059595108, + "p95": 215.39199352264404, + "p99": 220.96000611782074 + }, + "roundtrip": { + "p50": 548.416018486023, + "p90": 576.7359733581543, + "p95": 585.6639742851257, + "p99": 618.5600161552429 + }, + "isolatedSum": { + "p50": 538.3680015802383, + "p90": 585.5040103197098, + "p95": 599.7439920902252, + "p99": 670.975998044014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 527.9359817504883, + "p90": 565.5679702758789, + "p95": 573.9520192146301, + "p99": 644.7359919548035 + }, + "combine": { + "p50": 376.6399919986725, + "p90": 387.07199692726135, + "p95": 390.6880021095276, + "p99": 394.7199881076813 + }, + "roundtrip": { + "p50": 921.4400053024292, + "p90": 944.383978843689, + "p95": 952.7999758720398, + "p99": 1138.6879682540894 + }, + "isolatedSum": { + "p50": 904.5759737491608, + "p90": 952.6399672031403, + "p95": 964.6400213241577, + "p99": 1039.4559800624847 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 878.1120181083679, + "p90": 917.2160029411316, + "p95": 929.4400215148926, + "p99": 947.8399753570557 + }, + "combine": { + "p50": 715.1679992675781, + "p90": 726.3360023498535, + "p95": 728.7999987602234, + "p99": 734.8799705505371 + }, + "roundtrip": { + "p50": 1617.0239448547363, + "p90": 1641.1199569702148, + "p95": 1650.65598487854, + "p99": 1714.784026145935 + }, + "isolatedSum": { + "p50": 1593.280017375946, + "p90": 1643.552005290985, + "p95": 1658.240020275116, + "p99": 1682.7199459075928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1628.1280517578125, + "p90": 1659.9680185317993, + "p95": 1668.287992477417, + "p99": 1838.2400274276733 + }, + "combine": { + "p50": 1396.9919681549072, + "p90": 1405.3759574890137, + "p95": 1410.048007965088, + "p99": 1498.91197681427 + }, + "roundtrip": { + "p50": 3034.976005554199, + "p90": 3072.1280574798584, + "p95": 3104.2559146881104, + "p99": 3447.6799964904785 + }, + "isolatedSum": { + "p50": 3025.1200199127197, + "p90": 3065.343976020813, + "p95": 3078.336000442505, + "p99": 3337.1520042419434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-250bdc95", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_4dac9421", + "comparisonKey": "19d61a4bd3b464e1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:42.117180+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 266.11199975013733, + "p90": 290.8799946308136, + "p95": 301.63198709487915, + "p99": 322.27200269699097 + }, + "combine": { + "p50": 71.71200215816498, + "p90": 78.5600021481514, + "p95": 83.13599973917007, + "p99": 89.05600011348724 + }, + "roundtrip": { + "p50": 326.9760012626648, + "p90": 361.7599904537201, + "p95": 386.24000549316406, + "p99": 434.7519874572754 + }, + "isolatedSum": { + "p50": 337.8240019083023, + "p90": 369.439996778965, + "p95": 384.7679868340492, + "p99": 411.3280028104782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 277.0879864692688, + "p90": 309.5040023326874, + "p95": 319.0079927444458, + "p99": 412.31998801231384 + }, + "combine": { + "p50": 98.36799651384354, + "p90": 109.79200154542923, + "p95": 114.81600254774094, + "p99": 121.11999839544296 + }, + "roundtrip": { + "p50": 370.33599615097046, + "p90": 399.07199144363403, + "p95": 408.4480106830597, + "p99": 447.9680061340332 + }, + "isolatedSum": { + "p50": 375.45598298311234, + "p90": 419.2960038781166, + "p95": 433.82399529218674, + "p99": 533.4399864077568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 291.456013917923, + "p90": 322.27200269699097, + "p95": 328.73600721359253, + "p99": 346.75198793411255 + }, + "combine": { + "p50": 162.33600676059723, + "p90": 175.26400089263916, + "p95": 178.56000363826752, + "p99": 185.72799861431122 + }, + "roundtrip": { + "p50": 456.1600089073181, + "p90": 482.36799240112305, + "p95": 490.7520115375519, + "p99": 537.280023097992 + }, + "isolatedSum": { + "p50": 453.7920206785202, + "p90": 497.5360035896301, + "p95": 507.29601085186005, + "p99": 532.4799865484238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 438.62399458885193, + "p90": 458.8479995727539, + "p95": 468.32001209259033, + "p99": 498.879998922348 + }, + "combine": { + "p50": 285.2480113506317, + "p90": 293.92001032829285, + "p95": 296.9920039176941, + "p99": 302.047997713089 + }, + "roundtrip": { + "p50": 724.7999906539917, + "p90": 752.4480223655701, + "p95": 765.0240063667297, + "p99": 881.9519877433777 + }, + "isolatedSum": { + "p50": 723.8720059394836, + "p90": 752.7680099010468, + "p95": 765.3120160102844, + "p99": 800.927996635437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 722.0479846000671, + "p90": 740.3519749641418, + "p95": 746.0799813270569, + "p99": 814.624011516571 + }, + "combine": { + "p50": 535.8399748802185, + "p90": 543.5839891433716, + "p95": 546.5279817581177, + "p99": 552.6400208473206 + }, + "roundtrip": { + "p50": 1258.2080364227295, + "p90": 1284.608006477356, + "p95": 1294.0160036087036, + "p99": 1389.6960020065308 + }, + "isolatedSum": { + "p50": 1257.8879594802856, + "p90": 1283.9359641075134, + "p95": 1292.6079630851746, + "p99": 1367.2640323638916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1278.4639596939087, + "p90": 1306.4320087432861, + "p95": 1322.208046913147, + "p99": 1416.159987449646 + }, + "combine": { + "p50": 1027.7440547943115, + "p90": 1036.352038383484, + "p95": 1042.9760217666626, + "p99": 1073.6639499664307 + }, + "roundtrip": { + "p50": 2307.744026184082, + "p90": 2343.967914581299, + "p95": 2380.511999130249, + "p99": 2450.4640102386475 + }, + "isolatedSum": { + "p50": 2306.20801448822, + "p90": 2342.78404712677, + "p95": 2365.1840686798096, + "p99": 2489.8239374160767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe31bb31", + "identity": "h200|deepep-hybrid|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_179dbf5d", + "comparisonKey": "34fa27122542dfa0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:49.161237+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_9", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · deepep-hybrid · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": "hybrid-e0a5b1d", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 272.19200134277344, + "p90": 295.2960133552551, + "p95": 314.5599961280823, + "p99": 331.2639892101288 + }, + "combine": { + "p50": 73.37599992752075, + "p90": 78.40000092983246, + "p95": 86.01599931716919, + "p99": 99.04000163078308 + }, + "roundtrip": { + "p50": 328.6080062389374, + "p90": 350.75199604034424, + "p95": 364.4160032272339, + "p99": 385.1200044155121 + }, + "isolatedSum": { + "p50": 345.5680012702942, + "p90": 373.6960142850876, + "p95": 400.57599544525146, + "p99": 430.30399084091187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 281.5040051937103, + "p90": 322.07998633384705, + "p95": 331.58400654792786, + "p99": 349.40800070762634 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 109.6000000834465, + "p95": 115.13599753379822, + "p99": 119.58400160074234 + }, + "roundtrip": { + "p50": 376.44800543785095, + "p90": 413.34399580955505, + "p95": 422.2399890422821, + "p99": 446.4319944381714 + }, + "isolatedSum": { + "p50": 381.79200887680054, + "p90": 431.67998641729355, + "p95": 446.7200040817261, + "p99": 468.9920023083687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 295.9359884262085, + "p90": 329.9199938774109, + "p95": 338.3040130138397, + "p99": 361.1519932746887 + }, + "combine": { + "p50": 163.5199934244156, + "p90": 170.8800047636032, + "p95": 174.78400468826294, + "p99": 180.60800433158875 + }, + "roundtrip": { + "p50": 456.7359983921051, + "p90": 478.4959852695465, + "p95": 489.47200179100037, + "p99": 509.18400287628174 + }, + "isolatedSum": { + "p50": 459.4559818506241, + "p90": 500.7999986410141, + "p95": 513.0880177021027, + "p99": 541.7599976062775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 437.44000792503357, + "p90": 465.5359983444214, + "p95": 474.7839868068695, + "p99": 490.55999517440796 + }, + "combine": { + "p50": 284.89598631858826, + "p90": 296.86400294303894, + "p95": 300.9600043296814, + "p99": 314.5599961280823 + }, + "roundtrip": { + "p50": 727.6800274848938, + "p90": 771.1039781570435, + "p95": 789.3760204315186, + "p99": 1013.0879878997803 + }, + "isolatedSum": { + "p50": 722.3359942436218, + "p90": 762.4000012874603, + "p95": 775.7439911365509, + "p99": 805.1199913024902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 723.2319712638855, + "p90": 741.8879866600037, + "p95": 749.9200105667114, + "p99": 890.5280232429504 + }, + "combine": { + "p50": 532.7039957046509, + "p90": 541.5999889373779, + "p95": 544.0639853477478, + "p99": 550.5920052528381 + }, + "roundtrip": { + "p50": 1256.00004196167, + "p90": 1284.127950668335, + "p95": 1301.31196975708, + "p99": 1406.7840576171875 + }, + "isolatedSum": { + "p50": 1255.9359669685364, + "p90": 1283.4879755973816, + "p95": 1293.9839959144592, + "p99": 1441.1200284957886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1274.9439477920532, + "p90": 1300.7680177688599, + "p95": 1318.7839984893799, + "p99": 1474.6240377426147 + }, + "combine": { + "p50": 1026.9440412521362, + "p90": 1036.512017250061, + "p95": 1039.903998374939, + "p99": 1088.479995727539 + }, + "roundtrip": { + "p50": 2309.567928314209, + "p90": 2340.2559757232666, + "p95": 2361.2799644470215, + "p99": 2984.8320484161377 + }, + "isolatedSum": { + "p50": 2301.8879890441895, + "p90": 2337.280035018921, + "p95": 2358.687996864319, + "p99": 2563.104033470154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-695905ce", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_2b2aa3d0", + "comparisonKey": "b5fa6cfb0ebe706f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:53.049976+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 387.58400082588196, + "p90": 565.1519894599915, + "p95": 2675.391912460327, + "p99": 3969.2161083221436 + }, + "combine": { + "p50": 148.92800152301788, + "p90": 220.38400173187256, + "p95": 1800.2879619598389, + "p99": 3179.0719032287598 + }, + "roundtrip": { + "p50": 596.0959792137146, + "p90": 865.2799725532532, + "p95": 3072.2880363464355, + "p99": 4162.399768829346 + }, + "isolatedSum": { + "p50": 536.5120023488998, + "p90": 785.535991191864, + "p95": 4475.679874420166, + "p99": 7148.288011550903 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 354.3039858341217, + "p90": 500.7359981536865, + "p95": 2655.071973800659, + "p99": 3760.512113571167 + }, + "combine": { + "p50": 147.2959965467453, + "p90": 237.59999871253967, + "p95": 305.7920038700104, + "p99": 3451.200008392334 + }, + "roundtrip": { + "p50": 549.7599840164185, + "p90": 779.3279886245728, + "p95": 3145.8239555358887, + "p99": 4266.784191131592 + }, + "isolatedSum": { + "p50": 501.599982380867, + "p90": 738.3359968662262, + "p95": 2960.8639776706696, + "p99": 7211.712121963501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 416.3520038127899, + "p90": 579.2959928512573, + "p95": 1142.4640417099, + "p99": 3570.528030395508 + }, + "combine": { + "p50": 141.27999544143677, + "p90": 184.79999899864197, + "p95": 219.90400552749634, + "p99": 3007.999897003174 + }, + "roundtrip": { + "p50": 530.7199954986572, + "p90": 752.2879838943481, + "p95": 3194.6239471435547, + "p99": 4454.400062561035 + }, + "isolatedSum": { + "p50": 557.6319992542267, + "p90": 764.0959918498993, + "p95": 1362.3680472373962, + "p99": 6578.527927398682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 385.0879967212677, + "p90": 535.7760190963745, + "p95": 2036.1599922180176, + "p99": 3548.5119819641113 + }, + "combine": { + "p50": 140.47999680042267, + "p90": 209.53600108623505, + "p95": 251.55198574066162, + "p99": 3253.920078277588 + }, + "roundtrip": { + "p50": 559.1359734535217, + "p90": 771.4560031890869, + "p95": 3076.064109802246, + "p99": 4252.255916595459 + }, + "isolatedSum": { + "p50": 525.5679935216904, + "p90": 745.3120201826096, + "p95": 2287.711977958679, + "p99": 6802.432060241699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 395.2319920063019, + "p90": 524.3200063705444, + "p95": 1647.0719575881958, + "p99": 3619.8720932006836 + }, + "combine": { + "p50": 136.7039978504181, + "p90": 204.54399287700653, + "p95": 263.07201385498047, + "p99": 3016.0000324249268 + }, + "roundtrip": { + "p50": 573.0879902839661, + "p90": 776.0319709777832, + "p95": 2785.9199047088623, + "p99": 4136.064052581787 + }, + "isolatedSum": { + "p50": 531.93598985672, + "p90": 728.863999247551, + "p95": 1910.1439714431763, + "p99": 6635.87212562561 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 384.95999574661255, + "p90": 475.5519926548004, + "p95": 777.567982673645, + "p99": 3743.3600425720215 + }, + "combine": { + "p50": 145.53600549697876, + "p90": 191.42399728298187, + "p95": 265.8880054950714, + "p99": 3499.135971069336 + }, + "roundtrip": { + "p50": 550.815999507904, + "p90": 693.343997001648, + "p95": 3156.2559604644775, + "p99": 3987.328052520752 + }, + "isolatedSum": { + "p50": 530.4960012435913, + "p90": 666.9759899377823, + "p95": 1043.4559881687164, + "p99": 7242.496013641357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 425.6640076637268, + "p90": 547.2319722175598, + "p95": 2365.664005279541, + "p99": 3462.5279903411865 + }, + "combine": { + "p50": 160.22400557994843, + "p90": 227.39200294017792, + "p95": 310.65601110458374, + "p99": 3039.9680137634277 + }, + "roundtrip": { + "p50": 620.1599836349487, + "p90": 792.5440073013306, + "p95": 3015.712022781372, + "p99": 3828.511953353882 + }, + "isolatedSum": { + "p50": 585.8880132436752, + "p90": 774.6239751577377, + "p95": 2676.3200163841248, + "p99": 6502.496004104614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 408.80000591278076, + "p90": 513.8880014419556, + "p95": 2771.199941635132, + "p99": 3774.7840881347656 + }, + "combine": { + "p50": 238.46399784088135, + "p90": 265.56798815727234, + "p95": 347.77599573135376, + "p99": 3013.792037963867 + }, + "roundtrip": { + "p50": 621.9199895858765, + "p90": 790.3680205345154, + "p95": 2961.6000652313232, + "p99": 3701.3120651245117 + }, + "isolatedSum": { + "p50": 647.2640037536621, + "p90": 779.4559895992279, + "p95": 3118.9759373664856, + "p99": 6788.576126098633 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2a07346c", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_56dfc1f4", + "comparisonKey": "7304c9ad6764d5ae", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:29.906621+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 399.616003036499, + "p90": 505.72800636291504, + "p95": 2256.0958862304688, + "p99": 3917.7279472351074 + }, + "combine": { + "p50": 150.65599977970123, + "p90": 214.30400013923645, + "p95": 397.5679874420166, + "p99": 3110.304117202759 + }, + "roundtrip": { + "p50": 575.2639770507812, + "p90": 706.9119811058044, + "p95": 2351.5520095825195, + "p99": 4073.535919189453 + }, + "isolatedSum": { + "p50": 550.2720028162003, + "p90": 720.0320065021515, + "p95": 2653.6638736724854, + "p99": 7028.032064437866 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 398.78401160240173, + "p90": 483.13599824905396, + "p95": 1514.016032218933, + "p99": 3865.6320571899414 + }, + "combine": { + "p50": 148.70400726795197, + "p90": 240.12799561023712, + "p95": 457.8239917755127, + "p99": 3020.128011703491 + }, + "roundtrip": { + "p50": 595.6159830093384, + "p90": 705.2800059318542, + "p95": 1597.0879793167114, + "p99": 3955.3279876708984 + }, + "isolatedSum": { + "p50": 547.4880188703537, + "p90": 723.2639938592911, + "p95": 1971.8400239944458, + "p99": 6885.760068893433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 448.5119879245758, + "p90": 537.2160077095032, + "p95": 2705.024003982544, + "p99": 3610.975980758667 + }, + "combine": { + "p50": 267.520010471344, + "p90": 296.06398940086365, + "p95": 308.3840012550354, + "p99": 2251.4240741729736 + }, + "roundtrip": { + "p50": 699.7439861297607, + "p90": 766.048014163971, + "p95": 2089.440107345581, + "p99": 3441.8559074401855 + }, + "isolatedSum": { + "p50": 716.0319983959198, + "p90": 833.2799971103668, + "p95": 3013.4080052375793, + "p99": 5862.400054931641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9c8bd419", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_a846ca24", + "comparisonKey": "1919e744d0c7d05d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:54.031706+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 373.6959993839264, + "p90": 455.9679925441742, + "p95": 2873.8880157470703, + "p99": 3844.223976135254 + }, + "combine": { + "p50": 156.19200468063354, + "p90": 245.31200528144836, + "p95": 270.6240117549896, + "p99": 2768.6400413513184 + }, + "roundtrip": { + "p50": 543.8719987869263, + "p90": 739.8399710655212, + "p95": 3064.41593170166, + "p99": 4324.192047119141 + }, + "isolatedSum": { + "p50": 529.8880040645599, + "p90": 701.2799978256226, + "p95": 3144.51202750206, + "p99": 6612.864017486572 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 393.92000436782837, + "p90": 472.4160134792328, + "p95": 1713.1199836730957, + "p99": 3912.12797164917 + }, + "combine": { + "p50": 130.72000443935394, + "p90": 173.66400361061096, + "p95": 183.9359998703003, + "p99": 256.5760016441345 + }, + "roundtrip": { + "p50": 562.0480179786682, + "p90": 643.3600187301636, + "p95": 3122.1439838409424, + "p99": 4388.415813446045 + }, + "isolatedSum": { + "p50": 524.6400088071823, + "p90": 646.0800170898438, + "p95": 1897.055983543396, + "p99": 4168.703973293304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 444.7999894618988, + "p90": 729.0239930152893, + "p95": 2637.9199028015137, + "p99": 3711.967945098877 + }, + "combine": { + "p50": 264.0640139579773, + "p90": 291.20001196861267, + "p95": 1695.3599452972412, + "p99": 2477.9839515686035 + }, + "roundtrip": { + "p50": 691.2959814071655, + "p90": 773.0240225791931, + "p95": 2650.752067565918, + "p99": 3402.8480052948 + }, + "isolatedSum": { + "p50": 708.8640034198761, + "p90": 1020.224004983902, + "p95": 4333.279848098755, + "p99": 6189.9518966674805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9d227473", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_ab46cedd", + "comparisonKey": "54ce6b4a05aba9e8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:20.085779+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 377.47201323509216, + "p90": 443.7119960784912, + "p95": 1046.5279817581177, + "p99": 3911.1359119415283 + }, + "combine": { + "p50": 128.7039965391159, + "p90": 186.0799938440323, + "p95": 199.0080028772354, + "p99": 2747.648000717163 + }, + "roundtrip": { + "p50": 533.4399938583374, + "p90": 590.399980545044, + "p95": 884.0640187263489, + "p99": 4688.543796539307 + }, + "isolatedSum": { + "p50": 506.17600977420807, + "p90": 629.7919899225235, + "p95": 1245.535984635353, + "p99": 6658.783912658691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 409.40800309181213, + "p90": 482.6880097389221, + "p95": 1120.7679510116577, + "p99": 3772.831916809082 + }, + "combine": { + "p50": 125.18399953842163, + "p90": 182.8799992799759, + "p95": 192.1280026435852, + "p99": 226.97600722312927 + }, + "roundtrip": { + "p50": 575.007975101471, + "p90": 667.3600077629089, + "p95": 2199.5840072631836, + "p99": 4604.191780090332 + }, + "isolatedSum": { + "p50": 534.5920026302338, + "p90": 665.568009018898, + "p95": 1312.895953655243, + "p99": 3999.8079240322113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 443.6799883842468, + "p90": 508.2560181617737, + "p95": 2138.6239528656006, + "p99": 3349.407911300659 + }, + "combine": { + "p50": 266.9439911842346, + "p90": 299.9359965324402, + "p95": 313.3760094642639, + "p99": 2639.967918395996 + }, + "roundtrip": { + "p50": 697.5039839744568, + "p90": 794.3040132522583, + "p95": 1160.9280109405518, + "p99": 3003.648042678833 + }, + "isolatedSum": { + "p50": 710.6239795684814, + "p90": 808.1920146942139, + "p95": 2451.9999623298645, + "p99": 5989.375829696655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2d7cac21", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_aa46cd4a", + "comparisonKey": "7368892646533e08", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:44.589416+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 351.4559864997864, + "p90": 465.88799357414246, + "p95": 3010.94388961792, + "p99": 3973.9840030670166 + }, + "combine": { + "p50": 133.37600231170654, + "p90": 164.86400365829468, + "p95": 193.27999651432037, + "p99": 3490.5600547790527 + }, + "roundtrip": { + "p50": 524.5760083198547, + "p90": 805.4400086402893, + "p95": 3004.319906234741, + "p99": 4401.279926300049 + }, + "isolatedSum": { + "p50": 484.8319888114929, + "p90": 630.7519972324371, + "p95": 3204.2238861322403, + "p99": 7464.544057846069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 133, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 378.59201431274414, + "p90": 491.9680058956146, + "p95": 2605.4720878601074, + "p99": 4142.144203186035 + }, + "combine": { + "p50": 125.02400577068329, + "p90": 155.90399503707886, + "p95": 174.8799979686737, + "p99": 1921.4080572128296 + }, + "roundtrip": { + "p50": 528.4799933433533, + "p90": 680.6399822235107, + "p95": 2995.1999187469482, + "p99": 4020.5440521240234 + }, + "isolatedSum": { + "p50": 503.61602008342743, + "p90": 647.8720009326935, + "p95": 2780.352085828781, + "p99": 6063.552260398865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 520, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 431.8400025367737, + "p90": 576.5439867973328, + "p95": 2622.368097305298, + "p99": 3454.2720317840576 + }, + "combine": { + "p50": 267.39200949668884, + "p90": 296.60800099372864, + "p95": 1296.3520288467407, + "p99": 2989.151954650879 + }, + "roundtrip": { + "p50": 672.7679967880249, + "p90": 776.8959999084473, + "p95": 2896.0320949554443, + "p99": 3517.1520709991455 + }, + "isolatedSum": { + "p50": 699.2320120334625, + "p90": 873.1519877910614, + "p95": 3918.7201261520386, + "p99": 6443.4239864349365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 2062, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-56f948f9", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_ba6963a8", + "comparisonKey": "c3a34cdeb518452a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:07.230620+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 333.7920010089874, + "p90": 660.4800224304199, + "p95": 684.3839883804321, + "p99": 3178.528070449829 + }, + "combine": { + "p50": 125.18399953842163, + "p90": 158.4320068359375, + "p95": 188.92799317836761, + "p99": 3862.879991531372 + }, + "roundtrip": { + "p50": 501.0560154914856, + "p90": 628.8319826126099, + "p95": 1019.10400390625, + "p99": 4394.112110137939 + }, + "isolatedSum": { + "p50": 458.97600054740906, + "p90": 818.9120292663574, + "p95": 873.3119815587997, + "p99": 7041.408061981201 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 358.97600650787354, + "p90": 470.7520008087158, + "p95": 723.4879732131958, + "p99": 3893.631935119629 + }, + "combine": { + "p50": 129.18399274349213, + "p90": 180.57599663734436, + "p95": 210.27199923992157, + "p99": 3554.8479557037354 + }, + "roundtrip": { + "p50": 503.35997343063354, + "p90": 595.7120060920715, + "p95": 824.9279856681824, + "p99": 4197.984218597412 + }, + "isolatedSum": { + "p50": 488.15999925136566, + "p90": 651.3279974460602, + "p95": 933.7599724531174, + "p99": 7448.479890823364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 341.8560028076172, + "p90": 428.19198966026306, + "p95": 528.5120010375977, + "p99": 3716.12811088562 + }, + "combine": { + "p50": 128.4160017967224, + "p90": 170.20800709724426, + "p95": 608.672022819519, + "p99": 3213.5040760040283 + }, + "roundtrip": { + "p50": 517.8880095481873, + "p90": 924.9600172042847, + "p95": 1383.0080032348633, + "p99": 4136.096000671387 + }, + "isolatedSum": { + "p50": 470.2720046043396, + "p90": 598.3999967575073, + "p95": 1137.1840238571167, + "p99": 6929.632186889648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 349.66400265693665, + "p90": 434.3999922275543, + "p95": 513.4400129318237, + "p99": 3836.479902267456 + }, + "combine": { + "p50": 145.7280069589615, + "p90": 280.12800216674805, + "p95": 314.94399905204773, + "p99": 3347.16796875 + }, + "roundtrip": { + "p50": 611.6799712181091, + "p90": 939.5840167999268, + "p95": 1608.8320016860962, + "p99": 3665.4720306396484 + }, + "isolatedSum": { + "p50": 495.39200961589813, + "p90": 714.5279943943024, + "p95": 828.3840119838715, + "p99": 7183.647871017456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 349.2799997329712, + "p90": 446.24000787734985, + "p95": 600.383996963501, + "p99": 3888.2880210876465 + }, + "combine": { + "p50": 129.56799566745758, + "p90": 177.44000256061554, + "p95": 194.59199905395508, + "p99": 3065.471887588501 + }, + "roundtrip": { + "p50": 521.2799906730652, + "p90": 610.4639768600464, + "p95": 1145.2159881591797, + "p99": 4219.456195831299 + }, + "isolatedSum": { + "p50": 478.8479954004288, + "p90": 623.6800104379654, + "p95": 794.975996017456, + "p99": 6953.7599086761475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 382.81598687171936, + "p90": 466.2399888038635, + "p95": 567.1359896659851, + "p99": 3648.7679481506348 + }, + "combine": { + "p50": 129.02399897575378, + "p90": 187.99999356269836, + "p95": 212.54399418830872, + "p99": 3026.5278816223145 + }, + "roundtrip": { + "p50": 561.7600083351135, + "p90": 674.3680238723755, + "p95": 1619.264006614685, + "p99": 4079.6799659729004 + }, + "isolatedSum": { + "p50": 511.83998584747314, + "p90": 654.2399823665619, + "p95": 779.6799838542938, + "p99": 6675.295829772949 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 363.16800117492676, + "p90": 436.73598766326904, + "p95": 654.9760103225708, + "p99": 3605.087995529175 + }, + "combine": { + "p50": 144.96000111103058, + "p90": 181.18399381637573, + "p95": 249.24799799919128, + "p99": 3012.320041656494 + }, + "roundtrip": { + "p50": 533.2800149917603, + "p90": 622.6239800453186, + "p95": 912.4799966812134, + "p99": 4104.000091552734 + }, + "isolatedSum": { + "p50": 508.12800228595734, + "p90": 617.9199814796448, + "p95": 904.2240083217621, + "p99": 6617.408037185669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 382.01600313186646, + "p90": 432.44799971580505, + "p95": 594.7520136833191, + "p99": 3593.0240154266357 + }, + "combine": { + "p50": 231.99999332427979, + "p90": 255.16799092292786, + "p95": 728.16002368927, + "p99": 2714.656114578247 + }, + "roundtrip": { + "p50": 608.5439920425415, + "p90": 727.7119755744934, + "p95": 2505.1839351654053, + "p99": 3603.519916534424 + }, + "isolatedSum": { + "p50": 614.0159964561462, + "p90": 687.6159906387329, + "p95": 1322.912037372589, + "p99": 6307.680130004883 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9ee39f42", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_01a7820b", + "comparisonKey": "60013af000514292", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:18.410452+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 359.0080142021179, + "p90": 422.1760034561157, + "p95": 2209.696054458618, + "p99": 4506.080150604248 + }, + "combine": { + "p50": 133.44000279903412, + "p90": 174.14399981498718, + "p95": 184.67199802398682, + "p99": 768.9599990844727 + }, + "roundtrip": { + "p50": 529.1200280189514, + "p90": 645.5039978027344, + "p95": 3316.8320655822754, + "p99": 5051.904201507568 + }, + "isolatedSum": { + "p50": 492.44801700115204, + "p90": 596.3200032711029, + "p95": 2394.368052482605, + "p99": 5275.040149688721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 378.08001041412354, + "p90": 444.5439875125885, + "p95": 2753.5359859466553, + "p99": 4099.520206451416 + }, + "combine": { + "p50": 145.31199634075165, + "p90": 210.27199923992157, + "p95": 2921.6959476470947, + "p99": 3938.368082046509 + }, + "roundtrip": { + "p50": 539.8079752922058, + "p90": 628.000020980835, + "p95": 3074.079990386963, + "p99": 4831.615924835205 + }, + "isolatedSum": { + "p50": 523.3920067548752, + "p90": 654.8159867525101, + "p95": 5675.23193359375, + "p99": 8037.888288497925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 391.9360041618347, + "p90": 462.68799901008606, + "p95": 549.2799878120422, + "p99": 3050.5599975585938 + }, + "combine": { + "p50": 131.84000551700592, + "p90": 189.98399376869202, + "p95": 204.19199764728546, + "p99": 2889.2478942871094 + }, + "roundtrip": { + "p50": 564.8959875106812, + "p90": 664.0639901161194, + "p95": 960.0639939308167, + "p99": 3773.3120918273926 + }, + "isolatedSum": { + "p50": 523.7760096788406, + "p90": 652.6719927787781, + "p95": 753.4719854593277, + "p99": 5939.807891845703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 399.1360068321228, + "p90": 467.77600049972534, + "p95": 565.5679702758789, + "p99": 3587.87202835083 + }, + "combine": { + "p50": 235.74399948120117, + "p90": 269.1519856452942, + "p95": 1083.456039428711, + "p99": 3126.62410736084 + }, + "roundtrip": { + "p50": 630.2080154418945, + "p90": 701.5359997749329, + "p95": 2474.7519493103027, + "p99": 3676.640033721924 + }, + "isolatedSum": { + "p50": 634.880006313324, + "p90": 736.9279861450195, + "p95": 1649.0240097045898, + "p99": 6714.49613571167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d3606efb", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_fa518406", + "comparisonKey": "ed82f975a552fae3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:26.151255+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 321.79200649261475, + "p90": 424.19201135635376, + "p95": 3066.3681030273438, + "p99": 3948.575973510742 + }, + "combine": { + "p50": 133.12000036239624, + "p90": 196.383997797966, + "p95": 1781.7280292510986, + "p99": 3219.1998958587646 + }, + "roundtrip": { + "p50": 488.8960123062134, + "p90": 784.2239737510681, + "p95": 3622.015953063965, + "p99": 4689.919948577881 + }, + "isolatedSum": { + "p50": 454.912006855011, + "p90": 620.5760091543198, + "p95": 4848.096132278442, + "p99": 7167.775869369507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 9, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 330.9440016746521, + "p90": 466.65599942207336, + "p95": 3139.8398876190186, + "p99": 3955.104112625122 + }, + "combine": { + "p50": 129.18399274349213, + "p90": 166.9439971446991, + "p95": 193.7279999256134, + "p99": 3321.023941040039 + }, + "roundtrip": { + "p50": 493.6319887638092, + "p90": 778.7839770317078, + "p95": 3392.416000366211, + "p99": 4593.855857849121 + }, + "isolatedSum": { + "p50": 460.1279944181442, + "p90": 633.5999965667725, + "p95": 3333.567887544632, + "p99": 7276.128053665161 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 18, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 344.1280126571655, + "p90": 471.16801142692566, + "p95": 3046.7519760131836, + "p99": 3864.192008972168 + }, + "combine": { + "p50": 129.15199995040894, + "p90": 187.80800700187683, + "p95": 307.2640001773834, + "p99": 3070.6560611724854 + }, + "roundtrip": { + "p50": 500.44798851013184, + "p90": 767.0720219612122, + "p95": 3135.103940963745, + "p99": 4208.960056304932 + }, + "isolatedSum": { + "p50": 473.28001260757446, + "p90": 658.9760184288025, + "p95": 3354.015976190567, + "p99": 6934.848070144653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 36, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 337.69598603248596, + "p90": 431.2959909439087, + "p95": 535.8719825744629, + "p99": 3778.6879539489746 + }, + "combine": { + "p50": 126.81600451469421, + "p90": 186.39999628067017, + "p95": 234.55999791622162, + "p99": 3256.6399574279785 + }, + "roundtrip": { + "p50": 504.9920082092285, + "p90": 714.0799760818481, + "p95": 3048.288106918335, + "p99": 4184.095859527588 + }, + "isolatedSum": { + "p50": 464.5119905471802, + "p90": 617.6959872245789, + "p95": 770.4319804906845, + "p99": 7035.327911376953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 72, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 344.57600116729736, + "p90": 460.640013217926, + "p95": 562.7520084381104, + "p99": 3632.159948348999 + }, + "combine": { + "p50": 129.50399518013, + "p90": 186.65599822998047, + "p95": 220.47999501228333, + "p99": 2929.56805229187 + }, + "roundtrip": { + "p50": 492.000013589859, + "p90": 698.5599994659424, + "p95": 1130.687952041626, + "p99": 4136.767864227295 + }, + "isolatedSum": { + "p50": 474.07999634742737, + "p90": 647.2960114479065, + "p95": 783.2320034503937, + "p99": 6561.728000640869 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 144, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 360.28799414634705, + "p90": 491.0399913787842, + "p95": 585.3440165519714, + "p99": 3722.0799922943115 + }, + "combine": { + "p50": 134.0160071849823, + "p90": 179.45599555969238, + "p95": 204.3839991092682, + "p99": 3112.1599674224854 + }, + "roundtrip": { + "p50": 498.59198927879333, + "p90": 681.7280054092407, + "p95": 3094.8479175567627, + "p99": 4187.263965606689 + }, + "isolatedSum": { + "p50": 494.30400133132935, + "p90": 670.4959869384766, + "p95": 789.7280156612396, + "p99": 6834.239959716797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 288, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 363.20000886917114, + "p90": 489.47200179100037, + "p95": 658.9760184288025, + "p99": 3581.34388923645 + }, + "combine": { + "p50": 144.41600441932678, + "p90": 169.11999881267548, + "p95": 217.50399470329285, + "p99": 3037.152051925659 + }, + "roundtrip": { + "p50": 513.759970664978, + "p90": 684.4800114631653, + "p95": 2476.2558937072754, + "p99": 4038.303852081299 + }, + "isolatedSum": { + "p50": 507.6160132884979, + "p90": 658.5920006036758, + "p95": 876.4800131320953, + "p99": 6618.495941162109 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 576, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 389.8879885673523, + "p90": 494.0800070762634, + "p95": 651.2640118598938, + "p99": 3550.2400398254395 + }, + "combine": { + "p50": 236.95999383926392, + "p90": 252.79998779296875, + "p95": 272.15999364852905, + "p99": 3070.784091949463 + }, + "roundtrip": { + "p50": 606.1760187149048, + "p90": 725.600004196167, + "p95": 2753.024101257324, + "p99": 3666.3999557495117 + }, + "isolatedSum": { + "p50": 626.8479824066162, + "p90": 746.8799948692322, + "p95": 923.4240055084229, + "p99": 6621.024131774902 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-53b17fc9", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_3b856c30", + "comparisonKey": "e4097f1958a15c06", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:52.928494+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 395.9999978542328, + "p90": 482.56000876426697, + "p95": 1281.3440561294556, + "p99": 3698.431968688965 + }, + "combine": { + "p50": 148.80000054836273, + "p90": 223.64799678325653, + "p95": 248.79999458789825, + "p99": 3109.2801094055176 + }, + "roundtrip": { + "p50": 591.7760133743286, + "p90": 702.4319767951965, + "p95": 2599.5841026306152, + "p99": 3964.063882827759 + }, + "isolatedSum": { + "p50": 544.7999984025955, + "p90": 706.2080055475235, + "p95": 1530.1440507173538, + "p99": 6807.712078094482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 373.663991689682, + "p90": 475.23200511932373, + "p95": 835.0719809532166, + "p99": 3247.2000122070312 + }, + "combine": { + "p50": 130.8480054140091, + "p90": 187.9359930753708, + "p95": 212.47999370098114, + "p99": 3004.6401023864746 + }, + "roundtrip": { + "p50": 534.5280170440674, + "p90": 671.3280081748962, + "p95": 3005.631923675537, + "p99": 4181.4398765563965 + }, + "isolatedSum": { + "p50": 504.5119971036911, + "p90": 663.1679981946945, + "p95": 1047.5519746541977, + "p99": 6251.840114593506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 411.871999502182, + "p90": 489.3440008163452, + "p95": 579.3280005455017, + "p99": 3000.704050064087 + }, + "combine": { + "p50": 250.17601251602173, + "p90": 268.73600482940674, + "p95": 1844.383955001831, + "p99": 2979.6159267425537 + }, + "roundtrip": { + "p50": 670.0800061225891, + "p90": 747.9360103607178, + "p95": 2623.647928237915, + "p99": 3419.3599224090576 + }, + "isolatedSum": { + "p50": 662.0480120182037, + "p90": 758.080005645752, + "p95": 2423.7119555473328, + "p99": 5980.319976806641 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2a1fe097", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_a819a7f8", + "comparisonKey": "e8e0c8b57e3c53c4", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:17.262893+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 337.119996547699, + "p90": 400.7039964199066, + "p95": 560.6399774551392, + "p99": 3756.864070892334 + }, + "combine": { + "p50": 129.31199371814728, + "p90": 155.7759940624237, + "p95": 170.8800047636032, + "p99": 2955.80792427063 + }, + "roundtrip": { + "p50": 495.64799666404724, + "p90": 612.8960251808167, + "p95": 1469.3119525909424, + "p99": 4273.0560302734375 + }, + "isolatedSum": { + "p50": 466.43199026584625, + "p90": 556.4799904823303, + "p95": 731.5199822187424, + "p99": 6712.671995162964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 388.15999031066895, + "p90": 501.18398666381836, + "p95": 1277.3120403289795, + "p99": 3382.848024368286 + }, + "combine": { + "p50": 129.60000336170197, + "p90": 189.40800428390503, + "p95": 292.2559976577759, + "p99": 3326.5280723571777 + }, + "roundtrip": { + "p50": 513.8559937477112, + "p90": 612.9599809646606, + "p95": 796.064019203186, + "p99": 4232.800006866455 + }, + "isolatedSum": { + "p50": 517.7599936723709, + "p90": 690.5919909477234, + "p95": 1569.5680379867554, + "p99": 6709.376096725464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 406.9119989871979, + "p90": 456.7359983921051, + "p95": 596.0000157356262, + "p99": 3583.5840702056885 + }, + "combine": { + "p50": 250.43201446533203, + "p90": 268.5759961605072, + "p95": 2235.2640628814697, + "p99": 3073.8561153411865 + }, + "roundtrip": { + "p50": 636.4160180091858, + "p90": 725.600004196167, + "p95": 2734.879970550537, + "p99": 3718.3680534362793 + }, + "isolatedSum": { + "p50": 657.3440134525299, + "p90": 725.3119945526123, + "p95": 2831.264078617096, + "p99": 6657.440185546875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bf6019b6", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_ab19acb1", + "comparisonKey": "54982b0e776566e0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:41.778736+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 355.55198788642883, + "p90": 472.4160134792328, + "p95": 646.2720036506653, + "p99": 3455.456018447876 + }, + "combine": { + "p50": 130.52800297737122, + "p90": 186.11200153827667, + "p95": 210.7200026512146, + "p99": 3366.8160438537598 + }, + "roundtrip": { + "p50": 524.7359871864319, + "p90": 697.2159743309021, + "p95": 2993.7920570373535, + "p99": 4280.064105987549 + }, + "isolatedSum": { + "p50": 486.07999086380005, + "p90": 658.5280150175095, + "p95": 856.9920063018799, + "p99": 6822.272062301636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 365.9200072288513, + "p90": 472.4160134792328, + "p95": 1098.1440544128418, + "p99": 3136.1920833587646 + }, + "combine": { + "p50": 136.7039978504181, + "p90": 188.6720061302185, + "p95": 208.22399854660034, + "p99": 2993.5359954833984 + }, + "roundtrip": { + "p50": 568.7680244445801, + "p90": 832.0000171661377, + "p95": 3103.327989578247, + "p99": 4027.359962463379 + }, + "isolatedSum": { + "p50": 502.6240050792694, + "p90": 661.0880196094513, + "p95": 1306.3680529594421, + "p99": 6129.728078842163 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 420.5760061740875, + "p90": 550.5599975585938, + "p95": 1233.3439588546753, + "p99": 3215.775966644287 + }, + "combine": { + "p50": 251.0400116443634, + "p90": 267.5839960575104, + "p95": 2086.944103240967, + "p99": 2817.6960945129395 + }, + "roundtrip": { + "p50": 651.6159772872925, + "p90": 777.02397108078, + "p95": 2690.272092819214, + "p99": 3476.736068725586 + }, + "isolatedSum": { + "p50": 671.6160178184509, + "p90": 818.1439936161041, + "p95": 3320.288062095642, + "p99": 6033.472061157227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5bbf4043", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_aa19ab1e", + "comparisonKey": "b2528268a7d2f616", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:05.546936+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 379.68000769615173, + "p90": 449.91999864578247, + "p95": 568.1920051574707, + "p99": 3784.7681045532227 + }, + "combine": { + "p50": 133.91999900341034, + "p90": 197.02400267124176, + "p95": 214.6880030632019, + "p99": 2875.9679794311523 + }, + "roundtrip": { + "p50": 544.6400046348572, + "p90": 691.1360025405884, + "p95": 3032.73606300354, + "p99": 4195.4240798950195 + }, + "isolatedSum": { + "p50": 513.6000066995621, + "p90": 646.9440013170242, + "p95": 782.8800082206726, + "p99": 6660.736083984375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 399.6480107307434, + "p90": 488.95999789237976, + "p95": 590.2720093727112, + "p99": 3333.8561058044434 + }, + "combine": { + "p50": 132.57600367069244, + "p90": 196.3520050048828, + "p95": 214.30400013923645, + "p99": 2797.1839904785156 + }, + "roundtrip": { + "p50": 566.8799877166748, + "p90": 651.8080234527588, + "p95": 2972.8000164031982, + "p99": 4195.551872253418 + }, + "isolatedSum": { + "p50": 532.2240144014359, + "p90": 685.3120028972626, + "p95": 804.5760095119476, + "p99": 6131.040096282959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 410.91200709342957, + "p90": 480.44800758361816, + "p95": 582.4959874153137, + "p99": 3549.6959686279297 + }, + "combine": { + "p50": 252.25600600242615, + "p90": 271.4560031890869, + "p95": 1983.0399751663208, + "p99": 2995.5520629882812 + }, + "roundtrip": { + "p50": 662.7519726753235, + "p90": 729.0880084037781, + "p95": 2719.520092010498, + "p99": 3517.6639556884766 + }, + "isolatedSum": { + "p50": 663.1680130958557, + "p90": 751.9040107727051, + "p95": 2565.5359625816345, + "p99": 6545.248031616211 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7fa7639a", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_d556a532", + "comparisonKey": "c043817dc2c34d7e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:51.355549+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 321.696013212204, + "p90": 376.8320083618164, + "p95": 2872.6720809936523, + "p99": 3989.856004714966 + }, + "combine": { + "p50": 132.35199451446533, + "p90": 176.38400197029114, + "p95": 458.3039879798889, + "p99": 3923.583984375 + }, + "roundtrip": { + "p50": 487.61600255966187, + "p90": 771.5520262718201, + "p95": 3282.20796585083, + "p99": 4341.375827789307 + }, + "isolatedSum": { + "p50": 454.0480077266693, + "p90": 553.2160103321075, + "p95": 3330.9760689735413, + "p99": 7913.439989089966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 326.7199993133545, + "p90": 385.21599769592285, + "p95": 790.6559705734253, + "p99": 4129.471778869629 + }, + "combine": { + "p50": 135.16800105571747, + "p90": 193.6960071325302, + "p95": 243.3280050754547, + "p99": 3653.088092803955 + }, + "roundtrip": { + "p50": 509.3119740486145, + "p90": 698.7839937210083, + "p95": 1048.4800338745117, + "p99": 4417.727947235107 + }, + "isolatedSum": { + "p50": 461.88800036907196, + "p90": 578.9120048284531, + "p95": 1033.98397564888, + "p99": 7782.559871673584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 350.97599029541016, + "p90": 405.15199303627014, + "p95": 560.4479908943176, + "p99": 3676.896095275879 + }, + "combine": { + "p50": 128.9599984884262, + "p90": 161.24799847602844, + "p95": 249.95200335979462, + "p99": 3074.687957763672 + }, + "roundtrip": { + "p50": 503.9359927177429, + "p90": 706.4319849014282, + "p95": 1993.9839839935303, + "p99": 4182.528018951416 + }, + "isolatedSum": { + "p50": 479.93598878383636, + "p90": 566.3999915122986, + "p95": 810.3999942541122, + "p99": 6751.584053039551 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 50, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 370.6879913806915, + "p90": 499.32798743247986, + "p95": 2100.480079650879, + "p99": 3897.887945175171 + }, + "combine": { + "p50": 138.62399756908417, + "p90": 207.61600136756897, + "p95": 236.32000386714935, + "p99": 2973.9840030670166 + }, + "roundtrip": { + "p50": 525.2799987792969, + "p90": 730.5279970169067, + "p95": 2846.04811668396, + "p99": 4257.567882537842 + }, + "isolatedSum": { + "p50": 509.3119889497757, + "p90": 706.9439888000488, + "p95": 2336.8000835180283, + "p99": 6871.8719482421875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 109, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 350.3679931163788, + "p90": 474.91198778152466, + "p95": 745.248019695282, + "p99": 3652.2560119628906 + }, + "combine": { + "p50": 135.71199774742126, + "p90": 202.4960070848465, + "p95": 258.7839961051941, + "p99": 3427.1678924560547 + }, + "roundtrip": { + "p50": 523.967981338501, + "p90": 746.720016002655, + "p95": 3107.8720092773438, + "p99": 4035.7117652893066 + }, + "isolatedSum": { + "p50": 486.07999086380005, + "p90": 677.4079948663712, + "p95": 1004.0320158004761, + "p99": 7079.423904418945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 224, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 355.29598593711853, + "p90": 420.8320081233978, + "p95": 535.8719825744629, + "p99": 3244.159936904907 + }, + "combine": { + "p50": 131.58400356769562, + "p90": 161.69600188732147, + "p95": 194.04800236225128, + "p99": 3204.15997505188 + }, + "roundtrip": { + "p50": 511.29597425460815, + "p90": 674.3040084838867, + "p95": 3130.2080154418945, + "p99": 3813.728094100952 + }, + "isolatedSum": { + "p50": 486.87998950481415, + "p90": 582.5280100107193, + "p95": 729.9199849367142, + "p99": 6448.319911956787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 444, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 379.8399865627289, + "p90": 501.3759732246399, + "p95": 1407.423973083496, + "p99": 3405.503988265991 + }, + "combine": { + "p50": 156.54399991035461, + "p90": 223.7440049648285, + "p95": 259.42400097846985, + "p99": 3003.648042678833 + }, + "roundtrip": { + "p50": 543.39200258255, + "p90": 709.8559737205505, + "p95": 2898.0801105499268, + "p99": 3863.039970397949 + }, + "isolatedSum": { + "p50": 536.3839864730835, + "p90": 725.1199781894684, + "p95": 1666.847974061966, + "p99": 6409.152030944824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 925, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 403.1040072441101, + "p90": 516.4480209350586, + "p95": 2123.392105102539, + "p99": 3675.584077835083 + }, + "combine": { + "p50": 248.60799312591553, + "p90": 262.91200518608093, + "p95": 324.5759904384613, + "p99": 3030.04789352417 + }, + "roundtrip": { + "p50": 639.519989490509, + "p90": 726.2399792671204, + "p95": 2555.7758808135986, + "p99": 3536.479949951172 + }, + "isolatedSum": { + "p50": 651.7120003700256, + "p90": 779.3600261211395, + "p95": 2447.9680955410004, + "p99": 6705.631971359253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d9bfffc7", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_3ac0b8bc", + "comparisonKey": "878df5b4f875a07b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:43.836515+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 378.7519931793213, + "p90": 446.52798771858215, + "p95": 2906.719923019409, + "p99": 4372.191905975342 + }, + "combine": { + "p50": 149.56800639629364, + "p90": 204.96000349521637, + "p95": 235.9039932489395, + "p99": 3614.4320964813232 + }, + "roundtrip": { + "p50": 549.9200224876404, + "p90": 645.1839804649353, + "p95": 3073.791980743408, + "p99": 4202.816009521484 + }, + "isolatedSum": { + "p50": 528.3199995756149, + "p90": 651.4879912137985, + "p95": 3142.6239162683487, + "p99": 7986.624002456665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 374.87998604774475, + "p90": 450.5600035190582, + "p95": 2828.1280994415283, + "p99": 3897.4080085754395 + }, + "combine": { + "p50": 150.59199929237366, + "p90": 205.47200739383698, + "p95": 248.99199604988098, + "p99": 3403.1999111175537 + }, + "roundtrip": { + "p50": 568.4800148010254, + "p90": 742.3359751701355, + "p95": 3137.824058532715, + "p99": 4116.384029388428 + }, + "isolatedSum": { + "p50": 525.4719853401184, + "p90": 656.0320109128952, + "p95": 3077.1200954914093, + "p99": 7300.607919692993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 22, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 394.01599764823914, + "p90": 469.02400255203247, + "p95": 2651.808023452759, + "p99": 3749.216079711914 + }, + "combine": { + "p50": 128.9920061826706, + "p90": 197.60000705718994, + "p95": 220.09600698947906, + "p99": 2999.743938446045 + }, + "roundtrip": { + "p50": 564.1279816627502, + "p90": 752.9919743537903, + "p95": 2958.847999572754, + "p99": 4249.504089355469 + }, + "isolatedSum": { + "p50": 523.0080038309097, + "p90": 666.6240096092224, + "p95": 2871.904030442238, + "p99": 6748.960018157959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 396.3199853897095, + "p90": 486.8159890174866, + "p95": 2526.20792388916, + "p99": 3676.287889480591 + }, + "combine": { + "p50": 135.68000495433807, + "p90": 207.90399610996246, + "p95": 231.455996632576, + "p99": 3538.5279655456543 + }, + "roundtrip": { + "p50": 573.7599730491638, + "p90": 708.4159851074219, + "p95": 2034.6240997314453, + "p99": 4056.640148162842 + }, + "isolatedSum": { + "p50": 531.9999903440475, + "p90": 694.719985127449, + "p95": 2757.663920521736, + "p99": 7214.815855026245 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 79, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 385.1200044155121, + "p90": 471.0719883441925, + "p95": 685.7600212097168, + "p99": 3692.0320987701416 + }, + "combine": { + "p50": 140.03199338912964, + "p90": 209.6319943666458, + "p95": 242.17599630355835, + "p99": 3663.1040573120117 + }, + "roundtrip": { + "p50": 558.1439733505249, + "p90": 691.2000179290771, + "p95": 881.5039992332458, + "p99": 4036.7679595947266 + }, + "isolatedSum": { + "p50": 525.1519978046417, + "p90": 680.7039827108383, + "p95": 927.9360175132751, + "p99": 7355.136156082153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 134, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 397.0879912376404, + "p90": 467.51999855041504, + "p95": 571.1039900779724, + "p99": 3567.647933959961 + }, + "combine": { + "p50": 132.79999792575836, + "p90": 196.79999351501465, + "p95": 213.24799954891205, + "p99": 3558.3999156951904 + }, + "roundtrip": { + "p50": 591.5520191192627, + "p90": 757.1840286254883, + "p95": 3009.0880393981934, + "p99": 3970.0160026550293 + }, + "isolatedSum": { + "p50": 529.8879891633987, + "p90": 664.3199920654297, + "p95": 784.3519896268845, + "p99": 7126.047849655151 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 268, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 404.54399585723877, + "p90": 487.39200830459595, + "p95": 535.7440114021301, + "p99": 3050.368070602417 + }, + "combine": { + "p50": 149.02399480342865, + "p90": 201.56799256801605, + "p95": 217.66400337219238, + "p99": 3228.895902633667 + }, + "roundtrip": { + "p50": 587.0400071144104, + "p90": 693.6960220336914, + "p95": 2815.743923187256, + "p99": 3885.279893875122 + }, + "isolatedSum": { + "p50": 553.5679906606674, + "p90": 688.960000872612, + "p95": 753.4080147743225, + "p99": 6279.263973236084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 533, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 413.8239920139313, + "p90": 516.1920189857483, + "p95": 1003.4240484237671, + "p99": 3706.6240310668945 + }, + "combine": { + "p50": 235.1360023021698, + "p90": 269.6959972381592, + "p95": 1033.8239669799805, + "p99": 2620.8319664001465 + }, + "roundtrip": { + "p50": 666.4639711380005, + "p90": 766.4960026741028, + "p95": 2541.5680408477783, + "p99": 3503.904104232788 + }, + "isolatedSum": { + "p50": 648.9599943161011, + "p90": 785.8880162239075, + "p95": 2037.2480154037476, + "p99": 6327.455997467041 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 1027, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e63450e2", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·empty-rank|8|decode|normal|none|empty-rank|0|tuned||5621f0d4899ad7a", + "colorKey": "h200_16aa0583", + "comparisonKey": "8db957710f518a6d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:32.312987+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · uniform·empty-rank", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·empty-rank", + "routingStep": 0, + "unevenTokens": "empty-rank", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5621f0d4899ad7a", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 63, + "dispatch": { + "p50": 326.84800028800964, + "p90": 383.4879994392395, + "p95": 494.1120147705078, + "p99": 3815.392017364502 + }, + "combine": { + "p50": 129.88799810409546, + "p90": 173.15199971199036, + "p95": 548.9280223846436, + "p99": 3844.2559242248535 + }, + "roundtrip": { + "p50": 496.8000054359436, + "p90": 614.4959926605225, + "p95": 960.7359766960144, + "p99": 4777.632236480713 + }, + "isolatedSum": { + "p50": 456.7359983921051, + "p90": 556.6399991512299, + "p95": 1043.0400371551514, + "p99": 7659.6479415893555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4888576, + "combineLogicalBytes": 4888576, + "fanoutMean": 5.412698268890381, + "recvTokensMax": 74, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 252, + "dispatch": { + "p50": 361.11998558044434, + "p90": 421.85598611831665, + "p95": 522.3680138587952, + "p99": 2982.2399616241455 + }, + "combine": { + "p50": 135.5839967727661, + "p90": 189.40800428390503, + "p95": 348.4799861907959, + "p99": 3885.9519958496094 + }, + "roundtrip": { + "p50": 519.3600058555603, + "p90": 614.3680214881897, + "p95": 1078.0160427093506, + "p99": 4406.0797691345215 + }, + "isolatedSum": { + "p50": 496.70398235321045, + "p90": 611.2639904022217, + "p95": 870.8480000495911, + "p99": 6868.191957473755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19396608, + "combineLogicalBytes": 19396608, + "fanoutMean": 5.36904764175415, + "recvTokensMax": 285, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1022, + "dispatch": { + "p50": 380.5760145187378, + "p90": 444.09599900245667, + "p95": 611.4879846572876, + "p99": 3778.9440155029297 + }, + "combine": { + "p50": 260.6079876422882, + "p90": 335.7119858264923, + "p95": 2376.447916030884, + "p99": 3220.2560901641846 + }, + "roundtrip": { + "p50": 635.5519890785217, + "p90": 784.1920256614685, + "p95": 2750.8161067962646, + "p99": 3872.1280097961426 + }, + "isolatedSum": { + "p50": 641.184002161026, + "p90": 779.807984828949, + "p95": 2987.9359006881714, + "p99": 6999.200105667114 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77529088, + "combineLogicalBytes": 77529088, + "fanoutMean": 5.2915849685668945, + "recvTokensMax": 1101, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f798156c", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_c0ac0920", + "comparisonKey": "ab2702eeaac7d379", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:08.258590+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 333.24798941612244, + "p90": 399.3600010871887, + "p95": 561.3440275192261, + "p99": 3139.008045196533 + }, + "combine": { + "p50": 126.08000636100769, + "p90": 155.16799688339233, + "p95": 177.44000256061554, + "p99": 3903.424024581909 + }, + "roundtrip": { + "p50": 483.0400049686432, + "p90": 561.9519948959351, + "p95": 781.5999984741211, + "p99": 4482.848167419434 + }, + "isolatedSum": { + "p50": 459.3279957771301, + "p90": 554.527997970581, + "p95": 738.7840300798416, + "p99": 7042.432069778442 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 377.21601128578186, + "p90": 508.7360143661499, + "p95": 805.3119778633118, + "p99": 3634.144067764282 + }, + "combine": { + "p50": 130.72000443935394, + "p90": 166.27199947834015, + "p95": 231.48800432682037, + "p99": 3077.728033065796 + }, + "roundtrip": { + "p50": 505.9199929237366, + "p90": 676.4159798622131, + "p95": 1999.6800422668457, + "p99": 3661.6640090942383 + }, + "isolatedSum": { + "p50": 507.9360157251358, + "p90": 675.00801384449, + "p95": 1036.7999821901321, + "p99": 6711.872100830078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 388.5760009288788, + "p90": 456.09599351882935, + "p95": 1068.6399936676025, + "p99": 3611.743927001953 + }, + "combine": { + "p50": 286.3360047340393, + "p90": 304.4160008430481, + "p95": 964.3840193748474, + "p99": 2651.711940765381 + }, + "roundtrip": { + "p50": 658.9440107345581, + "p90": 721.6960191726685, + "p95": 2656.320095062256, + "p99": 3327.008008956909 + }, + "isolatedSum": { + "p50": 674.9120056629181, + "p90": 760.5119943618774, + "p95": 2033.02401304245, + "p99": 6263.455867767334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-21e60598", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_8507665e", + "comparisonKey": "9151a34dcea6ad5d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:11.644268+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 362.94400691986084, + "p90": 422.1760034561157, + "p95": 1442.5920248031616, + "p99": 4768.352031707764 + }, + "combine": { + "p50": 146.2399959564209, + "p90": 201.12000405788422, + "p95": 213.21600675582886, + "p99": 3895.6799507141113 + }, + "roundtrip": { + "p50": 549.4080185890198, + "p90": 745.8879947662354, + "p95": 3020.6398963928223, + "p99": 4580.704212188721 + }, + "isolatedSum": { + "p50": 509.18400287628174, + "p90": 623.2960075139999, + "p95": 1655.8080315589905, + "p99": 8664.031982421875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 15, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 374.39998984336853, + "p90": 451.4879882335663, + "p95": 2820.8000659942627, + "p99": 3947.2639560699463 + }, + "combine": { + "p50": 163.7440025806427, + "p90": 228.35199534893036, + "p95": 2784.8000526428223, + "p99": 3730.4320335388184 + }, + "roundtrip": { + "p50": 553.0239939689636, + "p90": 673.5680103302002, + "p95": 2998.879909515381, + "p99": 4259.039878845215 + }, + "isolatedSum": { + "p50": 538.1439924240112, + "p90": 679.8399835824966, + "p95": 5605.600118637085, + "p99": 7677.695989608765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 368.9279854297638, + "p90": 471.74400091171265, + "p95": 601.1199951171875, + "p99": 4142.911911010742 + }, + "combine": { + "p50": 132.7359974384308, + "p90": 208.38400721549988, + "p95": 230.43200373649597, + "p99": 3453.9198875427246 + }, + "roundtrip": { + "p50": 573.3759999275208, + "p90": 675.5520105361938, + "p95": 2950.2079486846924, + "p99": 4417.439937591553 + }, + "isolatedSum": { + "p50": 501.6639828681946, + "p90": 680.1280081272125, + "p95": 831.5519988536835, + "p99": 7596.831798553467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 43, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 397.63200283050537, + "p90": 479.39199209213257, + "p95": 665.5679941177368, + "p99": 3889.888048171997 + }, + "combine": { + "p50": 126.65599584579468, + "p90": 194.72000002861023, + "p95": 206.496000289917, + "p99": 230.84799945354462 + }, + "roundtrip": { + "p50": 554.0480017662048, + "p90": 618.9759969711304, + "p95": 890.175998210907, + "p99": 4269.0558433532715 + }, + "isolatedSum": { + "p50": 524.2879986763, + "p90": 674.1119921207428, + "p95": 872.0639944076538, + "p99": 4120.736047625542 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 73, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 378.08001041412354, + "p90": 448.60801100730896, + "p95": 551.360011100769, + "p99": 3923.4559535980225 + }, + "combine": { + "p50": 126.36800110340118, + "p90": 191.23199582099915, + "p95": 206.2080055475235, + "p99": 3739.3600940704346 + }, + "roundtrip": { + "p50": 553.7279844284058, + "p90": 653.0560255050659, + "p95": 1825.4400491714478, + "p99": 4211.328029632568 + }, + "isolatedSum": { + "p50": 504.4480115175247, + "p90": 639.8400068283081, + "p95": 757.5680166482925, + "p99": 7662.816047668457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 142, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 402.49601006507874, + "p90": 482.87999629974365, + "p95": 565.5040144920349, + "p99": 3659.168004989624 + }, + "combine": { + "p50": 136.9280070066452, + "p90": 209.72800254821777, + "p95": 227.10399329662323, + "p99": 2581.792116165161 + }, + "roundtrip": { + "p50": 577.2799849510193, + "p90": 652.9920101165771, + "p95": 1264.896035194397, + "p99": 3962.048053741455 + }, + "isolatedSum": { + "p50": 539.4240170717239, + "p90": 692.6079988479614, + "p95": 792.6080077886581, + "p99": 6240.960121154785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 274, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 382.6240003108978, + "p90": 463.3600115776062, + "p95": 515.8399939537048, + "p99": 3549.9520301818848 + }, + "combine": { + "p50": 146.5280055999756, + "p90": 211.29600703716278, + "p95": 333.8879942893982, + "p99": 3401.3121128082275 + }, + "roundtrip": { + "p50": 566.8799877166748, + "p90": 635.6800198554993, + "p95": 697.7599859237671, + "p99": 4076.223850250244 + }, + "isolatedSum": { + "p50": 529.1520059108734, + "p90": 674.656018614769, + "p95": 849.727988243103, + "p99": 6951.264142990112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 419.74401473999023, + "p90": 497.50399589538574, + "p95": 553.8560152053833, + "p99": 3614.8478984832764 + }, + "combine": { + "p50": 235.35999655723572, + "p90": 264.2560005187988, + "p95": 277.40800380706787, + "p99": 3050.0481128692627 + }, + "roundtrip": { + "p50": 639.2639875411987, + "p90": 697.3119974136353, + "p95": 2483.1039905548096, + "p99": 3661.9200706481934 + }, + "isolatedSum": { + "p50": 655.104011297226, + "p90": 761.7599964141846, + "p95": 831.2640190124512, + "p99": 6664.896011352539 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 1042, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-173dcf7e", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_4443fc93", + "comparisonKey": "64ad3254bc96eabd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:18.738858+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 365.85599184036255, + "p90": 436.12799048423767, + "p95": 607.7119708061218, + "p99": 3936.511993408203 + }, + "combine": { + "p50": 139.48799669742584, + "p90": 198.94400238990784, + "p95": 311.67998909950256, + "p99": 3756.319999694824 + }, + "roundtrip": { + "p50": 547.327995300293, + "p90": 666.4000153541565, + "p95": 3631.999969482422, + "p99": 4570.911884307861 + }, + "isolatedSum": { + "p50": 505.3439885377884, + "p90": 635.0719928741455, + "p95": 919.3919599056244, + "p99": 7692.831993103027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 368.3199882507324, + "p90": 471.8399941921234, + "p95": 2882.4639320373535, + "p99": 4833.536148071289 + }, + "combine": { + "p50": 122.01599776744843, + "p90": 185.88800728321075, + "p95": 196.83200120925903, + "p99": 3601.9840240478516 + }, + "roundtrip": { + "p50": 533.0560207366943, + "p90": 730.0800085067749, + "p95": 3272.5439071655273, + "p99": 4460.127830505371 + }, + "isolatedSum": { + "p50": 490.33598601818085, + "p90": 657.7280014753342, + "p95": 3079.2959332466125, + "p99": 8435.52017211914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 380.0959885120392, + "p90": 440.2239918708801, + "p95": 2655.263900756836, + "p99": 3714.3359184265137 + }, + "combine": { + "p50": 125.85599720478058, + "p90": 190.65600633621216, + "p95": 208.99200439453125, + "p99": 3606.7519187927246 + }, + "roundtrip": { + "p50": 549.95197057724, + "p90": 735.9679937362671, + "p95": 3041.9840812683105, + "p99": 4215.583801269531 + }, + "isolatedSum": { + "p50": 505.95198571681976, + "p90": 630.8799982070923, + "p95": 2864.255905151367, + "p99": 7321.087837219238 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 385.0240111351013, + "p90": 482.2719991207123, + "p95": 1362.5600337982178, + "p99": 4107.168197631836 + }, + "combine": { + "p50": 131.71200454235077, + "p90": 197.9839950799942, + "p95": 213.95200490951538, + "p99": 3580.8959007263184 + }, + "roundtrip": { + "p50": 548.2879877090454, + "p90": 636.9600296020508, + "p95": 922.2400188446045, + "p99": 4254.240036010742 + }, + "isolatedSum": { + "p50": 516.7360156774521, + "p90": 680.2559942007065, + "p95": 1576.5120387077332, + "p99": 7688.064098358154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 404.12798523902893, + "p90": 622.655987739563, + "p95": 2668.9279079437256, + "p99": 3756.704092025757 + }, + "combine": { + "p50": 130.14400005340576, + "p90": 187.99999356269836, + "p95": 208.00000429153442, + "p99": 2856.031894683838 + }, + "roundtrip": { + "p50": 542.8479909896851, + "p90": 653.4720063209534, + "p95": 2444.1280364990234, + "p99": 4306.6558837890625 + }, + "isolatedSum": { + "p50": 534.2719852924347, + "p90": 810.6559813022614, + "p95": 2876.92791223526, + "p99": 6612.735986709595 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 402.3680090904236, + "p90": 466.2080109119415, + "p95": 625.5999803543091, + "p99": 3512.768030166626 + }, + "combine": { + "p50": 143.0719941854477, + "p90": 187.29600310325623, + "p95": 202.68799364566803, + "p99": 3368.4799671173096 + }, + "roundtrip": { + "p50": 562.4960064888, + "p90": 715.5200242996216, + "p95": 2680.1280975341797, + "p99": 4160.064220428467 + }, + "isolatedSum": { + "p50": 545.4400032758713, + "p90": 653.5040140151978, + "p95": 828.2879739999771, + "p99": 6881.247997283936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 441.3119852542877, + "p90": 515.4240131378174, + "p95": 1044.1279411315918, + "p99": 3245.3439235687256 + }, + "combine": { + "p50": 212.73599565029144, + "p90": 226.30399465560913, + "p95": 367.35999584198, + "p99": 3054.1439056396484 + }, + "roundtrip": { + "p50": 662.6880168914795, + "p90": 732.9599857330322, + "p95": 2752.0320415496826, + "p99": 3617.5360679626465 + }, + "isolatedSum": { + "p50": 654.0479809045792, + "p90": 741.7280077934265, + "p95": 1411.4879369735718, + "p99": 6299.487829208374 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 541.4400100708008, + "p90": 669.0559983253479, + "p95": 1957.535982131958, + "p99": 3127.903938293457 + }, + "combine": { + "p50": 371.5519905090332, + "p90": 395.29600739479065, + "p95": 1978.3040285110474, + "p99": 2400.320053100586 + }, + "roundtrip": { + "p50": 916.9600009918213, + "p90": 983.3279848098755, + "p95": 2619.4560527801514, + "p99": 2986.176013946533 + }, + "isolatedSum": { + "p50": 912.992000579834, + "p90": 1064.3520057201385, + "p95": 3935.8400106430054, + "p99": 5528.223991394043 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f516355d", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_52e1dea9", + "comparisonKey": "e0472f3938d2dc27", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:58.779022+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 367.6159977912903, + "p90": 432.671993970871, + "p95": 2855.1039695739746, + "p99": 3913.5680198669434 + }, + "combine": { + "p50": 141.27999544143677, + "p90": 184.86399948596954, + "p95": 203.5519927740097, + "p99": 2725.3758907318115 + }, + "roundtrip": { + "p50": 551.0720014572144, + "p90": 794.4319844245911, + "p95": 3119.231939315796, + "p99": 4518.303871154785 + }, + "isolatedSum": { + "p50": 508.89599323272705, + "p90": 617.5359934568405, + "p95": 3058.6559623479843, + "p99": 6638.943910598755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 59, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 375.0399947166443, + "p90": 447.07199931144714, + "p95": 2961.4720344543457, + "p99": 3943.7758922576904 + }, + "combine": { + "p50": 137.31199502944946, + "p90": 191.03999435901642, + "p95": 214.04799818992615, + "p99": 2776.7040729522705 + }, + "roundtrip": { + "p50": 538.2720232009888, + "p90": 697.4400281906128, + "p95": 2808.511972427368, + "p99": 4403.3918380737305 + }, + "isolatedSum": { + "p50": 512.3519897460938, + "p90": 638.1119936704636, + "p95": 3175.520032644272, + "p99": 6720.479965209961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 121, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 394.9120044708252, + "p90": 482.2719991207123, + "p95": 2735.8078956604004, + "p99": 3838.207960128784 + }, + "combine": { + "p50": 156.92800283432007, + "p90": 203.87199521064758, + "p95": 227.48799622058868, + "p99": 2942.0158863067627 + }, + "roundtrip": { + "p50": 555.1360249519348, + "p90": 683.4880113601685, + "p95": 2874.847888946533, + "p99": 4393.02396774292 + }, + "isolatedSum": { + "p50": 551.8400073051453, + "p90": 686.1439943313599, + "p95": 2963.295891880989, + "p99": 6780.223846435547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 244, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 389.0880048274994, + "p90": 468.3839976787567, + "p95": 2921.9839572906494, + "p99": 3907.2000980377197 + }, + "combine": { + "p50": 150.30400454998016, + "p90": 207.2640061378479, + "p95": 273.0560004711151, + "p99": 3580.928087234497 + }, + "roundtrip": { + "p50": 558.9439868927002, + "p90": 736.0000014305115, + "p95": 3202.6240825653076, + "p99": 4594.272136688232 + }, + "isolatedSum": { + "p50": 539.3920093774796, + "p90": 675.6480038166046, + "p95": 3195.0399577617645, + "p99": 7488.128185272217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 478, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 397.95199036598206, + "p90": 491.61601066589355, + "p95": 2819.2319869995117, + "p99": 3752.16007232666 + }, + "combine": { + "p50": 141.37600362300873, + "p90": 202.81599462032318, + "p95": 223.32799434661865, + "p99": 3041.599988937378 + }, + "roundtrip": { + "p50": 581.60001039505, + "p90": 707.6799869537354, + "p95": 3017.2479152679443, + "p99": 4172.224044799805 + }, + "isolatedSum": { + "p50": 539.3279939889908, + "p90": 694.4320052862167, + "p95": 3042.5599813461304, + "p99": 6793.760061264038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 953, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 421.82400822639465, + "p90": 537.3119711875916, + "p95": 2691.103935241699, + "p99": 3609.7280979156494 + }, + "combine": { + "p50": 160.51200032234192, + "p90": 208.44799280166626, + "p95": 2523.9040851593018, + "p99": 3419.584035873413 + }, + "roundtrip": { + "p50": 597.1199870109558, + "p90": 785.0880026817322, + "p95": 3001.120090484619, + "p99": 3944.9920654296875 + }, + "isolatedSum": { + "p50": 582.3360085487366, + "p90": 745.7599639892578, + "p95": 5215.008020401001, + "p99": 7029.3121337890625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 1908, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 499.29600954055786, + "p90": 624.0000128746033, + "p95": 2412.64009475708, + "p99": 3117.3439025878906 + }, + "combine": { + "p50": 264.70398902893066, + "p90": 288.2879972457886, + "p95": 2032.6719284057617, + "p99": 2732.8639030456543 + }, + "roundtrip": { + "p50": 773.7600207328796, + "p90": 850.5280017852783, + "p95": 2574.3680000305176, + "p99": 3310.1119995117188 + }, + "isolatedSum": { + "p50": 763.9999985694885, + "p90": 912.2880101203918, + "p95": 4445.312023162842, + "p99": 5850.207805633545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 3804, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 635.8720064163208, + "p90": 794.7840094566345, + "p95": 2345.439910888672, + "p99": 3225.2800464630127 + }, + "combine": { + "p50": 471.71199321746826, + "p90": 503.52001190185547, + "p95": 1473.4079837799072, + "p99": 1951.6479969024658 + }, + "roundtrip": { + "p50": 1095.3279733657837, + "p90": 1752.7040243148804, + "p95": 2431.6160678863525, + "p99": 2817.215919494629 + }, + "isolatedSum": { + "p50": 1107.583999633789, + "p90": 1298.30402135849, + "p95": 3818.847894668579, + "p99": 5176.9280433654785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a87f026b", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_13ea7365", + "comparisonKey": "64f1dced9c2b7c4a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:29.194443+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 332.35201239585876, + "p90": 395.2000141143799, + "p95": 3095.072031021118, + "p99": 4119.775772094727 + }, + "combine": { + "p50": 134.14399325847626, + "p90": 212.25599944591522, + "p95": 300.86401104927063, + "p99": 3804.192066192627 + }, + "roundtrip": { + "p50": 490.81599712371826, + "p90": 800.383985042572, + "p95": 3628.7360191345215, + "p99": 4506.591796875 + }, + "isolatedSum": { + "p50": 466.496005654335, + "p90": 607.4560135602951, + "p95": 3395.936042070389, + "p99": 7923.9678382873535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 12, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 324.38400387763977, + "p90": 387.1360123157501, + "p95": 3454.1759490966797, + "p99": 4072.319984436035 + }, + "combine": { + "p50": 131.3599944114685, + "p90": 164.8319959640503, + "p95": 282.5919985771179, + "p99": 3888.6399269104004 + }, + "roundtrip": { + "p50": 489.1839921474457, + "p90": 750.5599856376648, + "p95": 3597.759962081909, + "p99": 4449.79190826416 + }, + "isolatedSum": { + "p50": 455.7439982891083, + "p90": 551.9680082798004, + "p95": 3736.7679476737976, + "p99": 7960.959911346436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 340.1600122451782, + "p90": 442.75200366973877, + "p95": 1156.3199758529663, + "p99": 3806.0479164123535 + }, + "combine": { + "p50": 132.57600367069244, + "p90": 171.83999717235565, + "p95": 197.6960003376007, + "p99": 3647.808074951172 + }, + "roundtrip": { + "p50": 517.1520113945007, + "p90": 708.2560062408447, + "p95": 3243.072032928467, + "p99": 4343.776226043701 + }, + "isolatedSum": { + "p50": 472.73601591587067, + "p90": 614.5920008420944, + "p95": 1354.015976190567, + "p99": 7453.855991363525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 40, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 337.5360071659088, + "p90": 405.40799498558044, + "p95": 609.4719767570496, + "p99": 3937.983989715576 + }, + "combine": { + "p50": 126.78399682044983, + "p90": 157.1200042963028, + "p95": 192.28799641132355, + "p99": 3668.8640117645264 + }, + "roundtrip": { + "p50": 490.1120066642761, + "p90": 685.375988483429, + "p95": 3030.8799743652344, + "p99": 4494.751930236816 + }, + "isolatedSum": { + "p50": 464.32000398635864, + "p90": 562.5279992818832, + "p95": 801.7599731683731, + "p99": 7606.8480014801025 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 71, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 343.58400106430054, + "p90": 418.4960126876831, + "p95": 650.6239771842957, + "p99": 3943.6800479888916 + }, + "combine": { + "p50": 131.99999928474426, + "p90": 163.7440025806427, + "p95": 221.98399901390076, + "p99": 3570.591926574707 + }, + "roundtrip": { + "p50": 493.5680031776428, + "p90": 606.1440110206604, + "p95": 979.1359901428223, + "p99": 4263.9360427856445 + }, + "isolatedSum": { + "p50": 475.5840003490448, + "p90": 582.2400152683258, + "p95": 872.6079761981964, + "p99": 7514.271974563599 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 143, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 362.527996301651, + "p90": 444.9920058250427, + "p95": 559.7760081291199, + "p99": 3821.1519718170166 + }, + "combine": { + "p50": 133.37600231170654, + "p90": 165.12000560760498, + "p95": 205.9839963912964, + "p99": 3743.488073348999 + }, + "roundtrip": { + "p50": 518.2399749755859, + "p90": 831.8719863891602, + "p95": 3428.5120964050293, + "p99": 4343.520164489746 + }, + "isolatedSum": { + "p50": 495.90399861335754, + "p90": 610.1120114326477, + "p95": 765.7600045204163, + "p99": 7564.640045166016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 266, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 375.64799189567566, + "p90": 498.01599979400635, + "p95": 683.0719709396362, + "p99": 3782.9439640045166 + }, + "combine": { + "p50": 146.7519998550415, + "p90": 187.9359930753708, + "p95": 257.6960027217865, + "p99": 3417.5679683685303 + }, + "roundtrip": { + "p50": 529.0240049362183, + "p90": 662.4640226364136, + "p95": 2965.0559425354004, + "p99": 4090.5919075012207 + }, + "isolatedSum": { + "p50": 522.3999917507172, + "p90": 685.9519928693771, + "p95": 940.7679736614227, + "p99": 7200.511932373047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 534, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 390.1439905166626, + "p90": 502.27200984954834, + "p95": 2604.095935821533, + "p99": 3687.4239444732666 + }, + "combine": { + "p50": 231.9680005311966, + "p90": 255.16799092292786, + "p95": 319.2319869995117, + "p99": 3042.7520275115967 + }, + "roundtrip": { + "p50": 598.5919833183289, + "p90": 706.4319849014282, + "p95": 3269.439935684204, + "p99": 3744.9920177459717 + }, + "isolatedSum": { + "p50": 622.1119910478592, + "p90": 757.4400007724762, + "p95": 2923.327922821045, + "p99": 6730.175971984863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1044, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-aef30bf4", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_e57b73c4", + "comparisonKey": "988d5b104c1a7a96", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:54.742020+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 332.70400762557983, + "p90": 410.5600118637085, + "p95": 3033.2798957824707, + "p99": 4104.159832000732 + }, + "combine": { + "p50": 128.4160017967224, + "p90": 193.85600090026855, + "p95": 713.2160067558289, + "p99": 3928.0319213867188 + }, + "roundtrip": { + "p50": 516.8319940567017, + "p90": 755.9679746627808, + "p95": 3237.6320362091064, + "p99": 4314.623832702637 + }, + "isolatedSum": { + "p50": 461.12000942230225, + "p90": 604.416012763977, + "p95": 3746.4959025382996, + "p99": 8032.191753387451 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 20, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 349.40800070762634, + "p90": 423.6159920692444, + "p95": 2955.3279876708984, + "p99": 4942.240238189697 + }, + "combine": { + "p50": 126.24000012874603, + "p90": 189.34400379657745, + "p95": 207.16799795627594, + "p99": 3088.0959033966064 + }, + "roundtrip": { + "p50": 534.9760055541992, + "p90": 705.4719924926758, + "p95": 3138.047933578491, + "p99": 4239.583969116211 + }, + "isolatedSum": { + "p50": 475.6480008363724, + "p90": 612.9599958658218, + "p95": 3162.4959856271744, + "p99": 8030.336141586304 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 40, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 338.1119966506958, + "p90": 408.7679982185364, + "p95": 645.4079747200012, + "p99": 3979.2959690093994 + }, + "combine": { + "p50": 125.08800625801086, + "p90": 166.04800522327423, + "p95": 180.38399517536163, + "p99": 3241.568088531494 + }, + "roundtrip": { + "p50": 521.8240022659302, + "p90": 716.5120244026184, + "p95": 3096.6720581054688, + "p99": 4399.328231811523 + }, + "isolatedSum": { + "p50": 463.20000290870667, + "p90": 574.8160034418106, + "p95": 825.7919698953629, + "p99": 7220.864057540894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 81, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 365.4719889163971, + "p90": 452.35198736190796, + "p95": 638.5599970817566, + "p99": 3787.839889526367 + }, + "combine": { + "p50": 123.6800029873848, + "p90": 198.11199605464935, + "p95": 271.67999744415283, + "p99": 3016.2880420684814 + }, + "roundtrip": { + "p50": 543.2320237159729, + "p90": 668.6080098152161, + "p95": 1925.7919788360596, + "p99": 4050.367832183838 + }, + "isolatedSum": { + "p50": 489.1519919037819, + "p90": 650.4639834165573, + "p95": 910.2399945259094, + "p99": 6804.127931594849 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 346.8480110168457, + "p90": 448.7040042877197, + "p95": 551.3920187950134, + "p99": 3744.0640926361084 + }, + "combine": { + "p50": 128.7039965391159, + "p90": 181.8239986896515, + "p95": 237.2799962759018, + "p99": 3332.6399326324463 + }, + "roundtrip": { + "p50": 519.3600058555603, + "p90": 703.4879922866821, + "p95": 3099.6479988098145, + "p99": 4281.248092651367 + }, + "isolatedSum": { + "p50": 475.5520075559616, + "p90": 630.5280029773712, + "p95": 788.6720150709152, + "p99": 7076.704025268555 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 339, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 376.25598907470703, + "p90": 457.5999975204468, + "p95": 573.9200115203857, + "p99": 3641.5040493011475 + }, + "combine": { + "p50": 134.24000144004822, + "p90": 183.4239959716797, + "p95": 215.16799926757812, + "p99": 3466.752052307129 + }, + "roundtrip": { + "p50": 533.951997756958, + "p90": 645.3440189361572, + "p95": 1037.11998462677, + "p99": 4301.760196685791 + }, + "isolatedSum": { + "p50": 510.49599051475525, + "p90": 641.0239934921265, + "p95": 789.0880107879639, + "p99": 7108.256101608276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 676, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 381.98399543762207, + "p90": 459.9680006504059, + "p95": 565.5360221862793, + "p99": 3468.640089035034 + }, + "combine": { + "p50": 167.39200055599213, + "p90": 182.11199343204498, + "p95": 200.1280039548874, + "p99": 3308.288097381592 + }, + "roundtrip": { + "p50": 545.8559989929199, + "p90": 681.0240149497986, + "p95": 3036.128044128418, + "p99": 3910.207986831665 + }, + "isolatedSum": { + "p50": 549.3759959936142, + "p90": 642.0799940824509, + "p95": 765.6640261411667, + "p99": 6776.928186416626 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 1328, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 428.6400079727173, + "p90": 481.6960096359253, + "p95": 623.583972454071, + "p99": 3305.471897125244 + }, + "combine": { + "p50": 282.46399760246277, + "p90": 300.9920120239258, + "p95": 1317.9199695587158, + "p99": 2841.279983520508 + }, + "roundtrip": { + "p50": 708.1599831581116, + "p90": 832.5120210647583, + "p95": 2678.208112716675, + "p99": 3385.792016983032 + }, + "isolatedSum": { + "p50": 711.10400557518, + "p90": 782.6880216598511, + "p95": 1941.5039420127869, + "p99": 6146.751880645752 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0bf0706c", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_07baaf6a", + "comparisonKey": "99876a46387b6727", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:24.566305+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 334.9759876728058, + "p90": 432.76798725128174, + "p95": 1869.5679903030396, + "p99": 4237.152099609375 + }, + "combine": { + "p50": 129.02399897575378, + "p90": 166.9439971446991, + "p95": 199.13600385189056, + "p99": 2791.2960052490234 + }, + "roundtrip": { + "p50": 488.2560074329376, + "p90": 617.0560121536255, + "p95": 913.1199717521667, + "p99": 4337.728023529053 + }, + "isolatedSum": { + "p50": 463.99998664855957, + "p90": 599.7119843959808, + "p95": 2068.70399415493, + "p99": 7028.448104858398 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 339.9040102958679, + "p90": 407.9039990901947, + "p95": 1037.503957748413, + "p99": 3969.5680141448975 + }, + "combine": { + "p50": 122.20799922943115, + "p90": 169.91999745368958, + "p95": 199.10399615764618, + "p99": 3049.823999404907 + }, + "roundtrip": { + "p50": 506.24001026153564, + "p90": 697.0880031585693, + "p95": 1400.0639915466309, + "p99": 4624.7358322143555 + }, + "isolatedSum": { + "p50": 462.1120095252991, + "p90": 577.8239965438843, + "p95": 1236.6079539060593, + "p99": 7019.392013549805 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 22, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 352.09599137306213, + "p90": 428.6719858646393, + "p95": 533.1839919090271, + "p99": 3570.0480937957764 + }, + "combine": { + "p50": 125.91999769210815, + "p90": 164.22399878501892, + "p95": 173.63199591636658, + "p99": 274.27199482917786 + }, + "roundtrip": { + "p50": 513.5679841041565, + "p90": 635.0719928741455, + "p95": 828.9279937744141, + "p99": 4348.447799682617 + }, + "isolatedSum": { + "p50": 478.0159890651703, + "p90": 592.8959846496582, + "p95": 706.8159878253937, + "p99": 3844.320088624954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 338.46399188041687, + "p90": 435.42400002479553, + "p95": 557.2159886360168, + "p99": 3905.791997909546 + }, + "combine": { + "p50": 125.21600723266602, + "p90": 160.44799983501434, + "p95": 177.69600450992584, + "p99": 250.14400482177734 + }, + "roundtrip": { + "p50": 501.8879771232605, + "p90": 687.0080232620239, + "p95": 1593.7919616699219, + "p99": 4596.960067749023 + }, + "isolatedSum": { + "p50": 463.6799991130829, + "p90": 595.8719998598099, + "p95": 734.9119931459427, + "p99": 4155.936002731323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 73, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 341.63200855255127, + "p90": 413.536012172699, + "p95": 532.0320129394531, + "p99": 3852.8640270233154 + }, + "combine": { + "p50": 124.06399846076965, + "p90": 152.51199901103973, + "p95": 165.0560051202774, + "p99": 2036.0960960388184 + }, + "roundtrip": { + "p50": 505.21600246429443, + "p90": 690.1440024375916, + "p95": 1178.3679723739624, + "p99": 4322.495937347412 + }, + "isolatedSum": { + "p50": 465.6960070133209, + "p90": 566.0480111837387, + "p95": 697.0880180597305, + "p99": 5888.960123062134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 138, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 391.1679983139038, + "p90": 603.6159992218018, + "p95": 857.9840064048767, + "p99": 3813.0240440368652 + }, + "combine": { + "p50": 132.28799402713776, + "p90": 215.58399498462677, + "p95": 261.59998774528503, + "p99": 2760.672092437744 + }, + "roundtrip": { + "p50": 530.6879878044128, + "p90": 728.6720275878906, + "p95": 1255.1039457321167, + "p99": 4139.071941375732 + }, + "isolatedSum": { + "p50": 523.4559923410416, + "p90": 819.1999942064285, + "p95": 1119.5839941501617, + "p99": 6573.696136474609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 273, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 371.42398953437805, + "p90": 440.8639967441559, + "p95": 524.9599814414978, + "p99": 3490.2079105377197 + }, + "combine": { + "p50": 148.25600385665894, + "p90": 189.2160028219223, + "p95": 214.7199958562851, + "p99": 2713.1199836730957 + }, + "roundtrip": { + "p50": 538.4960174560547, + "p90": 655.135989189148, + "p95": 949.4400024414062, + "p99": 4180.8319091796875 + }, + "isolatedSum": { + "p50": 519.679993391037, + "p90": 630.0799995660782, + "p95": 739.6799772977829, + "p99": 6203.327894210815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 532, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 393.8559889793396, + "p90": 465.05600214004517, + "p95": 599.232017993927, + "p99": 3708.767890930176 + }, + "combine": { + "p50": 234.8479926586151, + "p90": 255.74401021003723, + "p95": 267.67998933792114, + "p99": 2909.9199771881104 + }, + "roundtrip": { + "p50": 610.5279922485352, + "p90": 679.0080070495605, + "p95": 1436.4479780197144, + "p99": 3666.111946105957 + }, + "isolatedSum": { + "p50": 628.7039816379547, + "p90": 720.8000123500824, + "p95": 866.9120073318481, + "p99": 6618.687868118286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 1041, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-dc799e80", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_8ebb21f3", + "comparisonKey": "3e5f6d278813c98d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:34.893354+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 388.8320028781891, + "p90": 485.3120148181915, + "p95": 2956.5439224243164, + "p99": 3959.6800804138184 + }, + "combine": { + "p50": 152.25599706172943, + "p90": 218.23999285697937, + "p95": 2522.495985031128, + "p99": 3571.647882461548 + }, + "roundtrip": { + "p50": 549.3760108947754, + "p90": 759.3920230865479, + "p95": 3220.896005630493, + "p99": 4283.26416015625 + }, + "isolatedSum": { + "p50": 541.0879999399185, + "p90": 703.5520076751709, + "p95": 5479.039907455444, + "p99": 7531.327962875366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 36, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 383.8079869747162, + "p90": 467.20001101493835, + "p95": 2922.9440689086914, + "p99": 3748.9919662475586 + }, + "combine": { + "p50": 131.23199343681335, + "p90": 192.03199446201324, + "p95": 207.519993185997, + "p99": 2258.8798999786377 + }, + "roundtrip": { + "p50": 574.720025062561, + "p90": 878.6560297012329, + "p95": 3433.6960315704346, + "p99": 4573.887825012207 + }, + "isolatedSum": { + "p50": 515.0399804115295, + "p90": 659.2320054769516, + "p95": 3130.4640620946884, + "p99": 6007.871866226196 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 383.07198882102966, + "p90": 454.0160000324249, + "p95": 2427.1039962768555, + "p99": 3817.919969558716 + }, + "combine": { + "p50": 125.791996717453, + "p90": 191.64800643920898, + "p95": 202.72000133991241, + "p99": 2941.0240650177 + }, + "roundtrip": { + "p50": 551.3920187950134, + "p90": 737.887978553772, + "p95": 2861.151933670044, + "p99": 4601.151943206787 + }, + "isolatedSum": { + "p50": 508.86398553848267, + "p90": 645.6640064716339, + "p95": 2629.823997616768, + "p99": 6758.944034576416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 154, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 395.07201313972473, + "p90": 474.91198778152466, + "p95": 672.8320121765137, + "p99": 3463.9999866485596 + }, + "combine": { + "p50": 136.73600554466248, + "p90": 209.98400449752808, + "p95": 245.37600576877594, + "p99": 3182.784080505371 + }, + "roundtrip": { + "p50": 573.5359787940979, + "p90": 694.3039894104004, + "p95": 2984.5759868621826, + "p99": 4347.936153411865 + }, + "isolatedSum": { + "p50": 531.8080186843872, + "p90": 684.8959922790527, + "p95": 918.2080179452896, + "p99": 6646.784067153931 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 310, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 402.43199467658997, + "p90": 489.27998542785645, + "p95": 594.6879982948303, + "p99": 3494.9440956115723 + }, + "combine": { + "p50": 140.19200205802917, + "p90": 212.44800090789795, + "p95": 237.40799725055695, + "p99": 3433.311939239502 + }, + "roundtrip": { + "p50": 568.5759782791138, + "p90": 701.5359997749329, + "p95": 1445.8880424499512, + "p99": 4161.471843719482 + }, + "isolatedSum": { + "p50": 542.6239967346191, + "p90": 701.7279863357544, + "p95": 832.0959955453873, + "p99": 6928.256034851074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 624, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 420.6080138683319, + "p90": 499.61599707603455, + "p95": 840.9919738769531, + "p99": 3382.5600147247314 + }, + "combine": { + "p50": 147.48799800872803, + "p90": 215.64799547195435, + "p95": 460.1919949054718, + "p99": 2984.992027282715 + }, + "roundtrip": { + "p50": 580.0639986991882, + "p90": 726.3360023498535, + "p95": 2673.0239391326904, + "p99": 3965.60001373291 + }, + "isolatedSum": { + "p50": 568.0960118770599, + "p90": 715.2639925479889, + "p95": 1301.183968782425, + "p99": 6367.552042007446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 1255, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 452.5440037250519, + "p90": 569.3439841270447, + "p95": 2144.9921131134033, + "p99": 3132.0960521698 + }, + "combine": { + "p50": 213.85599672794342, + "p90": 226.46400332450867, + "p95": 714.0160202980042, + "p99": 2966.0799503326416 + }, + "roundtrip": { + "p50": 668.7999963760376, + "p90": 746.8159794807434, + "p95": 2756.160020828247, + "p99": 3423.9039421081543 + }, + "isolatedSum": { + "p50": 666.4000004529953, + "p90": 795.8079874515533, + "p95": 2859.0081334114075, + "p99": 6098.176002502441 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 2526, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 538.1119847297668, + "p90": 644.1599726676941, + "p95": 2287.328004837036, + "p99": 3411.7441177368164 + }, + "combine": { + "p50": 373.7280070781708, + "p90": 390.78399538993835, + "p95": 666.4000153541565, + "p99": 2300.8639812469482 + }, + "roundtrip": { + "p50": 946.6559886932373, + "p90": 1069.8879957199097, + "p95": 2374.176025390625, + "p99": 3006.432056427002 + }, + "isolatedSum": { + "p50": 911.8399918079376, + "p90": 1034.9439680576324, + "p95": 2953.7280201911926, + "p99": 5712.608098983765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-df73c808", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_02bb352b", + "comparisonKey": "0ce59e743bb9c250", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:04.314180+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 329.3760120868683, + "p90": 417.1839952468872, + "p95": 3050.816059112549, + "p99": 4149.631977081299 + }, + "combine": { + "p50": 125.31200051307678, + "p90": 172.38399386405945, + "p95": 2590.0158882141113, + "p99": 3871.488094329834 + }, + "roundtrip": { + "p50": 503.4880042076111, + "p90": 763.1040215492249, + "p95": 3455.264091491699, + "p99": 4534.976005554199 + }, + "isolatedSum": { + "p50": 454.68801259994507, + "p90": 589.5679891109467, + "p95": 5640.83194732666, + "p99": 8021.120071411133 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 346.6559946537018, + "p90": 425.1840114593506, + "p95": 3005.5038928985596, + "p99": 3931.9679737091064 + }, + "combine": { + "p50": 128.31999361515045, + "p90": 187.9359930753708, + "p95": 637.3119950294495, + "p99": 3797.0879077911377 + }, + "roundtrip": { + "p50": 511.680006980896, + "p90": 694.9120163917542, + "p95": 3214.207887649536, + "p99": 4287.07218170166 + }, + "isolatedSum": { + "p50": 474.97598826885223, + "p90": 613.1200045347214, + "p95": 3642.815887928009, + "p99": 7729.055881500244 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 346.3999927043915, + "p90": 438.7519955635071, + "p95": 772.8319764137268, + "p99": 3826.1759281158447 + }, + "combine": { + "p50": 126.88000500202179, + "p90": 194.11200284957886, + "p95": 328.4800052642822, + "p99": 3708.415985107422 + }, + "roundtrip": { + "p50": 517.1840190887451, + "p90": 703.1040191650391, + "p95": 2443.4878826141357, + "p99": 4396.383762359619 + }, + "isolatedSum": { + "p50": 473.27999770641327, + "p90": 632.8639984130859, + "p95": 1101.311981678009, + "p99": 7534.591913223267 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 366.4320111274719, + "p90": 436.5760087966919, + "p95": 583.4239721298218, + "p99": 3893.631935119629 + }, + "combine": { + "p50": 123.74400347471237, + "p90": 169.91999745368958, + "p95": 186.11200153827667, + "p99": 3658.751964569092 + }, + "roundtrip": { + "p50": 531.7440032958984, + "p90": 679.4880032539368, + "p95": 2614.9439811706543, + "p99": 4255.839824676514 + }, + "isolatedSum": { + "p50": 490.1760146021843, + "p90": 606.4960062503815, + "p95": 769.5359736680984, + "p99": 7552.383899688721 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 368.9279854297638, + "p90": 603.4240126609802, + "p95": 2539.232015609741, + "p99": 3809.6001148223877 + }, + "combine": { + "p50": 148.70400726795197, + "p90": 240.92799425125122, + "p95": 285.15198826789856, + "p99": 3542.56010055542 + }, + "roundtrip": { + "p50": 520.5439925193787, + "p90": 685.5360269546509, + "p95": 2958.751916885376, + "p99": 4151.199817657471 + }, + "isolatedSum": { + "p50": 517.6319926977158, + "p90": 844.3520069122314, + "p95": 2824.3840038776398, + "p99": 7352.160215377808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 375.8719861507416, + "p90": 484.41600799560547, + "p95": 2046.112060546875, + "p99": 3801.7919063568115 + }, + "combine": { + "p50": 130.97600638866425, + "p90": 193.24800372123718, + "p95": 252.48000025749207, + "p99": 3440.9279823303223 + }, + "roundtrip": { + "p50": 550.0479936599731, + "p90": 717.2799706459045, + "p95": 2669.5361137390137, + "p99": 4280.00020980835 + }, + "isolatedSum": { + "p50": 506.8479925394058, + "p90": 677.6640117168427, + "p95": 2298.592060804367, + "p99": 7242.719888687134 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 379.7439932823181, + "p90": 441.536009311676, + "p95": 560.0000023841858, + "p99": 3603.6479473114014 + }, + "combine": { + "p50": 147.71200716495514, + "p90": 186.0799938440323, + "p95": 211.67999505996704, + "p99": 3597.4719524383545 + }, + "roundtrip": { + "p50": 550.4000186920166, + "p90": 660.3519916534424, + "p95": 2746.7520236968994, + "p99": 4097.343921661377 + }, + "isolatedSum": { + "p50": 527.4560004472733, + "p90": 627.6160031557083, + "p95": 771.6799974441528, + "p99": 7201.119899749756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 392.5760090351105, + "p90": 467.26399660110474, + "p95": 1544.7360277175903, + "p99": 3788.383960723877 + }, + "combine": { + "p50": 232.67200589179993, + "p90": 257.56800174713135, + "p95": 315.96800684928894, + "p99": 3159.9040031433105 + }, + "roundtrip": { + "p50": 620.639979839325, + "p90": 701.4080286026001, + "p95": 2874.3999004364014, + "p99": 3642.1120166778564 + }, + "isolatedSum": { + "p50": 625.2480149269104, + "p90": 724.8319983482361, + "p95": 1860.7040345668793, + "p99": 6948.2879638671875 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6280ee68", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_c1740acb", + "comparisonKey": "361a49708a2669d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:40.779030+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 328.5120129585266, + "p90": 449.3759870529175, + "p95": 3140.415906906128, + "p99": 3801.1839389801025 + }, + "combine": { + "p50": 127.74400413036346, + "p90": 161.9199961423874, + "p95": 205.6960016489029, + "p99": 3170.3040599823 + }, + "roundtrip": { + "p50": 473.6959934234619, + "p90": 734.7840070724487, + "p95": 3377.023935317993, + "p99": 4014.75191116333 + }, + "isolatedSum": { + "p50": 456.2560170888901, + "p90": 611.2959831953049, + "p95": 3346.111908555031, + "p99": 6971.487998962402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 356.7039966583252, + "p90": 444.92799043655396, + "p95": 2828.0320167541504, + "p99": 3751.231908798218 + }, + "combine": { + "p50": 132.1599930524826, + "p90": 211.45600080490112, + "p95": 239.48800563812256, + "p99": 3067.0080184936523 + }, + "roundtrip": { + "p50": 536.4159941673279, + "p90": 746.5919852256775, + "p95": 3144.6080207824707, + "p99": 3912.9281044006348 + }, + "isolatedSum": { + "p50": 488.8639897108078, + "p90": 656.3839912414551, + "p95": 3067.520022392273, + "p99": 6818.23992729187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 346.6559946537018, + "p90": 469.215989112854, + "p95": 530.7199954986572, + "p99": 3320.768117904663 + }, + "combine": { + "p50": 127.83999741077423, + "p90": 180.35200238227844, + "p95": 202.5279998779297, + "p99": 2894.047975540161 + }, + "roundtrip": { + "p50": 508.0639719963074, + "p90": 626.1759996414185, + "p95": 1263.5200023651123, + "p99": 4122.9119300842285 + }, + "isolatedSum": { + "p50": 474.495992064476, + "p90": 649.5679914951324, + "p95": 733.2479953765869, + "p99": 6214.816093444824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 37, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 373.7280070781708, + "p90": 462.43199706077576, + "p95": 2709.088087081909, + "p99": 3646.5280055999756 + }, + "combine": { + "p50": 130.43199479579926, + "p90": 185.02399325370789, + "p95": 196.3520050048828, + "p99": 2952.3520469665527 + }, + "roundtrip": { + "p50": 558.8160157203674, + "p90": 761.3440155982971, + "p95": 2779.1359424591064, + "p99": 3853.1839847564697 + }, + "isolatedSum": { + "p50": 504.16000187397003, + "p90": 647.4559903144836, + "p95": 2905.440092086792, + "p99": 6598.880052566528 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 73, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 362.87999153137207, + "p90": 462.97600865364075, + "p95": 683.4880113601685, + "p99": 3788.0640029907227 + }, + "combine": { + "p50": 127.23200023174286, + "p90": 181.11999332904816, + "p95": 202.62399315834045, + "p99": 3129.2479038238525 + }, + "roundtrip": { + "p50": 557.2800040245056, + "p90": 680.2240014076233, + "p95": 1279.1999578475952, + "p99": 4221.280097961426 + }, + "isolatedSum": { + "p50": 490.11199176311493, + "p90": 644.0960019826889, + "p95": 886.1120045185089, + "p99": 6917.311906814575 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 144, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 401.0559916496277, + "p90": 499.1999864578247, + "p95": 1157.3760509490967, + "p99": 3433.8560104370117 + }, + "combine": { + "p50": 135.45599579811096, + "p90": 197.56799936294556, + "p95": 254.17599081993103, + "p99": 2911.4561080932617 + }, + "roundtrip": { + "p50": 563.103973865509, + "p90": 726.0479927062988, + "p95": 2939.296007156372, + "p99": 4249.023914337158 + }, + "isolatedSum": { + "p50": 536.5119874477386, + "p90": 696.7679858207703, + "p95": 1411.5520417690277, + "p99": 6345.312118530273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 267, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 389.5680010318756, + "p90": 472.47999906539917, + "p95": 560.8639717102051, + "p99": 3374.9120235443115 + }, + "combine": { + "p50": 145.9520012140274, + "p90": 199.26400482654572, + "p95": 218.62399578094482, + "p99": 2810.7199668884277 + }, + "roundtrip": { + "p50": 569.920003414154, + "p90": 758.2079768180847, + "p95": 2393.9199447631836, + "p99": 3760.607957839966 + }, + "isolatedSum": { + "p50": 535.520002245903, + "p90": 671.7440038919449, + "p95": 779.4879674911499, + "p99": 6185.631990432739 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 529, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 381.3759982585907, + "p90": 459.3920111656189, + "p95": 686.240017414093, + "p99": 3556.9920539855957 + }, + "combine": { + "p50": 234.30399596691132, + "p90": 264.92801308631897, + "p95": 751.9680261611938, + "p99": 3101.5679836273193 + }, + "roundtrip": { + "p50": 597.2480177879333, + "p90": 672.6719737052917, + "p95": 1330.5920362472534, + "p99": 3640.511989593506 + }, + "isolatedSum": { + "p50": 615.679994225502, + "p90": 724.3200242519379, + "p95": 1438.2080435752869, + "p99": 6658.560037612915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 1038, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f992f1c3", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_2b2aa3d0", + "comparisonKey": "18fc640c2fa7a4d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:38.392769+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 387.32799887657166, + "p90": 432.671993970871, + "p95": 538.5599732398987, + "p99": 3119.9679374694824 + }, + "combine": { + "p50": 234.75199937820435, + "p90": 246.36800587177277, + "p95": 296.7360019683838, + "p99": 2694.175958633423 + }, + "roundtrip": { + "p50": 609.4719767570496, + "p90": 710.6239795684814, + "p95": 2667.3600673675537, + "p99": 3734.7519397735596 + }, + "isolatedSum": { + "p50": 622.079998254776, + "p90": 679.0399998426437, + "p95": 835.2959752082825, + "p99": 5814.143896102905 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 449.8240053653717, + "p90": 528.8320183753967, + "p95": 1401.1839628219604, + "p99": 2872.6398944854736 + }, + "combine": { + "p50": 410.0799858570099, + "p90": 449.66399669647217, + "p95": 1725.9520292282104, + "p99": 2374.016046524048 + }, + "roundtrip": { + "p50": 844.5119857788086, + "p90": 915.1359796524048, + "p95": 2581.4080238342285, + "p99": 3150.3360271453857 + }, + "isolatedSum": { + "p50": 859.9039912223816, + "p90": 978.4960150718689, + "p95": 3127.135992050171, + "p99": 5246.6559410095215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 541.375994682312, + "p90": 611.1680269241333, + "p95": 1959.5199823379517, + "p99": 3090.0800228118896 + }, + "combine": { + "p50": 741.1199808120728, + "p90": 936.735987663269, + "p95": 1441.5359497070312, + "p99": 1848.0639457702637 + }, + "roundtrip": { + "p50": 1267.8719758987427, + "p90": 1549.8239994049072, + "p95": 2418.1759357452393, + "p99": 3017.9519653320312 + }, + "isolatedSum": { + "p50": 1282.4959754943848, + "p90": 1547.9040145874023, + "p95": 3401.055932044983, + "p99": 4938.143968582153 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 755.5519938468933, + "p90": 843.0399894714355, + "p95": 1629.5039653778076, + "p99": 2508.5439682006836 + }, + "combine": { + "p50": 1395.967960357666, + "p90": 1468.127965927124, + "p95": 1575.32799243927, + "p99": 1788.640022277832 + }, + "roundtrip": { + "p50": 2108.896017074585, + "p90": 2280.479907989502, + "p95": 2821.3438987731934, + "p99": 3093.1520462036133 + }, + "isolatedSum": { + "p50": 2151.5199542045593, + "p90": 2311.1679553985596, + "p95": 3204.8319578170776, + "p99": 4297.183990478516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1103.8719415664673, + "p90": 1471.2320566177368, + "p95": 2003.5839080810547, + "p99": 2511.392116546631 + }, + "combine": { + "p50": 2680.1600456237793, + "p90": 2708.224058151245, + "p95": 2719.072103500366, + "p99": 2752.8319358825684 + }, + "roundtrip": { + "p50": 3711.2319469451904, + "p90": 3787.584066390991, + "p95": 3922.2400188446045, + "p99": 4298.175811767578 + }, + "isolatedSum": { + "p50": 3784.0319871902466, + "p90": 4179.456114768982, + "p95": 4722.656011581421, + "p99": 5264.224052429199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1861.1199855804443, + "p90": 2001.4400482177734, + "p95": 2101.599931716919, + "p99": 2638.592004776001 + }, + "combine": { + "p50": 5239.999771118164, + "p90": 5303.232192993164, + "p95": 5328.896045684814, + "p99": 5510.303974151611 + }, + "roundtrip": { + "p50": 6975.359916687012, + "p90": 7049.888134002686, + "p95": 7074.336051940918, + "p99": 7342.751979827881 + }, + "isolatedSum": { + "p50": 7101.119756698608, + "p90": 7304.6722412109375, + "p95": 7430.495977401733, + "p99": 8148.895978927612 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fb462f1e", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_ba6963a8", + "comparisonKey": "ff77707c6c308a75", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:51.458884+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 414.88000750541687, + "p90": 497.0560073852539, + "p95": 924.4800209999084, + "p99": 3019.7761058807373 + }, + "combine": { + "p50": 233.024001121521, + "p90": 261.85598969459534, + "p95": 297.791987657547, + "p99": 2691.0719871520996 + }, + "roundtrip": { + "p50": 642.5920128822327, + "p90": 717.7600264549255, + "p95": 2473.3119010925293, + "p99": 3550.20809173584 + }, + "isolatedSum": { + "p50": 647.9040086269379, + "p90": 758.9119970798492, + "p95": 1222.2720086574554, + "p99": 5710.848093032837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 461.85600757598877, + "p90": 577.2160291671753, + "p95": 1432.8320026397705, + "p99": 2944.0319538116455 + }, + "combine": { + "p50": 426.2399971485138, + "p90": 490.3680086135864, + "p95": 1759.935975074768, + "p99": 2325.5999088287354 + }, + "roundtrip": { + "p50": 875.2959966659546, + "p90": 1064.255952835083, + "p95": 2406.1439037323, + "p99": 3099.1039276123047 + }, + "isolatedSum": { + "p50": 888.0960047245026, + "p90": 1067.5840377807617, + "p95": 3192.7679777145386, + "p99": 5269.631862640381 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 583.6799740791321, + "p90": 644.5440053939819, + "p95": 1648.3839750289917, + "p99": 2741.6958808898926 + }, + "combine": { + "p50": 748.7999796867371, + "p90": 969.0880179405212, + "p95": 1462.2399806976318, + "p99": 1904.0640592575073 + }, + "roundtrip": { + "p50": 1302.4959564208984, + "p90": 1604.0960550308228, + "p95": 2465.183973312378, + "p99": 2900.8638858795166 + }, + "isolatedSum": { + "p50": 1332.4799537658691, + "p90": 1613.6320233345032, + "p95": 3110.6239557266235, + "p99": 4645.7599401474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 791.8720245361328, + "p90": 880.9279799461365, + "p95": 1928.928017616272, + "p99": 2483.776092529297 + }, + "combine": { + "p50": 1395.7439661026, + "p90": 1438.1439685821533, + "p95": 1520.3839540481567, + "p99": 1912.160038948059 + }, + "roundtrip": { + "p50": 2153.280019760132, + "p90": 2333.120107650757, + "p95": 2720.031976699829, + "p99": 3256.9921016693115 + }, + "isolatedSum": { + "p50": 2187.615990638733, + "p90": 2319.07194852829, + "p95": 3449.3119716644287, + "p99": 4395.936131477356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1147.7760076522827, + "p90": 1299.9039888381958, + "p95": 1853.216052055359, + "p99": 2366.1439418792725 + }, + "combine": { + "p50": 2678.4958839416504, + "p90": 2716.991901397705, + "p95": 2729.5680046081543, + "p99": 2801.2800216674805 + }, + "roundtrip": { + "p50": 3744.2240715026855, + "p90": 3823.7760066986084, + "p95": 3941.8880939483643, + "p99": 4311.423778533936 + }, + "isolatedSum": { + "p50": 3826.271891593933, + "p90": 4016.895890235901, + "p95": 4582.784056663513, + "p99": 5167.423963546753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1896.2559700012207, + "p90": 2021.5039253234863, + "p95": 2253.82399559021, + "p99": 2591.104030609131 + }, + "combine": { + "p50": 5244.575977325439, + "p90": 5299.935817718506, + "p95": 5330.687999725342, + "p99": 5409.535884857178 + }, + "roundtrip": { + "p50": 6994.368076324463, + "p90": 7073.855876922607, + "p95": 7114.912033081055, + "p99": 7324.3842124938965 + }, + "isolatedSum": { + "p50": 7140.83194732666, + "p90": 7321.439743041992, + "p95": 7584.511995315552, + "p99": 8000.639915466309 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-5c4a2404", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_01a7820b", + "comparisonKey": "ce82927e73a99c4d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:48.733565+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 375.0720024108887, + "p90": 416.3840115070343, + "p95": 537.6960039138794, + "p99": 2837.1520042419434 + }, + "combine": { + "p50": 229.5680046081543, + "p90": 243.58400702476501, + "p95": 258.08000564575195, + "p99": 3075.4239559173584 + }, + "roundtrip": { + "p50": 582.8480124473572, + "p90": 659.2640280723572, + "p95": 1677.8559684753418, + "p99": 3727.1358966827393 + }, + "isolatedSum": { + "p50": 604.640007019043, + "p90": 659.9680185317993, + "p95": 795.7760095596313, + "p99": 5912.575960159302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 515.9040093421936, + "p90": 594.5919752120972, + "p95": 807.6159954071045, + "p99": 3068.511962890625 + }, + "combine": { + "p50": 731.9999933242798, + "p90": 797.6959943771362, + "p95": 1420.415997505188, + "p99": 2011.0080242156982 + }, + "roundtrip": { + "p50": 1224.5440483093262, + "p90": 1314.0480518341064, + "p95": 2412.447929382324, + "p99": 2907.5839519500732 + }, + "isolatedSum": { + "p50": 1247.9040026664734, + "p90": 1392.2879695892334, + "p95": 2228.0319929122925, + "p99": 5079.519987106323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1092.0000076293945, + "p90": 1269.6959972381592, + "p95": 2095.936059951782, + "p99": 2520.416021347046 + }, + "combine": { + "p50": 2669.408082962036, + "p90": 2698.3680725097656, + "p95": 2716.1600589752197, + "p99": 2877.1519660949707 + }, + "roundtrip": { + "p50": 3689.7599697113037, + "p90": 3802.9439449310303, + "p95": 3951.4238834381104, + "p99": 4344.480037689209 + }, + "isolatedSum": { + "p50": 3761.4080905914307, + "p90": 3968.064069747925, + "p95": 4812.096118927002, + "p99": 5397.567987442017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 16384, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-df31e675", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_fa518406", + "comparisonKey": "24b40dd01fd02639", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:11.380638+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 400.7039964199066, + "p90": 445.1200067996979, + "p95": 577.6320099830627, + "p99": 2938.6560916900635 + }, + "combine": { + "p50": 239.07199501991272, + "p90": 256.51198625564575, + "p95": 291.6159927845001, + "p99": 2621.119976043701 + }, + "roundtrip": { + "p50": 614.2399907112122, + "p90": 685.9840154647827, + "p95": 806.9120049476624, + "p99": 3304.9280643463135 + }, + "isolatedSum": { + "p50": 639.7759914398193, + "p90": 701.6319930553436, + "p95": 869.2480027675629, + "p99": 5559.776067733765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 1152, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 456.09599351882935, + "p90": 529.2159914970398, + "p95": 1363.968014717102, + "p99": 2897.3119258880615 + }, + "combine": { + "p50": 414.94399309158325, + "p90": 434.04799699783325, + "p95": 1763.4880542755127, + "p99": 2364.192008972168 + }, + "roundtrip": { + "p50": 856.6719889640808, + "p90": 934.656023979187, + "p95": 2498.8479614257812, + "p99": 3064.5759105682373 + }, + "isolatedSum": { + "p50": 871.0399866104126, + "p90": 963.263988494873, + "p95": 3127.4560689926147, + "p99": 5261.5039348602295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 2304, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 555.7760000228882, + "p90": 639.3280029296875, + "p95": 1863.327980041504, + "p99": 2834.752082824707 + }, + "combine": { + "p50": 750.495970249176, + "p90": 1079.584002494812, + "p95": 1469.0879583358765, + "p99": 1831.4239978790283 + }, + "roundtrip": { + "p50": 1293.4080362319946, + "p90": 1870.9440231323242, + "p95": 2473.088026046753, + "p99": 2905.3120613098145 + }, + "isolatedSum": { + "p50": 1306.2719702720642, + "p90": 1718.9120054244995, + "p95": 3332.4159383773804, + "p99": 4666.176080703735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 4608, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 784.5759987831116, + "p90": 893.2480216026306, + "p95": 1970.6560373306274, + "p99": 2343.264102935791 + }, + "combine": { + "p50": 1409.600019454956, + "p90": 1442.8160190582275, + "p95": 1493.183970451355, + "p99": 1756.8320035934448 + }, + "roundtrip": { + "p50": 2175.8079528808594, + "p90": 2301.471948623657, + "p95": 2764.575958251953, + "p99": 3102.7519702911377 + }, + "isolatedSum": { + "p50": 2194.1760182380676, + "p90": 2336.064040660858, + "p95": 3463.8400077819824, + "p99": 4100.096106529236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 9216, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1189.568042755127, + "p90": 1370.8159923553467, + "p95": 1843.7119722366333, + "p99": 2262.0160579681396 + }, + "combine": { + "p50": 2718.6241149902344, + "p90": 2756.5441131591797, + "p95": 2767.712116241455, + "p99": 2799.743890762329 + }, + "roundtrip": { + "p50": 3854.559898376465, + "p90": 3931.583881378174, + "p95": 4024.576187133789, + "p99": 4502.336025238037 + }, + "isolatedSum": { + "p50": 3908.1921577453613, + "p90": 4127.360105514526, + "p95": 4611.424088478088, + "p99": 5061.759948730469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 18432, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2026.047945022583, + "p90": 2212.671995162964, + "p95": 2306.2078952789307, + "p99": 2589.344024658203 + }, + "combine": { + "p50": 5336.415767669678, + "p90": 5410.943984985352, + "p95": 5451.776027679443, + "p99": 5495.071887969971 + }, + "roundtrip": { + "p50": 7256.51216506958, + "p90": 7364.6721839904785, + "p95": 7397.247791290283, + "p99": 7516.448020935059 + }, + "isolatedSum": { + "p50": 7362.463712692261, + "p90": 7623.615980148315, + "p95": 7757.983922958374, + "p99": 8084.415912628174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 36864, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-89f4d9be", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_d556a532", + "comparisonKey": "28d65d4c47b78e0c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:42.768796+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 427.6160001754761, + "p90": 497.18400835990906, + "p95": 925.7599711418152, + "p99": 3381.1841011047363 + }, + "combine": { + "p50": 254.68799471855164, + "p90": 272.8640139102936, + "p95": 434.87998843193054, + "p99": 2874.9759197235107 + }, + "roundtrip": { + "p50": 682.6879978179932, + "p90": 744.2560195922852, + "p95": 2640.160083770752, + "p99": 3513.5040283203125 + }, + "isolatedSum": { + "p50": 682.3039948940277, + "p90": 770.0480222702026, + "p95": 1360.6399595737457, + "p99": 6256.160020828247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1874, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 492.6399886608124, + "p90": 542.4320101737976, + "p95": 1122.3360300064087, + "p99": 2831.9039344787598 + }, + "combine": { + "p50": 460.7039988040924, + "p90": 476.639986038208, + "p95": 1566.912055015564, + "p99": 2427.295923233032 + }, + "roundtrip": { + "p50": 966.5279984474182, + "p90": 1032.4159860610962, + "p95": 2437.9520416259766, + "p99": 3036.1599922180176 + }, + "isolatedSum": { + "p50": 953.3439874649048, + "p90": 1019.0719962120056, + "p95": 2689.2480850219727, + "p99": 5259.199857711792 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 3755, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 657.2800278663635, + "p90": 745.631992816925, + "p95": 2010.6561183929443, + "p99": 2605.664014816284 + }, + "combine": { + "p50": 854.2400002479553, + "p90": 1018.3680057525635, + "p95": 1399.1999626159668, + "p99": 1779.263973236084 + }, + "roundtrip": { + "p50": 1510.815978050232, + "p90": 2108.736038208008, + "p95": 2581.727981567383, + "p99": 3029.6640396118164 + }, + "isolatedSum": { + "p50": 1511.5200281143188, + "p90": 1763.9999985694885, + "p95": 3409.856081008911, + "p99": 4384.927988052368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 7556, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 977.183997631073, + "p90": 1157.7279567718506, + "p95": 1896.448016166687, + "p99": 2405.280113220215 + }, + "combine": { + "p50": 1636.2559795379639, + "p90": 1655.6799411773682, + "p95": 1662.719964981079, + "p99": 1772.447943687439 + }, + "roundtrip": { + "p50": 2586.8160724639893, + "p90": 2656.575918197632, + "p95": 2917.9840087890625, + "p99": 3412.7678871154785 + }, + "isolatedSum": { + "p50": 2613.439977169037, + "p90": 2813.4078979492188, + "p95": 3559.167981147766, + "p99": 4177.728056907654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 15163, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1539.680004119873, + "p90": 1719.8400497436523, + "p95": 1824.0959644317627, + "p99": 2271.967887878418 + }, + "combine": { + "p50": 3192.512035369873, + "p90": 3220.2560901641846, + "p95": 3228.447914123535, + "p99": 3263.0081176757812 + }, + "roundtrip": { + "p50": 4650.591850280762, + "p90": 4798.431873321533, + "p95": 4863.3599281311035, + "p99": 5076.543807983398 + }, + "isolatedSum": { + "p50": 4732.192039489746, + "p90": 4940.096139907837, + "p95": 5052.543878555298, + "p99": 5534.976005554199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 30215, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2942.5599575042725, + "p90": 3006.432056427002, + "p95": 3051.7759323120117, + "p99": 3222.71990776062 + }, + "combine": { + "p50": 6238.495826721191, + "p90": 6395.199775695801, + "p95": 6426.33581161499, + "p99": 6462.592124938965 + }, + "roundtrip": { + "p50": 8867.839813232422, + "p90": 9034.111976623535, + "p95": 9072.863578796387, + "p99": 9174.464225769043 + }, + "isolatedSum": { + "p50": 9181.055784225464, + "p90": 9401.631832122803, + "p95": 9478.111743927002, + "p99": 9685.312032699585 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 60512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3eb371f5", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_3ac0b8bc", + "comparisonKey": "370a32d562036f7f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:28.579039+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 383.39200615882874, + "p90": 429.76000905036926, + "p95": 547.872006893158, + "p99": 2915.9998893737793 + }, + "combine": { + "p50": 235.83999276161194, + "p90": 249.05599653720856, + "p95": 332.5119912624359, + "p99": 3186.3040924072266 + }, + "roundtrip": { + "p50": 589.4719958305359, + "p90": 679.7119975090027, + "p95": 2920.4800128936768, + "p99": 3755.840063095093 + }, + "isolatedSum": { + "p50": 619.2319989204407, + "p90": 678.8160055875778, + "p95": 880.3839981555939, + "p99": 6102.303981781006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 1080, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 420.6719994544983, + "p90": 464.5119905471802, + "p95": 602.7200222015381, + "p99": 3031.712055206299 + }, + "combine": { + "p50": 406.39999508857727, + "p90": 423.16800355911255, + "p95": 1745.568037033081, + "p99": 2466.8478965759277 + }, + "roundtrip": { + "p50": 810.5279803276062, + "p90": 879.7119855880737, + "p95": 2702.336072921753, + "p99": 3286.144018173218 + }, + "isolatedSum": { + "p50": 827.0719945430756, + "p90": 887.6799941062927, + "p95": 2348.288059234619, + "p99": 5498.559951782227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 2102, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 512.1920108795166, + "p90": 587.9359841346741, + "p95": 1853.279948234558, + "p99": 3165.6320095062256 + }, + "combine": { + "p50": 736.191987991333, + "p90": 1124.9920129776, + "p95": 1555.6800365447998, + "p99": 2200.831890106201 + }, + "roundtrip": { + "p50": 1227.0079851150513, + "p90": 1339.2959833145142, + "p95": 2509.183883666992, + "p99": 2989.3438816070557 + }, + "isolatedSum": { + "p50": 1248.3839988708496, + "p90": 1712.9279971122742, + "p95": 3408.959984779358, + "p99": 5366.463899612427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 4207, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 730.239987373352, + "p90": 847.3600149154663, + "p95": 1730.847954750061, + "p99": 2638.9119625091553 + }, + "combine": { + "p50": 1387.7760171890259, + "p90": 1425.6960153579712, + "p95": 1508.895993232727, + "p99": 2087.455987930298 + }, + "roundtrip": { + "p50": 2082.5600624084473, + "p90": 2218.463897705078, + "p95": 2917.4718856811523, + "p99": 3305.504083633423 + }, + "isolatedSum": { + "p50": 2118.016004562378, + "p90": 2273.0560302734375, + "p95": 3239.743947982788, + "p99": 4726.367950439453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 8365, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1102.07998752594, + "p90": 1495.743989944458, + "p95": 2016.8960094451904, + "p99": 2617.471933364868 + }, + "combine": { + "p50": 2674.5920181274414, + "p90": 2705.8560848236084, + "p95": 2712.7039432525635, + "p99": 2765.3119564056396 + }, + "roundtrip": { + "p50": 3696.51198387146, + "p90": 3797.9838848114014, + "p95": 3933.3438873291016, + "p99": 4463.808059692383 + }, + "isolatedSum": { + "p50": 3776.6720056533813, + "p90": 4201.600074768066, + "p95": 4729.599952697754, + "p99": 5382.783889770508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 16483, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1831.2959671020508, + "p90": 2006.688117980957, + "p95": 2247.136116027832, + "p99": 2635.1358890533447 + }, + "combine": { + "p50": 5236.544132232666, + "p90": 5288.959980010986, + "p95": 5310.207843780518, + "p99": 5373.40784072876 + }, + "roundtrip": { + "p50": 6957.856178283691, + "p90": 7030.752182006836, + "p95": 7078.7200927734375, + "p99": 7367.040157318115 + }, + "isolatedSum": { + "p50": 7067.840099334717, + "p90": 7295.648097991943, + "p95": 7557.34395980835, + "p99": 8008.5437297821045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 32777, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-37005ff2", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_8507665e", + "comparisonKey": "51f5aad39a343d82", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:57.221327+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 389.15199041366577, + "p90": 430.84800243377686, + "p95": 558.0800175666809, + "p99": 2786.56005859375 + }, + "combine": { + "p50": 233.37599635124207, + "p90": 252.73600220680237, + "p95": 2040.0640964508057, + "p99": 3183.39204788208 + }, + "roundtrip": { + "p50": 602.2400259971619, + "p90": 680.7039976119995, + "p95": 2835.0400924682617, + "p99": 3640.7999992370605 + }, + "isolatedSum": { + "p50": 622.5279867649078, + "p90": 683.5840046405792, + "p95": 2598.1441140174866, + "p99": 5969.95210647583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 1064, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 439.2000138759613, + "p90": 523.4879851341248, + "p95": 2003.999948501587, + "p99": 3044.3201065063477 + }, + "combine": { + "p50": 409.7599983215332, + "p90": 438.975989818573, + "p95": 1930.3679466247559, + "p99": 2445.215940475464 + }, + "roundtrip": { + "p50": 833.1199884414673, + "p90": 938.2719993591309, + "p95": 2542.304039001465, + "p99": 3300.704002380371 + }, + "isolatedSum": { + "p50": 848.9600121974945, + "p90": 962.4639749526978, + "p95": 3934.367895126343, + "p99": 5489.5360469818115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 2081, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 537.5040173530579, + "p90": 641.5359973907471, + "p95": 1811.743974685669, + "p99": 2912.992000579834 + }, + "combine": { + "p50": 739.4559979438782, + "p90": 1160.9280109405518, + "p95": 1483.456015586853, + "p99": 1974.6559858322144 + }, + "roundtrip": { + "p50": 1259.9680423736572, + "p90": 1952.8640508651733, + "p95": 2517.1520709991455, + "p99": 2956.7999839782715 + }, + "isolatedSum": { + "p50": 1276.960015296936, + "p90": 1802.4640083312988, + "p95": 3295.199990272522, + "p99": 4887.647986412048 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 4153, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 744.2880272865295, + "p90": 849.3120074272156, + "p95": 2033.600091934204, + "p99": 2574.176073074341 + }, + "combine": { + "p50": 1394.1760063171387, + "p90": 1440.7039880752563, + "p95": 1674.4639873504639, + "p99": 2024.3520736694336 + }, + "roundtrip": { + "p50": 2111.1679077148438, + "p90": 2217.4720764160156, + "p95": 2770.848035812378, + "p99": 3176.2239933013916 + }, + "isolatedSum": { + "p50": 2138.464033603668, + "p90": 2290.015995502472, + "p95": 3708.064079284668, + "p99": 4598.528146743774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 8313, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1113.7280464172363, + "p90": 1262.3679637908936, + "p95": 2037.1520519256592, + "p99": 2453.439950942993 + }, + "combine": { + "p50": 2676.5758991241455, + "p90": 2706.1119079589844, + "p95": 2721.3759422302246, + "p99": 2755.359888076782 + }, + "roundtrip": { + "p50": 3718.8479900360107, + "p90": 3804.6720027923584, + "p95": 3898.207902908325, + "p99": 4362.207889556885 + }, + "isolatedSum": { + "p50": 3790.303945541382, + "p90": 3968.479871749878, + "p95": 4758.527994155884, + "p99": 5208.799839019775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 16581, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1833.024024963379, + "p90": 1969.1200256347656, + "p95": 2133.471965789795, + "p99": 2644.831895828247 + }, + "combine": { + "p50": 5237.40816116333, + "p90": 5282.879829406738, + "p95": 5296.0638999938965, + "p99": 5342.27180480957 + }, + "roundtrip": { + "p50": 6959.743976593018, + "p90": 7024.415969848633, + "p95": 7065.152168273926, + "p99": 7310.944080352783 + }, + "isolatedSum": { + "p50": 7070.432186126709, + "p90": 7251.999855041504, + "p95": 7429.535865783691, + "p99": 7987.103700637817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 32887, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-67797711", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_4443fc93", + "comparisonKey": "1e8d14e4c7fc77e9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:28.398529+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 508.8319778442383, + "p90": 569.2160129547119, + "p95": 764.6399736404419, + "p99": 2863.136053085327 + }, + "combine": { + "p50": 371.39201164245605, + "p90": 387.5519931316376, + "p95": 1826.751947402954, + "p99": 2250.0159740448 + }, + "roundtrip": { + "p50": 874.5920062065125, + "p90": 979.7760248184204, + "p95": 2448.0960369110107, + "p99": 2974.976062774658 + }, + "isolatedSum": { + "p50": 880.2239894866943, + "p90": 956.7680060863495, + "p95": 2591.391921043396, + "p99": 5113.152027130127 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 695.3279972076416, + "p90": 854.7199964523315, + "p95": 1716.9280052185059, + "p99": 2463.0401134490967 + }, + "combine": { + "p50": 694.0799951553345, + "p90": 732.7039837837219, + "p95": 1137.2480392456055, + "p99": 1522.4640369415283 + }, + "roundtrip": { + "p50": 1381.6959857940674, + "p90": 1771.9039916992188, + "p95": 2261.023998260498, + "p99": 2831.808090209961 + }, + "isolatedSum": { + "p50": 1389.407992362976, + "p90": 1587.4239802360535, + "p95": 2854.1760444641113, + "p99": 3985.504150390625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1061.3759756088257, + "p90": 1391.8720483779907, + "p95": 1721.5360403060913, + "p99": 2211.2319469451904 + }, + "combine": { + "p50": 1325.4079818725586, + "p90": 1343.9680337905884, + "p95": 1348.8960266113281, + "p99": 1370.1759576797485 + }, + "roundtrip": { + "p50": 2328.9918899536133, + "p90": 2427.743911743164, + "p95": 2640.9919261932373, + "p99": 2898.5280990600586 + }, + "isolatedSum": { + "p50": 2386.7839574813843, + "p90": 2735.840082168579, + "p95": 3070.4320669174194, + "p99": 3581.407904624939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1789.5679473876953, + "p90": 1883.3279609680176, + "p95": 1962.2399806976318, + "p99": 2099.8079776763916 + }, + "combine": { + "p50": 2463.200092315674, + "p90": 2585.2160453796387, + "p95": 2597.440004348755, + "p99": 2610.81600189209 + }, + "roundtrip": { + "p50": 4215.712070465088, + "p90": 4339.327812194824, + "p95": 4366.27197265625, + "p99": 4420.159816741943 + }, + "isolatedSum": { + "p50": 4252.768039703369, + "p90": 4468.544006347656, + "p95": 4559.679985046387, + "p99": 4710.623979568481 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 3239.0079498291016, + "p90": 3299.9680042266846, + "p95": 3353.1200885772705, + "p99": 3514.080047607422 + }, + "combine": { + "p50": 4919.77596282959, + "p90": 5004.576206207275, + "p95": 5012.703895568848, + "p99": 5031.519889831543 + }, + "roundtrip": { + "p50": 8021.696090698242, + "p90": 8077.312469482422, + "p95": 8120.351791381836, + "p99": 8394.68765258789 + }, + "isolatedSum": { + "p50": 8158.783912658691, + "p90": 8304.54421043396, + "p95": 8365.823984146118, + "p99": 8545.599937438965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 6750.6561279296875, + "p90": 6866.720199584961, + "p95": 7064.352035522461, + "p99": 7120.895862579346 + }, + "combine": { + "p50": 10303.359985351562, + "p90": 10380.99193572998, + "p95": 10399.264335632324, + "p99": 10443.488121032715 + }, + "roundtrip": { + "p50": 16895.16830444336, + "p90": 16946.624755859375, + "p95": 16966.91131591797, + "p99": 17006.656646728516 + }, + "isolatedSum": { + "p50": 17054.01611328125, + "p90": 17247.71213531494, + "p95": 17463.616371154785, + "p99": 17564.38398361206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6fb9347f", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_52e1dea9", + "comparisonKey": "b7a35de37a347834", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:21.974071+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 608.3840131759644, + "p90": 728.7039756774902, + "p95": 1978.0479669570923, + "p99": 2648.3840942382812 + }, + "combine": { + "p50": 471.0400104522705, + "p90": 486.5280091762543, + "p95": 1481.9519519805908, + "p99": 1964.2879962921143 + }, + "roundtrip": { + "p50": 1041.8879985809326, + "p90": 1134.1760158538818, + "p95": 2300.096035003662, + "p99": 2878.688097000122 + }, + "isolatedSum": { + "p50": 1079.4240236282349, + "p90": 1215.2319848537445, + "p95": 3459.999918937683, + "p99": 4612.6720905303955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 7584, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 886.7520093917847, + "p90": 1136.031985282898, + "p95": 1662.4959707260132, + "p99": 2234.4000339508057 + }, + "combine": { + "p50": 895.8399891853333, + "p90": 929.9520254135132, + "p95": 942.9439902305603, + "p99": 1307.2960376739502 + }, + "roundtrip": { + "p50": 1723.3920097351074, + "p90": 1893.3440446853638, + "p95": 2284.320116043091, + "p99": 2785.2160930633545 + }, + "isolatedSum": { + "p50": 1782.591998577118, + "p90": 2065.984010696411, + "p95": 2605.4399609565735, + "p99": 3541.696071624756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 15151, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1428.544044494629, + "p90": 1638.1440162658691, + "p95": 1768.1599855422974, + "p99": 1996.1600303649902 + }, + "combine": { + "p50": 1720.0000286102295, + "p90": 1758.239984512329, + "p95": 1772.4159955978394, + "p99": 1799.7119426727295 + }, + "roundtrip": { + "p50": 2998.016119003296, + "p90": 3160.1920127868652, + "p95": 3204.3840885162354, + "p99": 3397.5040912628174 + }, + "isolatedSum": { + "p50": 3148.5440731048584, + "p90": 3396.3840007781982, + "p95": 3540.5759811401367, + "p99": 3795.8719730377197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 30290, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 2520.3840732574463, + "p90": 2619.3599700927734, + "p95": 2648.0000019073486, + "p99": 2746.687889099121 + }, + "combine": { + "p50": 3213.3119106292725, + "p90": 3386.944055557251, + "p95": 3404.47998046875, + "p99": 3441.632032394409 + }, + "roundtrip": { + "p50": 5608.960151672363, + "p90": 5825.7598876953125, + "p95": 5864.160060882568, + "p99": 5964.57576751709 + }, + "isolatedSum": { + "p50": 5733.695983886719, + "p90": 6006.304025650024, + "p95": 6052.479982376099, + "p99": 6188.31992149353 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 60548, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 4566.8158531188965, + "p90": 4607.232093811035, + "p95": 4646.592140197754, + "p99": 4879.744052886963 + }, + "combine": { + "p50": 6367.743968963623, + "p90": 6501.247882843018, + "p95": 6516.287803649902, + "p99": 6533.952236175537 + }, + "roundtrip": { + "p50": 10811.296463012695, + "p90": 10897.024154663086, + "p95": 10930.335998535156, + "p99": 11137.887954711914 + }, + "isolatedSum": { + "p50": 10934.55982208252, + "p90": 11108.479976654053, + "p95": 11162.879943847656, + "p99": 11413.6962890625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 121046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 8818.592071533203, + "p90": 8854.975700378418, + "p95": 8872.639656066895, + "p99": 8952.320098876953 + }, + "combine": { + "p50": 12663.680076599121, + "p90": 12723.008155822754, + "p95": 12740.73600769043, + "p99": 12774.1117477417 + }, + "roundtrip": { + "p50": 21279.808044433594, + "p90": 21388.927459716797, + "p95": 21421.31233215332, + "p99": 21530.975341796875 + }, + "isolatedSum": { + "p50": 21482.272148132324, + "p90": 21577.983856201172, + "p95": 21613.375663757324, + "p99": 21726.431846618652 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 242154, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3127a362", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_13ea7365", + "comparisonKey": "cf4cfb3cb2c94ba2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:14.365211+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 402.17599272727966, + "p90": 475.5519926548004, + "p95": 1424.1600036621094, + "p99": 3256.351947784424 + }, + "combine": { + "p50": 233.8239997625351, + "p90": 255.71200251579285, + "p95": 292.2559976577759, + "p99": 2880.1920413970947 + }, + "roundtrip": { + "p50": 628.928005695343, + "p90": 706.5920233726501, + "p95": 2724.2560386657715, + "p99": 3503.391981124878 + }, + "isolatedSum": { + "p50": 635.9999924898148, + "p90": 731.2639951705933, + "p95": 1716.4160013198853, + "p99": 6136.543989181519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 1049, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 461.40798926353455, + "p90": 540.1279926300049, + "p95": 1124.4479417800903, + "p99": 2815.48810005188 + }, + "combine": { + "p50": 412.7039909362793, + "p90": 439.5520091056824, + "p95": 1724.2239713668823, + "p99": 2529.439926147461 + }, + "roundtrip": { + "p50": 857.7600121498108, + "p90": 962.6560211181641, + "p95": 2671.6160774230957, + "p99": 3120.192050933838 + }, + "isolatedSum": { + "p50": 874.1119801998138, + "p90": 979.6800017356873, + "p95": 2848.6719131469727, + "p99": 5344.928026199341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 2084, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 567.8719878196716, + "p90": 763.0079984664917, + "p95": 2129.120111465454, + "p99": 2721.4720249176025 + }, + "combine": { + "p50": 749.2480278015137, + "p90": 960.864007472992, + "p95": 1543.8400506973267, + "p99": 2104.0639877319336 + }, + "roundtrip": { + "p50": 1283.136010169983, + "p90": 1403.39195728302, + "p95": 2369.920015335083, + "p99": 2842.6880836486816 + }, + "isolatedSum": { + "p50": 1317.1200156211853, + "p90": 1723.8720059394836, + "p95": 3672.9601621627808, + "p99": 4825.536012649536 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 4126, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 767.5840258598328, + "p90": 867.2000169754028, + "p95": 2096.3199138641357, + "p99": 2519.9038982391357 + }, + "combine": { + "p50": 1396.5760469436646, + "p90": 1444.543957710266, + "p95": 1630.2080154418945, + "p99": 1883.2639455795288 + }, + "roundtrip": { + "p50": 2143.6800956726074, + "p90": 2275.8400440216064, + "p95": 2960.8960151672363, + "p99": 3250.080108642578 + }, + "isolatedSum": { + "p50": 2164.1600728034973, + "p90": 2311.743974685669, + "p95": 3726.5279293060303, + "p99": 4403.167843818665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 8234, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1139.1680240631104, + "p90": 1559.775948524475, + "p95": 2088.8640880584717, + "p99": 2571.4240074157715 + }, + "combine": { + "p50": 2682.111978530884, + "p90": 2712.2559547424316, + "p95": 2725.0239849090576, + "p99": 2843.35994720459 + }, + "roundtrip": { + "p50": 3746.7520236968994, + "p90": 3834.7840309143066, + "p95": 4198.976039886475, + "p99": 4509.535789489746 + }, + "isolatedSum": { + "p50": 3821.280002593994, + "p90": 4272.031903266907, + "p95": 4813.888072967529, + "p99": 5414.783954620361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 16480, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1864.2239570617676, + "p90": 1995.3279495239258, + "p95": 2159.008026123047, + "p99": 2644.7041034698486 + }, + "combine": { + "p50": 5241.663932800293, + "p90": 5296.351909637451, + "p95": 5309.055805206299, + "p99": 5354.720115661621 + }, + "roundtrip": { + "p50": 6986.623764038086, + "p90": 7045.152187347412, + "p95": 7077.023983001709, + "p99": 7285.888195037842 + }, + "isolatedSum": { + "p50": 7105.887889862061, + "p90": 7291.679859161377, + "p95": 7468.063831329346, + "p99": 7999.42421913147 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 32889, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1832f795", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_e57b73c4", + "comparisonKey": "ab6ea51726bc5763", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:19.458163+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 422.432005405426, + "p90": 510.97601652145386, + "p95": 653.6960005760193, + "p99": 3102.303981781006 + }, + "combine": { + "p50": 281.8560004234314, + "p90": 292.9919958114624, + "p95": 344.7999954223633, + "p99": 2614.8159503936768 + }, + "roundtrip": { + "p50": 697.8880167007446, + "p90": 790.880024433136, + "p95": 2392.064094543457, + "p99": 3250.0479221343994 + }, + "isolatedSum": { + "p50": 704.2880058288574, + "p90": 803.9680123329163, + "p95": 998.4959959983826, + "p99": 5717.119932174683 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 2676, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 530.784010887146, + "p90": 614.4639849662781, + "p95": 1429.2160272598267, + "p99": 2646.656036376953 + }, + "combine": { + "p50": 512.5759840011597, + "p90": 534.7200036048889, + "p95": 1565.0559663772583, + "p99": 2243.648052215576 + }, + "roundtrip": { + "p50": 1036.9600057601929, + "p90": 1134.3040466308594, + "p95": 2177.056074142456, + "p99": 2867.072105407715 + }, + "isolatedSum": { + "p50": 1043.3599948883057, + "p90": 1149.183988571167, + "p95": 2994.271993637085, + "p99": 4890.304088592529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 5302, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 723.8720059394836, + "p90": 902.1440148353577, + "p95": 2055.295944213867, + "p99": 2505.5999755859375 + }, + "combine": { + "p50": 963.1999731063843, + "p90": 1024.191975593567, + "p95": 1287.4239683151245, + "p99": 1618.9119815826416 + }, + "roundtrip": { + "p50": 1671.3919639587402, + "p90": 1904.5439958572388, + "p95": 2340.735912322998, + "p99": 2830.2719593048096 + }, + "isolatedSum": { + "p50": 1687.071979045868, + "p90": 1926.3359904289246, + "p95": 3342.7199125289917, + "p99": 4124.511957168579 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 10587, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1156.8000316619873, + "p90": 1446.943998336792, + "p95": 1828.544020652771, + "p99": 2199.039936065674 + }, + "combine": { + "p50": 1847.8080034255981, + "p90": 1866.6239976882935, + "p95": 1871.359944343567, + "p99": 1891.2639617919922 + }, + "roundtrip": { + "p50": 2915.9679412841797, + "p90": 3050.8480072021484, + "p95": 3184.8959922790527, + "p99": 3547.424077987671 + }, + "isolatedSum": { + "p50": 3004.6080350875854, + "p90": 3313.5679960250854, + "p95": 3699.903964996338, + "p99": 4090.303897857666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 21014, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1886.7520093917847, + "p90": 2011.5840435028076, + "p95": 2150.4321098327637, + "p99": 2269.695997238159 + }, + "combine": { + "p50": 3530.911922454834, + "p90": 3638.6559009552, + "p95": 3655.9360027313232, + "p99": 3704.3519020080566 + }, + "roundtrip": { + "p50": 5339.0398025512695, + "p90": 5509.312152862549, + "p95": 5553.376197814941, + "p99": 5832.608222961426 + }, + "isolatedSum": { + "p50": 5417.663931846619, + "p90": 5650.239944458008, + "p95": 5806.368112564087, + "p99": 5974.047899246216 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 41814, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 3844.928026199341, + "p90": 3924.448013305664, + "p95": 3955.872058868408, + "p99": 4040.4481887817383 + }, + "combine": { + "p50": 7122.848033905029, + "p90": 7214.943885803223, + "p95": 7229.983806610107, + "p99": 7271.999835968018 + }, + "roundtrip": { + "p50": 10601.408004760742, + "p90": 10648.703575134277, + "p95": 10674.752235412598, + "p99": 10909.248352050781 + }, + "isolatedSum": { + "p50": 10967.77606010437, + "p90": 11139.391899108887, + "p95": 11185.855865478516, + "p99": 11312.448024749756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 83417, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bb19cdf3", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_07baaf6a", + "comparisonKey": "0fcdae155d8decda", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:04.831408+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 417.279988527298, + "p90": 476.25601291656494, + "p95": 598.9440083503723, + "p99": 3212.8961086273193 + }, + "combine": { + "p50": 234.8800003528595, + "p90": 260.1599991321564, + "p95": 279.3920040130615, + "p99": 2458.49609375 + }, + "roundtrip": { + "p50": 651.0400176048279, + "p90": 703.5840153694153, + "p95": 2525.887966156006, + "p99": 3498.624086380005 + }, + "isolatedSum": { + "p50": 652.1599888801575, + "p90": 736.4160120487213, + "p95": 878.3360123634338, + "p99": 5671.392202377319 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 1067, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 464.89599347114563, + "p90": 527.2639989852905, + "p95": 589.1199707984924, + "p99": 2868.096113204956 + }, + "combine": { + "p50": 413.37600350379944, + "p90": 442.9120123386383, + "p95": 1470.2399969100952, + "p99": 2283.519983291626 + }, + "roundtrip": { + "p50": 868.4160113334656, + "p90": 919.871985912323, + "p95": 2303.0080795288086, + "p99": 3102.720022201538 + }, + "isolatedSum": { + "p50": 878.2719969749451, + "p90": 970.1760113239288, + "p95": 2059.3599677085876, + "p99": 5151.616096496582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 2097, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 566.6239857673645, + "p90": 622.111976146698, + "p95": 1332.4480056762695, + "p99": 2621.3440895080566 + }, + "combine": { + "p50": 746.3359832763672, + "p90": 1058.8159561157227, + "p95": 1453.8559913635254, + "p99": 1986.016035079956 + }, + "roundtrip": { + "p50": 1297.7279424667358, + "p90": 1420.2239513397217, + "p95": 2377.7918815612793, + "p99": 2917.7279472351074 + }, + "isolatedSum": { + "p50": 1312.9599690437317, + "p90": 1680.9279322624207, + "p95": 2786.303997039795, + "p99": 4607.360124588013 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 4163, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 789.4080281257629, + "p90": 881.2800049781799, + "p95": 1921.887993812561, + "p99": 2574.1119384765625 + }, + "combine": { + "p50": 1400.2879858016968, + "p90": 1425.0240325927734, + "p95": 1454.2720317840576, + "p99": 1852.1599769592285 + }, + "roundtrip": { + "p50": 2149.5039463043213, + "p90": 2223.4559059143066, + "p95": 2619.488000869751, + "p99": 3110.0800037384033 + }, + "isolatedSum": { + "p50": 2189.6960139274597, + "p90": 2306.3040375709534, + "p95": 3376.1600255966187, + "p99": 4426.271915435791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 8305, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1136.064052581787, + "p90": 1302.847981452942, + "p95": 1963.1359577178955, + "p99": 2407.423973083496 + }, + "combine": { + "p50": 2679.424047470093, + "p90": 2707.808017730713, + "p95": 2718.208074569702, + "p99": 2741.0879135131836 + }, + "roundtrip": { + "p50": 3753.7600994110107, + "p90": 3820.064067840576, + "p95": 3994.5919513702393, + "p99": 4385.119915008545 + }, + "isolatedSum": { + "p50": 3815.48810005188, + "p90": 4010.655999183655, + "p95": 4681.344032287598, + "p99": 5148.51188659668 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 16529, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1885.2479457855225, + "p90": 1970.080018043518, + "p95": 2076.7040252685547, + "p99": 2508.0320835113525 + }, + "combine": { + "p50": 5243.264198303223, + "p90": 5295.263767242432, + "p95": 5317.376136779785, + "p99": 5360.864162445068 + }, + "roundtrip": { + "p50": 6996.895790100098, + "p90": 7057.34395980835, + "p95": 7089.280128479004, + "p99": 7292.031764984131 + }, + "isolatedSum": { + "p50": 7128.512144088745, + "p90": 7265.34378528595, + "p95": 7394.08016204834, + "p99": 7868.896245956421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 32880, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a93fb823", + "identity": "h200|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_8ebb21f3", + "comparisonKey": "2b4f9108c41f95d3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:14.163559+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 529.7600030899048, + "p90": 634.335994720459, + "p95": 1312.4799728393555, + "p99": 2593.7600135803223 + }, + "combine": { + "p50": 372.4159896373749, + "p90": 391.64799451828003, + "p95": 1743.391990661621, + "p99": 2143.968105316162 + }, + "roundtrip": { + "p50": 885.2159976959229, + "p90": 971.2640047073364, + "p95": 2527.5518894195557, + "p99": 2829.375982284546 + }, + "isolatedSum": { + "p50": 902.1759927272797, + "p90": 1025.983989238739, + "p95": 3055.8719635009766, + "p99": 4737.728118896484 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 5046, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 702.8800249099731, + "p90": 840.5439853668213, + "p95": 1805.2480220794678, + "p99": 2297.760009765625 + }, + "combine": { + "p50": 696.7679858207703, + "p90": 878.2079815864563, + "p95": 1242.2080039978027, + "p99": 1669.0560579299927 + }, + "roundtrip": { + "p50": 1358.7520122528076, + "p90": 1693.7919855117798, + "p95": 2244.7359561920166, + "p99": 2751.807928085327 + }, + "isolatedSum": { + "p50": 1399.6480107307434, + "p90": 1718.7519669532776, + "p95": 3047.4560260772705, + "p99": 3966.8160676956177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 10049, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 1066.4639472961426, + "p90": 1369.8879480361938, + "p95": 1632.7040195465088, + "p99": 2184.3841075897217 + }, + "combine": { + "p50": 1328.8320302963257, + "p90": 1344.5440530776978, + "p95": 1351.9999980926514, + "p99": 1406.2399864196777 + }, + "roundtrip": { + "p50": 2297.600030899048, + "p90": 2435.4240894317627, + "p95": 2585.7601165771484, + "p99": 2937.056064605713 + }, + "isolatedSum": { + "p50": 2395.2959775924683, + "p90": 2714.4320011138916, + "p95": 2984.70401763916, + "p99": 3590.6240940093994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 20154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 1802.2079467773438, + "p90": 1883.072018623352, + "p95": 1966.1760330200195, + "p99": 2129.9519538879395 + }, + "combine": { + "p50": 2500.4799365997314, + "p90": 2596.1599349975586, + "p95": 2607.9039573669434, + "p99": 2625.407934188843 + }, + "roundtrip": { + "p50": 4197.472095489502, + "p90": 4342.048168182373, + "p95": 4379.648208618164, + "p99": 4489.952087402344 + }, + "isolatedSum": { + "p50": 4302.687883377075, + "p90": 4479.231953620911, + "p95": 4574.079990386963, + "p99": 4755.359888076782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 40211, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 3245.7919120788574, + "p90": 3314.271926879883, + "p95": 3349.4720458984375, + "p99": 3501.375913619995 + }, + "combine": { + "p50": 4923.3598709106445, + "p90": 5007.008075714111, + "p95": 5015.200138092041, + "p99": 5051.072120666504 + }, + "roundtrip": { + "p50": 8052.000045776367, + "p90": 8122.464179992676, + "p95": 8163.359642028809, + "p99": 8491.168022155762 + }, + "isolatedSum": { + "p50": 8169.151782989502, + "p90": 8321.280002593994, + "p95": 8364.672183990479, + "p99": 8552.448034286499 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 80302, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 6749.023914337158, + "p90": 6896.448135375977, + "p95": 7079.264163970947, + "p99": 7154.560089111328 + }, + "combine": { + "p50": 10312.607765197754, + "p90": 10384.160041809082, + "p95": 10406.399726867676, + "p99": 10464.096069335938 + }, + "roundtrip": { + "p50": 16934.335708618164, + "p90": 16987.712860107422, + "p95": 17009.7599029541, + "p99": 17038.591384887695 + }, + "isolatedSum": { + "p50": 17061.631679534912, + "p90": 17280.60817718506, + "p95": 17485.663890838623, + "p99": 17618.656158447266 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 160408, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d73f8e9f", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_02bb352b", + "comparisonKey": "185a38ed90884195", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:59.247815+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 384.0639889240265, + "p90": 457.92001485824585, + "p95": 578.5920023918152, + "p99": 3222.368001937866 + }, + "combine": { + "p50": 232.9919934272766, + "p90": 256.73601031303406, + "p95": 289.216011762619, + "p99": 3072.7360248565674 + }, + "roundtrip": { + "p50": 601.3439893722534, + "p90": 688.256025314331, + "p95": 2013.375997543335, + "p99": 3821.280002593994 + }, + "isolatedSum": { + "p50": 617.0559823513031, + "p90": 714.6560251712799, + "p95": 867.8080141544342, + "p99": 6295.104026794434 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 439.5520091056824, + "p90": 527.4879932403564, + "p95": 605.4080128669739, + "p99": 3115.839958190918 + }, + "combine": { + "p50": 407.71201252937317, + "p90": 433.56800079345703, + "p95": 1650.496006011963, + "p99": 2577.9519081115723 + }, + "roundtrip": { + "p50": 830.24001121521, + "p90": 930.4320216178894, + "p95": 2554.784059524536, + "p99": 3201.0560035705566 + }, + "isolatedSum": { + "p50": 847.2640216350555, + "p90": 961.0559940338135, + "p95": 2255.9040188789368, + "p99": 5693.79186630249 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 533.7280035018921, + "p90": 632.4160099029541, + "p95": 1741.536021232605, + "p99": 3213.088035583496 + }, + "combine": { + "p50": 737.0240092277527, + "p90": 803.2640218734741, + "p95": 1394.11199092865, + "p99": 1832.9919576644897 + }, + "roundtrip": { + "p50": 1258.3999633789062, + "p90": 1557.9839944839478, + "p95": 2516.671895980835, + "p99": 3060.2879524230957 + }, + "isolatedSum": { + "p50": 1270.7520127296448, + "p90": 1435.6800317764282, + "p95": 3135.648012161255, + "p99": 5046.079993247986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 745.855987071991, + "p90": 863.5839819908142, + "p95": 1998.8479614257812, + "p99": 2573.3439922332764 + }, + "combine": { + "p50": 1389.855980873108, + "p90": 1424.1600036621094, + "p95": 1554.6560287475586, + "p99": 1818.4319734573364 + }, + "roundtrip": { + "p50": 2122.944116592407, + "p90": 2291.5520668029785, + "p95": 2663.520097732544, + "p99": 3170.8478927612305 + }, + "isolatedSum": { + "p50": 2135.711967945099, + "p90": 2287.7439856529236, + "p95": 3553.50399017334, + "p99": 4391.775965690613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1124.5440244674683, + "p90": 1268.447995185852, + "p95": 1900.320053100586, + "p99": 2492.4800395965576 + }, + "combine": { + "p50": 2676.095962524414, + "p90": 2721.5681076049805, + "p95": 2746.1440563201904, + "p99": 2834.0160846710205 + }, + "roundtrip": { + "p50": 3716.639995574951, + "p90": 3822.4639892578125, + "p95": 3965.888023376465, + "p99": 4340.191841125488 + }, + "isolatedSum": { + "p50": 3800.6399869918823, + "p90": 3990.0161027908325, + "p95": 4646.464109420776, + "p99": 5326.496124267578 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1853.279948234558, + "p90": 2032.2558879852295, + "p95": 2327.7759552001953, + "p99": 2620.863914489746 + }, + "combine": { + "p50": 5241.343975067139, + "p90": 5297.632217407227, + "p95": 5313.183784484863, + "p99": 5376.095771789551 + }, + "roundtrip": { + "p50": 6977.63204574585, + "p90": 7064.96000289917, + "p95": 7152.224063873291, + "p99": 7352.1599769592285 + }, + "isolatedSum": { + "p50": 7094.623923301697, + "p90": 7329.888105392456, + "p95": 7640.959739685059, + "p99": 7996.959686279297 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8b9707ff", + "identity": "h200|nccl-ep|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_c1740acb", + "comparisonKey": "8ac68ddfdb8e063e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:25.293890+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_3", + "sku": "h200", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · nccl-ep · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 372.99200892448425, + "p90": 415.6480133533478, + "p95": 539.3279790878296, + "p99": 3072.864055633545 + }, + "combine": { + "p50": 231.77599906921387, + "p90": 244.159996509552, + "p95": 255.5840015411377, + "p99": 2771.712064743042 + }, + "roundtrip": { + "p50": 593.1199789047241, + "p90": 665.2799844741821, + "p95": 2690.272092819214, + "p99": 3654.5279026031494 + }, + "isolatedSum": { + "p50": 604.7680079936981, + "p90": 659.8080098628998, + "p95": 794.9119806289673, + "p99": 5844.576120376587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 1062, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 425.4719913005829, + "p90": 502.1759867668152, + "p95": 608.8320016860962, + "p99": 2980.89599609375 + }, + "combine": { + "p50": 406.75199031829834, + "p90": 425.4719913005829, + "p95": 1669.4719791412354, + "p99": 2475.9039878845215 + }, + "roundtrip": { + "p50": 811.2959861755371, + "p90": 880.19198179245, + "p95": 2595.00789642334, + "p99": 3250.5600452423096 + }, + "isolatedSum": { + "p50": 832.2239816188812, + "p90": 927.6479780673981, + "p95": 2278.3039808273315, + "p99": 5456.7999839782715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 2106, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 513.9200091362, + "p90": 568.9600110054016, + "p95": 742.7520155906677, + "p99": 3016.4480209350586 + }, + "combine": { + "p50": 733.7920069694519, + "p90": 992.6720261573792, + "p95": 1599.519968032837, + "p99": 2058.3040714263916 + }, + "roundtrip": { + "p50": 1228.8639545440674, + "p90": 1827.9999494552612, + "p95": 2387.455940246582, + "p99": 3003.5200119018555 + }, + "isolatedSum": { + "p50": 1247.7120161056519, + "p90": 1561.6320371627808, + "p95": 2342.2719836235046, + "p99": 5074.75209236145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 4154, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 728.7359833717346, + "p90": 845.7599878311157, + "p95": 2085.4079723358154, + "p99": 2598.2398986816406 + }, + "combine": { + "p50": 1385.9200477600098, + "p90": 1418.239951133728, + "p95": 1573.855996131897, + "p99": 1864.416003227234 + }, + "roundtrip": { + "p50": 2082.7200412750244, + "p90": 2281.8241119384766, + "p95": 2843.456029891968, + "p99": 3299.7119426727295 + }, + "isolatedSum": { + "p50": 2114.6560311317444, + "p90": 2263.9999389648438, + "p95": 3659.2639684677124, + "p99": 4462.6559019088745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 8278, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1091.8079614639282, + "p90": 1257.4080228805542, + "p95": 1998.91197681427, + "p99": 2385.535955429077 + }, + "combine": { + "p50": 2670.3040599823, + "p90": 2701.4400959014893, + "p95": 2711.616039276123, + "p99": 2805.567979812622 + }, + "roundtrip": { + "p50": 3704.7040462493896, + "p90": 3771.359920501709, + "p95": 4045.407772064209, + "p99": 4445.119857788086 + }, + "isolatedSum": { + "p50": 3762.112021446228, + "p90": 3958.8481187820435, + "p95": 4710.528016090393, + "p99": 5191.103935241699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 16479, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1826.5279531478882, + "p90": 1938.2719993591309, + "p95": 2050.3039360046387, + "p99": 2595.2959060668945 + }, + "combine": { + "p50": 5241.119861602783, + "p90": 5294.623851776123, + "p95": 5319.104194641113, + "p99": 5393.02396774292 + }, + "roundtrip": { + "p50": 6956.992149353027, + "p90": 7035.776138305664, + "p95": 7119.487762451172, + "p99": 7365.087985992432 + }, + "isolatedSum": { + "p50": 7067.647814750671, + "p90": 7232.895851135254, + "p95": 7369.408130645752, + "p99": 7988.319873809814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 32986, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-16bf2d34", + "identity": "h200|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_cd151ec9", + "comparisonKey": "c79e876bec93baf9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:32.439336+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.61600053310394, + "p90": 157.4079990386963, + "p95": 169.95200514793396, + "p99": 185.66399812698364 + }, + "combine": { + "p50": 69.15199756622314, + "p90": 84.03199911117554, + "p95": 92.92799979448318, + "p99": 101.88800096511841 + }, + "roundtrip": { + "p50": 171.55200242996216, + "p90": 213.59999477863312, + "p95": 220.64000368118286, + "p99": 247.48800694942474 + }, + "isolatedSum": { + "p50": 180.7679980993271, + "p90": 241.43999814987183, + "p95": 262.88000494241714, + "p99": 287.55199909210205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 108.06400328874588, + "p90": 154.81600165367126, + "p95": 164.57599401474, + "p99": 174.27200078964233 + }, + "combine": { + "p50": 69.08799707889557, + "p90": 82.17599987983704, + "p95": 93.1520015001297, + "p99": 102.1760031580925 + }, + "roundtrip": { + "p50": 168.57600212097168, + "p90": 217.43999421596527, + "p95": 225.95199942588806, + "p99": 247.871994972229 + }, + "isolatedSum": { + "p50": 177.15200036764145, + "p90": 236.9920015335083, + "p95": 257.7279955148697, + "p99": 276.44800394773483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 116.64000153541565, + "p90": 160.22400557994843, + "p95": 171.9679981470108, + "p99": 189.56799805164337 + }, + "combine": { + "p50": 72.03199714422226, + "p90": 91.13600105047226, + "p95": 99.90400075912476, + "p99": 110.01600325107574 + }, + "roundtrip": { + "p50": 181.2479943037033, + "p90": 226.68799757957458, + "p95": 242.17599630355835, + "p99": 278.7199914455414 + }, + "isolatedSum": { + "p50": 188.6719986796379, + "p90": 251.36000663042068, + "p95": 271.87199890613556, + "p99": 299.5840013027191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 108.92800241708755, + "p90": 152.73599326610565, + "p95": 164.5440012216568, + "p99": 203.19999754428864 + }, + "combine": { + "p50": 72.83200323581696, + "p90": 85.21600067615509, + "p95": 96.38399630784988, + "p99": 102.81600058078766 + }, + "roundtrip": { + "p50": 172.03199863433838, + "p90": 220.2879935503006, + "p95": 234.14400219917297, + "p99": 266.84799790382385 + }, + "isolatedSum": { + "p50": 181.7600056529045, + "p90": 237.95199394226074, + "p95": 260.9279975295067, + "p99": 306.0159981250763 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 118.49600076675415, + "p90": 177.3120015859604, + "p95": 193.82399320602417, + "p99": 218.23999285697937 + }, + "combine": { + "p50": 74.62400197982788, + "p90": 89.50400352478027, + "p95": 98.9760011434555, + "p99": 104.41599786281586 + }, + "roundtrip": { + "p50": 171.1360067129135, + "p90": 221.47199511528015, + "p95": 233.5679978132248, + "p99": 276.2239873409271 + }, + "isolatedSum": { + "p50": 193.12000274658203, + "p90": 266.81600511074066, + "p95": 292.7999943494797, + "p99": 322.6559907197952 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 107.84000158309937, + "p90": 148.99200201034546, + "p95": 155.07200360298157, + "p99": 185.2159947156906 + }, + "combine": { + "p50": 75.6160020828247, + "p90": 98.4639972448349, + "p95": 101.05600208044052, + "p99": 117.0559972524643 + }, + "roundtrip": { + "p50": 174.3679940700531, + "p90": 219.2319929599762, + "p95": 225.0880002975464, + "p99": 239.80799317359924 + }, + "isolatedSum": { + "p50": 183.45600366592407, + "p90": 247.45599925518036, + "p95": 256.1280056834221, + "p99": 302.2719919681549 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 125.66399574279785, + "p90": 171.48800194263458, + "p95": 184.57600474357605, + "p99": 228.99200022220612 + }, + "combine": { + "p50": 86.5280032157898, + "p90": 108.5439994931221, + "p95": 115.61600118875504, + "p99": 130.20800054073334 + }, + "roundtrip": { + "p50": 196.57599925994873, + "p90": 247.48800694942474, + "p95": 263.7760043144226, + "p99": 279.00800108909607 + }, + "isolatedSum": { + "p50": 212.19199895858765, + "p90": 280.0320014357567, + "p95": 300.1920059323311, + "p99": 359.20000076293945 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.1280002593994, + "p90": 167.04000532627106, + "p95": 180.95999956130981, + "p99": 211.19999885559082 + }, + "combine": { + "p50": 95.36000341176987, + "p90": 117.08799749612808, + "p95": 124.4800016283989, + "p99": 136.4160031080246 + }, + "roundtrip": { + "p50": 197.82400131225586, + "p90": 247.6480007171631, + "p95": 256.9279968738556, + "p99": 266.7520046234131 + }, + "isolatedSum": { + "p50": 227.48800367116928, + "p90": 284.12800282239914, + "p95": 305.4400011897087, + "p99": 347.6160019636154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a8993639", + "identity": "h200|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_cd151ec9", + "comparisonKey": "a4daf86eba51c6f9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:25.814540+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 114.20799791812897, + "p90": 136.51199638843536, + "p95": 156.54399991035461, + "p99": 213.6639952659607 + }, + "combine": { + "p50": 74.27199929952621, + "p90": 79.48800176382065, + "p95": 89.56799656152725, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 180.83199858665466, + "p90": 221.79199755191803, + "p95": 237.37600445747375, + "p99": 256.9600045681 + }, + "isolatedSum": { + "p50": 188.47999721765518, + "p90": 215.999998152256, + "p95": 246.11199647188187, + "p99": 319.583997130394 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 116.35199934244156, + "p90": 155.90399503707886, + "p95": 170.17599940299988, + "p99": 190.72000682353973 + }, + "combine": { + "p50": 74.97599720954895, + "p90": 80.51200211048126, + "p95": 95.32800316810608, + "p99": 107.71200060844421 + }, + "roundtrip": { + "p50": 181.5679967403412, + "p90": 218.176007270813, + "p95": 228.7680059671402, + "p99": 247.0400035381317 + }, + "isolatedSum": { + "p50": 191.3279965519905, + "p90": 236.41599714756012, + "p95": 265.50400257110596, + "p99": 298.43200743198395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.14399743080139, + "p90": 147.23199605941772, + "p95": 157.0879966020584, + "p99": 172.2559928894043 + }, + "combine": { + "p50": 76.4480009675026, + "p90": 85.02399921417236, + "p95": 98.01600128412247, + "p99": 105.92000186443329 + }, + "roundtrip": { + "p50": 180.92800676822662, + "p90": 207.8080028295517, + "p95": 221.76000475883484, + "p99": 251.0719895362854 + }, + "isolatedSum": { + "p50": 190.59199839830399, + "p90": 232.2559952735901, + "p95": 255.10399788618088, + "p99": 278.1759947538376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 111.68000102043152, + "p90": 146.65600657463074, + "p95": 156.76799416542053, + "p99": 168.38400065898895 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 85.4720026254654, + "p95": 100.47999769449234, + "p99": 110.04800349473953 + }, + "roundtrip": { + "p50": 186.97600066661835, + "p90": 228.86399924755096, + "p95": 245.56800723075867, + "p99": 261.8879973888397 + }, + "isolatedSum": { + "p50": 188.06400150060654, + "p90": 232.12800920009613, + "p95": 257.2479918599129, + "p99": 278.4320041537285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 125.98399817943573, + "p90": 169.8240041732788, + "p95": 181.18399381637573, + "p99": 192.28799641132355 + }, + "combine": { + "p50": 78.49600166082382, + "p90": 99.2640033364296, + "p95": 103.55199873447418, + "p99": 112.2559979557991 + }, + "roundtrip": { + "p50": 179.55200374126434, + "p90": 208.48000049591064, + "p95": 220.0320065021515, + "p99": 237.7920001745224 + }, + "isolatedSum": { + "p50": 204.47999984025955, + "p90": 269.0880075097084, + "p95": 284.7359925508499, + "p99": 304.54399436712265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.20799922943115, + "p90": 156.99200332164764, + "p95": 177.12000012397766, + "p99": 191.20000302791595 + }, + "combine": { + "p50": 83.26400071382523, + "p90": 104.86400127410889, + "p95": 109.79200154542923, + "p99": 115.77600240707397 + }, + "roundtrip": { + "p50": 183.58400464057922, + "p90": 221.5680032968521, + "p95": 235.71200668811798, + "p99": 277.8880000114441 + }, + "isolatedSum": { + "p50": 205.47199994325638, + "p90": 261.85600459575653, + "p95": 286.9120016694069, + "p99": 306.97600543498993 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 130.17599284648895, + "p90": 153.24799716472626, + "p95": 160.47999262809753, + "p99": 171.26399278640747 + }, + "combine": { + "p50": 90.94399958848953, + "p90": 102.84800082445145, + "p95": 116.06399714946747, + "p99": 124.35200065374374 + }, + "roundtrip": { + "p50": 196.22400403022766, + "p90": 252.0959973335266, + "p95": 272.5119888782501, + "p99": 317.9199993610382 + }, + "isolatedSum": { + "p50": 221.11999243497849, + "p90": 256.0959979891777, + "p95": 276.543989777565, + "p99": 295.6159934401512 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 137.85600662231445, + "p90": 159.10400450229645, + "p95": 170.30400037765503, + "p99": 186.5919977426529 + }, + "combine": { + "p50": 102.20800340175629, + "p90": 113.88800293207169, + "p95": 124.03199821710587, + "p99": 132.32000172138214 + }, + "roundtrip": { + "p50": 205.85599541664124, + "p90": 241.37599766254425, + "p95": 251.3279914855957, + "p99": 262.4639868736267 + }, + "isolatedSum": { + "p50": 240.06401002407074, + "p90": 272.99200743436813, + "p95": 294.3359985947609, + "p99": 318.91199946403503 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4e614384", + "identity": "h200|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_cd151ec9", + "comparisonKey": "0bfcb88588112580", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:21.441741+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.11199653148651, + "p90": 125.85599720478058, + "p95": 131.48799538612366, + "p99": 152.38399803638458 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 81.7599967122078, + "p95": 84.22400057315826, + "p99": 91.87199920415878 + }, + "roundtrip": { + "p50": 176.1920005083084, + "p90": 190.20800292491913, + "p95": 194.5600062608719, + "p99": 260.19200682640076 + }, + "isolatedSum": { + "p50": 185.69599837064743, + "p90": 207.61599391698837, + "p95": 215.71199595928192, + "p99": 244.25599724054337 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.80800265073776, + "p90": 131.00799918174744, + "p95": 137.05599308013916, + "p99": 155.35999834537506 + }, + "combine": { + "p50": 77.53600180149078, + "p90": 84.06399935483932, + "p95": 87.67999708652496, + "p99": 104.2879968881607 + }, + "roundtrip": { + "p50": 179.967999458313, + "p90": 195.77600061893463, + "p95": 202.2400051355362, + "p99": 235.77600717544556 + }, + "isolatedSum": { + "p50": 193.34400445222855, + "p90": 215.07199853658676, + "p95": 224.73599016666412, + "p99": 259.64799523353577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.52800035476685, + "p90": 137.11999356746674, + "p95": 168.64000260829926, + "p99": 230.3680032491684 + }, + "combine": { + "p50": 76.60800218582153, + "p90": 82.84799754619598, + "p95": 84.41600203514099, + "p99": 95.36000341176987 + }, + "roundtrip": { + "p50": 179.23200130462646, + "p90": 193.88799369335175, + "p95": 198.88000190258026, + "p99": 209.34399962425232 + }, + "isolatedSum": { + "p50": 191.13600254058838, + "p90": 219.96799111366272, + "p95": 253.05600464344025, + "p99": 325.72800666093826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 118.27199906110764, + "p90": 135.1040005683899, + "p95": 141.59999787807465, + "p99": 178.56000363826752 + }, + "combine": { + "p50": 77.40800082683563, + "p90": 84.25600081682205, + "p95": 88.41600269079208, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 185.02399325370789, + "p90": 248.09600412845612, + "p95": 307.71198868751526, + "p99": 339.83999490737915 + }, + "isolatedSum": { + "p50": 195.67999988794327, + "p90": 219.36000138521194, + "p95": 230.01600056886673, + "p99": 271.87200635671616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 117.24799871444702, + "p90": 139.90400731563568, + "p95": 178.8800060749054, + "p99": 241.56799912452698 + }, + "combine": { + "p50": 81.53600245714188, + "p90": 88.54400366544724, + "p95": 94.2080020904541, + "p99": 122.04799801111221 + }, + "roundtrip": { + "p50": 184.2239946126938, + "p90": 200.06400346755981, + "p95": 206.14400506019592, + "p99": 219.67999637126923 + }, + "isolatedSum": { + "p50": 198.7840011715889, + "p90": 228.44801098108292, + "p95": 273.0880081653595, + "p99": 363.6159971356392 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 121.44000083208084, + "p90": 130.43199479579926, + "p95": 135.16800105571747, + "p99": 145.60000598430634 + }, + "combine": { + "p50": 84.32000130414963, + "p90": 92.22400188446045, + "p95": 100.54399818181992, + "p99": 117.44000017642975 + }, + "roundtrip": { + "p50": 188.1600022315979, + "p90": 208.51199328899384, + "p95": 217.15199947357178, + "p99": 304.22401428222656 + }, + "isolatedSum": { + "p50": 205.76000213623047, + "p90": 222.6559966802597, + "p95": 235.71199923753738, + "p99": 263.0400061607361 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 131.9040060043335, + "p90": 142.91200041770935, + "p95": 148.22399616241455, + "p99": 157.4079990386963 + }, + "combine": { + "p50": 93.63199770450592, + "p90": 100.70399940013885, + "p95": 106.27199709415436, + "p99": 126.75200402736664 + }, + "roundtrip": { + "p50": 206.62400126457214, + "p90": 268.38400959968567, + "p95": 318.91199946403503, + "p99": 338.9439880847931 + }, + "isolatedSum": { + "p50": 225.53600370883942, + "p90": 243.6159998178482, + "p95": 254.4959932565689, + "p99": 284.1600030660629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 147.5519984960556, + "p90": 157.50400722026825, + "p95": 160.38399934768677, + "p99": 170.33599317073822 + }, + "combine": { + "p50": 109.24799740314484, + "p90": 117.24799871444702, + "p95": 122.01599776744843, + "p99": 133.88800621032715 + }, + "roundtrip": { + "p50": 218.46400201320648, + "p90": 234.65600609779358, + "p95": 241.05599522590637, + "p99": 317.56800413131714 + }, + "isolatedSum": { + "p50": 256.79999589920044, + "p90": 274.75200593471527, + "p95": 282.3999971151352, + "p99": 304.22399938106537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-74afc74f", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_05be9f49", + "comparisonKey": "8bbd7f30d0bdbd11", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:22.183677+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 120.64000219106674, + "p90": 162.432000041008, + "p95": 174.23999309539795, + "p99": 199.13600385189056 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 97.69599884748459, + "p95": 104.06400263309479, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 184.38400328159332, + "p90": 225.11999309062958, + "p95": 233.47200453281403, + "p99": 246.39999866485596 + }, + "isolatedSum": { + "p50": 203.64800095558167, + "p90": 260.1279988884926, + "p95": 278.30399572849274, + "p99": 312.3840019106865 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.46399986743927, + "p90": 157.27999806404114, + "p95": 164.44799304008484, + "p99": 175.74399709701538 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 95.61599791049957, + "p95": 106.6880002617836, + "p99": 111.42399907112122 + }, + "roundtrip": { + "p50": 181.11999332904816, + "p90": 222.27199375629425, + "p95": 231.9680005311966, + "p99": 252.0959973335266 + }, + "isolatedSum": { + "p50": 195.48799842596054, + "p90": 252.8959959745407, + "p95": 271.13599330186844, + "p99": 287.1679961681366 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.5680011510849, + "p90": 158.04800391197205, + "p95": 166.4319932460785, + "p99": 188.9919936656952 + }, + "combine": { + "p50": 85.11999994516373, + "p90": 100.96000134944916, + "p95": 108.38399827480316, + "p99": 114.62400108575821 + }, + "roundtrip": { + "p50": 194.59199905395508, + "p90": 237.15199530124664, + "p95": 248.25599789619446, + "p99": 279.7439992427826 + }, + "isolatedSum": { + "p50": 202.68800109624863, + "p90": 259.0080052614212, + "p95": 274.81599152088165, + "p99": 303.6159947514534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.60000139474869, + "p90": 154.7520011663437, + "p95": 162.9759967327118, + "p99": 173.75999689102173 + }, + "combine": { + "p50": 85.05599945783615, + "p90": 105.15200346708298, + "p95": 110.91200262308121, + "p99": 118.68800222873688 + }, + "roundtrip": { + "p50": 191.93600118160248, + "p90": 240.06399512290955, + "p95": 251.64800882339478, + "p99": 278.78400683403015 + }, + "isolatedSum": { + "p50": 202.65600085258484, + "p90": 259.90400463342667, + "p95": 273.887999355793, + "p99": 292.4479991197586 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.93600362539291, + "p90": 157.50400722026825, + "p95": 168.60799491405487, + "p99": 188.9919936656952 + }, + "combine": { + "p50": 88.48000317811966, + "p90": 108.15999656915665, + "p95": 117.37599968910217, + "p99": 128.89599800109863 + }, + "roundtrip": { + "p50": 189.85599279403687, + "p90": 232.54400491714478, + "p95": 247.0719963312149, + "p99": 276.92800760269165 + }, + "isolatedSum": { + "p50": 204.41600680351257, + "p90": 265.6640037894249, + "p95": 285.98399460315704, + "p99": 317.8879916667938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.86399710178375, + "p90": 160.89600324630737, + "p95": 167.32800006866455, + "p99": 193.95199418067932 + }, + "combine": { + "p50": 92.25600212812424, + "p90": 113.63200098276138, + "p95": 119.00799721479416, + "p99": 126.94400548934937 + }, + "roundtrip": { + "p50": 200.44800639152527, + "p90": 247.00799584388733, + "p95": 257.79199600219727, + "p99": 282.20799565315247 + }, + "isolatedSum": { + "p50": 217.119999229908, + "p90": 274.52800422906876, + "p95": 286.3359972834587, + "p99": 320.8959996700287 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.4479959011078, + "p90": 162.6559942960739, + "p95": 169.5680022239685, + "p99": 188.9919936656952 + }, + "combine": { + "p50": 102.20800340175629, + "p90": 122.81599640846252, + "p95": 128.51199507713318, + "p99": 137.7280056476593 + }, + "roundtrip": { + "p50": 205.4399996995926, + "p90": 246.5279996395111, + "p95": 255.5840015411377, + "p99": 291.3599908351898 + }, + "isolatedSum": { + "p50": 238.65599930286407, + "p90": 285.47199070453644, + "p95": 298.0799973011017, + "p99": 326.7199993133545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.70399367809296, + "p90": 180.35200238227844, + "p95": 188.38399648666382, + "p99": 200.70399343967438 + }, + "combine": { + "p50": 119.61600184440613, + "p90": 135.6479972600937, + "p95": 144.28800344467163, + "p99": 151.5199989080429 + }, + "roundtrip": { + "p50": 236.95999383926392, + "p90": 268.67198944091797, + "p95": 284.5439910888672, + "p99": 334.9440097808838 + }, + "isolatedSum": { + "p50": 276.3199955224991, + "p90": 315.99999964237213, + "p95": 332.67199993133545, + "p99": 352.2239923477173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-569d979b", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_cd151ec9", + "comparisonKey": "a4ad8ade2f646e7d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:14.845549+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 120.09599804878235, + "p90": 156.92800283432007, + "p95": 168.09600591659546, + "p99": 186.71999871730804 + }, + "combine": { + "p50": 81.79199695587158, + "p90": 99.93600100278854, + "p95": 108.89600217342377, + "p99": 115.52000045776367 + }, + "roundtrip": { + "p50": 187.42400407791138, + "p90": 236.83199286460876, + "p95": 251.55198574066162, + "p99": 277.40800380706787 + }, + "isolatedSum": { + "p50": 201.88799500465393, + "p90": 256.8640038371086, + "p95": 276.9920080900192, + "p99": 302.2399991750717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.69600212574005, + "p90": 153.98399531841278, + "p95": 161.47199273109436, + "p99": 170.9440052509308 + }, + "combine": { + "p50": 81.60000294446945, + "p90": 95.45599669218063, + "p95": 106.6880002617836, + "p99": 112.35199868679047 + }, + "roundtrip": { + "p50": 190.0479942560196, + "p90": 237.98400163650513, + "p95": 250.43201446533203, + "p99": 287.1679961681366 + }, + "isolatedSum": { + "p50": 199.2960050702095, + "p90": 249.43999201059341, + "p95": 268.15999299287796, + "p99": 283.29600393772125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 122.68800288438797, + "p90": 151.71200037002563, + "p95": 164.35199975967407, + "p99": 210.1760059595108 + }, + "combine": { + "p50": 84.35200154781342, + "p90": 92.41600334644318, + "p95": 99.42399710416794, + "p99": 117.69600212574005 + }, + "roundtrip": { + "p50": 190.33600389957428, + "p90": 212.0320051908493, + "p95": 228.38400304317474, + "p99": 244.6720004081726 + }, + "isolatedSum": { + "p50": 207.04000443220139, + "p90": 244.1280037164688, + "p95": 263.775996863842, + "p99": 327.87200808525085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 119.9679970741272, + "p90": 239.07199501991272, + "p95": 247.5840002298355, + "p99": 262.33598589897156 + }, + "combine": { + "p50": 85.21600067615509, + "p90": 121.18399888277054, + "p95": 126.24000012874603, + "p99": 134.24000144004822 + }, + "roundtrip": { + "p50": 204.73599433898926, + "p90": 322.4639892578125, + "p95": 330.7200074195862, + "p99": 348.25599193573 + }, + "isolatedSum": { + "p50": 205.1839977502823, + "p90": 360.25599390268326, + "p95": 373.82400035858154, + "p99": 396.5759873390198 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.87200313806534, + "p90": 159.45599973201752, + "p95": 180.28800189495087, + "p99": 233.63199830055237 + }, + "combine": { + "p50": 85.53600311279297, + "p90": 98.94400089979172, + "p95": 113.66400122642517, + "p99": 120.4800009727478 + }, + "roundtrip": { + "p50": 199.2959976196289, + "p90": 246.8159943819046, + "p95": 265.21599292755127, + "p99": 290.71998596191406 + }, + "isolatedSum": { + "p50": 201.4080062508583, + "p90": 258.40000063180923, + "p95": 293.95200312137604, + "p99": 354.11199927330017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 126.46399438381195, + "p90": 161.76000237464905, + "p95": 178.6240041255951, + "p99": 202.01599597930908 + }, + "combine": { + "p50": 91.10400080680847, + "p90": 100.8640006184578, + "p95": 114.33599889278412, + "p99": 123.03999811410904 + }, + "roundtrip": { + "p50": 197.24799692630768, + "p90": 237.47199773788452, + "p95": 248.79999458789825, + "p99": 263.808012008667 + }, + "isolatedSum": { + "p50": 217.56799519062042, + "p90": 262.62400299310684, + "p95": 292.9600030183792, + "p99": 325.0559940934181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.36799561977386, + "p90": 228.7680059671402, + "p95": 238.27199637889862, + "p99": 251.8720030784607 + }, + "combine": { + "p50": 100.41599720716476, + "p90": 108.64000022411346, + "p95": 117.60000139474869, + "p99": 135.77599823474884 + }, + "roundtrip": { + "p50": 204.6400010585785, + "p90": 232.09600150585175, + "p95": 246.94399535655975, + "p99": 278.4000039100647 + }, + "isolatedSum": { + "p50": 238.78399282693863, + "p90": 337.40800619125366, + "p95": 355.8719977736473, + "p99": 387.64800131320953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.43999862670898, + "p90": 166.36799275875092, + "p95": 179.55200374126434, + "p99": 201.02399587631226 + }, + "combine": { + "p50": 119.64800208806992, + "p90": 128.1919926404953, + "p95": 137.43999600410461, + "p99": 156.89599514007568 + }, + "roundtrip": { + "p50": 236.35199666023254, + "p90": 261.05600595474243, + "p95": 277.24799513816833, + "p99": 328.7999927997589 + }, + "isolatedSum": { + "p50": 273.0880007147789, + "p90": 294.5599853992462, + "p95": 316.99199974536896, + "p99": 357.91999101638794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-49d4bd2a", + "identity": "h200|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_cd151ec9", + "comparisonKey": "97181392dbd47343", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:35.613961+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.17599833011627, + "p90": 161.72799468040466, + "p95": 172.41600155830383, + "p99": 212.2880071401596 + }, + "combine": { + "p50": 80.76799660921097, + "p90": 94.30400282144547, + "p95": 106.36799782514572, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 185.5040043592453, + "p90": 227.90400683879852, + "p95": 248.99199604988098, + "p99": 291.04000329971313 + }, + "isolatedSum": { + "p50": 198.94399493932724, + "p90": 256.0319975018501, + "p95": 278.78399938344955, + "p99": 328.12801003456116 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.68000167608261, + "p90": 153.47200632095337, + "p95": 162.78399527072906, + "p99": 173.72800409793854 + }, + "combine": { + "p50": 81.11999928951263, + "p90": 100.54399818181992, + "p95": 105.76000064611435, + "p99": 114.9120032787323 + }, + "roundtrip": { + "p50": 185.18400192260742, + "p90": 225.0239998102188, + "p95": 234.65600609779358, + "p99": 352.4160087108612 + }, + "isolatedSum": { + "p50": 196.80000096559525, + "p90": 254.01600450277328, + "p95": 268.5439959168434, + "p99": 288.64000737667084 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 113.88800293207169, + "p90": 159.42400693893433, + "p95": 167.29600727558136, + "p99": 223.32799434661865 + }, + "combine": { + "p50": 82.17599987983704, + "p90": 96.57599776983261, + "p95": 107.45599865913391, + "p99": 114.07999694347382 + }, + "roundtrip": { + "p50": 186.5919977426529, + "p90": 227.00800001621246, + "p95": 234.1119945049286, + "p99": 257.4079930782318 + }, + "isolatedSum": { + "p50": 196.06400281190872, + "p90": 256.00000470876694, + "p95": 274.75200593471527, + "p99": 337.40799129009247 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 118.17599833011627, + "p90": 157.3439985513687, + "p95": 164.8319959640503, + "p99": 179.9039989709854 + }, + "combine": { + "p50": 83.13599973917007, + "p90": 106.20799660682678, + "p95": 110.46399921178818, + "p99": 124.06399846076965 + }, + "roundtrip": { + "p50": 190.20800292491913, + "p90": 231.455996632576, + "p95": 239.51999843120575, + "p99": 262.2399926185608 + }, + "isolatedSum": { + "p50": 201.31199806928635, + "p90": 263.5519951581955, + "p95": 275.29599517583847, + "p99": 303.96799743175507 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.63200098276138, + "p90": 154.7199934720993, + "p95": 159.64800119400024, + "p99": 171.10399901866913 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 102.68799960613251, + "p95": 108.35199803113937, + "p99": 115.13599753379822 + }, + "roundtrip": { + "p50": 186.94399297237396, + "p90": 229.8240065574646, + "p95": 237.0239943265915, + "p99": 264.5759880542755 + }, + "isolatedSum": { + "p50": 198.11200350522995, + "p90": 257.4079930782318, + "p95": 267.9999992251396, + "p99": 286.23999655246735 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.83199685811996, + "p90": 162.88000345230103, + "p95": 170.8800047636032, + "p99": 186.65599822998047 + }, + "combine": { + "p50": 91.42400324344635, + "p90": 110.20799726247787, + "p95": 118.23999881744385, + "p99": 126.14400684833527 + }, + "roundtrip": { + "p50": 196.1279958486557, + "p90": 237.12000250816345, + "p95": 247.77600169181824, + "p99": 258.65599513053894 + }, + "isolatedSum": { + "p50": 216.25600010156631, + "p90": 273.0880007147789, + "p95": 289.12000358104706, + "p99": 312.80000507831573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 140.70400595664978, + "p90": 177.66399681568146, + "p95": 183.6480051279068, + "p99": 194.72000002861023 + }, + "combine": { + "p50": 104.41599786281586, + "p90": 125.88800489902496, + "p95": 132.89600610733032, + "p99": 143.0719941854477 + }, + "roundtrip": { + "p50": 208.8959962129593, + "p90": 254.40001487731934, + "p95": 266.400009393692, + "p99": 288.86398673057556 + }, + "isolatedSum": { + "p50": 245.12000381946564, + "p90": 303.5520017147064, + "p95": 316.5440112352371, + "p99": 337.7919942140579 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.23999726772308, + "p90": 182.0800006389618, + "p95": 192.03199446201324, + "p99": 219.52000260353088 + }, + "combine": { + "p50": 118.97599697113037, + "p90": 140.09599387645721, + "p95": 149.08799529075623, + "p99": 158.4639996290207 + }, + "roundtrip": { + "p50": 239.58399891853333, + "p90": 279.9359858036041, + "p95": 300.1280128955841, + "p99": 338.27200531959534 + }, + "isolatedSum": { + "p50": 273.21599423885345, + "p90": 322.175994515419, + "p95": 341.11998975276947, + "p99": 377.9840022325516 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7125338b", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups|8|decode|normal|none|none|0|tuned||3cd13eac5b27759", + "colorKey": "h200_986b204b", + "comparisonKey": "3c2add86e47cf124", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:14.432081+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · alternating-groups", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.0559972524643, + "p90": 132.06399977207184, + "p95": 137.1839940547943, + "p99": 207.07200467586517 + }, + "combine": { + "p50": 82.07999914884567, + "p90": 90.40000289678574, + "p95": 94.24000233411789, + "p99": 105.72800040245056 + }, + "roundtrip": { + "p50": 192.9280012845993, + "p90": 212.3199999332428, + "p95": 220.22399306297302, + "p99": 267.4559950828552 + }, + "isolatedSum": { + "p50": 199.13599640130997, + "p90": 222.46400266885757, + "p95": 231.4239963889122, + "p99": 312.80000507831573 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 126.3359934091568, + "p90": 138.5280042886734, + "p95": 144.06399428844452, + "p99": 151.296004652977 + }, + "combine": { + "p50": 91.96799993515015, + "p90": 99.10400211811066, + "p95": 101.08800232410431, + "p99": 108.44799876213074 + }, + "roundtrip": { + "p50": 203.13599705696106, + "p90": 220.09600698947906, + "p95": 230.6559979915619, + "p99": 287.87198662757874 + }, + "isolatedSum": { + "p50": 218.30399334430695, + "p90": 237.63200640678406, + "p95": 245.15199661254883, + "p99": 259.7440034151077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.27200627326965, + "p90": 174.9120056629181, + "p95": 182.01600015163422, + "p99": 224.86400604248047 + }, + "combine": { + "p50": 126.14400684833527, + "p90": 132.4480026960373, + "p95": 137.9839926958084, + "p99": 145.1520025730133 + }, + "roundtrip": { + "p50": 248.06399643421173, + "p90": 258.14399123191833, + "p95": 263.96799087524414, + "p99": 322.9759931564331 + }, + "isolatedSum": { + "p50": 288.4160131216049, + "p90": 307.3600083589554, + "p95": 319.9999928474426, + "p99": 370.0160086154938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8f5f35c0", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s1|8|decode|normal|none|none|1|tuned||f8662de0b3559f9", + "colorKey": "h200_3937e815", + "comparisonKey": "bc6c7414cffa2212", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:06:38.512828+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · alternating-groups@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 112.2559979557991, + "p90": 127.13600695133209, + "p95": 130.52800297737122, + "p99": 138.36799561977386 + }, + "combine": { + "p50": 82.68799632787704, + "p90": 88.99199962615967, + "p95": 96.22400254011154, + "p99": 124.22399967908859 + }, + "roundtrip": { + "p50": 184.4480037689209, + "p90": 201.21599733829498, + "p95": 214.20800685882568, + "p99": 229.44000363349915 + }, + "isolatedSum": { + "p50": 194.94399428367615, + "p90": 216.12800657749176, + "p95": 226.75200551748276, + "p99": 262.59199529886246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 120.86399644613266, + "p90": 234.047994017601, + "p95": 246.5279996395111, + "p99": 264.6079957485199 + }, + "combine": { + "p50": 90.97599983215332, + "p90": 97.08800166845322, + "p95": 99.48799759149551, + "p99": 106.55999928712845 + }, + "roundtrip": { + "p50": 193.05600225925446, + "p90": 206.9759964942932, + "p95": 210.4959934949875, + "p99": 223.07200729846954 + }, + "isolatedSum": { + "p50": 211.83999627828598, + "p90": 331.13599568605423, + "p95": 346.0159972310066, + "p99": 371.16799503564835 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.8319946527481, + "p90": 166.6560024023056, + "p95": 170.43200135231018, + "p99": 179.1359931230545 + }, + "combine": { + "p50": 126.11199915409088, + "p90": 133.27999413013458, + "p95": 138.40000331401825, + "p99": 145.63199877738953 + }, + "roundtrip": { + "p50": 245.08799612522125, + "p90": 257.0880055427551, + "p95": 262.04800605773926, + "p99": 273.3759880065918 + }, + "isolatedSum": { + "p50": 282.943993806839, + "p90": 299.9359965324402, + "p95": 308.83200466632843, + "p99": 324.76799190044403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c6967392", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s2|8|decode|normal|none|none|2|tuned||3cd13eac5b27759", + "colorKey": "h200_3637e35c", + "comparisonKey": "7f31bcb5bbff8193", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:02.089579+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · alternating-groups@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "3cd13eac5b27759", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 113.72800171375275, + "p90": 147.8399932384491, + "p95": 156.09599649906158, + "p99": 163.83999586105347 + }, + "combine": { + "p50": 82.5280025601387, + "p90": 91.839998960495, + "p95": 105.34399747848511, + "p99": 116.57600104808807 + }, + "roundtrip": { + "p50": 185.88800728321075, + "p90": 216.73600375652313, + "p95": 226.33600234985352, + "p99": 249.95200335979462 + }, + "isolatedSum": { + "p50": 196.25600427389145, + "p90": 239.6799921989441, + "p95": 261.4399939775467, + "p99": 280.41599690914154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 118.81600320339203, + "p90": 141.66399836540222, + "p95": 154.1759967803955, + "p99": 168.2880073785782 + }, + "combine": { + "p50": 90.84799885749817, + "p90": 103.13600301742554, + "p95": 114.72000181674957, + "p99": 125.63200294971466 + }, + "roundtrip": { + "p50": 201.24800503253937, + "p90": 241.69600009918213, + "p95": 256.25601410865784, + "p99": 320.73599100112915 + }, + "isolatedSum": { + "p50": 209.6640020608902, + "p90": 244.80000138282776, + "p95": 268.8959985971451, + "p99": 293.92001032829285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.432000041008, + "p90": 210.59200167655945, + "p95": 222.6240038871765, + "p99": 244.76799368858337 + }, + "combine": { + "p50": 127.07200646400452, + "p90": 139.42399621009827, + "p95": 148.5760062932968, + "p99": 162.9440039396286 + }, + "roundtrip": { + "p50": 247.16800451278687, + "p90": 270.04799246788025, + "p95": 279.776006937027, + "p99": 307.2640001773834 + }, + "isolatedSum": { + "p50": 289.5040065050125, + "p90": 350.0159978866577, + "p95": 371.2000101804733, + "p99": 407.711997628212 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-77682718", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|alternating-groups@s3|8|decode|normal|none|none|3|tuned||f8662de0b3559f9", + "colorKey": "h200_3737e4ef", + "comparisonKey": "5dc35f2d3b7418fa", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:25.561254+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · alternating-groups@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "alternating-groups", + "routingLabel": "alternating-groups@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f8662de0b3559f9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 125.40799379348755, + "p90": 146.27200365066528, + "p95": 157.44000673294067, + "p99": 245.2159970998764 + }, + "combine": { + "p50": 88.639996945858, + "p90": 115.64800143241882, + "p95": 117.66400188207626, + "p99": 129.02399897575378 + }, + "roundtrip": { + "p50": 193.85600090026855, + "p90": 309.2159926891327, + "p95": 315.61601161956787, + "p99": 325.0240087509155 + }, + "isolatedSum": { + "p50": 214.04799073934555, + "p90": 261.9200050830841, + "p95": 275.10400861501694, + "p99": 374.2399960756302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3297280, + "combineLogicalBytes": 3297280, + "fanoutMean": 3.59375, + "recvTokensMax": 61, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 118.01599711179733, + "p90": 142.46399700641632, + "p95": 234.68799889087677, + "p99": 250.91201066970825 + }, + "combine": { + "p50": 95.10400146245956, + "p90": 125.2799928188324, + "p95": 127.10399925708771, + "p99": 137.15200126171112 + }, + "roundtrip": { + "p50": 206.91199600696564, + "p90": 326.24000310897827, + "p95": 334.6239924430847, + "p99": 342.0479893684387 + }, + "isolatedSum": { + "p50": 213.1199985742569, + "p90": 267.7439898252487, + "p95": 361.7919981479645, + "p99": 388.0640119314194 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13275136, + "combineLogicalBytes": 13275136, + "fanoutMean": 3.6171875, + "recvTokensMax": 236, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.44800662994385, + "p90": 240.22400379180908, + "p95": 247.3600059747696, + "p99": 258.432000875473 + }, + "combine": { + "p50": 125.85599720478058, + "p90": 133.31200182437897, + "p95": 136.57599687576294, + "p99": 143.51999759674072 + }, + "roundtrip": { + "p50": 245.34399807453156, + "p90": 255.71200251579285, + "p95": 261.31200790405273, + "p99": 312.6719892024994 + }, + "isolatedSum": { + "p50": 282.3040038347244, + "p90": 373.53600561618805, + "p95": 383.93600285053253, + "p99": 401.95199847221375 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 53172224, + "combineLogicalBytes": 53172224, + "fanoutMean": 3.6220703125, + "recvTokensMax": 934, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ef25d276", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "h200_5814bbeb", + "comparisonKey": "ff38db2e67939012", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:16.811539+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 118.14399808645248, + "p90": 141.4719969034195, + "p95": 151.07199549674988, + "p99": 168.7680035829544 + }, + "combine": { + "p50": 84.89599823951721, + "p90": 92.83199906349182, + "p95": 97.50399738550186, + "p99": 109.98400300741196 + }, + "roundtrip": { + "p50": 190.91199338436127, + "p90": 211.8079960346222, + "p95": 222.4320024251938, + "p99": 238.97600173950195 + }, + "isolatedSum": { + "p50": 203.0399963259697, + "p90": 234.30399596691132, + "p95": 248.57599288225174, + "p99": 278.75200659036636 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 116.44800007343292, + "p90": 147.8399932384491, + "p95": 156.38400614261627, + "p99": 167.26399958133698 + }, + "combine": { + "p50": 85.4400023818016, + "p90": 95.90400010347366, + "p95": 108.38399827480316, + "p99": 118.04799735546112 + }, + "roundtrip": { + "p50": 188.1600022315979, + "p90": 219.29599344730377, + "p95": 230.27199506759644, + "p99": 248.3839988708496 + }, + "isolatedSum": { + "p50": 201.88800245523453, + "p90": 243.74399334192276, + "p95": 264.76800441741943, + "p99": 285.3119969367981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.62400108575821, + "p90": 146.81600034236908, + "p95": 157.05600380897522, + "p99": 186.91200017929077 + }, + "combine": { + "p50": 85.9839990735054, + "p90": 98.14400225877762, + "p95": 109.95200276374817, + "p99": 124.86399710178375 + }, + "roundtrip": { + "p50": 193.08799505233765, + "p90": 226.33600234985352, + "p95": 240.9919947385788, + "p99": 274.1760015487671 + }, + "isolatedSum": { + "p50": 200.6080001592636, + "p90": 244.9600026011467, + "p95": 267.0080065727234, + "p99": 311.7759972810745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 121.47200107574463, + "p90": 144.86399292945862, + "p95": 155.74400126934052, + "p99": 176.67199671268463 + }, + "combine": { + "p50": 87.2960016131401, + "p90": 94.01600062847137, + "p95": 101.02400183677673, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 191.83999300003052, + "p90": 210.14399826526642, + "p95": 222.9440063238144, + "p99": 238.97600173950195 + }, + "isolatedSum": { + "p50": 208.76800268888474, + "p90": 238.87999355793, + "p95": 256.76800310611725, + "p99": 297.4720001220703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 119.48800086975098, + "p90": 138.20800185203552, + "p95": 152.8960019350052, + "p99": 169.3439930677414 + }, + "combine": { + "p50": 89.9839997291565, + "p90": 97.37599641084671, + "p95": 105.12000322341919, + "p99": 114.56000059843063 + }, + "roundtrip": { + "p50": 196.4160054922104, + "p90": 219.39200162887573, + "p95": 233.5679978132248, + "p99": 253.34399938583374 + }, + "isolatedSum": { + "p50": 209.47200059890747, + "p90": 235.58399826288223, + "p95": 258.0160051584244, + "p99": 283.903993666172 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 134.5279961824417, + "p90": 167.55199432373047, + "p95": 180.7360053062439, + "p99": 199.64799284934998 + }, + "combine": { + "p50": 97.15200215578079, + "p90": 112.35199868679047, + "p95": 124.12799894809723, + "p99": 136.03200018405914 + }, + "roundtrip": { + "p50": 202.27199792861938, + "p90": 236.9920015335083, + "p95": 259.2960000038147, + "p99": 290.94401001930237 + }, + "isolatedSum": { + "p50": 231.6799983382225, + "p90": 279.90399301052094, + "p95": 304.8640042543411, + "p99": 335.6799930334091 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 147.10399508476257, + "p90": 171.48800194263458, + "p95": 181.0240000486374, + "p99": 199.77599382400513 + }, + "combine": { + "p50": 109.53599959611893, + "p90": 120.67200243473053, + "p95": 127.68000364303589, + "p99": 139.52000439167023 + }, + "roundtrip": { + "p50": 219.52000260353088, + "p90": 248.1600046157837, + "p95": 260.19200682640076, + "p99": 279.1680097579956 + }, + "isolatedSum": { + "p50": 256.6399946808815, + "p90": 292.1600043773651, + "p95": 308.7040036916733, + "p99": 339.29599821567535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 171.26399278640747, + "p90": 195.8719938993454, + "p95": 208.19200575351715, + "p99": 219.13599967956543 + }, + "combine": { + "p50": 138.49599659442902, + "p90": 158.87999534606934, + "p95": 165.27999937534332, + "p99": 178.3359944820404 + }, + "roundtrip": { + "p50": 269.8880136013031, + "p90": 300.7679879665375, + "p95": 308.0959916114807, + "p99": 334.9759876728058 + }, + "isolatedSum": { + "p50": 309.7599893808365, + "p90": 354.75198924541473, + "p95": 373.4720051288605, + "p99": 397.47199416160583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0a469427", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||d02a66236b524b8", + "colorKey": "h200_5dd7099a", + "comparisonKey": "c0b015d3fbf99206", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:14.240842+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d02a66236b524b8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.45600062608719, + "p90": 162.33600676059723, + "p95": 177.08800733089447, + "p99": 194.46399807929993 + }, + "combine": { + "p50": 75.71200281381607, + "p90": 84.79999750852585, + "p95": 99.42399710416794, + "p99": 112.19199746847153 + }, + "roundtrip": { + "p50": 184.06400084495544, + "p90": 230.6559979915619, + "p95": 241.4720058441162, + "p99": 322.36799597740173 + }, + "isolatedSum": { + "p50": 195.16800343990326, + "p90": 247.13600426912308, + "p95": 276.5120044350624, + "p99": 306.65599554777145 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 113.37599903345108, + "p90": 142.04800128936768, + "p95": 149.75999295711517, + "p99": 157.151997089386 + }, + "combine": { + "p50": 72.51200079917908, + "p90": 80.92799782752991, + "p95": 90.36800265312195, + "p99": 110.78400164842606 + }, + "roundtrip": { + "p50": 177.40799486637115, + "p90": 207.58399367332458, + "p95": 214.7199958562851, + "p99": 225.50399601459503 + }, + "isolatedSum": { + "p50": 185.88799983263016, + "p90": 222.97599911689758, + "p95": 240.12799561023712, + "p99": 267.93599873781204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 129.82399761676788, + "p90": 249.6960014104843, + "p95": 258.976012468338, + "p99": 277.5999903678894 + }, + "combine": { + "p50": 82.56000280380249, + "p90": 94.14400160312653, + "p95": 106.88000172376633, + "p99": 121.98399752378464 + }, + "roundtrip": { + "p50": 181.2800019979477, + "p90": 212.73599565029144, + "p95": 228.7999987602234, + "p99": 255.5519938468933 + }, + "isolatedSum": { + "p50": 212.38400042057037, + "p90": 343.84000301361084, + "p95": 365.85601419210434, + "p99": 399.58398789167404 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 118.94399672746658, + "p90": 147.74399995803833, + "p95": 155.93600273132324, + "p99": 172.89599776268005 + }, + "combine": { + "p50": 83.8719978928566, + "p90": 106.175996363163, + "p95": 109.95200276374817, + "p99": 117.08799749612808 + }, + "roundtrip": { + "p50": 187.8719925880432, + "p90": 224.83199834823608, + "p95": 241.43999814987183, + "p99": 261.50399446487427 + }, + "isolatedSum": { + "p50": 202.81599462032318, + "p90": 253.91999632120132, + "p95": 265.8880054950714, + "p99": 289.98399525880814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e55a8c42", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|decode|normal|none|none|0|tuned||f0e66a15078595b", + "colorKey": "h200_e170f653", + "comparisonKey": "2df7e4b3dbea209a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:03.624003+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f0e66a15078595b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 116.83200299739838, + "p90": 131.26400113105774, + "p95": 136.28800213336945, + "p99": 148.80000054836273 + }, + "combine": { + "p50": 75.48800110816956, + "p90": 82.71999657154083, + "p95": 87.10400015115738, + "p99": 93.31200271844864 + }, + "roundtrip": { + "p50": 176.79999768733978, + "p90": 192.1280026435852, + "p95": 197.31199741363525, + "p99": 219.07199919223785 + }, + "isolatedSum": { + "p50": 192.32000410556793, + "p90": 213.98399770259857, + "p95": 223.39200228452682, + "p99": 242.11200326681137 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 2, + "recvTokensMax": 3, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.46399986743927, + "p90": 129.50399518013, + "p95": 135.3600025177002, + "p99": 151.2639969587326 + }, + "combine": { + "p50": 75.1039981842041, + "p90": 80.03199845552444, + "p95": 85.40800213813782, + "p99": 93.24800223112106 + }, + "roundtrip": { + "p50": 176.7680048942566, + "p90": 192.00000166893005, + "p95": 195.90400159358978, + "p99": 203.96800339221954 + }, + "isolatedSum": { + "p50": 189.56799805164337, + "p90": 209.53599363565445, + "p95": 220.768004655838, + "p99": 244.51199918985367 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 2, + "recvTokensMax": 6, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.88000303506851, + "p90": 130.8480054140091, + "p95": 139.0720009803772, + "p99": 161.5999937057495 + }, + "combine": { + "p50": 75.83999633789062, + "p90": 81.98399841785431, + "p95": 86.01599931716919, + "p99": 99.2640033364296 + }, + "roundtrip": { + "p50": 181.31199479103088, + "p90": 196.16000354290009, + "p95": 201.50400698184967, + "p99": 219.9999988079071 + }, + "isolatedSum": { + "p50": 190.71999937295914, + "p90": 212.8320038318634, + "p95": 225.0880002975464, + "p99": 260.8639970421791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 2, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 117.98399686813354, + "p90": 136.09600067138672, + "p95": 145.9520012140274, + "p99": 213.76000344753265 + }, + "combine": { + "p50": 76.25599950551987, + "p90": 83.23200047016144, + "p95": 85.9839990735054, + "p99": 94.55999732017517 + }, + "roundtrip": { + "p50": 181.15200102329254, + "p90": 202.72000133991241, + "p95": 211.93599700927734, + "p99": 240.38399755954742 + }, + "isolatedSum": { + "p50": 194.2399963736534, + "p90": 219.32800114154816, + "p95": 231.9360002875328, + "p99": 308.3200007677078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 2, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 115.61600118875504, + "p90": 129.34400141239166, + "p95": 133.88800621032715, + "p99": 142.97600090503693 + }, + "combine": { + "p50": 77.44000107049942, + "p90": 83.29600095748901, + "p95": 85.85599809885025, + "p99": 94.14400160312653 + }, + "roundtrip": { + "p50": 190.2720034122467, + "p90": 211.32799983024597, + "p95": 223.1999933719635, + "p99": 288.2240116596222 + }, + "isolatedSum": { + "p50": 193.05600225925446, + "p90": 212.64000236988068, + "p95": 219.7440043091774, + "p99": 237.12000250816345 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 2, + "recvTokensMax": 48, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 115.10399729013443, + "p90": 128.9920061826706, + "p95": 133.27999413013458, + "p99": 141.2159949541092 + }, + "combine": { + "p50": 78.11199873685837, + "p90": 83.99999886751175, + "p95": 86.43200248479843, + "p99": 92.67199784517288 + }, + "roundtrip": { + "p50": 184.25600230693817, + "p90": 198.4959989786148, + "p95": 204.99199628829956, + "p99": 255.87201118469238 + }, + "isolatedSum": { + "p50": 193.2159960269928, + "p90": 212.99200505018234, + "p95": 219.711996614933, + "p99": 233.88799279928207 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 2, + "recvTokensMax": 96, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 125.31200051307678, + "p90": 144.51199769973755, + "p95": 154.6880006790161, + "p99": 191.80800020694733 + }, + "combine": { + "p50": 85.4720026254654, + "p90": 94.4959968328476, + "p95": 99.45599734783173, + "p99": 113.15199732780457 + }, + "roundtrip": { + "p50": 190.36799669265747, + "p90": 211.8079960346222, + "p95": 221.18400037288666, + "p99": 244.63999271392822 + }, + "isolatedSum": { + "p50": 210.78400313854218, + "p90": 239.00799453258514, + "p95": 254.14399802684784, + "p99": 304.9599975347519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 2, + "recvTokensMax": 192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 136.76799833774567, + "p90": 147.23199605941772, + "p95": 154.36799824237823, + "p99": 164.86400365829468 + }, + "combine": { + "p50": 98.33600372076035, + "p90": 104.2879968881607, + "p95": 106.97600245475769, + "p99": 119.10399794578552 + }, + "roundtrip": { + "p50": 203.74399423599243, + "p90": 219.67999637126923, + "p95": 226.8799990415573, + "p99": 247.3600059747696 + }, + "isolatedSum": { + "p50": 235.104002058506, + "p90": 251.51999294757843, + "p95": 261.3440006971359, + "p99": 283.9680016040802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-63ac211e", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving|8|decode|normal|none|none|0|tuned||90042e0db6a8297", + "colorKey": "h200_b85b5933", + "comparisonKey": "4669063dcaf7aacd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:40.044884+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-moving", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "90042e0db6a8297", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 111.93600296974182, + "p90": 150.43200552463531, + "p95": 159.42400693893433, + "p99": 177.279993891716 + }, + "combine": { + "p50": 83.61600339412689, + "p90": 100.12800246477127, + "p95": 109.02400314807892, + "p99": 115.7120019197464 + }, + "roundtrip": { + "p50": 181.92000687122345, + "p90": 219.84000504016876, + "p95": 226.9439995288849, + "p99": 263.2319927215576 + }, + "isolatedSum": { + "p50": 195.5520063638687, + "p90": 250.56000798940659, + "p95": 268.44801008701324, + "p99": 292.9919958114624 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.27199971675873, + "p90": 158.81599485874176, + "p95": 195.10400295257568, + "p99": 241.7600005865097 + }, + "combine": { + "p50": 92.44800359010696, + "p90": 105.69600015878677, + "p95": 116.03199690580368, + "p99": 124.41600114107132 + }, + "roundtrip": { + "p50": 196.76800072193146, + "p90": 307.0720136165619, + "p95": 317.6319897174835, + "p99": 348.9919900894165 + }, + "isolatedSum": { + "p50": 214.7200033068657, + "p90": 264.51199501752853, + "p95": 311.13599985837936, + "p99": 366.176001727581 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.72799468040466, + "p90": 176.96000635623932, + "p95": 183.55199694633484, + "p99": 194.91200149059296 + }, + "combine": { + "p50": 128.63999605178833, + "p90": 140.06400108337402, + "p95": 147.67999947071075, + "p99": 156.47999942302704 + }, + "roundtrip": { + "p50": 254.68799471855164, + "p90": 276.8320143222809, + "p95": 285.43999791145325, + "p99": 318.39999556541443 + }, + "isolatedSum": { + "p50": 290.367990732193, + "p90": 317.02400743961334, + "p95": 331.2319964170456, + "p99": 351.39200091362 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-be4fb9e2", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s1|8|decode|normal|none|none|1|tuned||6288a1aa76c20e7", + "colorKey": "h200_11b8f32d", + "comparisonKey": "ae2ef6de2c73847c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:04.537151+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-moving@s1", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s1", + "routingStep": 1, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "6288a1aa76c20e7", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 130.91200590133667, + "p90": 247.55200743675232, + "p95": 254.40001487731934, + "p99": 265.4080092906952 + }, + "combine": { + "p50": 84.44800227880478, + "p90": 125.791996717453, + "p95": 132.51200318336487, + "p99": 143.23200285434723 + }, + "roundtrip": { + "p50": 200.95999538898468, + "p90": 332.99198746681213, + "p95": 338.4000062942505, + "p99": 357.02401399612427 + }, + "isolatedSum": { + "p50": 215.36000818014145, + "p90": 373.3440041542053, + "p95": 386.9120180606842, + "p99": 408.6400121450424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.69600343704224, + "p90": 234.047994017601, + "p95": 247.55200743675232, + "p99": 264.70398902893066 + }, + "combine": { + "p50": 92.38400310277939, + "p90": 101.75999999046326, + "p95": 111.96800321340561, + "p99": 123.32800030708313 + }, + "roundtrip": { + "p50": 195.26399672031403, + "p90": 234.592005610466, + "p95": 248.9279955625534, + "p99": 280.92798590660095 + }, + "isolatedSum": { + "p50": 218.08000653982162, + "p90": 335.80799400806427, + "p95": 359.52001065015793, + "p99": 388.0319893360138 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 160.99199652671814, + "p90": 180.1919937133789, + "p95": 190.97599387168884, + "p99": 211.8079960346222 + }, + "combine": { + "p50": 130.49599528312683, + "p90": 138.94400000572205, + "p95": 150.4960060119629, + "p99": 163.71199488639832 + }, + "roundtrip": { + "p50": 257.63198733329773, + "p90": 288.60801458358765, + "p95": 301.1839985847473, + "p99": 325.0240087509155 + }, + "isolatedSum": { + "p50": 291.48799180984497, + "p90": 319.13599371910095, + "p95": 341.47199988365173, + "p99": 375.5199909210205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-140ddd7f", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s2|8|decode|normal|none|none|2|tuned||675e15b52e37958", + "colorKey": "h200_0eb8ee74", + "comparisonKey": "9b3d6b4d44be0c3d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:28.004710+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-moving@s2", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s2", + "routingStep": 2, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "675e15b52e37958", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 113.6000007390976, + "p90": 142.752006649971, + "p95": 156.8319946527481, + "p99": 193.7599927186966 + }, + "combine": { + "p50": 83.74399691820145, + "p90": 91.39200299978256, + "p95": 102.08000242710114, + "p99": 123.9359974861145 + }, + "roundtrip": { + "p50": 185.66399812698364, + "p90": 217.69599616527557, + "p95": 243.77599358558655, + "p99": 298.68799448013306 + }, + "isolatedSum": { + "p50": 197.34399765729904, + "p90": 234.14400964975357, + "p95": 258.91199707984924, + "p99": 317.6959902048111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 126.88000500202179, + "p90": 239.1040027141571, + "p95": 247.48800694942474, + "p99": 265.50400257110596 + }, + "combine": { + "p50": 92.16000139713287, + "p90": 97.4079966545105, + "p95": 104.60799932479858, + "p99": 116.7680025100708 + }, + "roundtrip": { + "p50": 192.80000030994415, + "p90": 212.6079946756363, + "p95": 224.2240011692047, + "p99": 243.68000030517578 + }, + "isolatedSum": { + "p50": 219.04000639915466, + "p90": 336.5119993686676, + "p95": 352.0960062742233, + "p99": 382.27200508117676 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.93600404262543, + "p90": 175.84000527858734, + "p95": 181.47200345993042, + "p99": 190.72000682353973 + }, + "combine": { + "p50": 129.34400141239166, + "p90": 136.9280070066452, + "p95": 143.42400431632996, + "p99": 160.38399934768677 + }, + "roundtrip": { + "p50": 259.00799036026, + "p90": 274.7519910335541, + "p95": 280.16000986099243, + "p99": 294.40000653266907 + }, + "isolatedSum": { + "p50": 293.2800054550171, + "p90": 312.76801228523254, + "p95": 324.8960077762604, + "p99": 351.1040061712265 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-320f4cc2", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-moving@s3|8|decode|normal|none|none|3|tuned||82b2963fc322419", + "colorKey": "h200_0fb8f007", + "comparisonKey": "a6244b6f1d6e7a9b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:05:51.407681+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-moving@s3", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-moving", + "routingLabel": "hotspot-moving@s3", + "routingStep": 3, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "82b2963fc322419", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 110.84800213575363, + "p90": 126.24000012874603, + "p95": 131.96800649166107, + "p99": 141.82400703430176 + }, + "combine": { + "p50": 83.39200168848038, + "p90": 88.95999938249588, + "p95": 92.99200028181076, + "p99": 101.1200025677681 + }, + "roundtrip": { + "p50": 182.3039948940277, + "p90": 198.17599654197693, + "p95": 203.13599705696106, + "p99": 219.9680060148239 + }, + "isolatedSum": { + "p50": 194.240003824234, + "p90": 215.1999995112419, + "p95": 224.96000677347183, + "p99": 242.94400960206985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 120.28799951076508, + "p90": 135.13599336147308, + "p95": 141.02399349212646, + "p99": 157.50400722026825 + }, + "combine": { + "p50": 92.6079973578453, + "p90": 100.19200295209885, + "p95": 104.47999835014343, + "p99": 118.59200149774551 + }, + "roundtrip": { + "p50": 196.3520050048828, + "p90": 217.18400716781616, + "p95": 224.86400604248047, + "p99": 287.7439856529236 + }, + "isolatedSum": { + "p50": 212.89599686861038, + "p90": 235.32799631357193, + "p95": 245.5039918422699, + "p99": 276.09600871801376 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 162.49600052833557, + "p90": 171.9679981470108, + "p95": 177.3120015859604, + "p99": 197.79199361801147 + }, + "combine": { + "p50": 128.83199751377106, + "p90": 136.35200262069702, + "p95": 141.7279988527298, + "p99": 156.5759927034378 + }, + "roundtrip": { + "p50": 254.11200523376465, + "p90": 265.1199996471405, + "p95": 269.4399952888489, + "p99": 291.9999957084656 + }, + "isolatedSum": { + "p50": 291.3279980421066, + "p90": 308.3200007677078, + "p95": 319.0400004386902, + "p99": 354.3679863214493 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8295372d", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "h200_06b1dacd", + "comparisonKey": "f0f99aa545aaee59", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:05.565931+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 112.83200234174728, + "p90": 147.61599898338318, + "p95": 157.21599757671356, + "p99": 173.0560064315796 + }, + "combine": { + "p50": 78.91199737787247, + "p90": 89.31200206279755, + "p95": 100.38399696350098, + "p99": 111.68000102043152 + }, + "roundtrip": { + "p50": 183.6480051279068, + "p90": 219.10400688648224, + "p95": 228.67199778556824, + "p99": 243.42399835586548 + }, + "isolatedSum": { + "p50": 191.74399971961975, + "p90": 236.92800104618073, + "p95": 257.59999454021454, + "p99": 284.7360074520111 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.9120032787323, + "p90": 151.71200037002563, + "p95": 159.36000645160675, + "p99": 173.24799299240112 + }, + "combine": { + "p50": 81.91999793052673, + "p90": 96.67199850082397, + "p95": 105.40799796581268, + "p99": 116.67200177907944 + }, + "roundtrip": { + "p50": 183.20000171661377, + "p90": 219.93599832057953, + "p95": 229.5680046081543, + "p99": 249.439999461174 + }, + "isolatedSum": { + "p50": 196.83200120925903, + "p90": 248.3839988708496, + "p95": 264.76800441741943, + "p99": 289.91999477148056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 112.09599673748016, + "p90": 148.15999567508698, + "p95": 157.1200042963028, + "p99": 192.671999335289 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 92.79999881982803, + "p95": 105.43999820947647, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 183.84000658988953, + "p90": 219.9680060148239, + "p95": 229.72799837589264, + "p99": 261.4400088787079 + }, + "isolatedSum": { + "p50": 195.1039955019951, + "p90": 240.959994494915, + "p95": 262.56000250577927, + "p99": 308.51200222969055 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 111.64800077676773, + "p90": 148.25600385665894, + "p95": 153.6960005760193, + "p99": 170.9119975566864 + }, + "combine": { + "p50": 83.61600339412689, + "p90": 97.43999689817429, + "p95": 107.77600109577179, + "p99": 117.34399944543839 + }, + "roundtrip": { + "p50": 185.37600338459015, + "p90": 218.9120054244995, + "p95": 230.24000227451324, + "p99": 246.49600684642792 + }, + "isolatedSum": { + "p50": 195.26400417089462, + "p90": 245.69600075483322, + "p95": 261.4720016717911, + "p99": 288.2559970021248 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.98400366306305, + "p90": 154.08000349998474, + "p95": 160.38399934768677, + "p99": 185.95199286937714 + }, + "combine": { + "p50": 84.19200032949448, + "p90": 99.74399954080582, + "p95": 108.47999900579453, + "p99": 114.33599889278412 + }, + "roundtrip": { + "p50": 189.88800048828125, + "p90": 234.0800017118454, + "p95": 244.28799748420715, + "p99": 279.58399057388306 + }, + "isolatedSum": { + "p50": 198.17600399255753, + "p90": 253.82400304079056, + "p95": 268.8639983534813, + "p99": 300.28799176216125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 125.47199428081512, + "p90": 156.25600516796112, + "p95": 161.53599321842194, + "p99": 176.4799952507019 + }, + "combine": { + "p50": 93.12000125646591, + "p90": 110.01600325107574, + "p95": 117.0559972524643, + "p99": 128.12800705432892 + }, + "roundtrip": { + "p50": 196.22400403022766, + "p90": 235.61599850654602, + "p95": 247.00799584388733, + "p99": 260.3839933872223 + }, + "isolatedSum": { + "p50": 218.59199553728104, + "p90": 266.27200841903687, + "p95": 278.59199047088623, + "p99": 304.6080023050308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 144.51199769973755, + "p90": 169.3120002746582, + "p95": 180.1919937133789, + "p99": 198.71999323368073 + }, + "combine": { + "p50": 103.5199984908104, + "p90": 124.4800016283989, + "p95": 132.7359974384308, + "p99": 142.87999272346497 + }, + "roundtrip": { + "p50": 213.24799954891205, + "p90": 260.47998666763306, + "p95": 273.0239927768707, + "p99": 291.9360101222992 + }, + "isolatedSum": { + "p50": 248.03199619054794, + "p90": 293.7920019030571, + "p95": 312.9279911518097, + "p99": 341.5999859571457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 165.0560051202774, + "p90": 185.31200289726257, + "p95": 191.29599630832672, + "p99": 207.20000565052032 + }, + "combine": { + "p50": 129.43999469280243, + "p90": 147.71200716495514, + "p95": 154.7520011663437, + "p99": 165.24800658226013 + }, + "roundtrip": { + "p50": 262.7840042114258, + "p90": 290.97598791122437, + "p95": 303.74398827552795, + "p99": 325.6320059299469 + }, + "isolatedSum": { + "p50": 294.49599981307983, + "p90": 333.0240100622177, + "p95": 346.0479974746704, + "p99": 372.44801223278046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a887b96e", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|decode|normal|none|none|0|tuned||e41f5099a9733ac", + "colorKey": "h200_0d9e9091", + "comparisonKey": "8358e3376b741cc0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:43.876761+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "e41f5099a9733ac", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.830078125, + "eplbImbalanceAfter": 1.0007595486111112, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 116.70400202274323, + "p90": 153.98399531841278, + "p95": 160.64000129699707, + "p99": 167.7439957857132 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 95.83999961614609, + "p95": 107.10400342941284, + "p99": 117.21599847078323 + }, + "roundtrip": { + "p50": 186.0799938440323, + "p90": 219.42399442195892, + "p95": 228.99200022220612, + "p99": 254.2400062084198 + }, + "isolatedSum": { + "p50": 197.59999960660934, + "p90": 249.82399493455887, + "p95": 267.7440047264099, + "p99": 284.95999425649643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.66400188207626, + "p90": 156.51200711727142, + "p95": 160.96000373363495, + "p99": 171.7119961977005 + }, + "combine": { + "p50": 81.28000050783157, + "p90": 99.61599856615067, + "p95": 105.85600137710571, + "p99": 111.39199882745743 + }, + "roundtrip": { + "p50": 185.34399569034576, + "p90": 221.343994140625, + "p95": 232.2559952735901, + "p99": 242.17599630355835 + }, + "isolatedSum": { + "p50": 198.94400238990784, + "p90": 256.1280056834221, + "p95": 266.81600511074066, + "p99": 283.10399502515793 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.56000059843063, + "p90": 153.6960005760193, + "p95": 157.60000050067902, + "p99": 166.6560024023056 + }, + "combine": { + "p50": 83.93599838018417, + "p90": 101.50399804115295, + "p95": 108.92800241708755, + "p99": 115.07199704647064 + }, + "roundtrip": { + "p50": 185.44000387191772, + "p90": 222.88000583648682, + "p95": 232.2240024805069, + "p99": 287.00798749923706 + }, + "isolatedSum": { + "p50": 198.4959989786148, + "p90": 255.19999861717224, + "p95": 266.52800291776657, + "p99": 281.72799944877625 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 23, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 127.29600071907043, + "p90": 172.92800545692444, + "p95": 186.68800592422485, + "p99": 209.02399718761444 + }, + "combine": { + "p50": 86.01599931716919, + "p90": 102.20800340175629, + "p95": 111.07199639081955, + "p99": 118.65600198507309 + }, + "roundtrip": { + "p50": 194.62400674819946, + "p90": 237.37600445747375, + "p95": 250.4960000514984, + "p99": 269.6959972381592 + }, + "isolatedSum": { + "p50": 213.31200003623962, + "p90": 275.1360088586807, + "p95": 297.7600023150444, + "p99": 327.67999917268753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4730880, + "combineLogicalBytes": 4730880, + "fanoutMean": 5.15625, + "recvTokensMax": 44, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 121.69600278139114, + "p90": 159.743994474411, + "p95": 169.8240041732788, + "p99": 186.5600049495697 + }, + "combine": { + "p50": 85.95199882984161, + "p90": 103.64799946546555, + "p95": 111.42399907112122, + "p99": 123.1359988451004 + }, + "roundtrip": { + "p50": 192.25600361824036, + "p90": 230.3680032491684, + "p95": 236.7040067911148, + "p99": 269.6000039577484 + }, + "isolatedSum": { + "p50": 207.64800161123276, + "p90": 263.39199393987656, + "p95": 281.2480032444, + "p99": 309.6960037946701 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9691136, + "combineLogicalBytes": 9691136, + "fanoutMean": 5.28125, + "recvTokensMax": 88, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 126.65599584579468, + "p90": 157.1200042963028, + "p95": 164.8000031709671, + "p99": 180.16000092029572 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 105.56799918413162, + "p95": 115.87200313806534, + "p99": 123.32800030708313 + }, + "roundtrip": { + "p50": 196.06399536132812, + "p90": 235.80799996852875, + "p95": 243.52000653743744, + "p99": 257.79199600219727 + }, + "isolatedSum": { + "p50": 219.13599222898483, + "p90": 262.6880034804344, + "p95": 280.67200630903244, + "p99": 303.48800122737885 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19568640, + "combineLogicalBytes": 19568640, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 137.7599984407425, + "p90": 168.03200542926788, + "p95": 175.7120043039322, + "p99": 189.5039975643158 + }, + "combine": { + "p50": 102.33599692583084, + "p90": 119.61600184440613, + "p95": 127.93600559234619, + "p99": 137.66400516033173 + }, + "roundtrip": { + "p50": 206.81600272655487, + "p90": 248.03200364112854, + "p95": 254.97600436210632, + "p99": 267.7119970321655 + }, + "isolatedSum": { + "p50": 240.09599536657333, + "p90": 287.648007273674, + "p95": 303.6480098962784, + "p99": 327.1680027246475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38750208, + "combineLogicalBytes": 38750208, + "fanoutMean": 5.279296875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.7520011663437, + "p90": 178.0800074338913, + "p95": 187.3600035905838, + "p99": 203.13599705696106 + }, + "combine": { + "p50": 119.71200257539749, + "p90": 133.85599851608276, + "p95": 143.16800236701965, + "p99": 150.91200172901154 + }, + "roundtrip": { + "p50": 237.08799481391907, + "p90": 262.36799359321594, + "p95": 270.6559896469116, + "p99": 290.52799940109253 + }, + "isolatedSum": { + "p50": 274.4640037417412, + "p90": 311.93600594997406, + "p95": 330.52800595760345, + "p99": 354.0479987859726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77342720, + "combineLogicalBytes": 77342720, + "fanoutMean": 5.2685546875, + "recvTokensMax": 687, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7e041aaf", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform·linear|8|decode|normal|none|linear|0|tuned||b029c1a6fded400", + "colorKey": "h200_46d1dd0b", + "comparisonKey": "37adc058c75d56ce", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:07:49.064588+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · uniform·linear", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform·linear", + "routingStep": 0, + "unevenTokens": "linear", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b029c1a6fded400", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 113.79200220108032, + "p90": 151.87199413776398, + "p95": 158.81599485874176, + "p99": 177.85599827766418 + }, + "combine": { + "p50": 83.13599973917007, + "p90": 97.21600264310837, + "p95": 108.47999900579453, + "p99": 117.0559972524643 + }, + "roundtrip": { + "p50": 187.19999492168427, + "p90": 227.64800488948822, + "p95": 240.6720072031021, + "p99": 282.6560139656067 + }, + "isolatedSum": { + "p50": 196.9280019402504, + "p90": 249.08799678087234, + "p95": 267.2959938645363, + "p99": 294.9119955301285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.64000284671783, + "p90": 162.62400150299072, + "p95": 169.8240041732788, + "p99": 192.3840045928955 + }, + "combine": { + "p50": 91.45600348711014, + "p90": 106.81600123643875, + "p95": 115.4559999704361, + "p99": 122.079998254776 + }, + "roundtrip": { + "p50": 191.0720020532608, + "p90": 230.81600666046143, + "p95": 235.9039932489395, + "p99": 244.86400187015533 + }, + "isolatedSum": { + "p50": 216.09600633382797, + "p90": 269.4400027394295, + "p95": 285.2800041437149, + "p99": 314.4640028476715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 160.8320027589798, + "p90": 184.51200425624847, + "p95": 196.3520050048828, + "p99": 212.6079946756363 + }, + "combine": { + "p50": 131.23199343681335, + "p90": 144.44799721240997, + "p95": 148.8640010356903, + "p99": 158.87999534606934 + }, + "roundtrip": { + "p50": 245.31200528144836, + "p90": 271.93599939346313, + "p95": 279.10399436950684, + "p99": 300.3840148448944 + }, + "isolatedSum": { + "p50": 292.06399619579315, + "p90": 328.96000146865845, + "p95": 345.2160060405731, + "p99": 371.4879900217056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8ca97c4b", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|decode|normal|none|none|0|tuned||73351bbcd4d02de", + "colorKey": "h200_ff073305", + "comparisonKey": "847ae6bbd754a4ef", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:04.130889+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "73351bbcd4d02de", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.078125, + "eplbImbalanceAfter": 1.00048828125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 108.67200046777725, + "p90": 123.10399860143661, + "p95": 128.00000607967377, + "p99": 139.0720009803772 + }, + "combine": { + "p50": 76.92799717187881, + "p90": 82.75199681520462, + "p95": 85.21600067615509, + "p99": 92.3520028591156 + }, + "roundtrip": { + "p50": 180.41600286960602, + "p90": 195.6160068511963, + "p95": 201.12000405788422, + "p99": 208.8959962129593 + }, + "isolatedSum": { + "p50": 185.59999763965607, + "p90": 205.85599541664124, + "p95": 213.21600675582886, + "p99": 231.4240038394928 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 516096, + "combineLogicalBytes": 516096, + "fanoutMean": 4.5, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 112.22399771213531, + "p90": 127.36000120639801, + "p95": 131.48799538612366, + "p99": 144.54400539398193 + }, + "combine": { + "p50": 79.03999835252762, + "p90": 84.79999750852585, + "p95": 88.54400366544724, + "p99": 97.31200337409973 + }, + "roundtrip": { + "p50": 178.3680021762848, + "p90": 191.96799397468567, + "p95": 195.8719938993454, + "p99": 208.80000293254852 + }, + "isolatedSum": { + "p50": 191.26399606466293, + "p90": 212.15999871492386, + "p95": 220.0319990515709, + "p99": 241.85600876808167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1089536, + "combineLogicalBytes": 1089536, + "fanoutMean": 4.75, + "recvTokensMax": 11, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 110.23999750614166, + "p90": 127.58399546146393, + "p95": 139.0399932861328, + "p99": 203.77600193023682 + }, + "combine": { + "p50": 81.79199695587158, + "p90": 86.30400151014328, + "p95": 90.27200192213058, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 186.49600446224213, + "p90": 207.8399956226349, + "p95": 213.47199380397797, + "p99": 233.5360050201416 + }, + "isolatedSum": { + "p50": 192.03199446201324, + "p90": 213.8879969716072, + "p95": 229.3119952082634, + "p99": 302.0480051636696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2207744, + "combineLogicalBytes": 2207744, + "fanoutMean": 4.8125, + "recvTokensMax": 23, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 110.59200018644333, + "p90": 126.88000500202179, + "p95": 138.65600526332855, + "p99": 175.3920018672943 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 88.22400122880936, + "p95": 91.90399944782257, + "p99": 100.5759984254837 + }, + "roundtrip": { + "p50": 183.52000415325165, + "p90": 199.93600249290466, + "p95": 208.03199708461761, + "p99": 253.12000513076782 + }, + "isolatedSum": { + "p50": 193.59999895095825, + "p90": 215.10400623083115, + "p95": 230.56000471115112, + "p99": 275.968000292778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4558848, + "combineLogicalBytes": 4558848, + "fanoutMean": 4.96875, + "recvTokensMax": 46, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 109.43999886512756, + "p90": 123.80799651145935, + "p95": 127.51999497413635, + "p99": 142.33599603176117 + }, + "combine": { + "p50": 83.8719978928566, + "p90": 88.44800293445587, + "p95": 92.6079973578453, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 184.35199558734894, + "p90": 199.8720020055771, + "p95": 210.04800498485565, + "p99": 249.5039999485016 + }, + "isolatedSum": { + "p50": 193.31199675798416, + "p90": 212.25599944591522, + "p95": 220.12799233198166, + "p99": 244.7039932012558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9347072, + "combineLogicalBytes": 9347072, + "fanoutMean": 5.09375, + "recvTokensMax": 86, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.10399794578552, + "p90": 131.3599944114685, + "p95": 134.783998131752, + "p99": 140.8960074186325 + }, + "combine": { + "p50": 90.55999666452408, + "p90": 94.97600048780441, + "p95": 98.84800016880035, + "p99": 106.81600123643875 + }, + "roundtrip": { + "p50": 192.7040070295334, + "p90": 209.47200059890747, + "p95": 218.01599860191345, + "p99": 271.67999744415283 + }, + "isolatedSum": { + "p50": 209.6639946103096, + "p90": 226.33599489927292, + "p95": 233.63199830055237, + "p99": 247.71200865507126 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 18995200, + "combineLogicalBytes": 18995200, + "fanoutMean": 5.17578125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 133.215993642807, + "p90": 143.68000626564026, + "p95": 149.3760049343109, + "p99": 162.1440052986145 + }, + "combine": { + "p50": 100.16000270843506, + "p90": 105.50399869680405, + "p95": 108.47999900579453, + "p99": 117.24799871444702 + }, + "roundtrip": { + "p50": 202.33599841594696, + "p90": 219.7439968585968, + "p95": 228.96000742912292, + "p99": 271.232008934021 + }, + "isolatedSum": { + "p50": 233.37599635124207, + "p90": 249.1840049624443, + "p95": 257.85600394010544, + "p99": 279.3920040130615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38291456, + "combineLogicalBytes": 38291456, + "fanoutMean": 5.216796875, + "recvTokensMax": 348, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.22400426864624, + "p90": 161.53599321842194, + "p95": 164.76799547672272, + "p99": 217.72800385951996 + }, + "combine": { + "p50": 117.47200042009354, + "p90": 123.9359974861145, + "p95": 126.65599584579468, + "p99": 135.3279948234558 + }, + "roundtrip": { + "p50": 232.70399868488312, + "p90": 241.7600005865097, + "p95": 246.07999622821808, + "p99": 279.87200021743774 + }, + "isolatedSum": { + "p50": 269.6960046887398, + "p90": 285.47199070453644, + "p95": 291.4239913225174, + "p99": 353.05599868297577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77113344, + "combineLogicalBytes": 77113344, + "fanoutMean": 5.2529296875, + "recvTokensMax": 685, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b35d9c39", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_d0051910", + "comparisonKey": "594ae1b009f30387", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:07.977536+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 115.29599875211716, + "p90": 131.9040060043335, + "p95": 137.472003698349, + "p99": 190.8479928970337 + }, + "combine": { + "p50": 79.9039974808693, + "p90": 85.31200140714645, + "p95": 88.70399743318558, + "p99": 100.38399696350098 + }, + "roundtrip": { + "p50": 183.67999792099, + "p90": 195.93599438667297, + "p95": 200.6399929523468, + "p99": 209.05600488185883 + }, + "isolatedSum": { + "p50": 195.19999623298645, + "p90": 217.21600741147995, + "p95": 226.17600113153458, + "p99": 291.23198986053467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 119.80800330638885, + "p90": 136.7039978504181, + "p95": 144.03200149536133, + "p99": 162.9440039396286 + }, + "combine": { + "p50": 83.00799876451492, + "p90": 89.56799656152725, + "p95": 92.54399687051773, + "p99": 104.47999835014343 + }, + "roundtrip": { + "p50": 191.48799777030945, + "p90": 208.95999670028687, + "p95": 218.59200298786163, + "p99": 235.26400327682495 + }, + "isolatedSum": { + "p50": 202.81600207090378, + "p90": 226.27199441194534, + "p95": 236.57599836587906, + "p99": 267.42400228977203 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 113.34399878978729, + "p90": 128.1919926404953, + "p95": 134.62400436401367, + "p99": 189.11999464035034 + }, + "combine": { + "p50": 80.09599894285202, + "p90": 86.43200248479843, + "p95": 93.66399794816971, + "p99": 105.95200210809708 + }, + "roundtrip": { + "p50": 181.69599771499634, + "p90": 193.63200664520264, + "p95": 200.6399929523468, + "p99": 234.75199937820435 + }, + "isolatedSum": { + "p50": 193.4399977326393, + "p90": 214.62399512529373, + "p95": 228.28800231218338, + "p99": 295.0719967484474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.15999788045883, + "p90": 135.26399433612823, + "p95": 146.40000462532043, + "p99": 204.0639966726303 + }, + "combine": { + "p50": 81.08799904584885, + "p90": 87.90399879217148, + "p95": 91.23200178146362, + "p99": 99.80800002813339 + }, + "roundtrip": { + "p50": 183.03999304771423, + "p90": 197.05599546432495, + "p95": 201.08799636363983, + "p99": 209.82399582862854 + }, + "isolatedSum": { + "p50": 197.24799692630768, + "p90": 223.1679931282997, + "p95": 237.63200640678406, + "p99": 303.8719967007637 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.15199732780457, + "p90": 127.36000120639801, + "p95": 133.05599987506866, + "p99": 146.94400131702423 + }, + "combine": { + "p50": 84.83199775218964, + "p90": 89.66399729251862, + "p95": 92.25600212812424, + "p99": 102.08000242710114 + }, + "roundtrip": { + "p50": 184.60799753665924, + "p90": 197.4399983882904, + "p95": 202.72000133991241, + "p99": 221.27999365329742 + }, + "isolatedSum": { + "p50": 197.9839950799942, + "p90": 217.02399849891663, + "p95": 225.3120020031929, + "p99": 249.02400374412537 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 124.35200065374374, + "p90": 134.65599715709686, + "p95": 138.2720023393631, + "p99": 151.8079936504364 + }, + "combine": { + "p50": 90.17600119113922, + "p90": 97.31200337409973, + "p95": 100.38399696350098, + "p99": 113.27999830245972 + }, + "roundtrip": { + "p50": 192.7040070295334, + "p90": 208.76799523830414, + "p95": 212.51200139522552, + "p99": 227.39200294017792 + }, + "isolatedSum": { + "p50": 214.52800184488297, + "p90": 231.9680005311966, + "p95": 238.65599930286407, + "p99": 265.0879919528961 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.2399946451187, + "p90": 149.02399480342865, + "p95": 153.60000729560852, + "p99": 186.75200641155243 + }, + "combine": { + "p50": 100.70399940013885, + "p90": 107.55199939012527, + "p95": 112.96000331640244, + "p99": 127.16799974441528 + }, + "roundtrip": { + "p50": 202.97600328922272, + "p90": 218.23999285697937, + "p95": 222.75200486183167, + "p99": 274.3679881095886 + }, + "isolatedSum": { + "p50": 238.94399404525757, + "p90": 256.5759941935539, + "p95": 266.56001061201096, + "p99": 313.9200061559677 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 160.47999262809753, + "p90": 170.20800709724426, + "p95": 175.80799758434296, + "p99": 207.13600516319275 + }, + "combine": { + "p50": 124.9919980764389, + "p90": 130.43199479579926, + "p95": 133.85599851608276, + "p99": 143.327996134758 + }, + "roundtrip": { + "p50": 249.34400618076324, + "p90": 259.64799523353577, + "p95": 263.5200023651123, + "p99": 296.79998755455017 + }, + "isolatedSum": { + "p50": 285.47199070453644, + "p90": 300.6400018930435, + "p95": 309.6639961004257, + "p99": 350.46400129795074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-60196b92", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "h200_0e782e5a", + "comparisonKey": "3c7fa686daa9b504", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:06.942670+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 123.32800030708313, + "p90": 165.12000560760498, + "p95": 177.63200402259827, + "p99": 194.87999379634857 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 88.92799913883209, + "p95": 96.92800045013428, + "p99": 101.98400169610977 + }, + "roundtrip": { + "p50": 183.26400220394135, + "p90": 230.68800568580627, + "p95": 250.84799528121948, + "p99": 325.0240087509155 + }, + "isolatedSum": { + "p50": 199.71200078725815, + "p90": 254.04800474643707, + "p95": 274.56000447273254, + "p99": 296.86399549245834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.63200163841248, + "p90": 167.07199811935425, + "p95": 193.1840032339096, + "p99": 225.92000663280487 + }, + "combine": { + "p50": 77.08799839019775, + "p90": 92.00000017881393, + "p95": 98.78399968147278, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 190.0160014629364, + "p90": 229.91999983787537, + "p95": 245.2159970998764, + "p99": 264.8000121116638 + }, + "isolatedSum": { + "p50": 194.72000002861023, + "p90": 259.0719982981682, + "p95": 291.9680029153824, + "p99": 331.9680094718933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 112.31999844312668, + "p90": 155.03999590873718, + "p95": 159.93599593639374, + "p99": 171.03999853134155 + }, + "combine": { + "p50": 75.9039968252182, + "p90": 89.59999680519104, + "p95": 98.04800152778625, + "p99": 106.65600001811981 + }, + "roundtrip": { + "p50": 179.1040003299713, + "p90": 224.06400740146637, + "p95": 237.56800591945648, + "p99": 273.21600914001465 + }, + "isolatedSum": { + "p50": 188.22399526834488, + "p90": 244.63999271392822, + "p95": 257.98399746418, + "p99": 277.69599854946136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 114.14399743080139, + "p90": 154.7520011663437, + "p95": 162.1440052986145, + "p99": 183.23199450969696 + }, + "combine": { + "p50": 76.38400048017502, + "p90": 95.93600034713745, + "p95": 102.08000242710114, + "p99": 114.1119971871376 + }, + "roundtrip": { + "p50": 180.57599663734436, + "p90": 225.3440022468567, + "p95": 234.20800268650055, + "p99": 250.30401349067688 + }, + "isolatedSum": { + "p50": 190.5279979109764, + "p90": 250.68800151348114, + "p95": 264.22400772571564, + "p99": 297.34399169683456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.15199732780457, + "p90": 156.25600516796112, + "p95": 166.01599752902985, + "p99": 187.6479983329773 + }, + "combine": { + "p50": 77.79199630022049, + "p90": 95.0080007314682, + "p95": 102.24000364542007, + "p99": 120.67200243473053 + }, + "roundtrip": { + "p50": 179.1040003299713, + "p90": 216.48000180721283, + "p95": 223.4880030155182, + "p99": 231.6800057888031 + }, + "isolatedSum": { + "p50": 190.94399362802505, + "p90": 251.26400589942932, + "p95": 268.2560011744499, + "p99": 308.3200007677078 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 118.46400052309036, + "p90": 153.60000729560852, + "p95": 162.9440039396286, + "p99": 174.9120056629181 + }, + "combine": { + "p50": 84.51200276613235, + "p90": 100.832000374794, + "p95": 106.97600245475769, + "p99": 114.94400352239609 + }, + "roundtrip": { + "p50": 185.15199422836304, + "p90": 227.64800488948822, + "p95": 243.58400702476501, + "p99": 287.3919904232025 + }, + "isolatedSum": { + "p50": 202.97600328922272, + "p90": 254.43200767040253, + "p95": 269.9200063943863, + "p99": 289.8560091853142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 136.51199638843536, + "p90": 163.26400637626648, + "p95": 173.21600019931793, + "p99": 191.93600118160248 + }, + "combine": { + "p50": 95.07200121879578, + "p90": 112.73600161075592, + "p95": 120.31999975442886, + "p99": 127.3919939994812 + }, + "roundtrip": { + "p50": 197.11999595165253, + "p90": 239.16800320148468, + "p95": 246.91200256347656, + "p99": 258.0159902572632 + }, + "isolatedSum": { + "p50": 231.58399760723114, + "p90": 276.0000079870224, + "p95": 293.5359999537468, + "p99": 319.3279951810837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.38400614261627, + "p90": 185.37600338459015, + "p95": 193.50400567054749, + "p99": 208.70399475097656 + }, + "combine": { + "p50": 119.23199892044067, + "p90": 138.08000087738037, + "p95": 141.56800508499146, + "p99": 148.99200201034546 + }, + "roundtrip": { + "p50": 241.85599386692047, + "p90": 269.3760097026825, + "p95": 280.7359993457794, + "p99": 332.12798833847046 + }, + "isolatedSum": { + "p50": 275.61600506305695, + "p90": 323.4560042619705, + "p95": 335.07201075553894, + "p99": 357.695996761322 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ee662072", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|decode|normal|none|none|0|tuned||5a3054422534366", + "colorKey": "h200_dd7af994", + "comparisonKey": "9650d8e397a5f8a5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:44.950990+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "5a3054422534366", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.40625, + "eplbImbalanceAfter": 1.0004417782738093, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 121.44000083208084, + "p90": 161.43999993801117, + "p95": 173.24799299240112, + "p99": 192.44800508022308 + }, + "combine": { + "p50": 79.00799810886383, + "p90": 94.24000233411789, + "p95": 105.8880016207695, + "p99": 111.51999980211258 + }, + "roundtrip": { + "p50": 184.09599363803864, + "p90": 229.79199886322021, + "p95": 244.54399943351746, + "p99": 266.01600646972656 + }, + "isolatedSum": { + "p50": 200.44799894094467, + "p90": 255.68000227212906, + "p95": 279.1359946131706, + "p99": 303.96800488233566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 559104, + "combineLogicalBytes": 559104, + "fanoutMean": 4.875, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.42399972677231, + "p90": 154.36799824237823, + "p95": 160.47999262809753, + "p99": 168.5439944267273 + }, + "combine": { + "p50": 79.83999699354172, + "p90": 89.79199826717377, + "p95": 101.40799731016159, + "p99": 113.37599903345108 + }, + "roundtrip": { + "p50": 195.19999623298645, + "p90": 241.5039986371994, + "p95": 251.71199440956116, + "p99": 285.2480113506317 + }, + "isolatedSum": { + "p50": 195.26399672031403, + "p90": 244.159996509552, + "p95": 261.8879899382591, + "p99": 281.9199934601784 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1175552, + "combineLogicalBytes": 1175552, + "fanoutMean": 5.125, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.84000289440155, + "p90": 155.90399503707886, + "p95": 164.48000073432922, + "p99": 183.52000415325165 + }, + "combine": { + "p50": 82.97599852085114, + "p90": 99.55199807882309, + "p95": 109.69600081443787, + "p99": 118.23999881744385 + }, + "roundtrip": { + "p50": 192.47999787330627, + "p90": 236.86400055885315, + "p95": 252.6719868183136, + "p99": 301.91999673843384 + }, + "isolatedSum": { + "p50": 198.81600141525269, + "p90": 255.45599311590195, + "p95": 274.1760015487671, + "p99": 301.7600029706955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2465792, + "combineLogicalBytes": 2465792, + "fanoutMean": 5.375, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.09599739313126, + "p90": 157.21599757671356, + "p95": 169.18399930000305, + "p99": 189.95200097560883 + }, + "combine": { + "p50": 83.55200290679932, + "p90": 98.36799651384354, + "p95": 108.25599730014801, + "p99": 114.62400108575821 + }, + "roundtrip": { + "p50": 187.04000115394592, + "p90": 228.96000742912292, + "p95": 236.9920015335083, + "p99": 252.83199548721313 + }, + "isolatedSum": { + "p50": 199.64800029993057, + "p90": 255.5839940905571, + "p95": 277.43999660015106, + "p99": 304.57600206136703 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4988928, + "combineLogicalBytes": 4988928, + "fanoutMean": 5.4375, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.24799805879593, + "p90": 155.10399639606476, + "p95": 162.52799332141876, + "p99": 177.18400061130524 + }, + "combine": { + "p50": 85.40800213813782, + "p90": 105.56799918413162, + "p95": 110.62400043010712, + "p99": 120.31999975442886 + }, + "roundtrip": { + "p50": 189.43999707698822, + "p90": 229.69600558280945, + "p95": 241.56799912452698, + "p99": 283.03998708724976 + }, + "isolatedSum": { + "p50": 198.65600019693375, + "p90": 260.6719955801964, + "p95": 273.1519937515259, + "p99": 297.5040003657341 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9791488, + "combineLogicalBytes": 9791488, + "fanoutMean": 5.3359375, + "recvTokensMax": 94, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.00799787044525, + "p90": 156.25600516796112, + "p95": 162.04799711704254, + "p99": 168.70400309562683 + }, + "combine": { + "p50": 91.5519967675209, + "p90": 105.15200346708298, + "p95": 112.67200112342834, + "p99": 123.61600250005722 + }, + "roundtrip": { + "p50": 192.44800508022308, + "p90": 228.09599339962006, + "p95": 234.17599499225616, + "p99": 244.73600089550018 + }, + "isolatedSum": { + "p50": 214.55999463796616, + "p90": 261.4080086350441, + "p95": 274.7199982404709, + "p99": 292.32000559568405 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19410944, + "combineLogicalBytes": 19410944, + "fanoutMean": 5.2890625, + "recvTokensMax": 178, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 134.5279961824417, + "p90": 162.33600676059723, + "p95": 170.01600563526154, + "p99": 193.9840018749237 + }, + "combine": { + "p50": 101.21600329875946, + "p90": 118.97599697113037, + "p95": 126.75200402736664, + "p99": 134.65599715709686 + }, + "roundtrip": { + "p50": 209.21599864959717, + "p90": 254.7839879989624, + "p95": 263.2960081100464, + "p99": 297.5359857082367 + }, + "isolatedSum": { + "p50": 235.74399948120117, + "p90": 281.3120037317276, + "p95": 296.7680096626282, + "p99": 328.63999903202057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38678528, + "combineLogicalBytes": 38678528, + "fanoutMean": 5.26953125, + "recvTokensMax": 360, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.34400534629822, + "p90": 174.14399981498718, + "p95": 182.11199343204498, + "p99": 196.8960016965866 + }, + "combine": { + "p50": 118.27199906110764, + "p90": 134.88000631332397, + "p95": 142.14399456977844, + "p99": 151.58399939537048 + }, + "roundtrip": { + "p50": 236.4480048418045, + "p90": 255.93599677085876, + "p95": 265.3439939022064, + "p99": 281.40801191329956 + }, + "isolatedSum": { + "p50": 271.61600440740585, + "p90": 309.02400612831116, + "p95": 324.2559880018234, + "p99": 348.4800010919571 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 77285376, + "fanoutMean": 5.2646484375, + "recvTokensMax": 704, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-501492ad", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|decode|normal|none|none|0|tuned||f3df51be7d5c32b", + "colorKey": "h200_eb9b77cd", + "comparisonKey": "bdbb6dfc90739722", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:53.492221+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "f3df51be7d5c32b", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 110.01600325107574, + "p90": 148.51200580596924, + "p95": 154.1759967803955, + "p99": 168.2240068912506 + }, + "combine": { + "p50": 78.84799689054489, + "p90": 91.74399822950363, + "p95": 101.43999755382538, + "p99": 115.90400338172913 + }, + "roundtrip": { + "p50": 179.87200617790222, + "p90": 216.60800278186798, + "p95": 225.50399601459503, + "p99": 242.11199581623077 + }, + "isolatedSum": { + "p50": 188.86400014162064, + "p90": 240.25600403547287, + "p95": 255.6159943342209, + "p99": 284.12801027297974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 587776, + "combineLogicalBytes": 587776, + "fanoutMean": 5.125, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 109.18399691581726, + "p90": 145.1520025730133, + "p95": 151.32799744606018, + "p99": 170.20800709724426 + }, + "combine": { + "p50": 78.04799824953079, + "p90": 88.48000317811966, + "p95": 98.52799773216248, + "p99": 108.57599973678589 + }, + "roundtrip": { + "p50": 178.9119988679886, + "p90": 212.3199999332428, + "p95": 218.176007270813, + "p99": 229.18400168418884 + }, + "isolatedSum": { + "p50": 187.23199516534805, + "p90": 233.63200575113297, + "p95": 249.85599517822266, + "p99": 278.78400683403015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1103872, + "combineLogicalBytes": 1103872, + "fanoutMean": 4.8125, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 119.39200013875961, + "p90": 159.36000645160675, + "p95": 168.73599588871002, + "p99": 183.84000658988953 + }, + "combine": { + "p50": 82.68799632787704, + "p90": 102.01600193977356, + "p95": 110.49599945545197, + "p99": 118.84800344705582 + }, + "roundtrip": { + "p50": 184.09599363803864, + "p90": 226.17599368095398, + "p95": 235.61599850654602, + "p99": 247.48800694942474 + }, + "isolatedSum": { + "p50": 202.07999646663666, + "p90": 261.3760083913803, + "p95": 279.231995344162, + "p99": 302.68801003694534 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2250752, + "combineLogicalBytes": 2250752, + "fanoutMean": 4.90625, + "recvTokensMax": 31, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 110.36799848079681, + "p90": 146.08000218868256, + "p95": 151.5520066022873, + "p99": 160.60799360275269 + }, + "combine": { + "p50": 82.75199681520462, + "p90": 92.44800359010696, + "p95": 106.59199953079224, + "p99": 116.80000275373459 + }, + "roundtrip": { + "p50": 181.60000443458557, + "p90": 211.8079960346222, + "p95": 218.52800250053406, + "p99": 230.335995554924 + }, + "isolatedSum": { + "p50": 193.11999529600143, + "p90": 238.52800577878952, + "p95": 258.14400613307953, + "p99": 277.4079963564873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4472832, + "combineLogicalBytes": 4472832, + "fanoutMean": 4.875, + "recvTokensMax": 62, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 119.07199770212173, + "p90": 159.36000645160675, + "p95": 178.81600558757782, + "p99": 208.8959962129593 + }, + "combine": { + "p50": 85.08799970149994, + "p90": 101.21600329875946, + "p95": 109.0560033917427, + "p99": 117.3119992017746 + }, + "roundtrip": { + "p50": 185.02399325370789, + "p90": 225.92000663280487, + "p95": 236.03199422359467, + "p99": 256.0639977455139 + }, + "isolatedSum": { + "p50": 204.15999740362167, + "p90": 260.5760097503662, + "p95": 287.8720089793205, + "p99": 326.2079954147339 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8888320, + "combineLogicalBytes": 8888320, + "fanoutMean": 4.84375, + "recvTokensMax": 124, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 122.27199971675873, + "p90": 151.8400013446808, + "p95": 157.27999806404114, + "p99": 174.112007021904 + }, + "combine": { + "p50": 91.07200056314468, + "p90": 102.7199998497963, + "p95": 111.42399907112122, + "p99": 118.30399930477142 + }, + "roundtrip": { + "p50": 194.04800236225128, + "p90": 230.9119999408722, + "p95": 239.16800320148468, + "p99": 260.19200682640076 + }, + "isolatedSum": { + "p50": 213.3440002799034, + "p90": 254.56000119447708, + "p95": 268.70399713516235, + "p99": 292.4160063266754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 17733632, + "combineLogicalBytes": 17733632, + "fanoutMean": 4.83203125, + "recvTokensMax": 248, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.84800672531128, + "p90": 164.8000031709671, + "p95": 178.3359944820404, + "p99": 197.4399983882904 + }, + "combine": { + "p50": 102.27199643850327, + "p90": 117.24799871444702, + "p95": 126.17599964141846, + "p99": 138.68799805641174 + }, + "roundtrip": { + "p50": 211.29600703716278, + "p90": 248.09600412845612, + "p95": 261.6960108280182, + "p99": 275.6800055503845 + }, + "isolatedSum": { + "p50": 241.12000316381454, + "p90": 282.0480018854141, + "p95": 304.51199412345886, + "p99": 336.12799644470215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 35424256, + "combineLogicalBytes": 35424256, + "fanoutMean": 4.826171875, + "recvTokensMax": 492, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.79199516773224, + "p90": 186.65599822998047, + "p95": 197.08800315856934, + "p99": 209.6959948539734 + }, + "combine": { + "p50": 126.17599964141846, + "p90": 142.2719955444336, + "p95": 149.4079977273941, + "p99": 152.6080071926117 + }, + "roundtrip": { + "p50": 252.00000405311584, + "p90": 271.5519964694977, + "p95": 277.3759961128235, + "p99": 287.1040105819702 + }, + "isolatedSum": { + "p50": 287.9679948091507, + "p90": 328.92799377441406, + "p95": 346.49600088596344, + "p99": 362.3040020465851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e805717", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|decode|normal|none|none|0|tuned||16babcaf4204243", + "colorKey": "h200_a11c2791", + "comparisonKey": "5bb561a4abbf55a2", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:18.497526+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "16babcaf4204243", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.61328125, + "eplbImbalanceAfter": 1.0009114583333334, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.29599809646606, + "p90": 124.79999661445618, + "p95": 127.96799838542938, + "p99": 133.12000036239624 + }, + "combine": { + "p50": 79.29600030183792, + "p90": 85.50400286912918, + "p95": 88.99199962615967, + "p99": 94.91200000047684 + }, + "roundtrip": { + "p50": 181.8239986896515, + "p90": 194.84800100326538, + "p95": 200.70399343967438, + "p99": 214.33599293231964 + }, + "isolatedSum": { + "p50": 190.59199839830399, + "p90": 210.30399948358536, + "p95": 216.95999801158905, + "p99": 228.03200036287308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 115.23199826478958, + "p90": 132.4159950017929, + "p95": 140.00000059604645, + "p99": 166.62399470806122 + }, + "combine": { + "p50": 81.82399719953537, + "p90": 93.72799843549728, + "p95": 98.36799651384354, + "p99": 107.35999792814255 + }, + "roundtrip": { + "p50": 181.37599527835846, + "p90": 200.28799772262573, + "p95": 223.10400009155273, + "p99": 276.095986366272 + }, + "isolatedSum": { + "p50": 197.05599546432495, + "p90": 226.1439934372902, + "p95": 238.36799710988998, + "p99": 273.98399263620377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1189888, + "combineLogicalBytes": 1189888, + "fanoutMean": 5.1875, + "recvTokensMax": 12, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 114.68800157308578, + "p90": 129.82399761676788, + "p95": 137.7599984407425, + "p99": 156.95999562740326 + }, + "combine": { + "p50": 81.88799768686295, + "p90": 88.128000497818, + "p95": 93.08800101280212, + "p99": 100.0640019774437 + }, + "roundtrip": { + "p50": 184.03199315071106, + "p90": 200.28799772262573, + "p95": 204.12799715995789, + "p99": 215.42400121688843 + }, + "isolatedSum": { + "p50": 196.57599925994873, + "p90": 217.95199811458588, + "p95": 230.84799945354462, + "p99": 257.02399760484695 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 23, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 112.64000087976456, + "p90": 127.48800218105316, + "p95": 132.35199451446533, + "p99": 143.71199905872345 + }, + "combine": { + "p50": 83.77599716186523, + "p90": 89.63199704885483, + "p95": 91.87199920415878, + "p99": 101.34399682283401 + }, + "roundtrip": { + "p50": 183.80799889564514, + "p90": 195.68000733852386, + "p95": 202.72000133991241, + "p99": 229.8559993505478 + }, + "isolatedSum": { + "p50": 196.4159980416298, + "p90": 217.119999229908, + "p95": 224.22399371862411, + "p99": 245.05599588155746 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 113.56800049543381, + "p90": 129.60000336170197, + "p95": 138.11199367046356, + "p99": 208.25600624084473 + }, + "combine": { + "p50": 85.79199761152267, + "p90": 94.4959968328476, + "p95": 101.05600208044052, + "p99": 115.84000289440155 + }, + "roundtrip": { + "p50": 189.98399376869202, + "p90": 204.99199628829956, + "p95": 210.91200411319733, + "p99": 235.74399948120117 + }, + "isolatedSum": { + "p50": 199.35999810695648, + "p90": 224.09600019454956, + "p95": 239.16799575090408, + "p99": 324.0960091352463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9605120, + "combineLogicalBytes": 9605120, + "fanoutMean": 5.234375, + "recvTokensMax": 93, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 120.41600048542023, + "p90": 133.59999656677246, + "p95": 136.19199395179749, + "p99": 146.84799313545227 + }, + "combine": { + "p50": 90.87999910116196, + "p90": 94.78399902582169, + "p95": 98.88000041246414, + "p99": 106.4319983124733 + }, + "roundtrip": { + "p50": 190.528005361557, + "p90": 204.25599813461304, + "p95": 209.1200053691864, + "p99": 219.04000639915466 + }, + "isolatedSum": { + "p50": 211.29599958658218, + "p90": 228.38399559259415, + "p95": 235.07199436426163, + "p99": 253.27999144792557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19367936, + "combineLogicalBytes": 19367936, + "fanoutMean": 5.27734375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 134.68800485134125, + "p90": 148.6400067806244, + "p95": 159.13599729537964, + "p99": 175.26400089263916 + }, + "combine": { + "p50": 102.08000242710114, + "p90": 113.63200098276138, + "p95": 123.64800274372101, + "p99": 164.06400501728058 + }, + "roundtrip": { + "p50": 215.00800549983978, + "p90": 235.35999655723572, + "p95": 243.6479926109314, + "p99": 268.22400093078613 + }, + "isolatedSum": { + "p50": 236.76800727844238, + "p90": 262.2720077633858, + "p95": 282.78400003910065, + "p99": 339.32800590991974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38535168, + "combineLogicalBytes": 38535168, + "fanoutMean": 5.25, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.11199629306793, + "p90": 166.6879951953888, + "p95": 174.81599748134613, + "p99": 199.39200580120087 + }, + "combine": { + "p50": 119.07199770212173, + "p90": 127.03999876976013, + "p95": 132.06399977207184, + "p99": 141.95199310779572 + }, + "roundtrip": { + "p50": 237.59999871253967, + "p90": 249.2160052061081, + "p95": 253.82399559020996, + "p99": 266.07999205589294 + }, + "isolatedSum": { + "p50": 273.18399399518967, + "p90": 293.7279939651489, + "p95": 306.87999725341797, + "p99": 341.3439989089966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76869632, + "combineLogicalBytes": 76869632, + "fanoutMean": 5.236328125, + "recvTokensMax": 688, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7bc8079d", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "h200_cdce4762", + "comparisonKey": "8603e7156be7b7a1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:48.517681+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.23199760913849, + "p90": 125.08800625801086, + "p95": 129.88799810409546, + "p99": 142.91200041770935 + }, + "combine": { + "p50": 77.47200131416321, + "p90": 83.03999900817871, + "p95": 86.43200248479843, + "p99": 96.3520035147667 + }, + "roundtrip": { + "p50": 183.87199938297272, + "p90": 200.6399929523468, + "p95": 211.10400557518005, + "p99": 283.4559977054596 + }, + "isolatedSum": { + "p50": 188.7039989233017, + "p90": 208.12800526618958, + "p95": 216.3200005888939, + "p99": 239.26400393247604 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 111.7120012640953, + "p90": 125.31200051307678, + "p95": 129.37599420547485, + "p99": 138.75199854373932 + }, + "combine": { + "p50": 79.6160027384758, + "p90": 84.60800349712372, + "p95": 88.639996945858, + "p99": 98.4639972448349 + }, + "roundtrip": { + "p50": 182.01600015163422, + "p90": 195.80799341201782, + "p95": 201.1840045452118, + "p99": 244.9920028448105 + }, + "isolatedSum": { + "p50": 191.3280040025711, + "p90": 209.9200040102005, + "p95": 218.01599115133286, + "p99": 237.21599578857422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.99999666213989, + "p90": 129.08799946308136, + "p95": 134.20799374580383, + "p99": 142.752006649971 + }, + "combine": { + "p50": 79.52000200748444, + "p90": 84.79999750852585, + "p95": 89.40800279378891, + "p99": 99.04000163078308 + }, + "roundtrip": { + "p50": 181.34400248527527, + "p90": 193.1840032339096, + "p95": 199.64799284934998, + "p99": 223.26399385929108 + }, + "isolatedSum": { + "p50": 195.51999866962433, + "p90": 213.8879969716072, + "p95": 223.61599653959274, + "p99": 241.7920082807541 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 113.37599903345108, + "p90": 127.20000743865967, + "p95": 133.12000036239624, + "p99": 159.87199544906616 + }, + "combine": { + "p50": 80.25600016117096, + "p90": 85.34400165081024, + "p95": 89.88799899816513, + "p99": 99.23200309276581 + }, + "roundtrip": { + "p50": 182.14400112628937, + "p90": 196.83200120925903, + "p95": 201.75999402999878, + "p99": 210.62399446964264 + }, + "isolatedSum": { + "p50": 193.63199919462204, + "p90": 212.5440090894699, + "p95": 223.00799936056137, + "p99": 259.10399854183197 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 111.7120012640953, + "p90": 128.7360042333603, + "p95": 134.5600038766861, + "p99": 151.0079950094223 + }, + "combine": { + "p50": 82.71999657154083, + "p90": 88.95999938249588, + "p95": 92.28800237178802, + "p99": 99.20000284910202 + }, + "roundtrip": { + "p50": 183.6480051279068, + "p90": 201.9840031862259, + "p95": 210.24000644683838, + "p99": 294.20799016952515 + }, + "isolatedSum": { + "p50": 194.43199783563614, + "p90": 217.69600361585617, + "p95": 226.84800624847412, + "p99": 250.20799785852432 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.52000176906586, + "p90": 134.0479999780655, + "p95": 139.93600010871887, + "p99": 158.4639996290207 + }, + "combine": { + "p50": 89.47200328111649, + "p90": 94.46399658918381, + "p95": 99.0080013871193, + "p99": 128.9599984884262 + }, + "roundtrip": { + "p50": 190.5599981546402, + "p90": 206.36799931526184, + "p95": 211.67999505996704, + "p99": 261.6960108280182 + }, + "isolatedSum": { + "p50": 212.99200505018234, + "p90": 228.5119965672493, + "p95": 238.94400149583817, + "p99": 287.4239981174469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 138.36799561977386, + "p90": 148.54399859905243, + "p95": 153.31199765205383, + "p99": 158.01599621772766 + }, + "combine": { + "p50": 101.05600208044052, + "p90": 107.61599987745285, + "p95": 110.59200018644333, + "p99": 121.56800180673599 + }, + "roundtrip": { + "p50": 203.10400426387787, + "p90": 219.26400065422058, + "p95": 224.60800409317017, + "p99": 233.3119958639145 + }, + "isolatedSum": { + "p50": 239.4239977002144, + "p90": 256.1599984765053, + "p95": 263.90399783849716, + "p99": 279.58399802446365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 161.8880033493042, + "p90": 176.7680048942566, + "p95": 185.2799952030182, + "p99": 205.72799444198608 + }, + "combine": { + "p50": 125.76000392436981, + "p90": 134.17600095272064, + "p95": 139.3280029296875, + "p99": 157.4079990386963 + }, + "roundtrip": { + "p50": 248.09600412845612, + "p90": 259.42400097846985, + "p95": 262.30400800704956, + "p99": 355.77601194381714 + }, + "isolatedSum": { + "p50": 287.648007273674, + "p90": 310.94400584697723, + "p95": 324.6079981327057, + "p99": 363.1359934806824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6795bc7b", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_2c30082c", + "comparisonKey": "0037d124ecf82a54", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:13.769909+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 111.23199760913849, + "p90": 130.8159977197647, + "p95": 165.66400229930878, + "p99": 534.0160131454468 + }, + "combine": { + "p50": 79.03999835252762, + "p90": 93.63199770450592, + "p95": 107.80800133943558, + "p99": 277.0879864692688 + }, + "roundtrip": { + "p50": 184.4159960746765, + "p90": 200.73600113391876, + "p95": 212.8639966249466, + "p99": 268.70399713516235 + }, + "isolatedSum": { + "p50": 190.2719959616661, + "p90": 224.44799542427063, + "p95": 273.47200363874435, + "p99": 811.1039996147156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 117.27999895811081, + "p90": 150.39999783039093, + "p95": 159.96800363063812, + "p99": 173.34400117397308 + }, + "combine": { + "p50": 79.58400249481201, + "p90": 88.35200220346451, + "p95": 99.42399710416794, + "p99": 113.15199732780457 + }, + "roundtrip": { + "p50": 183.00800025463104, + "p90": 216.95999801158905, + "p95": 231.04000091552734, + "p99": 261.75999641418457 + }, + "isolatedSum": { + "p50": 196.86400145292282, + "p90": 238.75200003385544, + "p95": 259.39200073480606, + "p99": 286.49599850177765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 115.68000167608261, + "p90": 135.6160044670105, + "p95": 152.41600573062897, + "p99": 181.72800540924072 + }, + "combine": { + "p50": 82.0159986615181, + "p90": 91.0400003194809, + "p95": 101.31199657917023, + "p99": 116.48000031709671 + }, + "roundtrip": { + "p50": 185.31200289726257, + "p90": 215.87200462818146, + "p95": 228.5439968109131, + "p99": 246.5600073337555 + }, + "isolatedSum": { + "p50": 197.6960003376007, + "p90": 226.6560047864914, + "p95": 253.7280023097992, + "p99": 298.20800572633743 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 116.67200177907944, + "p90": 155.68000078201294, + "p95": 169.5999950170517, + "p99": 189.2160028219223 + }, + "combine": { + "p50": 82.49600231647491, + "p90": 92.25600212812424, + "p95": 104.86400127410889, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 190.17599523067474, + "p90": 234.30399596691132, + "p95": 244.4159984588623, + "p99": 261.1519992351532 + }, + "isolatedSum": { + "p50": 199.16800409555435, + "p90": 247.93600291013718, + "p95": 274.4639962911606, + "p99": 302.17600613832474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 110.3999987244606, + "p90": 125.37600100040436, + "p95": 127.87200510501862, + "p99": 138.33600282669067 + }, + "combine": { + "p50": 84.57600325345993, + "p90": 93.53599697351456, + "p95": 101.6319990158081, + "p99": 124.83199685811996 + }, + "roundtrip": { + "p50": 192.1599954366684, + "p90": 211.74399554729462, + "p95": 234.49599742889404, + "p99": 258.2719922065735 + }, + "isolatedSum": { + "p50": 194.97600197792053, + "p90": 218.91199797391891, + "p95": 229.50400412082672, + "p99": 263.16799968481064 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 127.55200266838074, + "p90": 161.1199975013733, + "p95": 175.10400712490082, + "p99": 199.23199713230133 + }, + "combine": { + "p50": 91.00800007581711, + "p90": 103.45599800348282, + "p95": 115.00799655914307, + "p99": 128.76799702644348 + }, + "roundtrip": { + "p50": 193.82399320602417, + "p90": 231.07199370861053, + "p95": 244.22399699687958, + "p99": 270.81599831581116 + }, + "isolatedSum": { + "p50": 218.56000274419785, + "p90": 264.5759955048561, + "p95": 290.1120036840439, + "p99": 327.9999941587448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 132.83200562000275, + "p90": 148.54399859905243, + "p95": 165.3439998626709, + "p99": 185.15199422836304 + }, + "combine": { + "p50": 100.28800368309021, + "p90": 107.16799646615982, + "p95": 115.4559999704361, + "p99": 125.2480000257492 + }, + "roundtrip": { + "p50": 207.61600136756897, + "p90": 232.06399381160736, + "p95": 243.80800127983093, + "p99": 291.8719947338104 + }, + "isolatedSum": { + "p50": 233.12000930309296, + "p90": 255.71199506521225, + "p95": 280.799999833107, + "p99": 310.39999425411224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.43999862670898, + "p90": 165.27999937534332, + "p95": 172.83199727535248, + "p99": 198.04799556732178 + }, + "combine": { + "p50": 118.43200027942657, + "p90": 126.71999633312225, + "p95": 133.98399949073792, + "p99": 151.96800231933594 + }, + "roundtrip": { + "p50": 235.1360023021698, + "p90": 244.73600089550018, + "p95": 256.5760016441345, + "p99": 272.5760042667389 + }, + "isolatedSum": { + "p50": 271.87199890613556, + "p90": 291.9999957084656, + "p95": 306.8159967660904, + "p99": 350.0159978866577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-072c4cb9", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|decode|normal|none|none|0|tuned||a8f501af7004836", + "colorKey": "h200_9ffc4a1e", + "comparisonKey": "00e18303b790f307", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:57.539954+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "a8f501af7004836", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.927734375, + "eplbImbalanceAfter": 1.0006103515625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 119.07199770212173, + "p90": 154.14400398731232, + "p95": 171.32799327373505, + "p99": 188.09600174427032 + }, + "combine": { + "p50": 79.19999957084656, + "p90": 89.24800157546997, + "p95": 99.58399832248688, + "p99": 108.0000028014183 + }, + "roundtrip": { + "p50": 181.7920058965683, + "p90": 211.61599457263947, + "p95": 229.5680046081543, + "p99": 244.63999271392822 + }, + "isolatedSum": { + "p50": 198.2719972729683, + "p90": 243.3920055627823, + "p95": 270.9119915962219, + "p99": 296.09600454568863 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 5.375, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 114.656001329422, + "p90": 153.3759981393814, + "p95": 162.91199624538422, + "p99": 186.62400543689728 + }, + "combine": { + "p50": 81.02399855852127, + "p90": 95.58399766683578, + "p95": 105.66399991512299, + "p99": 114.46399986743927 + }, + "roundtrip": { + "p50": 180.06399273872375, + "p90": 216.8000042438507, + "p95": 225.72800517082214, + "p99": 245.08799612522125 + }, + "isolatedSum": { + "p50": 195.67999988794327, + "p90": 248.9599958062172, + "p95": 268.5759961605072, + "p99": 301.08800530433655 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 1204224, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 121.8239963054657, + "p90": 159.7760021686554, + "p95": 169.72799599170685, + "p99": 195.26399672031403 + }, + "combine": { + "p50": 82.78399705886841, + "p90": 99.74399954080582, + "p95": 108.83200168609619, + "p99": 161.40800714492798 + }, + "roundtrip": { + "p50": 184.03199315071106, + "p90": 218.81599724292755, + "p95": 234.17599499225616, + "p99": 246.848002076149 + }, + "isolatedSum": { + "p50": 204.6079933643341, + "p90": 259.5200017094612, + "p95": 278.55999767780304, + "p99": 356.672003865242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2394112, + "combineLogicalBytes": 2394112, + "fanoutMean": 5.21875, + "recvTokensMax": 24, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 110.75200140476227, + "p90": 149.9519944190979, + "p95": 155.71199357509613, + "p99": 163.83999586105347 + }, + "combine": { + "p50": 83.10399949550629, + "p90": 98.2080027461052, + "p95": 107.87200182676315, + "p99": 142.7839994430542 + }, + "roundtrip": { + "p50": 197.63199985027313, + "p90": 240.6720072031021, + "p95": 258.976012468338, + "p99": 285.0239872932434 + }, + "isolatedSum": { + "p50": 193.85600090026855, + "p90": 248.1599971652031, + "p95": 263.5839954018593, + "p99": 306.62399530410767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4630528, + "combineLogicalBytes": 4630528, + "fanoutMean": 5.046875, + "recvTokensMax": 45, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 114.49600011110306, + "p90": 151.96800231933594, + "p95": 162.9440039396286, + "p99": 175.58400332927704 + }, + "combine": { + "p50": 84.48000252246857, + "p90": 93.40800344944, + "p95": 106.75200074911118, + "p99": 112.2559979557991 + }, + "roundtrip": { + "p50": 188.4479969739914, + "p90": 223.4240025281906, + "p95": 233.8559925556183, + "p99": 245.05600333213806 + }, + "isolatedSum": { + "p50": 198.97600263357162, + "p90": 245.37600576877594, + "p95": 269.6960046887398, + "p99": 287.84000128507614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9447424, + "combineLogicalBytes": 9447424, + "fanoutMean": 5.1484375, + "recvTokensMax": 91, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 123.96799772977829, + "p90": 158.36800634860992, + "p95": 166.87999665737152, + "p99": 187.71199882030487 + }, + "combine": { + "p50": 90.52799642086029, + "p90": 105.59999942779541, + "p95": 115.13599753379822, + "p99": 119.1679984331131 + }, + "roundtrip": { + "p50": 194.14399564266205, + "p90": 237.88799345493317, + "p95": 252.51200795173645, + "p99": 680.1279783248901 + }, + "isolatedSum": { + "p50": 214.49599415063858, + "p90": 263.96800577640533, + "p95": 282.01599419116974, + "p99": 306.87999725341797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19023872, + "combineLogicalBytes": 19023872, + "fanoutMean": 5.18359375, + "recvTokensMax": 178, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 134.783998131752, + "p90": 159.90400314331055, + "p95": 168.38400065898895, + "p99": 204.19199764728546 + }, + "combine": { + "p50": 102.30399668216705, + "p90": 115.03999680280685, + "p95": 126.91199779510498, + "p99": 135.93600690364838 + }, + "roundtrip": { + "p50": 211.64800226688385, + "p90": 257.9840123653412, + "p95": 268.5759961605072, + "p99": 293.0560111999512 + }, + "isolatedSum": { + "p50": 237.08799481391907, + "p90": 274.9439999461174, + "p95": 295.29599845409393, + "p99": 340.12800455093384 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38148096, + "combineLogicalBytes": 38148096, + "fanoutMean": 5.197265625, + "recvTokensMax": 350, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 152.41600573062897, + "p90": 175.135999917984, + "p95": 183.80799889564514, + "p99": 215.00800549983978 + }, + "combine": { + "p50": 118.43200027942657, + "p90": 134.91199910640717, + "p95": 141.40799641609192, + "p99": 148.12800288200378 + }, + "roundtrip": { + "p50": 241.7600005865097, + "p90": 281.5360128879547, + "p95": 298.11200499534607, + "p99": 349.5360016822815 + }, + "isolatedSum": { + "p50": 270.84800601005554, + "p90": 310.0479990243912, + "p95": 325.21599531173706, + "p99": 363.13600838184357 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 76955648, + "combineLogicalBytes": 76955648, + "fanoutMean": 5.2421875, + "recvTokensMax": 687, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-62819b2f", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_04cde55d", + "comparisonKey": "caea5048190ffb84", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:18.058387+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 89.50400352478027, + "p90": 124.1919994354248, + "p95": 133.18400084972382, + "p99": 154.01600301265717 + }, + "combine": { + "p50": 81.98399841785431, + "p90": 103.90400141477585, + "p95": 109.56799983978271, + "p99": 125.76000392436981 + }, + "roundtrip": { + "p50": 160.3199988603592, + "p90": 195.64799964427948, + "p95": 204.16000485420227, + "p99": 231.3919961452484 + }, + "isolatedSum": { + "p50": 171.48800194263458, + "p90": 228.09600085020065, + "p95": 242.75200068950653, + "p99": 279.776006937027 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 92.51199662685394, + "p90": 127.48800218105316, + "p95": 133.08799266815186, + "p99": 149.02399480342865 + }, + "combine": { + "p50": 82.68799632787704, + "p90": 98.27200323343277, + "p95": 107.42399841547012, + "p99": 118.46400052309036 + }, + "roundtrip": { + "p50": 161.5999937057495, + "p90": 198.2399970293045, + "p95": 206.65599405765533, + "p99": 220.38400173187256 + }, + "isolatedSum": { + "p50": 175.199992954731, + "p90": 225.76000541448593, + "p95": 240.51199108362198, + "p99": 267.487995326519 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 90.17600119113922, + "p90": 118.20799857378006, + "p95": 125.40799379348755, + "p99": 133.44000279903412 + }, + "combine": { + "p50": 83.99999886751175, + "p90": 93.21600198745728, + "p95": 101.82400047779083, + "p99": 113.47199976444244 + }, + "roundtrip": { + "p50": 164.60800170898438, + "p90": 191.48799777030945, + "p95": 202.4639993906021, + "p99": 218.87999773025513 + }, + "isolatedSum": { + "p50": 174.17600005865097, + "p90": 211.42400056123734, + "p95": 227.23199427127838, + "p99": 246.91200256347656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 98.75199943780899, + "p90": 130.97600638866425, + "p95": 142.14399456977844, + "p99": 161.15200519561768 + }, + "combine": { + "p50": 84.927998483181, + "p90": 105.6319996714592, + "p95": 110.91200262308121, + "p99": 120.80000340938568 + }, + "roundtrip": { + "p50": 165.6319946050644, + "p90": 203.77600193023682, + "p95": 216.09599888324738, + "p99": 238.43200504779816 + }, + "isolatedSum": { + "p50": 183.67999792099, + "p90": 236.60800606012344, + "p95": 253.05599719285965, + "p99": 281.95200860500336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 90.2400016784668, + "p90": 118.9119964838028, + "p95": 129.56799566745758, + "p99": 144.0960019826889 + }, + "combine": { + "p50": 84.86399799585342, + "p90": 92.73599833250046, + "p95": 104.00000214576721, + "p99": 115.77600240707397 + }, + "roundtrip": { + "p50": 165.50399363040924, + "p90": 193.95199418067932, + "p95": 204.25599813461304, + "p99": 224.09600019454956 + }, + "isolatedSum": { + "p50": 175.10399967432022, + "p90": 211.64799481630325, + "p95": 233.5679978132248, + "p99": 259.8720043897629 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 101.21600329875946, + "p90": 129.12000715732574, + "p95": 138.8159990310669, + "p99": 159.87199544906616 + }, + "combine": { + "p50": 92.47999638319016, + "p90": 112.60800063610077, + "p95": 118.97599697113037, + "p99": 132.54399597644806 + }, + "roundtrip": { + "p50": 172.92800545692444, + "p90": 210.78400313854218, + "p95": 222.30400145053864, + "p99": 248.99199604988098 + }, + "isolatedSum": { + "p50": 193.69599968194962, + "p90": 241.7280077934265, + "p95": 257.79199600219727, + "p99": 292.4159914255142 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 116.92799627780914, + "p90": 158.84800255298615, + "p95": 169.3439930677414, + "p99": 183.20000171661377 + }, + "combine": { + "p50": 102.68799960613251, + "p90": 125.85599720478058, + "p95": 128.28800082206726, + "p99": 146.55999839305878 + }, + "roundtrip": { + "p50": 190.20800292491913, + "p90": 263.3279860019684, + "p95": 275.29600262641907, + "p99": 319.64799761772156 + }, + "isolatedSum": { + "p50": 219.61599588394165, + "p90": 284.7039997577667, + "p95": 297.63199388980865, + "p99": 329.76000010967255 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 133.12000036239624, + "p90": 143.74400675296783, + "p95": 150.59199929237366, + "p99": 162.30399906635284 + }, + "combine": { + "p50": 119.23199892044067, + "p90": 131.8719983100891, + "p95": 138.40000331401825, + "p99": 148.8959938287735 + }, + "roundtrip": { + "p50": 214.59199488162994, + "p90": 236.67199909687042, + "p95": 245.85600197315216, + "p99": 265.02400636672974 + }, + "isolatedSum": { + "p50": 252.3519992828369, + "p90": 275.61600506305695, + "p95": 288.9920026063919, + "p99": 311.19999289512634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-927a2eb0", + "identity": "h200|uccl|n-a|7168|8|256|bf16|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_5c7c1624", + "comparisonKey": "18c4732f19d26d5c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:10.401426+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.95200037956238, + "p90": 57.24800005555153, + "p95": 65.15199691057205, + "p99": 77.02399790287018 + }, + "combine": { + "p50": 40.09599983692169, + "p90": 42.55999997258186, + "p95": 49.12000149488449, + "p99": 59.67999994754791 + }, + "roundtrip": { + "p50": 65.85600227117538, + "p90": 74.40000027418137, + "p95": 81.727996468544, + "p99": 106.65600001811981 + }, + "isolatedSum": { + "p50": 90.04800021648407, + "p90": 99.80800002813339, + "p95": 114.27199840545654, + "p99": 136.7039978504181 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.15200173854828, + "p90": 57.82400071620941, + "p95": 67.4239993095398, + "p99": 78.8159966468811 + }, + "combine": { + "p50": 40.031999349594116, + "p90": 42.87999868392944, + "p95": 50.52800104022026, + "p99": 60.5119988322258 + }, + "roundtrip": { + "p50": 66.11199676990509, + "p90": 76.28799974918365, + "p95": 88.51200342178345, + "p99": 104.25599664449692 + }, + "isolatedSum": { + "p50": 89.1840010881424, + "p90": 100.70399940013885, + "p95": 117.95200034976006, + "p99": 139.3279954791069 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.27999898791313, + "p90": 53.247999399900436, + "p95": 60.63999980688095, + "p99": 73.27999919652939 + }, + "combine": { + "p50": 40.28800129890442, + "p90": 42.847998440265656, + "p95": 47.39199951291084, + "p99": 59.39200147986412 + }, + "roundtrip": { + "p50": 66.65600091218948, + "p90": 77.56800204515457, + "p95": 85.15200018882751, + "p99": 104.76800054311752 + }, + "isolatedSum": { + "p50": 89.56800028681755, + "p90": 96.09599784016609, + "p95": 108.0319993197918, + "p99": 132.6720006763935 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 49.695998430252075, + "p90": 55.48800155520439, + "p95": 62.30400130152702, + "p99": 79.26400005817413 + }, + "combine": { + "p50": 41.6640006005764, + "p90": 46.62400111556053, + "p95": 51.93600058555603, + "p99": 60.15999987721443 + }, + "roundtrip": { + "p50": 68.38399916887283, + "p90": 79.68000322580338, + "p95": 86.56000345945358, + "p99": 103.13600301742554 + }, + "isolatedSum": { + "p50": 91.35999903082848, + "p90": 102.11200267076492, + "p95": 114.24000188708305, + "p99": 139.42399993538857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 56.03199824690819, + "p90": 58.59199911355972, + "p95": 68.44799965620041, + "p99": 89.47200328111649 + }, + "combine": { + "p50": 41.82400181889534, + "p90": 48.00000041723251, + "p95": 50.04800111055374, + "p99": 56.89600110054016 + }, + "roundtrip": { + "p50": 74.49600100517273, + "p90": 86.87999844551086, + "p95": 96.92800045013428, + "p99": 109.47199910879135 + }, + "isolatedSum": { + "p50": 97.85600006580353, + "p90": 106.59199953079224, + "p95": 118.49600076675415, + "p99": 146.36800438165665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.50399827957153, + "p90": 64.89600241184235, + "p95": 73.85600358247757, + "p99": 86.62399649620056 + }, + "combine": { + "p50": 50.04800111055374, + "p90": 58.17599967122078, + "p95": 63.840001821517944, + "p99": 69.92000341415405 + }, + "roundtrip": { + "p50": 84.95999872684479, + "p90": 95.23200243711472, + "p95": 104.80000078678131, + "p99": 112.89600282907486 + }, + "isolatedSum": { + "p50": 107.55199939012527, + "p90": 123.07200208306313, + "p95": 137.6960054039955, + "p99": 156.54399991035461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 71.10399752855301, + "p90": 79.32800054550171, + "p95": 96.00000083446503, + "p99": 108.8000014424324 + }, + "combine": { + "p50": 65.24799764156342, + "p90": 69.60000097751617, + "p95": 75.55200159549713, + "p99": 81.53600245714188 + }, + "roundtrip": { + "p50": 109.47199910879135, + "p90": 115.9679964184761, + "p95": 122.36800044775009, + "p99": 131.84000551700592 + }, + "isolatedSum": { + "p50": 136.35199517011642, + "p90": 148.92800152301788, + "p95": 171.55200242996216, + "p99": 190.33600389957428 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.18400174379349, + "p90": 98.59199821949005, + "p95": 107.16799646615982, + "p99": 116.57600104808807 + }, + "combine": { + "p50": 94.36800330877304, + "p90": 100.80000013113022, + "p95": 104.44799810647964, + "p99": 112.64000087976456 + }, + "roundtrip": { + "p50": 166.81599617004395, + "p90": 174.9120056629181, + "p95": 184.38400328159332, + "p99": 193.92000138759613 + }, + "isolatedSum": { + "p50": 187.55200505256653, + "p90": 199.39199835062027, + "p95": 211.61599457263947, + "p99": 229.21600192785263 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b0dfcbf4", + "identity": "h200|uccl|n-a|7168|8|256|bf16|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_0840ecc4", + "comparisonKey": "422800698cd519d1", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:41:34.452308+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 49.44000020623207, + "p90": 55.93600124120712, + "p95": 65.18399715423584, + "p99": 79.00799810886383 + }, + "combine": { + "p50": 40.09599983692169, + "p90": 48.448000103235245, + "p95": 54.655998945236206, + "p99": 65.40799885988235 + }, + "roundtrip": { + "p50": 65.8240020275116, + "p90": 82.59200304746628, + "p95": 97.56799787282944, + "p99": 106.01600259542465 + }, + "isolatedSum": { + "p50": 89.53600004315376, + "p90": 104.38400134444237, + "p95": 119.83999609947205, + "p99": 144.41599696874619 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 49.44000020623207, + "p90": 59.13599953055382, + "p95": 71.26399874687195, + "p99": 81.05599880218506 + }, + "combine": { + "p50": 40.41599854826927, + "p90": 48.8319993019104, + "p95": 56.41600117087364, + "p99": 61.88800185918808 + }, + "roundtrip": { + "p50": 66.72000139951706, + "p90": 82.2720006108284, + "p95": 94.01600062847137, + "p99": 106.33599758148193 + }, + "isolatedSum": { + "p50": 89.85599875450134, + "p90": 107.96799883246422, + "p95": 127.67999991774559, + "p99": 142.94400066137314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 49.695998430252075, + "p90": 57.24800005555153, + "p95": 66.78400188684464, + "p99": 79.23199981451035 + }, + "combine": { + "p50": 40.41599854826927, + "p90": 45.21600157022476, + "p95": 51.96800082921982, + "p99": 69.95200365781784 + }, + "roundtrip": { + "p50": 66.84800237417221, + "p90": 80.38400113582611, + "p95": 93.79199892282486, + "p99": 108.96000266075134 + }, + "isolatedSum": { + "p50": 90.11199697852135, + "p90": 102.46400162577629, + "p95": 118.75200271606445, + "p99": 149.1840034723282 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 50.75199902057648, + "p90": 61.664000153541565, + "p95": 71.96799665689468, + "p99": 82.07999914884567 + }, + "combine": { + "p50": 41.760001331567764, + "p90": 49.79199916124344, + "p95": 59.7120001912117, + "p99": 69.15199756622314 + }, + "roundtrip": { + "p50": 68.09599697589874, + "p90": 81.4720019698143, + "p95": 93.75999867916107, + "p99": 112.15999722480774 + }, + "isolatedSum": { + "p50": 92.51200035214424, + "p90": 111.455999314785, + "p95": 131.67999684810638, + "p99": 151.23199671506882 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 55.615998804569244, + "p90": 59.07199904322624, + "p95": 69.98399645090103, + "p99": 80.28800040483475 + }, + "combine": { + "p50": 42.11200028657913, + "p90": 49.247998744249344, + "p95": 57.37600103020668, + "p99": 63.80800157785416 + }, + "roundtrip": { + "p50": 74.01599735021591, + "p90": 81.40800148248672, + "p95": 97.9200005531311, + "p99": 112.03200370073318 + }, + "isolatedSum": { + "p50": 97.72799909114838, + "p90": 108.31999778747559, + "p95": 127.35999748110771, + "p99": 144.0960019826889 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 57.69599974155426, + "p90": 63.680000603199005, + "p95": 72.38399982452393, + "p99": 84.6719965338707 + }, + "combine": { + "p50": 49.984000623226166, + "p90": 56.384000927209854, + "p95": 59.7120001912117, + "p99": 69.85600292682648 + }, + "roundtrip": { + "p50": 87.26400136947632, + "p90": 96.00000083446503, + "p95": 106.97600245475769, + "p99": 120.86399644613266 + }, + "isolatedSum": { + "p50": 107.68000036478043, + "p90": 120.06400153040886, + "p95": 132.09600001573563, + "p99": 154.52799946069717 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 69.2799985408783, + "p90": 77.11999863386154, + "p95": 89.50400352478027, + "p99": 104.22399640083313 + }, + "combine": { + "p50": 65.72800129652023, + "p90": 68.92800331115723, + "p95": 78.23999971151352, + "p99": 85.63199639320374 + }, + "roundtrip": { + "p50": 111.42399907112122, + "p90": 117.91999638080597, + "p95": 122.94399738311768, + "p99": 136.06399297714233 + }, + "isolatedSum": { + "p50": 135.00799983739853, + "p90": 146.04800194501877, + "p95": 167.7440032362938, + "p99": 189.85599279403687 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.56799721717834, + "p90": 101.88800096511841, + "p95": 112.03200370073318, + "p99": 121.85599654912949 + }, + "combine": { + "p50": 94.30400282144547, + "p90": 101.72799974679947, + "p95": 106.01600259542465, + "p99": 114.78400230407715 + }, + "roundtrip": { + "p50": 166.24000668525696, + "p90": 175.32800137996674, + "p95": 182.27200210094452, + "p99": 194.33599710464478 + }, + "isolatedSum": { + "p50": 187.8720000386238, + "p90": 203.61600071191788, + "p95": 218.04800629615784, + "p99": 236.63999885320663 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c5f1d40b", + "identity": "h200|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "h200_5966376c", + "comparisonKey": "c90e0c741fa73b6e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:35.143073+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 305.7279884815216, + "p90": 351.1039912700653, + "p95": 363.3919954299927, + "p99": 424.8639941215515 + }, + "combine": { + "p50": 63.680000603199005, + "p90": 82.49600231647491, + "p95": 92.92799979448318, + "p99": 102.36799716949463 + }, + "roundtrip": { + "p50": 367.0080006122589, + "p90": 434.08000469207764, + "p95": 449.47201013565063, + "p99": 490.04799127578735 + }, + "isolatedSum": { + "p50": 369.4079890847206, + "p90": 433.5999935865402, + "p95": 456.31999522447586, + "p99": 527.2319912910461 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 296.9599962234497, + "p90": 353.59999537467957, + "p95": 363.68000507354736, + "p99": 411.0400080680847 + }, + "combine": { + "p50": 65.31199812889099, + "p90": 88.03199976682663, + "p95": 95.58399766683578, + "p99": 107.32799768447876 + }, + "roundtrip": { + "p50": 349.3439853191376, + "p90": 408.83201360702515, + "p95": 426.9759953022003, + "p99": 445.43999433517456 + }, + "isolatedSum": { + "p50": 362.2719943523407, + "p90": 441.6319951415062, + "p95": 459.26400274038315, + "p99": 518.3680057525635 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 352256, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 305.6960105895996, + "p90": 355.48800230026245, + "p95": 396.9919979572296, + "p99": 525.8560180664062 + }, + "combine": { + "p50": 66.6240006685257, + "p90": 94.94400024414062, + "p95": 103.35999727249146, + "p99": 114.68800157308578 + }, + "roundtrip": { + "p50": 357.4399948120117, + "p90": 419.96800899505615, + "p95": 431.3279986381531, + "p99": 463.00798654556274 + }, + "isolatedSum": { + "p50": 372.3200112581253, + "p90": 450.4320025444031, + "p95": 500.35199522972107, + "p99": 640.544019639492 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 692224, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 311.3279938697815, + "p90": 359.48801040649414, + "p95": 370.7199990749359, + "p99": 400.736004114151 + }, + "combine": { + "p50": 64.00000303983688, + "p90": 80.92799782752991, + "p95": 92.6079973578453, + "p99": 98.27200323343277 + }, + "roundtrip": { + "p50": 351.3599932193756, + "p90": 407.8400135040283, + "p95": 430.91198801994324, + "p99": 599.6800065040588 + }, + "isolatedSum": { + "p50": 375.3279969096184, + "p90": 440.41600823402405, + "p95": 463.3279964327812, + "p99": 499.00800734758377 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1372160, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 309.28000807762146, + "p90": 351.3599932193756, + "p95": 360.25598645210266, + "p99": 385.2800130844116 + }, + "combine": { + "p50": 66.43199920654297, + "p90": 88.16000074148178, + "p95": 96.83199971914291, + "p99": 113.24799805879593 + }, + "roundtrip": { + "p50": 377.1840035915375, + "p90": 447.1360146999359, + "p95": 463.5840058326721, + "p99": 519.2000269889832 + }, + "isolatedSum": { + "p50": 375.71200728416443, + "p90": 439.5199939608574, + "p95": 457.0879861712456, + "p99": 498.52801114320755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2732032, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 292.9919958114624, + "p90": 340.7039940357208, + "p95": 348.4160006046295, + "p99": 372.8640079498291 + }, + "combine": { + "p50": 69.40799951553345, + "p90": 88.83199840784073, + "p95": 98.43199700117111, + "p99": 106.04800283908844 + }, + "roundtrip": { + "p50": 363.48798871040344, + "p90": 420.9280014038086, + "p95": 438.1760060787201, + "p99": 460.25601029396057 + }, + "isolatedSum": { + "p50": 362.39999532699585, + "p90": 429.53599244356155, + "p95": 446.84799760580063, + "p99": 478.91201078891754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5562368, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 312.7039968967438, + "p90": 356.6400110721588, + "p95": 373.76001477241516, + "p99": 405.4720103740692 + }, + "combine": { + "p50": 79.23199981451035, + "p90": 100.19200295209885, + "p95": 107.84000158309937, + "p99": 115.23199826478958 + }, + "roundtrip": { + "p50": 366.62399768829346, + "p90": 429.7280013561249, + "p95": 449.44000244140625, + "p99": 494.56000328063965 + }, + "isolatedSum": { + "p50": 391.9359967112541, + "p90": 456.83201402425766, + "p95": 481.6000163555145, + "p99": 520.7040086388588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11096064, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 300.9920120239258, + "p90": 351.6159951686859, + "p95": 370.2720105648041, + "p99": 465.472012758255 + }, + "combine": { + "p50": 87.23200112581253, + "p90": 109.11999642848969, + "p95": 114.656001329422, + "p99": 122.97599762678146 + }, + "roundtrip": { + "p50": 381.76000118255615, + "p90": 444.70399618148804, + "p95": 460.86400747299194, + "p99": 563.0720257759094 + }, + "isolatedSum": { + "p50": 388.2240131497383, + "p90": 460.7359915971756, + "p95": 484.9280118942261, + "p99": 588.4480103850365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b43d553c", + "identity": "h200|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "h200_5966376c", + "comparisonKey": "583f921446825733", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:27.766761+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 273.6319899559021, + "p90": 325.0240087509155, + "p95": 332.3200047016144, + "p99": 355.3279936313629 + }, + "combine": { + "p50": 64.00000303983688, + "p90": 74.91199672222137, + "p95": 89.12000060081482, + "p99": 96.83199971914291 + }, + "roundtrip": { + "p50": 331.712007522583, + "p90": 387.9680037498474, + "p95": 397.15200662612915, + "p99": 428.0639886856079 + }, + "isolatedSum": { + "p50": 337.631992995739, + "p90": 399.9360054731369, + "p95": 421.4400053024292, + "p99": 452.15999335050583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 215040, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 307.9040050506592, + "p90": 365.59998989105225, + "p95": 377.85598635673523, + "p99": 401.0559916496277 + }, + "combine": { + "p50": 64.7360011935234, + "p90": 83.8719978928566, + "p95": 91.48799628019333, + "p99": 100.8640006184578 + }, + "roundtrip": { + "p50": 333.44000577926636, + "p90": 390.6880021095276, + "p95": 402.24000811576843, + "p99": 433.3440065383911 + }, + "isolatedSum": { + "p50": 372.6400062441826, + "p90": 449.47198778390884, + "p95": 469.34398263692856, + "p99": 501.9199922680855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 440320, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 283.6480140686035, + "p90": 348.83201122283936, + "p95": 366.8479919433594, + "p99": 444.7999894618988 + }, + "combine": { + "p50": 66.23999774456024, + "p90": 75.58400183916092, + "p95": 88.83199840784073, + "p99": 100.25600343942642 + }, + "roundtrip": { + "p50": 334.9440097808838, + "p90": 396.5759873390198, + "p95": 406.20800852775574, + "p99": 422.4959909915924 + }, + "isolatedSum": { + "p50": 349.88801181316376, + "p90": 424.4160130620003, + "p95": 455.6799903512001, + "p99": 545.0559929013252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 870400, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 300.4480004310608, + "p90": 362.91199922561646, + "p95": 378.495991230011, + "p99": 401.91999077796936 + }, + "combine": { + "p50": 67.61600077152252, + "p90": 78.87999713420868, + "p95": 92.57599711418152, + "p99": 98.81599992513657 + }, + "roundtrip": { + "p50": 334.6560001373291, + "p90": 388.7360095977783, + "p95": 399.4239866733551, + "p99": 431.71200156211853 + }, + "isolatedSum": { + "p50": 368.0640012025833, + "p90": 441.79199635982513, + "p95": 471.0719883441925, + "p99": 500.7359907031059 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1735680, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 293.5360074043274, + "p90": 359.96800661087036, + "p95": 372.96000123023987, + "p99": 398.144006729126 + }, + "combine": { + "p50": 68.2239979505539, + "p90": 78.14399898052216, + "p95": 91.20000153779984, + "p99": 99.7759997844696 + }, + "roundtrip": { + "p50": 336.7680013179779, + "p90": 394.1760063171387, + "p95": 407.29600191116333, + "p99": 446.1440145969391 + }, + "isolatedSum": { + "p50": 361.7600053548813, + "p90": 438.1120055913925, + "p95": 464.1600027680397, + "p99": 497.9200065135956 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3456000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 285.12001037597656, + "p90": 335.32801270484924, + "p95": 350.5600094795227, + "p99": 403.4560024738312 + }, + "combine": { + "p50": 73.31199944019318, + "p90": 84.48000252246857, + "p95": 95.20000219345093, + "p99": 103.45599800348282 + }, + "roundtrip": { + "p50": 346.3680148124695, + "p90": 400.2240002155304, + "p95": 421.1840033531189, + "p99": 466.7840003967285 + }, + "isolatedSum": { + "p50": 358.43200981616974, + "p90": 419.8080152273178, + "p95": 445.76001167297363, + "p99": 506.912000477314 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6988800, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 302.8160035610199, + "p90": 377.6639997959137, + "p95": 391.2639915943146, + "p99": 423.3280122280121 + }, + "combine": { + "p50": 82.56000280380249, + "p90": 96.57599776983261, + "p95": 103.67999970912933, + "p99": 113.34399878978729 + }, + "roundtrip": { + "p50": 373.63201379776, + "p90": 430.62400817871094, + "p95": 455.00800013542175, + "p99": 544.0000295639038 + }, + "isolatedSum": { + "p50": 385.3760063648224, + "p90": 474.2399975657463, + "p95": 494.9439913034439, + "p99": 536.6720110177994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13987840, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 282.81599283218384, + "p90": 337.2479975223541, + "p95": 344.543993473053, + "p99": 364.8959994316101 + }, + "combine": { + "p50": 95.77599912881851, + "p90": 107.80800133943558, + "p95": 121.60000205039978, + "p99": 139.29599523544312 + }, + "roundtrip": { + "p50": 374.65599179267883, + "p90": 442.68798828125, + "p95": 484.5440089702606, + "p99": 639.680027961731 + }, + "isolatedSum": { + "p50": 378.59199196100235, + "p90": 445.0559988617897, + "p95": 466.14399552345276, + "p99": 504.1919946670532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2cb5f587", + "identity": "h200|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_5966376c", + "comparisonKey": "28581c73512ba955", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:22.161928+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 273.3120024204254, + "p90": 304.8959970474243, + "p95": 320.92800736427307, + "p99": 379.64800000190735 + }, + "combine": { + "p50": 69.88800317049026, + "p90": 75.77600330114365, + "p95": 78.65600287914276, + "p99": 87.0399996638298 + }, + "roundtrip": { + "p50": 325.72799921035767, + "p90": 350.3040075302124, + "p95": 360.03199219703674, + "p99": 388.44799995422363 + }, + "isolatedSum": { + "p50": 343.2000055909157, + "p90": 380.67200034856796, + "p95": 399.58401024341583, + "p99": 466.68799966573715 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 270336, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 268.3520019054413, + "p90": 290.94401001930237, + "p95": 302.91199684143066, + "p99": 363.68000507354736 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 80.86399734020233, + "p95": 96.00000083446503, + "p99": 113.21599781513214 + }, + "roundtrip": { + "p50": 332.89599418640137, + "p90": 359.360009431839, + "p95": 368.4479892253876, + "p99": 410.46398878097534 + }, + "isolatedSum": { + "p50": 339.1680046916008, + "p90": 371.8080073595047, + "p95": 398.9119976758957, + "p99": 476.8960028886795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 528384, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 272.15999364852905, + "p90": 313.56799602508545, + "p95": 331.7759931087494, + "p99": 355.55198788642883 + }, + "combine": { + "p50": 71.42399996519089, + "p90": 78.11199873685837, + "p95": 85.9839990735054, + "p99": 97.28000313043594 + }, + "roundtrip": { + "p50": 350.8799970149994, + "p90": 386.81599497795105, + "p95": 403.4560024738312, + "p99": 484.1279983520508 + }, + "isolatedSum": { + "p50": 343.58399361371994, + "p90": 391.6799947619438, + "p95": 417.7599921822548, + "p99": 452.8319910168648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1062912, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 279.6480059623718, + "p90": 307.6159954071045, + "p95": 316.70400500297546, + "p99": 332.28799700737 + }, + "combine": { + "p50": 70.88000327348709, + "p90": 77.15199887752533, + "p95": 79.96799796819687, + "p99": 84.95999872684479 + }, + "roundtrip": { + "p50": 332.28799700737, + "p90": 358.43199491500854, + "p95": 366.1760091781616, + "p99": 389.0880048274994 + }, + "isolatedSum": { + "p50": 350.5280092358589, + "p90": 384.7679942846298, + "p95": 396.67200297117233, + "p99": 417.2479957342148 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131968, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 268.0320143699646, + "p90": 307.16800689697266, + "p95": 323.2319951057434, + "p99": 349.5680093765259 + }, + "combine": { + "p50": 72.92799651622772, + "p90": 79.00799810886383, + "p95": 84.57600325345993, + "p99": 99.93600100278854 + }, + "roundtrip": { + "p50": 331.2639892101288, + "p90": 366.65600538253784, + "p95": 378.9440095424652, + "p99": 406.68800473213196 + }, + "isolatedSum": { + "p50": 340.9600108861923, + "p90": 386.1760050058365, + "p95": 407.80799835920334, + "p99": 449.5040103793144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4251648, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 271.4880108833313, + "p90": 297.760009765625, + "p95": 309.08799171447754, + "p99": 342.303991317749 + }, + "combine": { + "p50": 79.45600152015686, + "p90": 85.1840004324913, + "p95": 87.87199854850769, + "p99": 97.69599884748459 + }, + "roundtrip": { + "p50": 346.8480110168457, + "p90": 375.8080005645752, + "p95": 383.83999466896057, + "p99": 412.06398606300354 + }, + "isolatedSum": { + "p50": 350.94401240348816, + "p90": 382.9440101981163, + "p95": 396.95999026298523, + "p99": 439.9999901652336 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8454144, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 274.0800082683563, + "p90": 323.2319951057434, + "p95": 338.23999762535095, + "p99": 354.0799915790558 + }, + "combine": { + "p50": 87.61599659919739, + "p90": 94.4959968328476, + "p95": 100.67199915647507, + "p99": 116.70400202274323 + }, + "roundtrip": { + "p50": 348.25599193573, + "p90": 390.3360068798065, + "p95": 402.27198600769043, + "p99": 436.5440011024475 + }, + "isolatedSum": { + "p50": 361.6960048675537, + "p90": 417.727991938591, + "p95": 438.911996781826, + "p99": 470.783993601799 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16711680, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 278.9120078086853, + "p90": 322.59199023246765, + "p95": 335.2000117301941, + "p99": 361.82400584220886 + }, + "combine": { + "p50": 101.59999877214432, + "p90": 108.22399705648422, + "p95": 116.28799885511398, + "p99": 134.0479999780655 + }, + "roundtrip": { + "p50": 363.647997379303, + "p90": 410.0480079650879, + "p95": 430.11200428009033, + "p99": 465.472012758255 + }, + "isolatedSum": { + "p50": 380.5120065808296, + "p90": 430.8159872889519, + "p95": 451.4880105853081, + "p99": 495.87200582027435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-20265ece", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_ab5f65fc", + "comparisonKey": "03d9361eb7e094df", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:12.905603+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 117.79200285673141, + "p90": 162.33600676059723, + "p95": 168.83200407028198, + "p99": 188.28800320625305 + }, + "combine": { + "p50": 71.80800288915634, + "p90": 98.65599870681763, + "p95": 103.74400019645691, + "p99": 180.35200238227844 + }, + "roundtrip": { + "p50": 220.32000124454498, + "p90": 295.80798745155334, + "p95": 312.1280074119568, + "p99": 336.2880051136017 + }, + "isolatedSum": { + "p50": 189.60000574588776, + "p90": 260.99200546741486, + "p95": 272.5760042667389, + "p99": 368.6400055885315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 121.0239976644516, + "p90": 168.70400309562683, + "p95": 179.1680008172989, + "p99": 196.25599682331085 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 89.79199826717377, + "p95": 101.3759970664978, + "p99": 106.91200196743011 + }, + "roundtrip": { + "p50": 218.23999285697937, + "p90": 282.24000334739685, + "p95": 291.6800081729889, + "p99": 320.1279938220978 + }, + "isolatedSum": { + "p50": 193.4399977326393, + "p90": 258.4960013628006, + "p95": 280.5439978837967, + "p99": 303.16799879074097 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 117.8240031003952, + "p90": 158.49600732326508, + "p95": 168.06399822235107, + "p99": 179.71199750900269 + }, + "combine": { + "p50": 72.80000299215317, + "p90": 84.83199775218964, + "p95": 97.6639986038208, + "p99": 102.08000242710114 + }, + "roundtrip": { + "p50": 222.04799950122833, + "p90": 279.6480059623718, + "p95": 291.7119860649109, + "p99": 326.6560137271881 + }, + "isolatedSum": { + "p50": 190.62400609254837, + "p90": 243.3280050754547, + "p95": 265.7279968261719, + "p99": 281.7919999361038 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 129.63199615478516, + "p90": 176.60799622535706, + "p95": 189.66400623321533, + "p99": 211.74399554729462 + }, + "combine": { + "p50": 74.5600014925003, + "p90": 92.19200164079666, + "p95": 103.26399654150009, + "p99": 111.64800077676773 + }, + "roundtrip": { + "p50": 233.88800024986267, + "p90": 289.8879945278168, + "p95": 303.74398827552795, + "p99": 362.527996301651 + }, + "isolatedSum": { + "p50": 204.19199764728546, + "p90": 268.7999978661537, + "p95": 292.9280027747154, + "p99": 323.39199632406235 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 116.86400324106216, + "p90": 159.13599729537964, + "p95": 165.18400609493256, + "p99": 187.00799345970154 + }, + "combine": { + "p50": 75.29599964618683, + "p90": 91.5519967675209, + "p95": 102.88000106811523, + "p99": 108.2879975438118 + }, + "roundtrip": { + "p50": 228.96000742912292, + "p90": 277.3759961128235, + "p95": 288.9919877052307, + "p99": 313.8880133628845 + }, + "isolatedSum": { + "p50": 192.160002887249, + "p90": 250.68799406290054, + "p95": 268.0640071630478, + "p99": 295.29599100351334 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 119.9679970741272, + "p90": 164.95999693870544, + "p95": 175.10400712490082, + "p99": 202.33599841594696 + }, + "combine": { + "p50": 80.89599758386612, + "p90": 97.43999689817429, + "p95": 108.15999656915665, + "p99": 112.96000331640244 + }, + "roundtrip": { + "p50": 249.56800043582916, + "p90": 316.8320059776306, + "p95": 336.5119993686676, + "p99": 403.6799967288971 + }, + "isolatedSum": { + "p50": 200.86399465799332, + "p90": 262.39999383687973, + "p95": 283.26400369405746, + "p99": 315.2960017323494 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 122.17599898576736, + "p90": 162.49600052833557, + "p95": 169.50400173664093, + "p99": 186.27199530601501 + }, + "combine": { + "p50": 90.62399715185165, + "p90": 111.58400028944016, + "p95": 118.94399672746658, + "p99": 121.88799679279327 + }, + "roundtrip": { + "p50": 237.7920001745224, + "p90": 294.8479950428009, + "p95": 303.20000648498535, + "p99": 323.3279883861542 + }, + "isolatedSum": { + "p50": 212.79999613761902, + "p90": 274.0800008177757, + "p95": 288.4479984641075, + "p99": 308.1599920988083 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 131.58400356769562, + "p90": 167.4879938364029, + "p95": 177.05599963665009, + "p99": 196.70400023460388 + }, + "combine": { + "p50": 111.07199639081955, + "p90": 134.0160071849823, + "p95": 141.08799397945404, + "p99": 147.5840061903 + }, + "roundtrip": { + "p50": 260.127991437912, + "p90": 315.71200489997864, + "p95": 324.73599910736084, + "p99": 341.2800133228302 + }, + "isolatedSum": { + "p50": 242.65599995851517, + "p90": 301.5040010213852, + "p95": 318.1439936161041, + "p99": 344.28800642490387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ab36a1f6", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_5966376c", + "comparisonKey": "79bc7184f7560eb3", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:10.243839+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 280.5440127849579, + "p90": 325.72799921035767, + "p95": 346.1439907550812, + "p99": 447.58400321006775 + }, + "combine": { + "p50": 70.81600278615952, + "p90": 85.4400023818016, + "p95": 97.59999811649323, + "p99": 103.96800190210342 + }, + "roundtrip": { + "p50": 325.24800300598145, + "p90": 373.1519877910614, + "p95": 385.6639862060547, + "p99": 416.48000478744507 + }, + "isolatedSum": { + "p50": 351.3600155711174, + "p90": 411.16800159215927, + "p95": 443.7439888715744, + "p99": 551.5520051121712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 271.64798974990845, + "p90": 323.96799325942993, + "p95": 351.0720133781433, + "p99": 481.471985578537 + }, + "combine": { + "p50": 71.3919997215271, + "p90": 87.23200112581253, + "p95": 96.76799923181534, + "p99": 102.49599814414978 + }, + "roundtrip": { + "p50": 338.52800726890564, + "p90": 398.46399426460266, + "p95": 417.4079895019531, + "p99": 449.69600439071655 + }, + "isolatedSum": { + "p50": 343.03998947143555, + "p90": 411.19999438524246, + "p95": 447.84001260995865, + "p99": 583.9679837226868 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 280.64000606536865, + "p90": 334.23998951911926, + "p95": 347.135990858078, + "p99": 389.0880048274994 + }, + "combine": { + "p50": 75.58400183916092, + "p90": 91.42400324344635, + "p95": 99.2640033364296, + "p99": 110.33599823713303 + }, + "roundtrip": { + "p50": 335.4560136795044, + "p90": 385.15201210975647, + "p95": 396.95999026298523, + "p99": 451.3919949531555 + }, + "isolatedSum": { + "p50": 356.22400790452957, + "p90": 425.6639927625656, + "p95": 446.3999941945076, + "p99": 499.4240030646324 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 278.59199047088623, + "p90": 332.4800133705139, + "p95": 344.92799639701843, + "p99": 422.1760034561157 + }, + "combine": { + "p50": 73.40800017118454, + "p90": 90.55999666452408, + "p95": 101.75999999046326, + "p99": 108.73600095510483 + }, + "roundtrip": { + "p50": 329.72800731658936, + "p90": 377.6960074901581, + "p95": 393.3440148830414, + "p99": 434.143990278244 + }, + "isolatedSum": { + "p50": 351.99999064207077, + "p90": 423.040010035038, + "p95": 446.6879963874817, + "p99": 530.9120044112206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 289.5680069923401, + "p90": 438.27199935913086, + "p95": 467.20001101493835, + "p99": 526.528000831604 + }, + "combine": { + "p50": 75.74400305747986, + "p90": 92.32000261545181, + "p95": 100.60799866914749, + "p99": 105.59999942779541 + }, + "roundtrip": { + "p50": 329.120010137558, + "p90": 373.02398681640625, + "p95": 384.0959966182709, + "p99": 430.1440119743347 + }, + "isolatedSum": { + "p50": 365.31201004981995, + "p90": 530.5920019745827, + "p95": 567.8080096840858, + "p99": 632.1280002593994 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 283.488005399704, + "p90": 339.32799100875854, + "p95": 354.0480136871338, + "p99": 379.5520067214966 + }, + "combine": { + "p50": 83.36000144481659, + "p90": 102.84800082445145, + "p95": 109.43999886512756, + "p99": 118.04799735546112 + }, + "roundtrip": { + "p50": 352.54400968551636, + "p90": 404.03199195861816, + "p95": 412.3519957065582, + "p99": 497.6319968700409 + }, + "isolatedSum": { + "p50": 366.84800684452057, + "p90": 442.17599183321, + "p95": 463.48801255226135, + "p99": 497.6000040769577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 269.0240144729614, + "p90": 315.13598561286926, + "p95": 322.4320113658905, + "p99": 342.9439961910248 + }, + "combine": { + "p50": 91.2960022687912, + "p90": 109.79200154542923, + "p95": 115.1999980211258, + "p99": 130.8159977197647 + }, + "roundtrip": { + "p50": 351.6800105571747, + "p90": 401.08799934387207, + "p95": 413.05598616600037, + "p99": 470.0799882411957 + }, + "isolatedSum": { + "p50": 360.3200167417526, + "p90": 424.9279871582985, + "p95": 437.6320093870163, + "p99": 473.7599939107895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 284.4479978084564, + "p90": 328.3199965953827, + "p95": 340.38400650024414, + "p99": 386.81599497795105 + }, + "combine": { + "p50": 111.58400028944016, + "p90": 132.25600123405457, + "p95": 137.88799941539764, + "p99": 150.01599490642548 + }, + "roundtrip": { + "p50": 376.5439987182617, + "p90": 426.59199237823486, + "p95": 443.7440037727356, + "p99": 523.5520005226135 + }, + "isolatedSum": { + "p50": 396.0319980978966, + "p90": 460.57599782943726, + "p95": 478.2720059156418, + "p99": 536.8319898843765 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-28ceb9db", + "identity": "h200|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "h200_5966376c", + "comparisonKey": "e3fd15f03a231a26", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:33.663560+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 263.808012008667, + "p90": 327.07199454307556, + "p95": 337.8559947013855, + "p99": 368.99200081825256 + }, + "combine": { + "p50": 69.7920024394989, + "p90": 81.79199695587158, + "p95": 94.94400024414062, + "p99": 101.27999633550644 + }, + "roundtrip": { + "p50": 325.6640136241913, + "p90": 388.41599225997925, + "p95": 397.8239893913269, + "p99": 429.79198694229126 + }, + "isolatedSum": { + "p50": 333.6000144481659, + "p90": 408.86399149894714, + "p95": 432.7999949455261, + "p99": 470.271997153759 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 301056, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 292.03200340270996, + "p90": 352.60799527168274, + "p95": 366.2720024585724, + "p99": 397.63200283050537 + }, + "combine": { + "p50": 72.41600006818771, + "p90": 94.65599805116653, + "p95": 101.50399804115295, + "p99": 111.29599809646606 + }, + "roundtrip": { + "p50": 342.72000193595886, + "p90": 401.88801288604736, + "p95": 414.94399309158325, + "p99": 441.7920112609863 + }, + "isolatedSum": { + "p50": 364.4480034708977, + "p90": 447.2639933228493, + "p95": 467.77600049972534, + "p99": 508.92800092697144 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 609280, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 282.20799565315247, + "p90": 335.80800890922546, + "p95": 352.9599905014038, + "p99": 408.7679982185364 + }, + "combine": { + "p50": 75.29599964618683, + "p90": 102.46399790048599, + "p95": 108.41599851846695, + "p99": 119.13599818944931 + }, + "roundtrip": { + "p50": 345.6000089645386, + "p90": 429.79198694229126, + "p95": 508.8000297546387, + "p99": 561.1839890480042 + }, + "isolatedSum": { + "p50": 357.5039952993393, + "p90": 438.27200680971146, + "p95": 461.37598901987076, + "p99": 527.9039964079857 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1204224, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 261.6319954395294, + "p90": 321.28000259399414, + "p95": 332.63999223709106, + "p99": 352.1920144557953 + }, + "combine": { + "p50": 73.11999797821045, + "p90": 94.46399658918381, + "p95": 101.31199657917023, + "p99": 119.32799965143204 + }, + "roundtrip": { + "p50": 357.85600543022156, + "p90": 439.35999274253845, + "p95": 455.00800013542175, + "p99": 541.2799715995789 + }, + "isolatedSum": { + "p50": 334.75199341773987, + "p90": 415.74399918317795, + "p95": 433.9519888162613, + "p99": 471.5200141072273 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2415616, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 269.9199914932251, + "p90": 317.82400608062744, + "p95": 337.44001388549805, + "p99": 399.616003036499 + }, + "combine": { + "p50": 75.13599842786789, + "p90": 82.8159973025322, + "p95": 92.44800359010696, + "p99": 100.47999769449234 + }, + "roundtrip": { + "p50": 322.87999987602234, + "p90": 369.6640133857727, + "p95": 377.75999307632446, + "p99": 406.8480134010315 + }, + "isolatedSum": { + "p50": 345.055989921093, + "p90": 400.64000338315964, + "p95": 429.888017475605, + "p99": 500.09600073099136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4924416, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 261.4400088787079, + "p90": 328.99200916290283, + "p95": 393.0560052394867, + "p99": 504.06402349472046 + }, + "combine": { + "p50": 82.94399827718735, + "p90": 107.51999914646149, + "p95": 112.0000034570694, + "p99": 122.17599898576736 + }, + "roundtrip": { + "p50": 338.55998516082764, + "p90": 403.1359851360321, + "p95": 441.8559968471527, + "p99": 549.4400262832642 + }, + "isolatedSum": { + "p50": 344.38400715589523, + "p90": 436.5120083093643, + "p95": 505.0560086965561, + "p99": 626.2400224804878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 282.55999088287354, + "p90": 350.75199604034424, + "p95": 371.71199917793274, + "p99": 419.20000314712524 + }, + "combine": { + "p50": 92.0960009098053, + "p90": 105.3759977221489, + "p95": 119.61600184440613, + "p99": 126.39999389648438 + }, + "roundtrip": { + "p50": 355.77601194381714, + "p90": 424.54400658607483, + "p95": 441.0240054130554, + "p99": 570.9120035171509 + }, + "isolatedSum": { + "p50": 374.65599179267883, + "p90": 456.12799376249313, + "p95": 491.32800102233887, + "p99": 545.5999970436096 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19418112, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 273.98398518562317, + "p90": 329.5679986476898, + "p95": 343.6799943447113, + "p99": 376.73598527908325 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 125.63200294971466, + "p95": 135.51999628543854, + "p99": 147.45600521564484 + }, + "roundtrip": { + "p50": 372.25601077079773, + "p90": 425.6959855556488, + "p95": 441.18401408195496, + "p99": 511.2000107765198 + }, + "isolatedSum": { + "p50": 383.87198746204376, + "p90": 455.2000015974045, + "p95": 479.19999063014984, + "p99": 524.1919904947281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ac67d642", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_980d3360", + "comparisonKey": "a02ffd92501030bb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:10.829149+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 94.65599805116653, + "p90": 139.16799426078796, + "p95": 148.80000054836273, + "p99": 183.77600610256195 + }, + "combine": { + "p50": 72.28799909353256, + "p90": 92.44800359010696, + "p95": 97.37599641084671, + "p99": 104.96000200510025 + }, + "roundtrip": { + "p50": 213.919997215271, + "p90": 282.368004322052, + "p95": 300.927996635437, + "p99": 358.7839901447296 + }, + "isolatedSum": { + "p50": 166.9439971446991, + "p90": 231.61599785089493, + "p95": 246.17599695920944, + "p99": 288.7360081076622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 97.37599641084671, + "p90": 137.28000223636627, + "p95": 149.21599626541138, + "p99": 163.83999586105347 + }, + "combine": { + "p50": 71.19999825954437, + "p90": 89.82399851083755, + "p95": 96.63999825716019, + "p99": 102.04800218343735 + }, + "roundtrip": { + "p50": 193.40799748897552, + "p90": 246.07999622821808, + "p95": 257.56800174713135, + "p99": 278.9439857006073 + }, + "isolatedSum": { + "p50": 168.57599467039108, + "p90": 227.10400074720383, + "p95": 245.85599452257156, + "p99": 265.8879980444908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 94.11200135946274, + "p90": 140.6719982624054, + "p95": 148.8640010356903, + "p99": 168.47999393939972 + }, + "combine": { + "p50": 72.48000055551529, + "p90": 88.03199976682663, + "p95": 96.3200032711029, + "p99": 101.95200145244598 + }, + "roundtrip": { + "p50": 199.20000433921814, + "p90": 252.6400089263916, + "p95": 269.6959972381592, + "p99": 311.1039996147156 + }, + "isolatedSum": { + "p50": 166.59200191497803, + "p90": 228.70399802923203, + "p95": 245.1840043067932, + "p99": 270.4319953918457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 94.04800087213516, + "p90": 136.03200018405914, + "p95": 141.31200313568115, + "p99": 157.6319932937622 + }, + "combine": { + "p50": 73.27999919652939, + "p90": 96.92800045013428, + "p95": 101.6319990158081, + "p99": 110.01600325107574 + }, + "roundtrip": { + "p50": 195.3279972076416, + "p90": 249.79199469089508, + "p95": 261.59998774528503, + "p99": 280.19198775291443 + }, + "isolatedSum": { + "p50": 167.32800006866455, + "p90": 232.96000063419342, + "p95": 242.94400215148926, + "p99": 267.64799654483795 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 105.56799918413162, + "p90": 143.77599954605103, + "p95": 156.0640037059784, + "p99": 182.97599256038666 + }, + "combine": { + "p50": 77.66400277614594, + "p90": 96.70399874448776, + "p95": 104.80000078678131, + "p99": 111.48799955844879 + }, + "roundtrip": { + "p50": 209.21599864959717, + "p90": 288.1920039653778, + "p95": 297.21599817276, + "p99": 342.6240086555481 + }, + "isolatedSum": { + "p50": 183.23200196027756, + "p90": 240.4799982905388, + "p95": 260.8640044927597, + "p99": 294.46399211883545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 104.70400005578995, + "p90": 152.92799472808838, + "p95": 161.21600568294525, + "p99": 181.21600151062012 + }, + "combine": { + "p50": 84.41600203514099, + "p90": 107.04000294208527, + "p95": 115.55200070142746, + "p99": 128.7039965391159 + }, + "roundtrip": { + "p50": 205.53599298000336, + "p90": 269.82399821281433, + "p95": 282.6879918575287, + "p99": 314.1759932041168 + }, + "isolatedSum": { + "p50": 189.12000209093094, + "p90": 259.96799767017365, + "p95": 276.7680063843727, + "p99": 309.919998049736 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 99.16800260543823, + "p90": 127.45599448680878, + "p95": 136.73600554466248, + "p99": 148.99200201034546 + }, + "combine": { + "p50": 91.20000153779984, + "p90": 107.39199817180634, + "p95": 116.44800007343292, + "p99": 121.24799937009811 + }, + "roundtrip": { + "p50": 231.07199370861053, + "p90": 281.792014837265, + "p95": 308.9919984340668, + "p99": 375.2959966659546 + }, + "isolatedSum": { + "p50": 190.36800414323807, + "p90": 234.8479926586151, + "p95": 253.1840056180954, + "p99": 270.2400013804436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 107.16799646615982, + "p90": 139.77600634098053, + "p95": 148.95999431610107, + "p99": 177.279993891716 + }, + "combine": { + "p50": 108.73600095510483, + "p90": 129.2800009250641, + "p95": 134.91199910640717, + "p99": 147.90399372577667 + }, + "roundtrip": { + "p50": 237.37600445747375, + "p90": 292.86399483680725, + "p95": 307.776004076004, + "p99": 351.99999809265137 + }, + "isolatedSum": { + "p50": 215.90399742126465, + "p90": 269.0560072660446, + "p95": 283.87199342250824, + "p99": 325.1839876174927 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4a41a025", + "identity": "h200|uccl|n-a|7168|8|256|fp8|ll|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_d12126a5", + "comparisonKey": "3d368eea7061625e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:42:20.344642+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 45.53600028157234, + "p90": 57.5999990105629, + "p95": 70.3359991312027, + "p99": 86.65599673986435 + }, + "combine": { + "p50": 55.26399984955788, + "p90": 68.35199892520905, + "p95": 73.08799773454666, + "p99": 83.8719978928566 + }, + "roundtrip": { + "p50": 1877.7920007705688, + "p90": 1891.808032989502, + "p95": 1901.7599821090698, + "p99": 1919.0080165863037 + }, + "isolatedSum": { + "p50": 100.80000013113022, + "p90": 125.95199793577194, + "p95": 143.42399686574936, + "p99": 170.52799463272095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 45.343998819589615, + "p90": 59.007998555898666, + "p95": 73.95199686288834, + "p99": 92.44800359010696 + }, + "combine": { + "p50": 58.720000088214874, + "p90": 74.5600014925003, + "p95": 80.57600259780884, + "p99": 109.76000130176544 + }, + "roundtrip": { + "p50": 1882.3039531707764, + "p90": 1903.1039476394653, + "p95": 1912.992000579834, + "p99": 1932.7360391616821 + }, + "isolatedSum": { + "p50": 104.06399890780449, + "p90": 133.56800004839897, + "p95": 154.52799946069717, + "p99": 202.2080048918724 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 44.79999840259552, + "p90": 57.69599974155426, + "p95": 69.37599927186966, + "p99": 87.64799684286118 + }, + "combine": { + "p50": 53.31199988722801, + "p90": 73.91999661922455, + "p95": 84.48000252246857, + "p99": 118.367999792099 + }, + "roundtrip": { + "p50": 1879.3280124664307, + "p90": 1898.7840414047241, + "p95": 1905.5999517440796, + "p99": 1940.3200149536133 + }, + "isolatedSum": { + "p50": 98.11199828982353, + "p90": 131.6159963607788, + "p95": 153.85600179433823, + "p99": 206.01599663496017 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 45.152001082897186, + "p90": 58.62399935722351, + "p95": 73.79200309515, + "p99": 86.71999722719193 + }, + "combine": { + "p50": 55.71199953556061, + "p90": 66.68800115585327, + "p95": 72.83200323581696, + "p99": 95.96800059080124 + }, + "roundtrip": { + "p50": 1883.1360340118408, + "p90": 1910.0799560546875, + "p95": 1929.6319484710693, + "p99": 1969.3119525909424 + }, + "isolatedSum": { + "p50": 100.8640006184578, + "p90": 125.31200051307678, + "p95": 146.62400633096695, + "p99": 182.68799781799316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 46.62400111556053, + "p90": 54.207999259233475, + "p95": 63.93600255250931, + "p99": 79.52000200748444 + }, + "combine": { + "p50": 56.48000165820122, + "p90": 70.56000083684921, + "p95": 81.11999928951263, + "p99": 109.63200032711029 + }, + "roundtrip": { + "p50": 1883.039951324463, + "p90": 1896.3840007781982, + "p95": 1907.1999788284302, + "p99": 1956.5759897232056 + }, + "isolatedSum": { + "p50": 103.10400277376175, + "p90": 124.76800009608269, + "p95": 145.05600184202194, + "p99": 189.15200233459473 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.0880012512207, + "p90": 61.28000095486641, + "p95": 76.99199765920639, + "p99": 86.87999844551086 + }, + "combine": { + "p50": 65.43999910354614, + "p90": 81.02399855852127, + "p95": 85.56800335645676, + "p99": 122.84799665212631 + }, + "roundtrip": { + "p50": 1894.976019859314, + "p90": 1913.3440256118774, + "p95": 1918.6559915542603, + "p99": 1946.8799829483032 + }, + "isolatedSum": { + "p50": 114.52800035476685, + "p90": 142.30399951338768, + "p95": 162.56000101566315, + "p99": 209.72799509763718 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 55.58399856090546, + "p90": 61.40799820423126, + "p95": 68.54400038719177, + "p99": 82.33600109815598 + }, + "combine": { + "p50": 72.67200201749802, + "p90": 84.79999750852585, + "p95": 93.31200271844864, + "p99": 114.78400230407715 + }, + "roundtrip": { + "p50": 1910.3679656982422, + "p90": 1929.1839599609375, + "p95": 1941.59996509552, + "p99": 1973.9840030670166 + }, + "isolatedSum": { + "p50": 128.25600057840347, + "p90": 146.2079957127571, + "p95": 161.8560031056404, + "p99": 197.12000340223312 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.50400024652481, + "p90": 80.60800284147263, + "p95": 93.34400296211243, + "p99": 102.59199887514114 + }, + "combine": { + "p50": 103.26399654150009, + "p90": 110.11199653148651, + "p95": 112.76800185441971, + "p99": 130.11200726032257 + }, + "roundtrip": { + "p50": 1971.135973930359, + "p90": 1981.8559885025024, + "p95": 1988.095998764038, + "p99": 2032.2880744934082 + }, + "isolatedSum": { + "p50": 172.7679967880249, + "p90": 190.71999937295914, + "p95": 206.11200481653214, + "p99": 232.70400613546371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0fe8efb5", + "identity": "h200|uccl|n-a|7168|8|256|fp8|ll|runtime-visible-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "h200_0855e66d", + "comparisonKey": "e87232512952cd6c", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:43:06.447922+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "mode": "ll", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8 LL", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 132, + "resourceClass": "fixed-kernel", + "conformanceClass": "not-applicable", + "fixedKernel": true, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 45.1200008392334, + "p90": 55.615998804569244, + "p95": 66.11199676990509, + "p99": 79.0719985961914 + }, + "combine": { + "p50": 57.88800120353699, + "p90": 86.68799698352814, + "p95": 97.02400118112564, + "p99": 111.1999973654747 + }, + "roundtrip": { + "p50": 1876.8320083618164, + "p90": 1893.7599658966064, + "p95": 1907.3280096054077, + "p99": 1954.367995262146 + }, + "isolatedSum": { + "p50": 103.00800204277039, + "p90": 142.30399578809738, + "p95": 163.13599795103073, + "p99": 190.2719959616661 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 45.27999833226204, + "p90": 59.10399928689003, + "p95": 75.9039968252182, + "p99": 87.3280018568039 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 72.15999811887741, + "p95": 81.02399855852127, + "p99": 117.44000017642975 + }, + "roundtrip": { + "p50": 1880.3839683532715, + "p90": 1900.0320434570312, + "p95": 1905.5999517440796, + "p99": 1923.6160516738892 + }, + "isolatedSum": { + "p50": 100.92799738049507, + "p90": 131.26399740576744, + "p95": 156.92799538373947, + "p99": 204.76800203323364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 46.30399867892265, + "p90": 57.151999324560165, + "p95": 70.8480030298233, + "p99": 85.88799834251404 + }, + "combine": { + "p50": 56.96000158786774, + "p90": 74.72000271081924, + "p95": 83.16799998283386, + "p99": 118.49600076675415 + }, + "roundtrip": { + "p50": 1878.0800104141235, + "p90": 1912.8639698028564, + "p95": 1923.8719940185547, + "p99": 1986.5920543670654 + }, + "isolatedSum": { + "p50": 103.26400026679039, + "p90": 131.8720020353794, + "p95": 154.01600301265717, + "p99": 204.3839991092682 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 46.14400118589401, + "p90": 61.5679994225502, + "p95": 76.99199765920639, + "p99": 84.99199897050858 + }, + "combine": { + "p50": 55.64799904823303, + "p90": 74.81600344181061, + "p95": 85.95199882984161, + "p99": 96.57599776983261 + }, + "roundtrip": { + "p50": 1882.0159435272217, + "p90": 1909.9199771881104, + "p95": 1929.6640157699585, + "p99": 1957.1839570999146 + }, + "isolatedSum": { + "p50": 101.79200023412704, + "p90": 136.3840028643608, + "p95": 162.943996489048, + "p99": 181.5679967403412 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 48.54400083422661, + "p90": 64.7680014371872, + "p95": 79.68000322580338, + "p99": 89.59999680519104 + }, + "combine": { + "p50": 60.19200012087822, + "p90": 74.75200295448303, + "p95": 80.03199845552444, + "p99": 101.6639992594719 + }, + "roundtrip": { + "p50": 1885.5040073394775, + "p90": 1905.344009399414, + "p95": 1916.8959856033325, + "p99": 1988.6399507522583 + }, + "isolatedSum": { + "p50": 108.73600095510483, + "p90": 139.52000439167023, + "p95": 159.71200168132782, + "p99": 191.26399606466293 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 49.6320016682148, + "p90": 62.591999769210815, + "p95": 75.26399940252304, + "p99": 85.02399921417236 + }, + "combine": { + "p50": 63.80800157785416, + "p90": 74.49600100517273, + "p95": 79.68000322580338, + "p99": 88.51200342178345 + }, + "roundtrip": { + "p50": 1894.752025604248, + "p90": 1926.751971244812, + "p95": 1957.8880071640015, + "p99": 2017.8558826446533 + }, + "isolatedSum": { + "p50": 113.44000324606895, + "p90": 137.08800077438354, + "p95": 154.94400262832642, + "p99": 173.5360026359558 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 57.53599852323532, + "p90": 78.27199995517731, + "p95": 86.62399649620056, + "p99": 100.0640019774437 + }, + "combine": { + "p50": 77.7600035071373, + "p90": 96.73599898815155, + "p95": 99.71199929714203, + "p99": 110.62400043010712 + }, + "roundtrip": { + "p50": 1914.0479564666748, + "p90": 1934.0159893035889, + "p95": 1939.5840167999268, + "p99": 1976.6080379486084 + }, + "isolatedSum": { + "p50": 135.29600203037262, + "p90": 175.00799894332886, + "p95": 186.3359957933426, + "p99": 210.6880024075508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 69.023996591568, + "p90": 79.42400127649307, + "p95": 93.34400296211243, + "p99": 101.95200145244598 + }, + "combine": { + "p50": 103.61599922180176, + "p90": 112.2559979557991, + "p95": 116.95999652147293, + "p99": 157.72800147533417 + }, + "roundtrip": { + "p50": 1972.3520278930664, + "p90": 1980.8319807052612, + "p95": 1993.9199686050415, + "p99": 2171.3919639587402 + }, + "isolatedSum": { + "p50": 172.63999581336975, + "p90": 191.67999923229218, + "p95": 210.30399948358536, + "p99": 259.68000292778015 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1557a7f1", + "identity": "h200|uccl|n-a|4096|8|128|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_cd151ec9", + "comparisonKey": "5c3f304e78b13fd7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:47:00.655589+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.83200562000275, + "p90": 145.24799585342407, + "p95": 148.92800152301788, + "p99": 154.4959992170334 + }, + "combine": { + "p50": 95.45599669218063, + "p90": 101.72799974679947, + "p95": 105.59999942779541, + "p99": 112.41599917411804 + }, + "roundtrip": { + "p50": 199.5519995689392, + "p90": 212.16000616550446, + "p95": 216.06400609016418, + "p99": 224.73600506782532 + }, + "isolatedSum": { + "p50": 228.28800231218338, + "p90": 246.97599560022354, + "p95": 254.5280009508133, + "p99": 266.9119983911514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 153.76000106334686, + "p90": 167.71200299263, + "p95": 176.4799952507019, + "p99": 191.80800020694733 + }, + "combine": { + "p50": 119.93599683046341, + "p90": 127.07200646400452, + "p95": 132.192000746727, + "p99": 148.12800288200378 + }, + "roundtrip": { + "p50": 237.88799345493317, + "p90": 254.59200143814087, + "p95": 261.4080011844635, + "p99": 278.0480086803436 + }, + "isolatedSum": { + "p50": 273.6959978938103, + "p90": 294.7840094566345, + "p95": 308.6719959974289, + "p99": 339.9360030889511 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 189.91999328136444, + "p90": 203.39199900627136, + "p95": 210.68799495697021, + "p99": 228.64000499248505 + }, + "combine": { + "p50": 168.16000640392303, + "p90": 176.79999768733978, + "p95": 183.67999792099, + "p99": 199.3280053138733 + }, + "roundtrip": { + "p50": 312.3520016670227, + "p90": 331.13598823547363, + "p95": 336.95998787879944, + "p99": 345.6000089645386 + }, + "isolatedSum": { + "p50": 358.0799996852875, + "p90": 380.19199669361115, + "p95": 394.3679928779602, + "p99": 427.96801030635834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 255.61600923538208, + "p90": 268.3199942111969, + "p95": 275.6800055503845, + "p99": 291.1359965801239 + }, + "combine": { + "p50": 265.21599292755127, + "p90": 274.3679881095886, + "p95": 278.4320116043091, + "p99": 292.06401109695435 + }, + "roundtrip": { + "p50": 485.02400517463684, + "p90": 497.79200553894043, + "p95": 503.07202339172363, + "p99": 521.6000080108643 + }, + "isolatedSum": { + "p50": 520.8320021629333, + "p90": 542.6879823207855, + "p95": 554.1120171546936, + "p99": 583.2000076770782 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 388.60800862312317, + "p90": 397.95199036598206, + "p95": 403.6479890346527, + "p99": 416.51201248168945 + }, + "combine": { + "p50": 437.18400597572327, + "p90": 445.5679953098297, + "p95": 448.5760033130646, + "p99": 459.1679871082306 + }, + "roundtrip": { + "p50": 790.0800108909607, + "p90": 814.3360018730164, + "p95": 828.7680149078369, + "p99": 912.5440120697021 + }, + "isolatedSum": { + "p50": 825.7920145988464, + "p90": 843.5199856758118, + "p95": 852.2239923477173, + "p99": 875.67999958992 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 661.6320013999939, + "p90": 673.3120083808899, + "p95": 678.9759993553162, + "p99": 690.4320120811462 + }, + "combine": { + "p50": 781.7280292510986, + "p90": 791.7439937591553, + "p95": 797.6639866828918, + "p99": 912.2560024261475 + }, + "roundtrip": { + "p50": 1400.3839492797852, + "p90": 1412.384033203125, + "p95": 1417.6959991455078, + "p99": 1425.536036491394 + }, + "isolatedSum": { + "p50": 1443.3600306510925, + "p90": 1465.0560021400452, + "p95": 1476.639986038208, + "p99": 1602.6880145072937 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e52ed7f", + "identity": "h200|uccl|n-a|5120|8|160|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_cd151ec9", + "comparisonKey": "8ead5abb6e45922f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:55.078368+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 142.2719955444336, + "p90": 175.74399709701538, + "p95": 182.23999440670013, + "p99": 192.73599982261658 + }, + "combine": { + "p50": 104.99200224876404, + "p90": 122.36800044775009, + "p95": 131.3599944114685, + "p99": 139.20000195503235 + }, + "roundtrip": { + "p50": 207.2640061378479, + "p90": 245.85600197315216, + "p95": 251.42401456832886, + "p99": 264.0959918498993 + }, + "isolatedSum": { + "p50": 247.26399779319763, + "p90": 298.1119975447655, + "p95": 313.59998881816864, + "p99": 331.9360017776489 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.24800658226013, + "p90": 193.27999651432037, + "p95": 202.27199792861938, + "p99": 226.20800137519836 + }, + "combine": { + "p50": 133.40799510478973, + "p90": 146.91199362277985, + "p95": 155.20000457763672, + "p99": 165.43999314308167 + }, + "roundtrip": { + "p50": 256.6080093383789, + "p90": 284.7679853439331, + "p95": 289.63199257850647, + "p99": 296.7360019683838 + }, + "isolatedSum": { + "p50": 298.65600168704987, + "p90": 340.1919901371002, + "p95": 357.4720025062561, + "p99": 391.64799451828003 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 202.36800611019135, + "p90": 224.8000055551529, + "p95": 235.10399460792542, + "p99": 251.26400589942932 + }, + "combine": { + "p50": 196.3520050048828, + "p90": 213.4079933166504, + "p95": 221.79199755191803, + "p99": 239.16800320148468 + }, + "roundtrip": { + "p50": 355.55198788642883, + "p90": 385.0240111351013, + "p95": 395.55200934410095, + "p99": 420.991986989975 + }, + "isolatedSum": { + "p50": 398.72001111507416, + "p90": 438.2079988718033, + "p95": 456.89599215984344, + "p99": 490.432009100914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 286.3999903202057, + "p90": 316.32000207901, + "p95": 324.0000009536743, + "p99": 342.1120047569275 + }, + "combine": { + "p50": 299.3600070476532, + "p90": 316.3839876651764, + "p95": 321.3439881801605, + "p99": 333.407998085022 + }, + "roundtrip": { + "p50": 546.5919971466064, + "p90": 567.296028137207, + "p95": 573.9200115203857, + "p99": 590.2079939842224 + }, + "isolatedSum": { + "p50": 585.7599973678589, + "p90": 632.7039897441864, + "p95": 645.3439891338348, + "p99": 675.5200028419495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 452.7679979801178, + "p90": 472.54401445388794, + "p95": 479.45600748062134, + "p99": 494.7200119495392 + }, + "combine": { + "p50": 503.167986869812, + "p90": 514.0159726142883, + "p95": 517.3760056495667, + "p99": 528.6399722099304 + }, + "roundtrip": { + "p50": 914.143979549408, + "p90": 932.2239756584167, + "p95": 942.0480132102966, + "p99": 954.7520279884338 + }, + "isolatedSum": { + "p50": 955.9359848499298, + "p90": 986.5599870681763, + "p95": 996.832013130188, + "p99": 1023.3599841594696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 785.2479815483093, + "p90": 811.9360208511353, + "p95": 836.7999792098999, + "p99": 865.2480244636536 + }, + "combine": { + "p50": 906.8480134010315, + "p90": 927.5519847869873, + "p95": 931.7439794540405, + "p99": 939.9679899215698 + }, + "roundtrip": { + "p50": 1636.1279487609863, + "p90": 1660.032033920288, + "p95": 1669.8240041732788, + "p99": 1686.6240501403809 + }, + "isolatedSum": { + "p50": 1692.0959949493408, + "p90": 1739.4880056381226, + "p95": 1768.5439586639404, + "p99": 1805.2160143852234 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-52f57621", + "identity": "h200|uccl|n-a|6144|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_cd151ec9", + "comparisonKey": "529050fb6477fbec", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:50:51.952201+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 148.41599762439728, + "p90": 172.4800020456314, + "p95": 182.17599391937256, + "p99": 198.30399751663208 + }, + "combine": { + "p50": 109.6000000834465, + "p90": 124.7360035777092, + "p95": 131.00799918174744, + "p99": 135.51999628543854 + }, + "roundtrip": { + "p50": 219.7120040655136, + "p90": 252.79998779296875, + "p95": 260.0640058517456, + "p99": 275.90399980545044 + }, + "isolatedSum": { + "p50": 258.0159977078438, + "p90": 297.2160056233406, + "p95": 313.18399310112, + "p99": 333.8239938020706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 171.424001455307, + "p90": 200.51200687885284, + "p95": 206.91199600696564, + "p99": 232.83199965953827 + }, + "combine": { + "p50": 143.23200285434723, + "p90": 162.6880019903183, + "p95": 168.96000504493713, + "p99": 176.4799952507019 + }, + "roundtrip": { + "p50": 274.81600642204285, + "p90": 300.1919984817505, + "p95": 306.5600097179413, + "p99": 315.42399525642395 + }, + "isolatedSum": { + "p50": 314.65600430965424, + "p90": 363.20000886917114, + "p95": 375.87200105190277, + "p99": 409.3119949102402 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 223.10400009155273, + "p90": 250.07998943328857, + "p95": 255.295991897583, + "p99": 271.07200026512146 + }, + "combine": { + "p50": 214.88000452518463, + "p90": 232.35200345516205, + "p95": 237.7600073814392, + "p99": 247.42400646209717 + }, + "roundtrip": { + "p50": 395.7119882106781, + "p90": 420.25598883628845, + "p95": 430.400013923645, + "p99": 444.89601254463196 + }, + "isolatedSum": { + "p50": 437.98400461673737, + "p90": 482.4319928884506, + "p95": 493.0559992790222, + "p99": 518.4960067272186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 313.4079873561859, + "p90": 335.4879915714264, + "p95": 339.58399295806885, + "p99": 349.5999872684479 + }, + "combine": { + "p50": 332.38399028778076, + "p90": 344.7040021419525, + "p95": 349.5999872684479, + "p99": 357.9519987106323 + }, + "roundtrip": { + "p50": 615.1360273361206, + "p90": 644.3840265274048, + "p95": 656.3839912414551, + "p99": 687.7440214157104 + }, + "isolatedSum": { + "p50": 645.7919776439667, + "p90": 680.1919937133789, + "p95": 689.1839802265167, + "p99": 707.5519859790802 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 511.48802042007446, + "p90": 527.4559855461121, + "p95": 533.6959958076477, + "p99": 554.4639825820923 + }, + "combine": { + "p50": 565.4720067977905, + "p90": 576.9280195236206, + "p95": 581.3440084457397, + "p99": 590.9439921379089 + }, + "roundtrip": { + "p50": 1035.5520248413086, + "p90": 1052.8639554977417, + "p95": 1067.8080320358276, + "p99": 1141.152024269104 + }, + "isolatedSum": { + "p50": 1076.960027217865, + "p90": 1104.3840050697327, + "p95": 1115.0400042533875, + "p99": 1145.4079747200012 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 893.7919735908508, + "p90": 913.6959910392761, + "p95": 920.0639724731445, + "p99": 945.6319808959961 + }, + "combine": { + "p50": 1021.9839811325073, + "p90": 1034.6879959106445, + "p95": 1040.4479503631592, + "p99": 1056.22398853302 + }, + "roundtrip": { + "p50": 1872.480034828186, + "p90": 1895.3919410705566, + "p95": 1902.6880264282227, + "p99": 1925.6319999694824 + }, + "isolatedSum": { + "p50": 1915.7759547233582, + "p90": 1948.3839869499207, + "p95": 1960.5119228363037, + "p99": 2001.855969429016 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6e1a607c", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_05be9f49", + "comparisonKey": "4f6cbb2ad4892beb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:35:53.552990+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.27200496196747, + "p90": 169.76000368595123, + "p95": 181.5039962530136, + "p99": 192.28799641132355 + }, + "combine": { + "p50": 118.84800344705582, + "p90": 132.9279989004135, + "p95": 141.9840008020401, + "p99": 150.2400040626526 + }, + "roundtrip": { + "p50": 237.47199773788452, + "p90": 262.719988822937, + "p95": 272.7679908275604, + "p99": 292.57598519325256 + }, + "isolatedSum": { + "p50": 273.1200084090233, + "p90": 302.68800258636475, + "p95": 323.4879970550537, + "p99": 342.52800047397614 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 183.48799645900726, + "p90": 195.99999487400055, + "p95": 201.1519968509674, + "p99": 216.15999937057495 + }, + "combine": { + "p50": 158.49600732326508, + "p90": 165.79200327396393, + "p95": 170.3999936580658, + "p99": 177.76000499725342 + }, + "roundtrip": { + "p50": 301.7599880695343, + "p90": 314.5279884338379, + "p95": 320.8320140838623, + "p99": 333.1199884414673 + }, + "isolatedSum": { + "p50": 341.98400378227234, + "p90": 361.7919981479645, + "p95": 371.5519905090332, + "p99": 393.92000436782837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 240.57599902153015, + "p90": 264.0320062637329, + "p95": 275.87199211120605, + "p99": 298.97600412368774 + }, + "combine": { + "p50": 236.64000630378723, + "p90": 247.16800451278687, + "p95": 252.25600600242615, + "p99": 263.839989900589 + }, + "roundtrip": { + "p50": 439.90400433540344, + "p90": 457.95199275016785, + "p95": 464.28799629211426, + "p99": 479.3280065059662 + }, + "isolatedSum": { + "p50": 477.2160053253174, + "p90": 511.2000107765198, + "p95": 528.1279981136322, + "p99": 562.8159940242767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 347.3919928073883, + "p90": 364.9280071258545, + "p95": 372.6080060005188, + "p99": 382.88000226020813 + }, + "combine": { + "p50": 369.56799030303955, + "p90": 377.21601128578186, + "p95": 380.92800974845886, + "p99": 393.0560052394867 + }, + "roundtrip": { + "p50": 680.0640225410461, + "p90": 691.1360025405884, + "p95": 695.1360106468201, + "p99": 707.4880003929138 + }, + "isolatedSum": { + "p50": 716.9599831104279, + "p90": 742.1440184116364, + "p95": 753.5360157489777, + "p99": 775.9360074996948 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 580.5119872093201, + "p90": 596.8319773674011, + "p95": 602.944016456604, + "p99": 621.6959953308105 + }, + "combine": { + "p50": 632.8960061073303, + "p90": 641.5039896965027, + "p95": 644.864022731781, + "p99": 652.3839831352234 + }, + "roundtrip": { + "p50": 1174.399971961975, + "p90": 1189.7599697113037, + "p95": 1196.7359781265259, + "p99": 1207.9360485076904 + }, + "isolatedSum": { + "p50": 1213.4079933166504, + "p90": 1238.3359670639038, + "p95": 1247.808039188385, + "p99": 1274.079978466034 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1030.1120281219482, + "p90": 1050.0799417495728, + "p95": 1057.1520328521729, + "p99": 1078.0160427093506 + }, + "combine": { + "p50": 1140.4800415039062, + "p90": 1152.6720523834229, + "p95": 1158.1759452819824, + "p99": 1172.8320121765137 + }, + "roundtrip": { + "p50": 2123.4560012817383, + "p90": 2143.615961074829, + "p95": 2148.7040519714355, + "p99": 2165.8239364624023 + }, + "isolatedSum": { + "p50": 2170.5920696258545, + "p90": 2202.7519941329956, + "p95": 2215.3279781341553, + "p99": 2250.8480548858643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2ec51908", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_cd151ec9", + "comparisonKey": "448b58686d0fa1c5", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:37:46.526968+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 158.720001578331, + "p90": 241.7919933795929, + "p95": 246.46399915218353, + "p99": 257.53599405288696 + }, + "combine": { + "p50": 121.69600278139114, + "p90": 160.70400178432465, + "p95": 164.5440012216568, + "p99": 177.05599963665009 + }, + "roundtrip": { + "p50": 235.87200045585632, + "p90": 353.7600040435791, + "p95": 373.56799840927124, + "p99": 404.38398718833923 + }, + "isolatedSum": { + "p50": 280.41600435972214, + "p90": 402.49599516391754, + "p95": 411.00800037384033, + "p99": 434.59199368953705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 183.26400220394135, + "p90": 207.23199844360352, + "p95": 215.10399878025055, + "p99": 225.63199698925018 + }, + "combine": { + "p50": 160.0320041179657, + "p90": 174.68799650669098, + "p95": 180.92800676822662, + "p99": 187.6160055398941 + }, + "roundtrip": { + "p50": 303.8400113582611, + "p90": 329.53599095344543, + "p95": 334.20801162719727, + "p99": 342.3359990119934 + }, + "isolatedSum": { + "p50": 343.29600632190704, + "p90": 381.9199949502945, + "p95": 396.0320055484772, + "p99": 413.2480025291443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 240.89600145816803, + "p90": 268.5759961605072, + "p95": 279.87200021743774, + "p99": 294.94398832321167 + }, + "combine": { + "p50": 238.0799949169159, + "p90": 256.22400641441345, + "p95": 262.2720003128052, + "p99": 276.95998549461365 + }, + "roundtrip": { + "p50": 441.8239891529083, + "p90": 472.1600115299225, + "p95": 480.4159998893738, + "p99": 490.911990404129 + }, + "isolatedSum": { + "p50": 478.9759963750839, + "p90": 524.8000025749207, + "p95": 542.1440005302429, + "p99": 571.9039738178253 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 350.3359854221344, + "p90": 371.071994304657, + "p95": 377.375990152359, + "p99": 388.70400190353394 + }, + "combine": { + "p50": 371.616005897522, + "p90": 384.5439851284027, + "p95": 391.5199935436249, + "p99": 400.191992521286 + }, + "roundtrip": { + "p50": 684.2880249023438, + "p90": 700.2559900283813, + "p95": 706.8799734115601, + "p99": 723.039984703064 + }, + "isolatedSum": { + "p50": 721.9519913196564, + "p90": 755.6159794330597, + "p95": 768.8959836959839, + "p99": 788.89599442482 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 584.2880010604858, + "p90": 595.8399772644043, + "p95": 601.0239720344543, + "p99": 612.0960116386414 + }, + "combine": { + "p50": 633.6640119552612, + "p90": 648.8000154495239, + "p95": 697.5359916687012, + "p99": 721.2799787521362 + }, + "roundtrip": { + "p50": 1181.8560361862183, + "p90": 1210.4640007019043, + "p95": 1222.5600481033325, + "p99": 1256.2559843063354 + }, + "isolatedSum": { + "p50": 1217.952013015747, + "p90": 1244.6399927139282, + "p95": 1298.5599637031555, + "p99": 1333.3759903907776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1028.6719799041748, + "p90": 1050.6880283355713, + "p95": 1059.9360466003418, + "p99": 1077.7599811553955 + }, + "combine": { + "p50": 1148.8640308380127, + "p90": 1172.8320121765137, + "p95": 1192.3840045928955, + "p99": 1237.4720573425293 + }, + "roundtrip": { + "p50": 2130.784034729004, + "p90": 2157.183885574341, + "p95": 2165.4720306396484, + "p99": 2185.6000423431396 + }, + "isolatedSum": { + "p50": 2177.5360107421875, + "p90": 2223.520040512085, + "p95": 2252.3200511932373, + "p99": 2315.232038497925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-65ca459b", + "identity": "h200|uccl|n-a|7168|8|384|bf16|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_cd151ec9", + "comparisonKey": "4ff8c3c496ef100b", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:45:07.617712+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 155.7759940624237, + "p90": 177.37600207328796, + "p95": 188.1600022315979, + "p99": 203.90400290489197 + }, + "combine": { + "p50": 119.80800330638885, + "p90": 139.77600634098053, + "p95": 147.77599275112152, + "p99": 164.63999450206757 + }, + "roundtrip": { + "p50": 241.69600009918213, + "p90": 277.24799513816833, + "p95": 284.15998816490173, + "p99": 300.06399750709534 + }, + "isolatedSum": { + "p50": 275.58399736881256, + "p90": 317.1520084142685, + "p95": 335.9359949827194, + "p99": 368.54399740695953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.95199286937714, + "p90": 210.81599593162537, + "p95": 220.0320065021515, + "p99": 232.03200101852417 + }, + "combine": { + "p50": 161.56800091266632, + "p90": 179.48800325393677, + "p95": 188.51199746131897, + "p99": 197.63199985027313 + }, + "roundtrip": { + "p50": 307.0079982280731, + "p90": 339.9679958820343, + "p95": 345.5039858818054, + "p99": 356.06399178504944 + }, + "isolatedSum": { + "p50": 347.51999378204346, + "p90": 390.30399918556213, + "p95": 408.54400396347046, + "p99": 429.6640008687973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 249.1839975118637, + "p90": 274.6880054473877, + "p95": 284.15998816490173, + "p99": 303.3599853515625 + }, + "combine": { + "p50": 240.63999950885773, + "p90": 261.7279887199402, + "p95": 266.52801036834717, + "p99": 282.4319899082184 + }, + "roundtrip": { + "p50": 442.68798828125, + "p90": 472.6719856262207, + "p95": 479.5520007610321, + "p99": 494.52799558639526 + }, + "isolatedSum": { + "p50": 489.82399702072144, + "p90": 536.4159941673279, + "p95": 550.6879985332489, + "p99": 585.7919752597809 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 350.17600655555725, + "p90": 374.4960129261017, + "p95": 382.01600313186646, + "p99": 398.144006729126 + }, + "combine": { + "p50": 369.6320056915283, + "p90": 385.21599769592285, + "p95": 390.6559944152832, + "p99": 401.88801288604736 + }, + "roundtrip": { + "p50": 687.8719925880432, + "p90": 715.391993522644, + "p95": 739.4239902496338, + "p99": 774.6239900588989 + }, + "isolatedSum": { + "p50": 719.8080122470856, + "p90": 759.7120106220245, + "p95": 772.6719975471497, + "p99": 800.0320196151733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 573.8880038261414, + "p90": 597.5679755210876, + "p95": 605.6640148162842, + "p99": 619.5840239524841 + }, + "combine": { + "p50": 630.9760212898254, + "p90": 647.1359729766846, + "p95": 653.2800197601318, + "p99": 665.66401720047 + }, + "roundtrip": { + "p50": 1153.056025505066, + "p90": 1174.5599508285522, + "p95": 1184.2559576034546, + "p99": 1204.0319442749023 + }, + "isolatedSum": { + "p50": 1204.8640251159668, + "p90": 1244.7039484977722, + "p95": 1258.944034576416, + "p99": 1285.248041152954 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1043.3919429779053, + "p90": 1069.0560340881348, + "p95": 1080.448031425476, + "p99": 1095.744013786316 + }, + "combine": { + "p50": 1122.1760511398315, + "p90": 1137.8240585327148, + "p95": 1145.2480554580688, + "p99": 1164.5439863204956 + }, + "roundtrip": { + "p50": 2103.9679050445557, + "p90": 2128.096103668213, + "p95": 2135.3600025177, + "p99": 2157.3119163513184 + }, + "isolatedSum": { + "p50": 2165.567994117737, + "p90": 2206.8800926208496, + "p95": 2225.696086883545, + "p99": 2260.2880001068115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-add00a67", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||0a3064a2af0dd39", + "colorKey": "h200_5814bbeb", + "comparisonKey": "8b8e0acbc2bf993a", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:51:50.850258+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0a3064a2af0dd39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 168.86399686336517, + "p90": 188.86399269104004, + "p95": 197.82400131225586, + "p99": 208.3519995212555 + }, + "combine": { + "p50": 136.03200018405914, + "p90": 147.23199605941772, + "p95": 155.29599785804749, + "p99": 161.5999937057495 + }, + "roundtrip": { + "p50": 267.16798543930054, + "p90": 280.0639867782593, + "p95": 288.38399052619934, + "p99": 297.0240116119385 + }, + "isolatedSum": { + "p50": 304.8959970474243, + "p90": 336.09598875045776, + "p95": 353.11999917030334, + "p99": 369.951993227005 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 219.00799870491028, + "p90": 234.27200317382812, + "p95": 241.2479966878891, + "p99": 254.36800718307495 + }, + "combine": { + "p50": 191.3599967956543, + "p90": 199.52000677585602, + "p95": 203.71200144290924, + "p99": 214.23999965190887 + }, + "roundtrip": { + "p50": 364.4160032272339, + "p90": 385.79198718070984, + "p95": 396.7680037021637, + "p99": 420.0960099697113 + }, + "isolatedSum": { + "p50": 410.3679955005646, + "p90": 433.79200994968414, + "p95": 444.95999813079834, + "p99": 468.6080068349838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 312.22400069236755, + "p90": 328.031986951828, + "p95": 333.5680067539215, + "p99": 345.7280099391937 + }, + "combine": { + "p50": 283.1999957561493, + "p90": 293.503999710083, + "p95": 299.26401376724243, + "p99": 312.8640055656433 + }, + "roundtrip": { + "p50": 558.8160157203674, + "p90": 577.2799849510193, + "p95": 589.3440246582031, + "p99": 607.1040034294128 + }, + "isolatedSum": { + "p50": 595.4239964485168, + "p90": 621.535986661911, + "p95": 632.8320205211639, + "p99": 658.592015504837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 496.96001410484314, + "p90": 508.67199897766113, + "p95": 515.7759785652161, + "p99": 528.3520221710205 + }, + "combine": { + "p50": 469.760000705719, + "p90": 480.0960123538971, + "p95": 487.5839948654175, + "p99": 497.0879852771759 + }, + "roundtrip": { + "p50": 926.8800020217896, + "p90": 946.3679790496826, + "p95": 959.5839977264404, + "p99": 997.1839785575867 + }, + "isolatedSum": { + "p50": 966.7200148105621, + "p90": 988.7680113315582, + "p95": 1003.3599734306335, + "p99": 1025.4400074481964 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 8, + "recvTokensMax": 8192, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 865.4720187187195, + "p90": 882.5920224189758, + "p95": 892.8959965705872, + "p99": 948.8639831542969 + }, + "combine": { + "p50": 842.2080278396606, + "p90": 855.4880023002625, + "p95": 861.8559837341309, + "p99": 891.6800022125244 + }, + "roundtrip": { + "p50": 1660.7680320739746, + "p90": 1683.7120056152344, + "p95": 1695.3599452972412, + "p99": 1744.7999715805054 + }, + "isolatedSum": { + "p50": 1707.6800465583801, + "p90": 1738.0800247192383, + "p95": 1754.751980304718, + "p99": 1840.5439853668213 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1879048192, + "combineLogicalBytes": 1879048192, + "fanoutMean": 8, + "recvTokensMax": 16384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1596.7680215835571, + "p90": 1617.6320314407349, + "p95": 1636.064052581787, + "p99": 1657.7919721603394 + }, + "combine": { + "p50": 1554.6879768371582, + "p90": 1565.6960010528564, + "p95": 1571.4240074157715, + "p99": 1610.975980758667 + }, + "roundtrip": { + "p50": 3108.351945877075, + "p90": 3128.1919479370117, + "p95": 3139.7759914398193, + "p99": 3174.4320392608643 + }, + "isolatedSum": { + "p50": 3151.4559984207153, + "p90": 3183.3280324935913, + "p95": 3207.4880599975586, + "p99": 3268.7679529190063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3758096384, + "combineLogicalBytes": 3758096384, + "fanoutMean": 8, + "recvTokensMax": 32768, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d5445b43", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||7aa44c7b86748b9", + "colorKey": "h200_5dd7099a", + "comparisonKey": "8c54954686465af0", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:52:38.962747+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "7aa44c7b86748b9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 129.88799810409546, + "p90": 238.0480021238327, + "p95": 244.83199417591095, + "p99": 253.9840042591095 + }, + "combine": { + "p50": 85.85599809885025, + "p90": 134.783998131752, + "p95": 140.99200069904327, + "p99": 152.319997549057 + }, + "roundtrip": { + "p50": 194.2719966173172, + "p90": 344.1280126571655, + "p95": 355.29598593711853, + "p99": 379.39199805259705 + }, + "isolatedSum": { + "p50": 215.7439962029457, + "p90": 372.8320002555847, + "p95": 385.8239948749542, + "p99": 406.3040018081665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 148.60799908638, + "p90": 179.80800569057465, + "p95": 191.39200448989868, + "p99": 237.92000114917755 + }, + "combine": { + "p50": 129.5360028743744, + "p90": 142.68800616264343, + "p95": 151.36000514030457, + "p99": 163.96799683570862 + }, + "roundtrip": { + "p50": 239.84000086784363, + "p90": 274.2080092430115, + "p95": 282.46399760246277, + "p99": 295.77600955963135 + }, + "isolatedSum": { + "p50": 278.1440019607544, + "p90": 322.4960118532181, + "p95": 342.75200963020325, + "p99": 401.88799798488617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 225.53600370883942, + "p90": 254.14401292800903, + "p95": 261.9200050830841, + "p99": 272.0319926738739 + }, + "combine": { + "p50": 297.2480058670044, + "p90": 311.39200925827026, + "p95": 318.56000423431396, + "p99": 326.1440098285675 + }, + "roundtrip": { + "p50": 483.68000984191895, + "p90": 493.47200989723206, + "p95": 495.4560101032257, + "p99": 500.15997886657715 + }, + "isolatedSum": { + "p50": 522.7840095758438, + "p90": 565.5360221862793, + "p95": 580.4800093173981, + "p99": 598.1760025024414 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 1, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e7f353a6", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|balanced+eplb|8|prefill|normal|none|none|0|tuned||df54a9510825f71", + "colorKey": "h200_e170f653", + "comparisonKey": "f18153f5e15a7446", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:57:31.676036+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · balanced+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "balanced", + "routingLabel": "balanced+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "df54a9510825f71", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1, + "eplbImbalanceAfter": 1, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.06399977207184, + "p90": 157.98400342464447, + "p95": 166.36799275875092, + "p99": 180.95999956130981 + }, + "combine": { + "p50": 97.28000313043594, + "p90": 107.51999914646149, + "p95": 116.67200177907944, + "p99": 131.52000308036804 + }, + "roundtrip": { + "p50": 201.08799636363983, + "p90": 233.11999440193176, + "p95": 244.51200664043427, + "p99": 260.76799631118774 + }, + "isolatedSum": { + "p50": 229.34400290250778, + "p90": 265.50400257110596, + "p95": 283.03999453783035, + "p99": 312.48000264167786 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 2, + "recvTokensMax": 384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 160.73599457740784, + "p90": 182.8799992799759, + "p95": 191.71200692653656, + "p99": 229.18400168418884 + }, + "combine": { + "p50": 121.2799996137619, + "p90": 133.91999900341034, + "p95": 141.95199310779572, + "p99": 156.3519984483719 + }, + "roundtrip": { + "p50": 237.72799968719482, + "p90": 256.25601410865784, + "p95": 266.81599020957947, + "p99": 281.823992729187 + }, + "isolatedSum": { + "p50": 282.01599419116974, + "p90": 316.79999828338623, + "p95": 333.6640000343323, + "p99": 385.53600013256073 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 2, + "recvTokensMax": 768, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 185.5359971523285, + "p90": 201.75999402999878, + "p95": 211.96800470352173, + "p99": 231.6800057888031 + }, + "combine": { + "p50": 156.95999562740326, + "p90": 169.5680022239685, + "p95": 177.05599963665009, + "p99": 185.44000387191772 + }, + "roundtrip": { + "p50": 304.1599988937378, + "p90": 327.84000039100647, + "p95": 335.2319896221161, + "p99": 348.03199768066406 + }, + "isolatedSum": { + "p50": 342.49599277973175, + "p90": 371.3279962539673, + "p95": 389.0240043401718, + "p99": 417.1200096607208 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 2, + "recvTokensMax": 1536, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 240.4160052537918, + "p90": 257.05599784851074, + "p95": 262.81601190567017, + "p99": 275.07200837135315 + }, + "combine": { + "p50": 232.9919934272766, + "p90": 246.75199389457703, + "p95": 252.28801369667053, + "p99": 261.7279887199402 + }, + "roundtrip": { + "p50": 441.6320025920868, + "p90": 461.3119959831238, + "p95": 469.184011220932, + "p99": 488.319993019104 + }, + "isolatedSum": { + "p50": 473.4079986810684, + "p90": 503.80799174308777, + "p95": 515.1040256023407, + "p99": 536.7999970912933 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 2, + "recvTokensMax": 3072, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 364.51199650764465, + "p90": 385.8560025691986, + "p95": 394.6239948272705, + "p99": 409.4400107860565 + }, + "combine": { + "p50": 378.2080113887787, + "p90": 394.9440121650696, + "p95": 400.5120098590851, + "p99": 410.2720022201538 + }, + "roundtrip": { + "p50": 702.3680210113525, + "p90": 716.7999744415283, + "p95": 724.5759963989258, + "p99": 744.5120215415955 + }, + "isolatedSum": { + "p50": 742.7200078964233, + "p90": 780.8000147342682, + "p95": 795.1360046863556, + "p99": 819.7120130062103 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 2, + "recvTokensMax": 6144, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 613.8880252838135, + "p90": 628.2879710197449, + "p95": 634.9760293960571, + "p99": 681.2160015106201 + }, + "combine": { + "p50": 650.4319906234741, + "p90": 660.5759859085083, + "p95": 666.0799980163574, + "p99": 711.5520238876343 + }, + "roundtrip": { + "p50": 1222.3680019378662, + "p90": 1244.320034980774, + "p95": 1258.0480575561523, + "p99": 1286.5920066833496 + }, + "isolatedSum": { + "p50": 1264.3200159072876, + "p90": 1288.8639569282532, + "p95": 1301.0560274124146, + "p99": 1392.7680253982544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 939524096, + "combineLogicalBytes": 939524096, + "fanoutMean": 2, + "recvTokensMax": 12288, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8ccec087", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|prefill|normal|none|none|0|tuned||bfbb64a166e9f1c", + "colorKey": "h200_06b1dacd", + "comparisonKey": "02abadf906d372f8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:55:38.852239+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bfbb64a166e9f1c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 163.16799819469452, + "p90": 175.74399709701538, + "p95": 181.5039962530136, + "p99": 204.57600057125092 + }, + "combine": { + "p50": 131.67999684810638, + "p90": 139.26400244235992, + "p95": 145.05599439144135, + "p99": 154.94400262832642 + }, + "roundtrip": { + "p50": 260.09601354599, + "p90": 271.7759907245636, + "p95": 279.04000878334045, + "p99": 394.1119909286499 + }, + "isolatedSum": { + "p50": 294.8479950428009, + "p90": 315.0079995393753, + "p95": 326.55999064445496, + "p99": 359.52000319957733 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 201.50400698184967, + "p90": 212.16000616550446, + "p95": 216.86400473117828, + "p99": 226.23999416828156 + }, + "combine": { + "p50": 180.7360053062439, + "p90": 186.81600689888, + "p95": 191.3599967956543, + "p99": 205.4080069065094 + }, + "roundtrip": { + "p50": 342.6879942417145, + "p90": 360.22400856018066, + "p95": 370.2720105648041, + "p99": 411.5839898586273 + }, + "isolatedSum": { + "p50": 382.24001228809357, + "p90": 398.97601306438446, + "p95": 408.2240015268326, + "p99": 431.64800107479095 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156090368, + "combineLogicalBytes": 156090368, + "fanoutMean": 5.31640625, + "recvTokensMax": 2048, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 274.3360102176666, + "p90": 284.7039997577667, + "p95": 289.216011762619, + "p99": 301.6960024833679 + }, + "combine": { + "p50": 279.231995344162, + "p90": 286.20800375938416, + "p95": 289.08801078796387, + "p99": 297.4399924278259 + }, + "roundtrip": { + "p50": 515.8079862594604, + "p90": 526.8160104751587, + "p95": 530.0160050392151, + "p99": 534.496009349823 + }, + "isolatedSum": { + "p50": 553.5680055618286, + "p90": 570.9120035171509, + "p95": 578.3040225505829, + "p99": 599.1359949111938 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311091200, + "combineLogicalBytes": 311091200, + "fanoutMean": 5.2978515625, + "recvTokensMax": 4096, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 423.9040017127991, + "p90": 435.232013463974, + "p95": 439.9999976158142, + "p99": 449.95200634002686 + }, + "combine": { + "p50": 462.0159864425659, + "p90": 472.28801250457764, + "p95": 477.63198614120483, + "p99": 503.87197732925415 + }, + "roundtrip": { + "p50": 851.8400192260742, + "p90": 864.2560243606567, + "p95": 870.0799942016602, + "p99": 895.359992980957 + }, + "isolatedSum": { + "p50": 885.919988155365, + "p90": 907.5200259685516, + "p95": 917.631983757019, + "p99": 953.823983669281 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620648448, + "combineLogicalBytes": 620648448, + "fanoutMean": 5.2847900390625, + "recvTokensMax": 8192, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 750.495970249176, + "p90": 766.3360238075256, + "p95": 774.399995803833, + "p99": 796.0000038146973 + }, + "combine": { + "p50": 820.5440044403076, + "p90": 831.3919901847839, + "p95": 839.680016040802, + "p99": 890.5280232429504 + }, + "roundtrip": { + "p50": 1526.047945022583, + "p90": 1539.3600463867188, + "p95": 1544.8640584945679, + "p99": 1576.1280059814453 + }, + "isolatedSum": { + "p50": 1571.0399746894836, + "p90": 1597.7280139923096, + "p95": 1614.080011844635, + "p99": 1686.5280270576477 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1241511936, + "combineLogicalBytes": 1241511936, + "fanoutMean": 5.28570556640625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1405.6639671325684, + "p90": 1420.7680225372314, + "p95": 1426.2080192565918, + "p99": 1438.688039779663 + }, + "combine": { + "p50": 1513.759970664978, + "p90": 1557.695984840393, + "p95": 1563.5839700698853, + "p99": 1574.560046195984 + }, + "roundtrip": { + "p50": 2874.016046524048, + "p90": 2889.087915420532, + "p95": 2895.1680660247803, + "p99": 2941.567897796631 + }, + "isolatedSum": { + "p50": 2919.4239377975464, + "p90": 2978.4640073776245, + "p95": 2989.791989326477, + "p99": 3013.248085975647 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2484242432, + "combineLogicalBytes": 2484242432, + "fanoutMean": 5.288299560546875, + "recvTokensMax": 32768, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-25d1b1c6", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|hotspot-single+eplb|8|prefill|normal|none|none|0|tuned||29ae5ace13636f8", + "colorKey": "h200_0d9e9091", + "comparisonKey": "5975f9609e664563", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:04:15.835333+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · hotspot-single+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "hotspot-single", + "routingLabel": "hotspot-single+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "29ae5ace13636f8", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.8466796875, + "eplbImbalanceAfter": 1.0002700343276514, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.22399747371674, + "p90": 182.94399976730347, + "p95": 194.36800479888916, + "p99": 262.2720003128052 + }, + "combine": { + "p50": 118.49600076675415, + "p90": 133.91999900341034, + "p95": 142.2719955444336, + "p99": 150.2400040626526 + }, + "roundtrip": { + "p50": 236.57600581645966, + "p90": 257.88798928260803, + "p95": 267.8399980068207, + "p99": 279.87200021743774 + }, + "isolatedSum": { + "p50": 274.7199982404709, + "p90": 316.8639987707138, + "p95": 336.64000034332275, + "p99": 412.51200437545776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77701120, + "combineLogicalBytes": 77701120, + "fanoutMean": 5.29296875, + "recvTokensMax": 697, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 189.18399512767792, + "p90": 216.73600375652313, + "p95": 224.03199970722198, + "p99": 235.87200045585632 + }, + "combine": { + "p50": 161.24799847602844, + "p90": 183.3599954843521, + "p95": 188.92799317836761, + "p99": 198.11199605464935 + }, + "roundtrip": { + "p50": 301.56800150871277, + "p90": 335.4560136795044, + "p95": 341.5359854698181, + "p99": 381.3120126724243 + }, + "isolatedSum": { + "p50": 350.43199360370636, + "p90": 400.09599924087524, + "p95": 412.9599928855896, + "p99": 433.9839965105057 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155187200, + "combineLogicalBytes": 155187200, + "fanoutMean": 5.28564453125, + "recvTokensMax": 1372, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 238.49600553512573, + "p90": 261.75999641418457, + "p95": 269.79199051856995, + "p99": 281.792014837265 + }, + "combine": { + "p50": 236.9920015335083, + "p90": 246.8159943819046, + "p95": 252.19199061393738, + "p99": 262.59198784828186 + }, + "roundtrip": { + "p50": 437.6640021800995, + "p90": 464.06400203704834, + "p95": 471.8399941921234, + "p99": 495.7759976387024 + }, + "isolatedSum": { + "p50": 475.48800706863403, + "p90": 508.5759907960892, + "p95": 521.9839811325073, + "p99": 544.3840026855469 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311162880, + "combineLogicalBytes": 311162880, + "fanoutMean": 5.299072265625, + "recvTokensMax": 2761, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 352.7039885520935, + "p90": 375.16799569129944, + "p95": 381.50399923324585, + "p99": 395.1680064201355 + }, + "combine": { + "p50": 378.04800271987915, + "p90": 396.3199853897095, + "p95": 401.5359878540039, + "p99": 415.74400663375854 + }, + "roundtrip": { + "p50": 692.3519968986511, + "p90": 712.0000123977661, + "p95": 716.9920206069946, + "p99": 736.191987991333 + }, + "isolatedSum": { + "p50": 730.7519912719727, + "p90": 771.4879810810089, + "p95": 783.0399870872498, + "p99": 810.912013053894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619974656, + "combineLogicalBytes": 619974656, + "fanoutMean": 5.279052734375, + "recvTokensMax": 5481, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 578.3680081367493, + "p90": 600.8960008621216, + "p95": 609.4719767570496, + "p99": 627.0400285720825 + }, + "combine": { + "p50": 635.5519890785217, + "p90": 644.7679996490479, + "p95": 648.576021194458, + "p99": 658.1439971923828 + }, + "roundtrip": { + "p50": 1164.7039651870728, + "p90": 1183.359980583191, + "p95": 1192.4159526824951, + "p99": 1213.6640548706055 + }, + "isolatedSum": { + "p50": 1213.919997215271, + "p90": 1245.6640005111694, + "p95": 1258.0479979515076, + "p99": 1285.1840257644653 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240020992, + "combineLogicalBytes": 1240020992, + "fanoutMean": 5.27935791015625, + "recvTokensMax": 10883, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1038.7200117111206, + "p90": 1059.0720176696777, + "p95": 1065.9199953079224, + "p99": 1087.5840187072754 + }, + "combine": { + "p50": 1128.4159421920776, + "p90": 1140.3839588165283, + "p95": 1144.1919803619385, + "p99": 1154.7839641571045 + }, + "roundtrip": { + "p50": 2112.287998199463, + "p90": 2131.8399906158447, + "p95": 2138.9119625091553, + "p99": 2177.248001098633 + }, + "isolatedSum": { + "p50": 2167.1359539031982, + "p90": 2199.455976486206, + "p95": 2210.111975669861, + "p99": 2242.36798286438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480414720, + "combineLogicalBytes": 2480414720, + "fanoutMean": 5.2801513671875, + "recvTokensMax": 21702, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-0e0d0537", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|uniform+eplb|8|prefill|normal|none|none|0|tuned||2225dbbdab9bf2d", + "colorKey": "h200_ff073305", + "comparisonKey": "da3803583bf7270d", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:56:38.055916+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · uniform+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "uniform", + "routingLabel": "uniform+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2225dbbdab9bf2d", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 1.006072998046875, + "eplbImbalanceAfter": 1.0000152587890625, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.78399395942688, + "p90": 165.40800034999847, + "p95": 168.19199919700623, + "p99": 177.40799486637115 + }, + "combine": { + "p50": 118.59200149774551, + "p90": 125.34399330615997, + "p95": 130.11200726032257, + "p99": 136.51199638843536 + }, + "roundtrip": { + "p50": 237.5040054321289, + "p90": 248.73599410057068, + "p95": 251.39200687408447, + "p99": 265.9839987754822 + }, + "isolatedSum": { + "p50": 273.3759954571724, + "p90": 290.75199365615845, + "p95": 298.3040064573288, + "p99": 313.9199912548065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77041664, + "combineLogicalBytes": 77041664, + "fanoutMean": 5.248046875, + "recvTokensMax": 686, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.92000007629395, + "p90": 207.71199464797974, + "p95": 226.33600234985352, + "p99": 248.09600412845612 + }, + "combine": { + "p50": 159.42400693893433, + "p90": 167.52000153064728, + "p95": 175.23199319839478, + "p99": 188.73600661754608 + }, + "roundtrip": { + "p50": 305.05600571632385, + "p90": 322.9120075702667, + "p95": 333.3759903907776, + "p99": 348.54400157928467 + }, + "isolatedSum": { + "p50": 345.34400701522827, + "p90": 375.231996178627, + "p95": 401.5679955482483, + "p99": 436.8320107460022 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154542080, + "combineLogicalBytes": 154542080, + "fanoutMean": 5.263671875, + "recvTokensMax": 1365, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.77600038051605, + "p90": 250.20799040794373, + "p95": 254.14401292800903, + "p99": 262.4959945678711 + }, + "combine": { + "p50": 234.65600609779358, + "p90": 241.56799912452698, + "p95": 246.46399915218353, + "p99": 265.9519910812378 + }, + "roundtrip": { + "p50": 436.2559914588928, + "p90": 476.6719937324524, + "p95": 503.90398502349854, + "p99": 533.9840054512024 + }, + "isolatedSum": { + "p50": 474.43200647830963, + "p90": 491.7759895324707, + "p95": 500.60801208019257, + "p99": 528.4479856491089 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310589440, + "combineLogicalBytes": 310589440, + "fanoutMean": 5.289306640625, + "recvTokensMax": 2746, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 348.6720025539398, + "p90": 362.08000779151917, + "p95": 366.5280044078827, + "p99": 378.59201431274414 + }, + "combine": { + "p50": 369.08799409866333, + "p90": 378.59201431274414, + "p95": 381.44001364707947, + "p99": 389.72800970077515 + }, + "roundtrip": { + "p50": 680.7039976119995, + "p90": 693.0879950523376, + "p95": 699.7759938240051, + "p99": 725.5039811134338 + }, + "isolatedSum": { + "p50": 717.7599966526031, + "p90": 740.6720221042633, + "p95": 747.9680180549622, + "p99": 768.3200240135193 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619171840, + "combineLogicalBytes": 619171840, + "fanoutMean": 5.272216796875, + "recvTokensMax": 5467, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 577.6960253715515, + "p90": 589.568018913269, + "p95": 592.9279923439026, + "p99": 605.9200167655945 + }, + "combine": { + "p50": 624.8639822006226, + "p90": 634.4000101089478, + "p95": 639.4879817962646, + "p99": 656.1599969863892 + }, + "roundtrip": { + "p50": 1159.6479415893555, + "p90": 1171.5519428253174, + "p95": 1175.0400066375732, + "p99": 1187.8080368041992 + }, + "isolatedSum": { + "p50": 1202.560007572174, + "p90": 1223.9680290222168, + "p95": 1232.4159741401672, + "p99": 1262.0800137519836 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1238945792, + "combineLogicalBytes": 1238945792, + "fanoutMean": 5.2747802734375, + "recvTokensMax": 10913, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1041.0879850387573, + "p90": 1061.568021774292, + "p95": 1067.4560070037842, + "p99": 1095.9680080413818 + }, + "combine": { + "p50": 1126.6560554504395, + "p90": 1139.8719549179077, + "p95": 1145.5039978027344, + "p99": 1198.1120109558105 + }, + "roundtrip": { + "p50": 2111.743927001953, + "p90": 2131.103992462158, + "p95": 2138.9760971069336, + "p99": 2159.9040031433105 + }, + "isolatedSum": { + "p50": 2167.744040489197, + "p90": 2201.4399766921997, + "p95": 2212.9600048065186, + "p99": 2294.0800189971924 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481747968, + "combineLogicalBytes": 2481747968, + "fanoutMean": 5.282989501953125, + "recvTokensMax": 21789, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8bdbd438", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_d0051910", + "comparisonKey": "22dbf4f34e690133", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:53:41.262023+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 158.91200304031372, + "p90": 175.48799514770508, + "p95": 180.83199858665466, + "p99": 191.103994846344 + }, + "combine": { + "p50": 124.64000284671783, + "p90": 137.53600418567657, + "p95": 148.47999811172485, + "p99": 155.83999454975128 + }, + "roundtrip": { + "p50": 249.56800043582916, + "p90": 273.9520072937012, + "p95": 284.0000092983246, + "p99": 296.86400294303894 + }, + "isolatedSum": { + "p50": 283.55200588703156, + "p90": 313.02399933338165, + "p95": 329.3119966983795, + "p99": 346.9439893960953 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 192.25600361824036, + "p90": 211.67999505996704, + "p95": 217.40800142288208, + "p99": 231.1359941959381 + }, + "combine": { + "p50": 173.24799299240112, + "p90": 190.20800292491913, + "p95": 197.40800559520721, + "p99": 205.79199492931366 + }, + "roundtrip": { + "p50": 326.6879916191101, + "p90": 346.8480110168457, + "p95": 354.8479974269867, + "p99": 367.8399920463562 + }, + "isolatedSum": { + "p50": 365.5039966106415, + "p90": 401.88799798488617, + "p95": 414.8160070180893, + "p99": 436.92798912525177 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 268.99200677871704, + "p90": 292.9919958114624, + "p95": 302.592009305954, + "p99": 316.6719973087311 + }, + "combine": { + "p50": 275.90399980545044, + "p90": 289.72798585891724, + "p95": 296.1919903755188, + "p99": 313.50401043891907 + }, + "roundtrip": { + "p50": 510.3039741516113, + "p90": 533.5999727249146, + "p95": 541.8559908866882, + "p99": 555.2319884300232 + }, + "isolatedSum": { + "p50": 544.8960065841675, + "p90": 582.7199816703796, + "p95": 598.7839996814728, + "p99": 630.1760077476501 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 417.05599427223206, + "p90": 439.9679899215698, + "p95": 449.6000111103058, + "p99": 493.4079945087433 + }, + "combine": { + "p50": 456.0000002384186, + "p90": 471.19998931884766, + "p95": 476.3199985027313, + "p99": 486.8159890174866 + }, + "roundtrip": { + "p50": 833.0559730529785, + "p90": 850.7840037345886, + "p95": 860.4480028152466, + "p99": 885.5360150337219 + }, + "isolatedSum": { + "p50": 873.0559945106506, + "p90": 911.1679792404175, + "p95": 925.9200096130371, + "p99": 980.2239835262299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 708.0960273742676, + "p90": 724.2879867553711, + "p95": 730.3360104560852, + "p99": 751.4560222625732 + }, + "combine": { + "p50": 810.7200264930725, + "p90": 825.3120183944702, + "p95": 831.6159844398499, + "p99": 851.2639999389648 + }, + "roundtrip": { + "p50": 1471.0079431533813, + "p90": 1498.1759786605835, + "p95": 1505.952000617981, + "p99": 1521.440029144287 + }, + "isolatedSum": { + "p50": 1518.81605386734, + "p90": 1549.6000051498413, + "p95": 1561.951994895935, + "p99": 1602.720022201538 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1301.632046699524, + "p90": 1315.6160116195679, + "p95": 1320.2240467071533, + "p99": 1337.8880023956299 + }, + "combine": { + "p50": 1504.5440196990967, + "p90": 1521.183967590332, + "p95": 1526.9759893417358, + "p99": 1542.0479774475098 + }, + "roundtrip": { + "p50": 2765.216112136841, + "p90": 2788.831949234009, + "p95": 2803.584098815918, + "p99": 2825.792074203491 + }, + "isolatedSum": { + "p50": 2806.1760663986206, + "p90": 2836.7999792099, + "p95": 2847.200036048889, + "p99": 2879.9359798431396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4de9d6c2", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|prefill|normal|none|none|0|tuned||bbcd1d9d8d1e4fe", + "colorKey": "h200_0e782e5a", + "comparisonKey": "ea3008d75dafaabb", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:54:39.740111+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bbcd1d9d8d1e4fe", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 156.51200711727142, + "p90": 168.47999393939972, + "p95": 173.6000031232834, + "p99": 179.83999848365784 + }, + "combine": { + "p50": 121.08799815177917, + "p90": 126.8479973077774, + "p95": 129.60000336170197, + "p99": 137.1839940547943 + }, + "roundtrip": { + "p50": 241.34400486946106, + "p90": 252.54398584365845, + "p95": 259.8719894886017, + "p99": 309.88800525665283 + }, + "isolatedSum": { + "p50": 277.6000052690506, + "p90": 295.3279912471771, + "p95": 303.20000648498535, + "p99": 317.02399253845215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 192.60799884796143, + "p90": 200.32000541687012, + "p95": 204.44799959659576, + "p99": 210.87999641895294 + }, + "combine": { + "p50": 160.288006067276, + "p90": 166.78400337696075, + "p95": 170.20800709724426, + "p99": 178.97599935531616 + }, + "roundtrip": { + "p50": 316.1279857158661, + "p90": 326.27201080322266, + "p95": 328.7999927997589, + "p99": 337.8239870071411 + }, + "isolatedSum": { + "p50": 352.8960049152374, + "p90": 367.1040087938309, + "p95": 374.65600669384, + "p99": 389.8559957742691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 45688832, + "combineLogicalBytes": 45688832, + "fanoutMean": 1.55615234375, + "recvTokensMax": 2048, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 263.839989900589, + "p90": 273.18400144577026, + "p95": 277.8240144252777, + "p99": 301.503986120224 + }, + "combine": { + "p50": 261.6960108280182, + "p90": 267.7440047264099, + "p95": 270.33600211143494, + "p99": 282.3359966278076 + }, + "roundtrip": { + "p50": 487.8399968147278, + "p90": 499.9679923057556, + "p95": 506.9119930267334, + "p99": 544.5119738578796 + }, + "isolatedSum": { + "p50": 525.5360007286072, + "p90": 540.9280061721802, + "p95": 548.1600165367126, + "p99": 583.8399827480316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 91521024, + "combineLogicalBytes": 91521024, + "fanoutMean": 1.55859375, + "recvTokensMax": 4096, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 407.3599874973297, + "p90": 420.28799653053284, + "p95": 425.50399899482727, + "p99": 437.9839897155762 + }, + "combine": { + "p50": 438.01599740982056, + "p90": 446.52798771858215, + "p95": 450.0479996204376, + "p99": 454.17600870132446 + }, + "roundtrip": { + "p50": 806.4320087432861, + "p90": 816.3840174674988, + "p95": 819.9679851531982, + "p99": 830.6559920310974 + }, + "isolatedSum": { + "p50": 845.3759849071503, + "p90": 866.815984249115, + "p95": 875.5519986152649, + "p99": 892.1599984169006 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 183916544, + "combineLogicalBytes": 183916544, + "fanoutMean": 1.5660400390625, + "recvTokensMax": 8192, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 698.527991771698, + "p90": 713.1519913673401, + "p95": 718.9440131187439, + "p99": 761.9199752807617 + }, + "combine": { + "p50": 790.4959917068481, + "p90": 801.0560274124146, + "p95": 805.184006690979, + "p99": 842.5920009613037 + }, + "roundtrip": { + "p50": 1441.696047782898, + "p90": 1457.4719667434692, + "p95": 1464.128017425537, + "p99": 1492.8319454193115 + }, + "isolatedSum": { + "p50": 1489.0239834785461, + "p90": 1514.2080187797546, + "p95": 1524.128019809723, + "p99": 1604.5119762420654 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 368062464, + "combineLogicalBytes": 368062464, + "fanoutMean": 1.5670166015625, + "recvTokensMax": 16384, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1276.1919498443604, + "p90": 1292.896032333374, + "p95": 1300.5119562149048, + "p99": 1322.3999738693237 + }, + "combine": { + "p50": 1456.8320512771606, + "p90": 1472.383975982666, + "p95": 1477.4399995803833, + "p99": 1497.920036315918 + }, + "roundtrip": { + "p50": 2686.7520809173584, + "p90": 2703.360080718994, + "p95": 2710.3679180145264, + "p99": 2735.487937927246 + }, + "isolatedSum": { + "p50": 2733.024001121521, + "p90": 2765.28000831604, + "p95": 2777.951955795288, + "p99": 2820.3200101852417 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 734720000, + "combineLogicalBytes": 734720000, + "fanoutMean": 1.56402587890625, + "recvTokensMax": 32768, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-ebe9e4fd", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-heavy+eplb|8|prefill|normal|none|none|0|tuned||46855e7fa6754eb", + "colorKey": "h200_dd7af994", + "comparisonKey": "a3a9742e03c69104", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:03:18.292779+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-heavy+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "46855e7fa6754eb", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 7.38995361328125, + "eplbImbalanceAfter": 1.0000210716610862, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 154.91199493408203, + "p90": 170.1119989156723, + "p95": 180.00000715255737, + "p99": 187.80800700187683 + }, + "combine": { + "p50": 119.00799721479416, + "p90": 132.47999548912048, + "p95": 141.59999787807465, + "p99": 160.12799739837646 + }, + "roundtrip": { + "p50": 236.15999519824982, + "p90": 256.51198625564575, + "p95": 269.1839933395386, + "p99": 297.60000109672546 + }, + "isolatedSum": { + "p50": 273.9199921488762, + "p90": 302.5919944047928, + "p95": 321.600005030632, + "p99": 347.9360044002533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 79206400, + "combineLogicalBytes": 79206400, + "fanoutMean": 5.3955078125, + "recvTokensMax": 713, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 184.25600230693817, + "p90": 207.35999941825867, + "p95": 216.25599265098572, + "p99": 230.04800081253052 + }, + "combine": { + "p50": 158.62399339675903, + "p90": 172.5119948387146, + "p95": 179.51999604701996, + "p99": 188.35200369358063 + }, + "roundtrip": { + "p50": 305.9839904308319, + "p90": 335.35999059677124, + "p95": 340.7999873161316, + "p99": 344.9600040912628 + }, + "isolatedSum": { + "p50": 342.8799957036972, + "p90": 379.87199425697327, + "p95": 395.7759886980057, + "p99": 418.40000450611115 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 159330304, + "combineLogicalBytes": 159330304, + "fanoutMean": 5.4267578125, + "recvTokensMax": 1436, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 243.58400702476501, + "p90": 265.6320035457611, + "p95": 273.18400144577026, + "p99": 286.97600960731506 + }, + "combine": { + "p50": 238.5600060224533, + "p90": 248.19199740886688, + "p95": 254.40001487731934, + "p99": 272.7999985218048 + }, + "roundtrip": { + "p50": 445.1200067996979, + "p90": 470.5600142478943, + "p95": 479.99998927116394, + "p99": 509.7919702529907 + }, + "isolatedSum": { + "p50": 482.1440130472183, + "p90": 513.824000954628, + "p95": 527.5840163230896, + "p99": 559.7760081291199 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 319535104, + "combineLogicalBytes": 319535104, + "fanoutMean": 5.441650390625, + "recvTokensMax": 2897, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 358.62401127815247, + "p90": 382.7199935913086, + "p95": 390.78399538993835, + "p99": 423.007994890213 + }, + "combine": { + "p50": 377.21601128578186, + "p90": 391.5199935436249, + "p95": 398.97599816322327, + "p99": 405.34400939941406 + }, + "roundtrip": { + "p50": 696.51198387146, + "p90": 715.1359915733337, + "p95": 722.3359942436218, + "p99": 736.4159822463989 + }, + "isolatedSum": { + "p50": 735.8400225639343, + "p90": 774.2399871349335, + "p95": 789.7599935531616, + "p99": 828.3520042896271 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 638410752, + "combineLogicalBytes": 638410752, + "fanoutMean": 5.43603515625, + "recvTokensMax": 5815, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 590.0160074234009, + "p90": 609.6000075340271, + "p95": 619.9359893798828, + "p99": 647.8400230407715 + }, + "combine": { + "p50": 640.3200030326843, + "p90": 651.3599753379822, + "p95": 657.2160124778748, + "p99": 693.5039758682251 + }, + "roundtrip": { + "p50": 1181.5999746322632, + "p90": 1200.5120515823364, + "p95": 1213.2480144500732, + "p99": 1233.3760261535645 + }, + "isolatedSum": { + "p50": 1230.3360104560852, + "p90": 1260.9599828720093, + "p95": 1277.1520018577576, + "p99": 1341.3439989089966 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1275144192, + "combineLogicalBytes": 1275144192, + "fanoutMean": 5.42889404296875, + "recvTokensMax": 11606, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1061.311960220337, + "p90": 1086.5919589996338, + "p95": 1099.2319583892822, + "p99": 1132.5440406799316 + }, + "combine": { + "p50": 1163.2959842681885, + "p90": 1179.3919801712036, + "p95": 1187.615990638733, + "p99": 1205.407977104187 + }, + "roundtrip": { + "p50": 2168.895959854126, + "p90": 2190.8481121063232, + "p95": 2198.2719898223877, + "p99": 2263.2319927215576 + }, + "isolatedSum": { + "p50": 2224.6079444885254, + "p90": 2265.9839391708374, + "p95": 2286.847949028015, + "p99": 2337.9520177841187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2546374656, + "combineLogicalBytes": 2546374656, + "fanoutMean": 5.420562744140625, + "recvTokensMax": 23170, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8f0c3f7e", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-mild|8|prefill|normal|none|none|0|tuned||cf93f8f6b52e428", + "colorKey": "h200_eb9b77cd", + "comparisonKey": "437b7604a746bc90", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:59:51.809360+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-mild", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-mild", + "routingLabel": "zipf-mild", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cf93f8f6b52e428", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 159.5200002193451, + "p90": 175.00799894332886, + "p95": 186.46399676799774, + "p99": 205.88800311088562 + }, + "combine": { + "p50": 125.59999525547028, + "p90": 139.13600146770477, + "p95": 141.27999544143677, + "p99": 149.02399480342865 + }, + "roundtrip": { + "p50": 251.16801261901855, + "p90": 265.1199996471405, + "p95": 273.0560004711151, + "p99": 280.67201375961304 + }, + "isolatedSum": { + "p50": 285.11999547481537, + "p90": 314.14400041103363, + "p95": 327.7439922094345, + "p99": 354.91199791431427 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 70160384, + "combineLogicalBytes": 70160384, + "fanoutMean": 4.779296875, + "recvTokensMax": 987, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 198.36799800395966, + "p90": 215.00800549983978, + "p95": 223.51999580860138, + "p99": 235.29599606990814 + }, + "combine": { + "p50": 174.75199699401855, + "p90": 188.80000710487366, + "p95": 195.23200392723083, + "p99": 200.8959949016571 + }, + "roundtrip": { + "p50": 330.49601316452026, + "p90": 348.28799962997437, + "p95": 355.3920090198517, + "p99": 401.95199847221375 + }, + "isolatedSum": { + "p50": 373.1199949979782, + "p90": 403.80801260471344, + "p95": 418.7519997358322, + "p99": 436.19199097156525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 140879872, + "combineLogicalBytes": 140879872, + "fanoutMean": 4.79833984375, + "recvTokensMax": 1972, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 268.3199942111969, + "p90": 294.97599601745605, + "p95": 304.1599988937378, + "p99": 316.6719973087311 + }, + "combine": { + "p50": 275.6800055503845, + "p90": 287.9360020160675, + "p95": 294.17601227760315, + "p99": 304.1599988937378 + }, + "roundtrip": { + "p50": 506.46400451660156, + "p90": 534.1759920120239, + "p95": 850.3040075302124, + "p99": 922.6239919662476 + }, + "isolatedSum": { + "p50": 543.9999997615814, + "p90": 582.9119980335236, + "p95": 598.3360111713409, + "p99": 620.8319962024689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 282333184, + "combineLogicalBytes": 282333184, + "fanoutMean": 4.80810546875, + "recvTokensMax": 3936, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 412.9599928855896, + "p90": 435.39199233055115, + "p95": 444.19199228286743, + "p99": 458.46399664878845 + }, + "combine": { + "p50": 451.6479969024658, + "p90": 461.216002702713, + "p95": 465.40799736976624, + "p99": 479.0399968624115 + }, + "roundtrip": { + "p50": 820.2880024909973, + "p90": 831.7440152168274, + "p95": 837.3439908027649, + "p99": 876.0319948196411 + }, + "isolatedSum": { + "p50": 864.6079897880554, + "p90": 896.6079950332642, + "p95": 909.5999896526337, + "p99": 937.5039935112 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 566716416, + "combineLogicalBytes": 566716416, + "fanoutMean": 4.8255615234375, + "recvTokensMax": 7855, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 712.3839855194092, + "p90": 729.9839854240417, + "p95": 739.7119998931885, + "p99": 766.2079930305481 + }, + "combine": { + "p50": 789.0239953994751, + "p90": 799.3280291557312, + "p95": 802.8799891471863, + "p99": 818.2719945907593 + }, + "roundtrip": { + "p50": 1451.9679546356201, + "p90": 1473.7279415130615, + "p95": 1483.1360578536987, + "p99": 1554.6239614486694 + }, + "isolatedSum": { + "p50": 1501.4079809188843, + "p90": 1529.312014579773, + "p95": 1542.5919890403748, + "p99": 1584.4799876213074 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1132285952, + "combineLogicalBytes": 1132285952, + "fanoutMean": 4.8206787109375, + "recvTokensMax": 15694, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1313.8560056686401, + "p90": 1335.487961769104, + "p95": 1344.864010810852, + "p99": 1369.7279691696167 + }, + "combine": { + "p50": 1464.735984802246, + "p90": 1477.8879880905151, + "p95": 1483.7119579315186, + "p99": 1513.759970664978 + }, + "roundtrip": { + "p50": 2725.3758907318115, + "p90": 2742.464065551758, + "p95": 2750.52809715271, + "p99": 2784.320116043091 + }, + "isolatedSum": { + "p50": 2778.5919904708862, + "p90": 2813.375949859619, + "p95": 2828.5759687423706, + "p99": 2883.4879398345947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2267840512, + "combineLogicalBytes": 2267840512, + "fanoutMean": 4.82763671875, + "recvTokensMax": 31357, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6a2e3296", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-mild+eplb|8|prefill|normal|none|none|0|tuned||27ddc85ded0add9", + "colorKey": "h200_a11c2791", + "comparisonKey": "10f82d315affaa0e", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:00:23.082110+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-mild+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-mild", + "routingLabel": "zipf-mild+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "27ddc85ded0add9", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 2.545684814453125, + "eplbImbalanceAfter": 1.0001495361328125, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 153.08800339698792, + "p90": 160.288006067276, + "p95": 164.73600268363953, + "p99": 175.61599612236023 + }, + "combine": { + "p50": 118.17599833011627, + "p90": 124.03199821710587, + "p95": 126.08000636100769, + "p99": 133.69600474834442 + }, + "roundtrip": { + "p50": 235.07200181484222, + "p90": 241.72799289226532, + "p95": 246.5600073337555, + "p99": 254.2400062084198 + }, + "isolatedSum": { + "p50": 271.2640017271042, + "p90": 284.32000428438187, + "p95": 290.8160090446472, + "p99": 309.31200087070465 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78159872, + "combineLogicalBytes": 78159872, + "fanoutMean": 5.32421875, + "recvTokensMax": 702, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 184.9920004606247, + "p90": 196.76800072193146, + "p95": 201.1840045452118, + "p99": 240.22400379180908 + }, + "combine": { + "p50": 158.52800011634827, + "p90": 165.95199704170227, + "p95": 168.67199540138245, + "p99": 173.47200214862823 + }, + "roundtrip": { + "p50": 303.9360046386719, + "p90": 314.14398550987244, + "p95": 318.7839984893799, + "p99": 328.0960023403168 + }, + "isolatedSum": { + "p50": 343.52000057697296, + "p90": 362.7199977636337, + "p95": 369.85599994659424, + "p99": 413.6960059404373 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156563456, + "combineLogicalBytes": 156563456, + "fanoutMean": 5.33251953125, + "recvTokensMax": 1393, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 240.447998046875, + "p90": 250.91201066970825, + "p95": 256.19199872016907, + "p99": 264.2560005187988 + }, + "combine": { + "p50": 237.63200640678406, + "p90": 244.32000517845154, + "p95": 247.3279982805252, + "p99": 251.67998671531677 + }, + "roundtrip": { + "p50": 439.9360120296478, + "p90": 455.9040069580078, + "p95": 463.29599618911743, + "p99": 502.6559829711914 + }, + "isolatedSum": { + "p50": 478.08000445365906, + "p90": 495.2320158481598, + "p95": 503.5199970006943, + "p99": 515.9359872341156 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312410112, + "combineLogicalBytes": 312410112, + "fanoutMean": 5.3203125, + "recvTokensMax": 2773, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 349.1840064525604, + "p90": 358.91199111938477, + "p95": 361.7599904537201, + "p99": 370.1440095901489 + }, + "combine": { + "p50": 369.53601241111755, + "p90": 376.67199969291687, + "p95": 379.8080086708069, + "p99": 385.6959939002991 + }, + "roundtrip": { + "p50": 682.8799843788147, + "p90": 695.5839991569519, + "p95": 700.3200054168701, + "p99": 710.8799815177917 + }, + "isolatedSum": { + "p50": 718.720018863678, + "p90": 735.5839908123016, + "p95": 741.567999124527, + "p99": 755.840003490448 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 622712832, + "combineLogicalBytes": 622712832, + "fanoutMean": 5.3023681640625, + "recvTokensMax": 5498, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 570.0799822807312, + "p90": 580.3840160369873, + "p95": 584.3200087547302, + "p99": 592.9920077323914 + }, + "combine": { + "p50": 627.6800036430359, + "p90": 636.0960006713867, + "p95": 639.0079855918884, + "p99": 649.2800116539001 + }, + "roundtrip": { + "p50": 1154.271960258484, + "p90": 1165.0240421295166, + "p95": 1169.1839694976807, + "p99": 1187.872052192688 + }, + "isolatedSum": { + "p50": 1197.759985923767, + "p90": 1216.480016708374, + "p95": 1223.3279943466187, + "p99": 1242.2720193862915 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1245038592, + "combineLogicalBytes": 1245038592, + "fanoutMean": 5.30072021484375, + "recvTokensMax": 10955, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1042.0479774475098, + "p90": 1062.0160102844238, + "p95": 1067.7119493484497, + "p99": 1081.055998802185 + }, + "combine": { + "p50": 1139.2320394515991, + "p90": 1151.520013809204, + "p95": 1154.3999910354614, + "p99": 1169.7280406951904 + }, + "roundtrip": { + "p50": 2128.8321018218994, + "p90": 2151.16810798645, + "p95": 2159.872055053711, + "p99": 2172.5120544433594 + }, + "isolatedSum": { + "p50": 2181.280016899109, + "p90": 2213.536024093628, + "p95": 2222.111940383911, + "p99": 2250.7840394973755 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2489460736, + "combineLogicalBytes": 2489460736, + "fanoutMean": 5.299407958984375, + "recvTokensMax": 21864, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-50bf89d4", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-moderate|8|prefill|normal|none|none|0|tuned||b5217e990b95f86", + "colorKey": "h200_cdce4762", + "comparisonKey": "a74af82f695f49a9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:01:47.004952+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-moderate", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b5217e990b95f86", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 160.8320027589798, + "p90": 182.23999440670013, + "p95": 190.5599981546402, + "p99": 205.53599298000336 + }, + "combine": { + "p50": 124.9919980764389, + "p90": 136.57599687576294, + "p95": 149.56800639629364, + "p99": 156.38400614261627 + }, + "roundtrip": { + "p50": 249.28000569343567, + "p90": 272.41599559783936, + "p95": 278.49599719047546, + "p99": 288.5439991950989 + }, + "isolatedSum": { + "p50": 285.8240008354187, + "p90": 318.8159912824631, + "p95": 340.12800455093384, + "p99": 361.91999912261963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 193.40799748897552, + "p90": 205.59999346733093, + "p95": 213.34399282932281, + "p99": 224.73600506782532 + }, + "combine": { + "p50": 173.34400117397308, + "p90": 185.63200533390045, + "p95": 196.6399997472763, + "p99": 205.21600544452667 + }, + "roundtrip": { + "p50": 329.6000063419342, + "p90": 349.4719862937927, + "p95": 356.54398798942566, + "p99": 386.52798533439636 + }, + "isolatedSum": { + "p50": 366.7519986629486, + "p90": 391.2319988012314, + "p95": 409.9839925765991, + "p99": 429.952010512352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 100509696, + "combineLogicalBytes": 100509696, + "fanoutMean": 3.42333984375, + "recvTokensMax": 2046, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 268.15998554229736, + "p90": 281.95199370384216, + "p95": 288.2240116596222, + "p99": 298.0799973011017 + }, + "combine": { + "p50": 274.52799677848816, + "p90": 288.9919877052307, + "p95": 295.6799864768982, + "p99": 303.5840094089508 + }, + "roundtrip": { + "p50": 509.11998748779297, + "p90": 527.3600220680237, + "p95": 534.3359708786011, + "p99": 572.704017162323 + }, + "isolatedSum": { + "p50": 542.6879823207855, + "p90": 570.9439814090729, + "p95": 583.9039981365204, + "p99": 601.6640067100525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 201678848, + "combineLogicalBytes": 201678848, + "fanoutMean": 3.4345703125, + "recvTokensMax": 4094, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 409.9839925765991, + "p90": 424.67200756073, + "p95": 431.36000633239746, + "p99": 444.0639913082123 + }, + "combine": { + "p50": 454.68801259994507, + "p90": 469.92000937461853, + "p95": 476.4159917831421, + "p99": 489.0559911727905 + }, + "roundtrip": { + "p50": 829.695999622345, + "p90": 848.1600284576416, + "p95": 857.4399948120117, + "p99": 877.4080276489258 + }, + "isolatedSum": { + "p50": 864.6720051765442, + "p90": 894.5920169353485, + "p95": 907.7759981155396, + "p99": 933.1199824810028 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 405035008, + "combineLogicalBytes": 405035008, + "fanoutMean": 3.4488525390625, + "recvTokensMax": 8189, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 707.7440023422241, + "p90": 753.3760070800781, + "p95": 776.3199806213379, + "p99": 811.2639784812927 + }, + "combine": { + "p50": 807.200014591217, + "p90": 820.2559947967529, + "p95": 823.7119913101196, + "p99": 839.3279910087585 + }, + "roundtrip": { + "p50": 1468.35196018219, + "p90": 1496.5120553970337, + "p95": 1502.6240348815918, + "p99": 1514.5920515060425 + }, + "isolatedSum": { + "p50": 1514.9440169334412, + "p90": 1573.632001876831, + "p95": 1600.0319719314575, + "p99": 1650.5919694900513 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 808822784, + "combineLogicalBytes": 808822784, + "fanoutMean": 3.44354248046875, + "recvTokensMax": 16380, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1299.2960214614868, + "p90": 1314.7200345993042, + "p95": 1319.3600177764893, + "p99": 1344.1599607467651 + }, + "combine": { + "p50": 1497.1519708633423, + "p90": 1510.3039741516113, + "p95": 1515.2000188827515, + "p99": 1526.2080430984497 + }, + "roundtrip": { + "p50": 2760.1280212402344, + "p90": 2787.1360778808594, + "p95": 2799.583911895752, + "p99": 2823.2319355010986 + }, + "isolatedSum": { + "p50": 2796.447992324829, + "p90": 2825.0240087509155, + "p95": 2834.5600366592407, + "p99": 2870.368003845215 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1619795968, + "combineLogicalBytes": 1619795968, + "fanoutMean": 3.4481201171875, + "recvTokensMax": 32761, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d912bad6", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf-moderate+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_2c30082c", + "comparisonKey": "f29baddd5644c629", + "schemaVersion": 3, + "generatedAt": "2026-07-02T11:02:18.738283+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf-moderate+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf-moderate", + "routingLabel": "zipf-moderate+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 158.49600732326508, + "p90": 170.71999609470367, + "p95": 178.5919964313507, + "p99": 247.48800694942474 + }, + "combine": { + "p50": 119.10399794578552, + "p90": 124.64000284671783, + "p95": 127.80800461769104, + "p99": 134.14399325847626 + }, + "roundtrip": { + "p50": 236.51200532913208, + "p90": 248.6400008201599, + "p95": 253.9840042591095, + "p99": 313.50401043891907 + }, + "isolatedSum": { + "p50": 277.6000052690506, + "p90": 295.3599989414215, + "p95": 306.40000104904175, + "p99": 381.632000207901 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 185.47199666500092, + "p90": 198.68800044059753, + "p95": 207.0399969816208, + "p99": 574.783980846405 + }, + "combine": { + "p50": 160.8320027589798, + "p90": 168.7999963760376, + "p95": 173.567995429039, + "p99": 197.1839964389801 + }, + "roundtrip": { + "p50": 307.6480031013489, + "p90": 322.9759931564331, + "p95": 333.3440124988556, + "p99": 378.59201431274414 + }, + "isolatedSum": { + "p50": 346.3039994239807, + "p90": 367.48799681663513, + "p95": 380.6079924106598, + "p99": 771.9679772853851 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 241.5039986371994, + "p90": 253.6959946155548, + "p95": 258.04799795150757, + "p99": 266.88000559806824 + }, + "combine": { + "p50": 235.55199801921844, + "p90": 243.52000653743744, + "p95": 247.1040040254593, + "p99": 267.4880027770996 + }, + "roundtrip": { + "p50": 435.232013463974, + "p90": 447.36000895500183, + "p95": 454.0160000324249, + "p99": 517.1840190887451 + }, + "isolatedSum": { + "p50": 477.05599665641785, + "p90": 497.21600115299225, + "p95": 505.15200197696686, + "p99": 534.3680083751678 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 351.4240086078644, + "p90": 363.45601081848145, + "p95": 367.96799302101135, + "p99": 392.9600119590759 + }, + "combine": { + "p50": 370.62400579452515, + "p90": 380.41600584983826, + "p95": 383.7440013885498, + "p99": 417.05599427223206 + }, + "roundtrip": { + "p50": 681.7920207977295, + "p90": 692.4160122871399, + "p95": 697.7919936180115, + "p99": 756.9599747657776 + }, + "isolatedSum": { + "p50": 722.0480144023895, + "p90": 743.8720166683197, + "p95": 751.7119944095612, + "p99": 810.016006231308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 571.615993976593, + "p90": 585.536003112793, + "p95": 589.4719958305359, + "p99": 656.5120220184326 + }, + "combine": { + "p50": 636.5119814872742, + "p90": 649.1519808769226, + "p95": 658.3999991416931, + "p99": 700.1919746398926 + }, + "roundtrip": { + "p50": 1163.1040573120117, + "p90": 1177.791953086853, + "p95": 1187.77596950531, + "p99": 1238.3359670639038 + }, + "isolatedSum": { + "p50": 1208.1279754638672, + "p90": 1234.6879839897156, + "p95": 1247.871994972229, + "p99": 1356.7039966583252 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1045.6000566482544, + "p90": 1071.7439651489258, + "p95": 1082.2720527648926, + "p99": 1174.9440431594849 + }, + "combine": { + "p50": 1137.1840238571167, + "p90": 1148.9280462265015, + "p95": 1153.92005443573, + "p99": 1209.8239660263062 + }, + "roundtrip": { + "p50": 2131.9680213928223, + "p90": 2149.9838829040527, + "p95": 2161.2799167633057, + "p99": 2265.023946762085 + }, + "isolatedSum": { + "p50": 2182.784080505371, + "p90": 2220.6720113754272, + "p95": 2236.1921072006226, + "p99": 2384.768009185791 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3ab7c0d8", + "identity": "h200|uccl|n-a|7168|8|288|bf16|normal|layout-and-dispatch-v1|zipf+eplb|8|prefill|normal|none|none|0|tuned||2b57a75d27f5b39", + "colorKey": "h200_9ffc4a1e", + "comparisonKey": "6e4c3dbed39a56fd", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:58:28.639052+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 · zipf+eplb", + "model": "DeepSeek-V3 (EPLB physical)", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 288, + "routing": "zipf", + "routingLabel": "zipf+eplb", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": true, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2b57a75d27f5b39", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": 4.895263671875, + "eplbImbalanceAfter": 1.0000902811686199, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 155.4879993200302, + "p90": 181.11999332904816, + "p95": 187.32799589633942, + "p99": 199.10399615764618 + }, + "combine": { + "p50": 118.1119978427887, + "p90": 135.903999209404, + "p95": 142.43200421333313, + "p99": 154.27200496196747 + }, + "roundtrip": { + "p50": 239.04000222682953, + "p90": 269.27998661994934, + "p95": 279.2640030384064, + "p99": 295.3279912471771 + }, + "isolatedSum": { + "p50": 273.5999971628189, + "p90": 317.02399253845215, + "p95": 329.76000010967255, + "p99": 353.37600111961365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77385728, + "combineLogicalBytes": 77385728, + "fanoutMean": 5.271484375, + "recvTokensMax": 691, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 183.58400464057922, + "p90": 207.10399746894836, + "p95": 214.75200355052948, + "p99": 237.2799962759018 + }, + "combine": { + "p50": 160.09600460529327, + "p90": 172.38399386405945, + "p95": 180.03199994564056, + "p99": 190.0160014629364 + }, + "roundtrip": { + "p50": 302.8480112552643, + "p90": 328.2560110092163, + "p95": 334.6560001373291, + "p99": 346.015989780426 + }, + "isolatedSum": { + "p50": 343.6800092458725, + "p90": 379.4879913330078, + "p95": 394.78400349617004, + "p99": 427.2959977388382 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155172864, + "combineLogicalBytes": 155172864, + "fanoutMean": 5.28515625, + "recvTokensMax": 1378, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 239.6160066127777, + "p90": 261.9839906692505, + "p95": 273.3120024204254, + "p99": 288.63999247550964 + }, + "combine": { + "p50": 237.2480034828186, + "p90": 253.1520128250122, + "p95": 261.6640031337738, + "p99": 273.8879919052124 + }, + "roundtrip": { + "p50": 435.90399622917175, + "p90": 461.3119959831238, + "p95": 471.3920056819916, + "p99": 535.1679921150208 + }, + "isolatedSum": { + "p50": 476.8640100955963, + "p90": 515.1360034942627, + "p95": 534.9760055541992, + "p99": 562.527984380722 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 310546432, + "combineLogicalBytes": 310546432, + "fanoutMean": 5.28857421875, + "recvTokensMax": 2745, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 352.06401348114014, + "p90": 456.0000002384186, + "p95": 461.95200085639954, + "p99": 468.7039852142334 + }, + "combine": { + "p50": 371.10400199890137, + "p90": 405.60001134872437, + "p95": 412.1919870376587, + "p99": 424.703985452652 + }, + "roundtrip": { + "p50": 682.4640035629272, + "p90": 700.7359862327576, + "p95": 710.5280160903931, + "p99": 727.8720140457153 + }, + "isolatedSum": { + "p50": 723.1680154800415, + "p90": 861.600011587143, + "p95": 874.1439878940582, + "p99": 893.4079706668854 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 620619776, + "combineLogicalBytes": 620619776, + "fanoutMean": 5.2845458984375, + "recvTokensMax": 5526, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 572.9600191116333, + "p90": 597.0240235328674, + "p95": 609.9839806556702, + "p99": 625.9520053863525 + }, + "combine": { + "p50": 638.047993183136, + "p90": 650.2079963684082, + "p95": 656.9280028343201, + "p99": 683.4880113601685 + }, + "roundtrip": { + "p50": 1162.7520322799683, + "p90": 1183.0079555511475, + "p95": 1192.2240257263184, + "p99": 1214.9120569229126 + }, + "isolatedSum": { + "p50": 1211.0080122947693, + "p90": 1247.2320199012756, + "p95": 1266.9119834899902, + "p99": 1309.440016746521 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239175168, + "combineLogicalBytes": 1239175168, + "fanoutMean": 5.2757568359375, + "recvTokensMax": 11165, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1047.8719472885132, + "p90": 1075.4239559173584, + "p95": 1086.303949356079, + "p99": 1118.4320449829102 + }, + "combine": { + "p50": 1139.583945274353, + "p90": 1157.1520566940308, + "p95": 1166.815996170044, + "p99": 1181.3119649887085 + }, + "roundtrip": { + "p50": 2138.7839317321777, + "p90": 2164.0639305114746, + "p95": 2174.623966217041, + "p99": 2201.184034347534 + }, + "isolatedSum": { + "p50": 2187.455892562866, + "p90": 2232.576012611389, + "p95": 2253.119945526123, + "p99": 2299.7440099716187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2481604608, + "combineLogicalBytes": 2481604608, + "fanoutMean": 5.282684326171875, + "recvTokensMax": 22165, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fdba77dc", + "identity": "h200|uccl|n-a|7168|8|256|bf16|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_04cde55d", + "comparisonKey": "b0a4dd154e680b3f", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:36:49.860115+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · bf16 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 132.1599930524826, + "p90": 143.5839980840683, + "p95": 149.85600113868713, + "p99": 160.99199652671814 + }, + "combine": { + "p50": 118.8800036907196, + "p90": 129.82399761676788, + "p95": 138.68799805641174, + "p99": 148.19200336933136 + }, + "roundtrip": { + "p50": 214.1759991645813, + "p90": 230.75200617313385, + "p95": 237.44000494480133, + "p99": 250.8159875869751 + }, + "isolatedSum": { + "p50": 251.0399967432022, + "p90": 273.4079957008362, + "p95": 288.5439991950989, + "p99": 309.1839998960495 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 165.69599509239197, + "p90": 182.49599635601044, + "p95": 196.44799828529358, + "p99": 216.22399985790253 + }, + "combine": { + "p50": 161.0880047082901, + "p90": 172.2559928894043, + "p95": 188.63999843597412, + "p99": 205.53599298000336 + }, + "roundtrip": { + "p50": 284.7999930381775, + "p90": 301.7280101776123, + "p95": 320.3519880771637, + "p99": 344.1919982433319 + }, + "isolatedSum": { + "p50": 326.78399980068207, + "p90": 354.75198924541473, + "p95": 385.0879967212677, + "p99": 421.7599928379059 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 217.40800142288208, + "p90": 230.335995554924, + "p95": 236.51200532913208, + "p99": 246.8159943819046 + }, + "combine": { + "p50": 239.58399891853333, + "p90": 247.3600059747696, + "p95": 252.60800123214722, + "p99": 272.96000719070435 + }, + "roundtrip": { + "p50": 418.5279905796051, + "p90": 435.2959990501404, + "p95": 447.87201285362244, + "p99": 470.7840085029602 + }, + "isolatedSum": { + "p50": 456.9920003414154, + "p90": 477.6960015296936, + "p95": 489.1200065612793, + "p99": 519.776001572609 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 329.0880024433136, + "p90": 340.31999111175537, + "p95": 346.97601199150085, + "p99": 360.32000184059143 + }, + "combine": { + "p50": 369.82399225234985, + "p90": 381.47199153900146, + "p95": 389.15199041366577, + "p99": 401.15201473236084 + }, + "roundtrip": { + "p50": 659.6800088882446, + "p90": 672.4799871444702, + "p95": 681.0879707336426, + "p99": 725.4080176353455 + }, + "isolatedSum": { + "p50": 698.9119946956635, + "p90": 721.7919826507568, + "p95": 736.1280024051666, + "p99": 761.4720165729523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 559.8080158233643, + "p90": 574.944019317627, + "p95": 581.3120007514954, + "p99": 615.4239773750305 + }, + "combine": { + "p50": 632.9600214958191, + "p90": 643.8720226287842, + "p95": 647.2640037536621, + "p99": 654.8159718513489 + }, + "roundtrip": { + "p50": 1147.487998008728, + "p90": 1164.9919748306274, + "p95": 1176.2559413909912, + "p99": 1216.256022453308 + }, + "isolatedSum": { + "p50": 1192.7680373191833, + "p90": 1218.8160419464111, + "p95": 1228.5760045051575, + "p99": 1270.2399492263794 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1000.1599788665771, + "p90": 1025.8879661560059, + "p95": 1044.2880392074585, + "p99": 1081.3119411468506 + }, + "combine": { + "p50": 1137.5679969787598, + "p90": 1150.9120464324951, + "p95": 1155.392050743103, + "p99": 1176.8640279769897 + }, + "roundtrip": { + "p50": 2087.712049484253, + "p90": 2109.2159748077393, + "p95": 2116.544008255005, + "p99": 2145.3120708465576 + }, + "isolatedSum": { + "p50": 2137.727975845337, + "p90": 2176.800012588501, + "p95": 2199.6800899505615, + "p99": 2258.1759691238403 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-920eb570", + "identity": "h200|uccl|n-a|4096|8|128|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "h200_5966376c", + "comparisonKey": "a5ba95dbcb8778ef", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:46:06.805993+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 268.41598749160767, + "p90": 297.5040078163147, + "p95": 313.27998638153076, + "p99": 389.60000872612 + }, + "combine": { + "p50": 87.3280018568039, + "p90": 93.05600076913834, + "p95": 96.8639999628067, + "p99": 102.52799838781357 + }, + "roundtrip": { + "p50": 366.3040101528168, + "p90": 423.3599901199341, + "p95": 455.32798767089844, + "p99": 508.512020111084 + }, + "isolatedSum": { + "p50": 355.74398934841156, + "p90": 390.56000858545303, + "p95": 410.14398634433746, + "p99": 492.12800711393356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22282240, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 294.17601227760315, + "p90": 348.4480082988739, + "p95": 403.23200821876526, + "p99": 549.2479801177979 + }, + "combine": { + "p50": 112.5119999051094, + "p90": 125.59999525547028, + "p95": 129.7920048236847, + "p99": 140.6400054693222 + }, + "roundtrip": { + "p50": 383.55201482772827, + "p90": 419.1359877586365, + "p95": 465.2479887008667, + "p99": 510.591983795166 + }, + "isolatedSum": { + "p50": 406.68801218271255, + "p90": 474.0480035543442, + "p95": 533.02401304245, + "p99": 689.88798558712 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44863488, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 370.4639971256256, + "p90": 393.18400621414185, + "p95": 408.1279933452606, + "p99": 466.8479859828949 + }, + "combine": { + "p50": 164.35199975967407, + "p90": 183.45600366592407, + "p95": 187.68000602722168, + "p99": 196.22400403022766 + }, + "roundtrip": { + "p50": 503.80802154541016, + "p90": 539.6479964256287, + "p95": 570.5919861793518, + "p99": 621.7600107192993 + }, + "isolatedSum": { + "p50": 534.8159968852997, + "p90": 576.6400098800659, + "p95": 595.8079993724823, + "p99": 663.0719900131226 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89751552, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 514.1440033912659, + "p90": 547.327995300293, + "p95": 575.0719904899597, + "p99": 645.8240151405334 + }, + "combine": { + "p50": 262.33598589897156, + "p90": 270.33600211143494, + "p95": 273.6639976501465, + "p99": 283.87200832366943 + }, + "roundtrip": { + "p50": 742.2720193862915, + "p90": 784.9599719047546, + "p95": 814.4639730453491, + "p99": 848.3200073242188 + }, + "isolatedSum": { + "p50": 776.4799892902374, + "p90": 817.6639974117279, + "p95": 848.7359881401062, + "p99": 929.6960234642029 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179511296, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 5558, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 782.8159928321838, + "p90": 812.7679824829102, + "p95": 832.2880268096924, + "p99": 879.1679739952087 + }, + "combine": { + "p50": 430.4960072040558, + "p90": 439.2319917678833, + "p95": 443.3920085430145, + "p99": 450.23998618125916 + }, + "roundtrip": { + "p50": 1183.4880113601685, + "p90": 1214.3360376358032, + "p95": 1235.6480360031128, + "p99": 1284.7360372543335 + }, + "isolatedSum": { + "p50": 1213.3120000362396, + "p90": 1251.9999742507935, + "p95": 1275.680035352707, + "p99": 1329.407960176468 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 358055936, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 10982, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1422.49596118927, + "p90": 1437.0559453964233, + "p95": 1447.6799964904785, + "p99": 1470.2080488204956 + }, + "combine": { + "p50": 778.7200212478638, + "p90": 789.0880107879639, + "p95": 794.048011302948, + "p99": 803.48801612854 + }, + "roundtrip": { + "p50": 2185.408115386963, + "p90": 2203.1679153442383, + "p95": 2212.4478816986084, + "p99": 2238.52801322937 + }, + "isolatedSum": { + "p50": 2201.215982437134, + "p90": 2226.143956184387, + "p95": 2241.7280077934265, + "p99": 2273.6960649490356 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716197888, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 21939, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-993e71b4", + "identity": "h200|uccl|n-a|5120|8|160|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "h200_5966376c", + "comparisonKey": "42dd8041bbf56bb9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:48:00.653469+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 301.88798904418945, + "p90": 383.0080032348633, + "p95": 402.72000432014465, + "p99": 562.720000743866 + }, + "combine": { + "p50": 96.99200093746185, + "p90": 125.56800246238708, + "p95": 141.53599739074707, + "p99": 185.5040043592453 + }, + "roundtrip": { + "p50": 362.08000779151917, + "p90": 447.87201285362244, + "p95": 503.35997343063354, + "p99": 567.0080184936523 + }, + "isolatedSum": { + "p50": 398.8799899816513, + "p90": 508.57600569725037, + "p95": 544.2560017108917, + "p99": 748.2240051031113 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27837440, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 310.3039860725403, + "p90": 364.22398686408997, + "p95": 379.2319893836975, + "p99": 420.0960099697113 + }, + "combine": { + "p50": 126.36800110340118, + "p90": 142.5279974937439, + "p95": 147.90399372577667, + "p99": 164.67200219631195 + }, + "roundtrip": { + "p50": 417.248010635376, + "p90": 462.0479941368103, + "p95": 471.96799516677856, + "p99": 514.9440169334412 + }, + "isolatedSum": { + "p50": 436.67198717594147, + "p90": 506.75198435783386, + "p95": 527.1359831094742, + "p99": 584.7680121660233 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55552000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 383.4240138530731, + "p90": 433.47200751304626, + "p95": 444.7999894618988, + "p99": 514.5279765129089 + }, + "combine": { + "p50": 185.59999763965607, + "p90": 196.19199633598328, + "p95": 200.99200308322906, + "p99": 209.6640020608902 + }, + "roundtrip": { + "p50": 562.1439814567566, + "p90": 596.3199734687805, + "p95": 608.0319881439209, + "p99": 656.2560200691223 + }, + "isolatedSum": { + "p50": 569.0240114927292, + "p90": 629.6640038490295, + "p95": 645.7919925451279, + "p99": 724.1919785737991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111549440, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 560.7360005378723, + "p90": 617.8240180015564, + "p95": 638.975977897644, + "p99": 735.7760071754456 + }, + "combine": { + "p50": 290.0800108909607, + "p90": 305.37599325180054, + "p95": 311.8079900741577, + "p99": 321.50399684906006 + }, + "roundtrip": { + "p50": 832.863986492157, + "p90": 869.8880076408386, + "p95": 880.3840279579163, + "p99": 905.56800365448 + }, + "isolatedSum": { + "p50": 850.816011428833, + "p90": 923.2000112533569, + "p95": 950.7839679718018, + "p99": 1057.2800040245056 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223365120, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 5518, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 902.7199745178223, + "p90": 930.7839870452881, + "p95": 952.7680277824402, + "p99": 1165.120005607605 + }, + "combine": { + "p50": 497.95201420783997, + "p90": 514.2080187797546, + "p95": 521.6320157051086, + "p99": 548.7359762191772 + }, + "roundtrip": { + "p50": 1371.7119693756104, + "p90": 1428.0320405960083, + "p95": 1465.0559425354004, + "p99": 1553.1840324401855 + }, + "isolatedSum": { + "p50": 1400.6719887256622, + "p90": 1444.9920058250427, + "p95": 1474.4000434875488, + "p99": 1713.8559818267822 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446817280, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 11032, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1666.815996170044, + "p90": 1687.1360540390015, + "p95": 1696.1599588394165, + "p99": 1721.6639518737793 + }, + "combine": { + "p50": 896.992027759552, + "p90": 909.824013710022, + "p95": 912.447988986969, + "p99": 929.1840195655823 + }, + "roundtrip": { + "p50": 2526.0159969329834, + "p90": 2546.6558933258057, + "p95": 2555.327892303467, + "p99": 2597.9840755462646 + }, + "isolatedSum": { + "p50": 2563.808023929596, + "p90": 2596.9600677490234, + "p95": 2608.6079478263855, + "p99": 2650.8479714393616 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893132800, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 21895, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fe0702c4", + "identity": "h200|uccl|n-a|6144|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_5966376c", + "comparisonKey": "b35ef2fc79d18ce7", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:49:56.092556+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 269.3119943141937, + "p90": 319.2960023880005, + "p95": 331.58400654792786, + "p99": 356.57599568367004 + }, + "combine": { + "p50": 101.08800232410431, + "p90": 113.15199732780457, + "p95": 122.84799665212631, + "p99": 131.1040073633194 + }, + "roundtrip": { + "p50": 371.42398953437805, + "p90": 435.7439875602722, + "p95": 484.0640127658844, + "p99": 612.3200058937073 + }, + "isolatedSum": { + "p50": 370.39999663829803, + "p90": 432.44799971580505, + "p95": 454.43200320005417, + "p99": 487.68000304698944 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33288192, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 328.7999927997589, + "p90": 370.4319894313812, + "p95": 382.6240003108978, + "p99": 433.0559968948364 + }, + "combine": { + "p50": 138.46400380134583, + "p90": 152.0639955997467, + "p95": 161.05599701404572, + "p99": 169.27999258041382 + }, + "roundtrip": { + "p50": 433.8560104370117, + "p90": 483.7439954280853, + "p95": 498.04800748825073, + "p99": 534.496009349823 + }, + "isolatedSum": { + "p50": 467.26399660110474, + "p90": 522.4959850311279, + "p95": 543.6799973249435, + "p99": 602.3359894752502 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66809856, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 427.90400981903076, + "p90": 472.54401445388794, + "p95": 485.0879907608032, + "p99": 531.5520167350769 + }, + "combine": { + "p50": 207.96799659729004, + "p90": 228.7680059671402, + "p95": 234.1119945049286, + "p99": 248.09600412845612 + }, + "roundtrip": { + "p50": 617.1200275421143, + "p90": 650.6239771842957, + "p95": 662.4000072479248, + "p99": 687.8399848937988 + }, + "isolatedSum": { + "p50": 635.8720064163208, + "p90": 701.3120204210281, + "p95": 719.1999852657318, + "p99": 779.648020863533 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133828608, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 612.8000020980835, + "p90": 649.0560173988342, + "p95": 663.3279919624329, + "p99": 698.7199783325195 + }, + "combine": { + "p50": 326.07999444007874, + "p90": 335.00799536705017, + "p95": 338.78400921821594, + "p99": 348.32000732421875 + }, + "roundtrip": { + "p50": 941.9519901275635, + "p90": 1067.4560070037842, + "p95": 1076.2239694595337, + "p99": 1107.6159477233887 + }, + "isolatedSum": { + "p50": 938.8799965381622, + "p90": 984.0640127658844, + "p95": 1002.1120011806488, + "p99": 1047.0399856567383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267190272, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1037.8880500793457, + "p90": 1125.4080533981323, + "p95": 1181.280016899109, + "p99": 1230.463981628418 + }, + "combine": { + "p50": 562.1439814567566, + "p90": 582.144021987915, + "p95": 591.4559960365295, + "p99": 610.7839941978455 + }, + "roundtrip": { + "p50": 1577.023983001709, + "p90": 1689.9199485778809, + "p95": 1720.51203250885, + "p99": 1763.10396194458 + }, + "isolatedSum": { + "p50": 1600.0320315361023, + "p90": 1707.5520753860474, + "p95": 1772.7360129356384, + "p99": 1841.2479758262634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 533059584, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1930.8799505233765, + "p90": 1952.1280527114868, + "p95": 1967.2640562057495, + "p99": 2039.9680137634277 + }, + "combine": { + "p50": 1016.92795753479, + "p90": 1035.0079536437988, + "p95": 1043.936014175415, + "p99": 1101.631999015808 + }, + "roundtrip": { + "p50": 2996.1280822753906, + "p90": 3024.5120525360107, + "p95": 3031.167984008789, + "p99": 3102.368116378784 + }, + "isolatedSum": { + "p50": 2947.8079080581665, + "p90": 2987.1360063552856, + "p95": 3011.2000703811646, + "p99": 3141.600012779236 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1065861120, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-bed60cc3", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_ab5f65fc", + "comparisonKey": "e0dd384a6136f028", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:38:45.814068+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 130.14400005340576, + "p90": 166.6560024023056, + "p95": 176.1920005083084, + "p99": 196.6720074415207 + }, + "combine": { + "p50": 111.90400272607803, + "p90": 134.62400436401367, + "p95": 139.20000195503235, + "p99": 148.41599762439728 + }, + "roundtrip": { + "p50": 269.9199914932251, + "p90": 323.8399922847748, + "p95": 337.15200424194336, + "p99": 371.39201164245605 + }, + "isolatedSum": { + "p50": 242.0480027794838, + "p90": 301.2800067663193, + "p95": 315.39200246334076, + "p99": 345.08800506591797 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 150.14399588108063, + "p90": 185.05600094795227, + "p95": 191.45600497722626, + "p99": 209.24800634384155 + }, + "combine": { + "p50": 152.73599326610565, + "p90": 169.95200514793396, + "p95": 178.46399545669556, + "p99": 189.95200097560883 + }, + "roundtrip": { + "p50": 357.85600543022156, + "p90": 392.0319974422455, + "p95": 401.08799934387207, + "p99": 426.2720048427582 + }, + "isolatedSum": { + "p50": 302.8799891471863, + "p90": 355.00800609588623, + "p95": 369.9200004339218, + "p99": 399.2000073194504 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 182.01600015163422, + "p90": 210.33599972724915, + "p95": 218.30399334430695, + "p99": 227.23199427127838 + }, + "combine": { + "p50": 232.67200589179993, + "p90": 247.77600169181824, + "p95": 253.60000133514404, + "p99": 262.81601190567017 + }, + "roundtrip": { + "p50": 550.495982170105, + "p90": 581.3760161399841, + "p95": 591.808021068573, + "p99": 626.8159747123718 + }, + "isolatedSum": { + "p50": 414.68800604343414, + "p90": 458.1120014190674, + "p95": 471.903994679451, + "p99": 490.04800617694855 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 249.95200335979462, + "p90": 279.58399057388306, + "p95": 285.75998544692993, + "p99": 298.43199253082275 + }, + "combine": { + "p50": 365.59998989105225, + "p90": 377.53599882125854, + "p95": 386.6559863090515, + "p99": 414.11200165748596 + }, + "roundtrip": { + "p50": 884.4799995422363, + "p90": 905.5039882659912, + "p95": 909.4719886779785, + "p99": 921.0879802703857 + }, + "isolatedSum": { + "p50": 615.5519932508469, + "p90": 657.1199893951416, + "p95": 672.4159717559814, + "p99": 712.5439941883087 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 387.36000657081604, + "p90": 415.4239892959595, + "p95": 425.50399899482727, + "p99": 440.8000111579895 + }, + "combine": { + "p50": 631.5199732780457, + "p90": 641.1200165748596, + "p95": 644.9919939041138, + "p99": 656.1599969863892 + }, + "roundtrip": { + "p50": 1559.4240427017212, + "p90": 1587.4559879302979, + "p95": 1602.07998752594, + "p99": 1649.2799520492554 + }, + "isolatedSum": { + "p50": 1018.8799798488617, + "p90": 1056.544005870819, + "p95": 1070.495992898941, + "p99": 1096.9600081443787 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 645.0240015983582, + "p90": 663.0719900131226, + "p95": 669.6320176124573, + "p99": 693.0239796638489 + }, + "combine": { + "p50": 1137.3440027236938, + "p90": 1151.74400806427, + "p95": 1156.607985496521, + "p99": 1169.5040464401245 + }, + "roundtrip": { + "p50": 2897.8559970855713, + "p90": 2920.2558994293213, + "p95": 2926.9120693206787, + "p99": 2953.183889389038 + }, + "isolatedSum": { + "p50": 1782.368004322052, + "p90": 1814.8159980773926, + "p95": 1826.2400031089783, + "p99": 1862.5280261039734 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-b8d71edb", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_5966376c", + "comparisonKey": "3b5772d536d0a9c8", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:40:46.206130+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 269.53598856925964, + "p90": 310.4639947414398, + "p95": 325.6640136241913, + "p99": 351.23199224472046 + }, + "combine": { + "p50": 111.77600175142288, + "p90": 124.57600235939026, + "p95": 138.2399946451187, + "p99": 174.94399845600128 + }, + "roundtrip": { + "p50": 387.03998923301697, + "p90": 433.6639940738678, + "p95": 442.1440064907074, + "p99": 465.63199162483215 + }, + "isolatedSum": { + "p50": 381.3119903206825, + "p90": 435.0399971008301, + "p95": 463.90400826931, + "p99": 526.1759907007217 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 347.0720052719116, + "p90": 371.071994304657, + "p95": 384.0000033378601, + "p99": 454.0160000324249 + }, + "combine": { + "p50": 151.74399316310883, + "p90": 157.3760062456131, + "p95": 158.81599485874176, + "p99": 168.2880073785782 + }, + "roundtrip": { + "p50": 468.25599670410156, + "p90": 491.7759895324707, + "p95": 500.0320076942444, + "p99": 561.3120198249817 + }, + "isolatedSum": { + "p50": 498.81599843502045, + "p90": 528.4480005502701, + "p95": 542.8159981966019, + "p99": 622.3040074110031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 440.8639967441559, + "p90": 459.03998613357544, + "p95": 468.80000829696655, + "p99": 491.61601066589355 + }, + "combine": { + "p50": 231.1359941959381, + "p90": 238.01599442958832, + "p95": 241.69600009918213, + "p99": 250.40000677108765 + }, + "roundtrip": { + "p50": 649.0240097045898, + "p90": 674.1759777069092, + "p95": 684.9279999732971, + "p99": 762.4639868736267 + }, + "isolatedSum": { + "p50": 671.999990940094, + "p90": 697.0559805631638, + "p95": 710.4960083961487, + "p99": 742.0160174369812 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 657.4079990386963, + "p90": 681.2480092048645, + "p95": 688.86399269104, + "p99": 720.4800248146057 + }, + "combine": { + "p50": 366.8479919433594, + "p90": 375.39198994636536, + "p95": 378.7840008735657, + "p99": 392.89599657058716 + }, + "roundtrip": { + "p50": 988.7040257453918, + "p90": 1004.3840408325195, + "p95": 1012.2560262680054, + "p99": 1038.815975189209 + }, + "isolatedSum": { + "p50": 1024.2559909820557, + "p90": 1056.6399991512299, + "p95": 1067.6479935646057, + "p99": 1113.3760213851929 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1150.1120328903198, + "p90": 1162.335991859436, + "p95": 1169.7920560836792, + "p99": 1204.1599750518799 + }, + "combine": { + "p50": 630.4640173912048, + "p90": 638.4959816932678, + "p95": 642.848014831543, + "p99": 653.4720063209534 + }, + "roundtrip": { + "p50": 1744.1920042037964, + "p90": 1760.8319520950317, + "p95": 1770.0480222702026, + "p99": 1872.9599714279175 + }, + "isolatedSum": { + "p50": 1780.5760502815247, + "p90": 1800.8319735527039, + "p95": 1812.6400709152222, + "p99": 1857.6319813728333 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2148.1919288635254, + "p90": 2161.9200706481934, + "p95": 2167.9680347442627, + "p99": 2217.087984085083 + }, + "combine": { + "p50": 1139.1359567642212, + "p90": 1151.6159772872925, + "p95": 1155.8719873428345, + "p99": 1175.1680374145508 + }, + "roundtrip": { + "p50": 3264.0960216522217, + "p90": 3286.5281105041504, + "p95": 3299.3600368499756, + "p99": 3342.911958694458 + }, + "isolatedSum": { + "p50": 3287.3278856277466, + "p90": 3313.536047935486, + "p95": 3323.840022087097, + "p99": 3392.256021499634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3d975a58", + "identity": "h200|uccl|n-a|7168|8|384|fp8|normal|runtime-visible-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "h200_5966376c", + "comparisonKey": "a726cc63fb45bda9", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:44:09.520433+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "runtime-visible-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 293.7279939651489, + "p90": 347.4879860877991, + "p95": 358.88001322746277, + "p99": 399.04001355171204 + }, + "combine": { + "p50": 109.8880022764206, + "p90": 128.51199507713318, + "p95": 136.09600067138672, + "p99": 162.33600676059723 + }, + "roundtrip": { + "p50": 389.3440067768097, + "p90": 439.4879937171936, + "p95": 450.3679871559143, + "p99": 477.88798809051514 + }, + "isolatedSum": { + "p50": 403.6159962415695, + "p90": 475.99998116493225, + "p95": 494.9760138988495, + "p99": 561.3760203123093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38757376, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 349.7599959373474, + "p90": 407.1359932422638, + "p95": 429.05598878860474, + "p99": 530.9439897537231 + }, + "combine": { + "p50": 153.76000106334686, + "p90": 175.90400576591492, + "p95": 181.85600638389587, + "p99": 188.83199989795685 + }, + "roundtrip": { + "p50": 486.36800050735474, + "p90": 521.6000080108643, + "p95": 538.3039712905884, + "p99": 645.6000208854675 + }, + "isolatedSum": { + "p50": 503.5199970006943, + "p90": 583.0399990081787, + "p95": 610.9119951725006, + "p99": 719.77598965168 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77285376, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 464.9600088596344, + "p90": 512.7679705619812, + "p95": 530.1759839057922, + "p99": 625.5999803543091 + }, + "combine": { + "p50": 231.455996632576, + "p90": 254.94399666786194, + "p95": 264.8960053920746, + "p99": 278.0480086803436 + }, + "roundtrip": { + "p50": 673.0239987373352, + "p90": 708.3520293235779, + "p95": 718.6880111694336, + "p99": 746.0799813270569 + }, + "isolatedSum": { + "p50": 696.4160054922104, + "p90": 767.7119672298431, + "p95": 795.0719892978668, + "p99": 903.6479890346527 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154886144, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 681.3759803771973, + "p90": 708.4800004959106, + "p95": 718.8479900360107, + "p99": 817.0239925384521 + }, + "combine": { + "p50": 365.1520013809204, + "p90": 376.41599774360657, + "p95": 381.79200887680054, + "p99": 390.20800590515137 + }, + "roundtrip": { + "p50": 1020.7359790802002, + "p90": 1054.1759729385376, + "p95": 1076.3520002365112, + "p99": 1212.7360105514526 + }, + "isolatedSum": { + "p50": 1046.5279817581177, + "p90": 1084.8959982395172, + "p95": 1100.6399989128113, + "p99": 1207.2319984436035 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309750784, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 5469, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1153.056025505066, + "p90": 1234.1760396957397, + "p95": 1263.8399600982666, + "p99": 1308.4800243377686 + }, + "combine": { + "p50": 633.8880062103271, + "p90": 658.9120030403137, + "p95": 666.208028793335, + "p99": 681.4720034599304 + }, + "roundtrip": { + "p50": 1746.880054473877, + "p90": 1850.7200479507446, + "p95": 1886.1440420150757, + "p99": 1929.0560483932495 + }, + "isolatedSum": { + "p50": 1786.944031715393, + "p90": 1893.0880427360535, + "p95": 1930.0479888916016, + "p99": 1989.952027797699 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619687936, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 10883, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 2155.263900756836, + "p90": 2177.40797996521, + "p95": 2187.0720386505127, + "p99": 2276.1919498443604 + }, + "combine": { + "p50": 1125.216007232666, + "p90": 1141.4719820022583, + "p95": 1148.7679481506348, + "p99": 1169.600009918213 + }, + "roundtrip": { + "p50": 3248.8319873809814, + "p90": 3276.1919498443604, + "p95": 3288.0640029907227, + "p99": 3345.088005065918 + }, + "isolatedSum": { + "p50": 3280.479907989502, + "p90": 3318.8799619674683, + "p95": 3335.8399868011475, + "p99": 3445.7919597625732 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239834624, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 21730, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-2228ed37", + "identity": "h200|uccl|n-a|7168|8|256|fp8|normal|cached-layout-comm-only-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "h200_980d3360", + "comparisonKey": "0101b0c648787985", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:39:42.698447+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "h200-dgxc-slurm_2", + "sku": "h200", + "backend": "uccl", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "cached-layout-comm-only-v1", + "topologyClass": "h200-nvlink-island", + "transport": "nvlink", + "worldSize": 8, + "epSize": 8, + "label": "H200 EP8 · uccl · fp8 [cl]", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.1515, + "configuredUnits": 20, + "deviceUnits": 132, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 105.66399991512299, + "p90": 114.49600011110306, + "p95": 117.21599847078323, + "p99": 122.27199971675873 + }, + "combine": { + "p50": 109.98400300741196, + "p90": 116.83200299739838, + "p95": 121.0239976644516, + "p99": 129.60000336170197 + }, + "roundtrip": { + "p50": 242.43199825286865, + "p90": 263.68001103401184, + "p95": 274.01599287986755, + "p99": 322.6560056209564 + }, + "isolatedSum": { + "p50": 215.64800292253494, + "p90": 231.32800310850143, + "p95": 238.23999613523483, + "p99": 251.8720030784607 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.51200187206268, + "p90": 133.44000279903412, + "p95": 137.5039964914322, + "p99": 144.80000734329224 + }, + "combine": { + "p50": 152.16000378131866, + "p90": 157.69599378108978, + "p95": 159.7760021686554, + "p99": 165.21599888801575 + }, + "roundtrip": { + "p50": 333.21601152420044, + "p90": 342.0160114765167, + "p95": 345.8240032196045, + "p99": 358.36800932884216 + }, + "isolatedSum": { + "p50": 276.67200565338135, + "p90": 291.1359965801239, + "p95": 297.2799986600876, + "p99": 310.016006231308 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 163.32800686359406, + "p90": 174.81599748134613, + "p95": 180.4479956626892, + "p99": 211.84000372886658 + }, + "combine": { + "p50": 230.97600042819977, + "p90": 238.17600309848785, + "p95": 241.82400107383728, + "p99": 250.2399981021881 + }, + "roundtrip": { + "p50": 520.2879905700684, + "p90": 531.8080186843872, + "p95": 536.7040038108826, + "p99": 590.3679728507996 + }, + "isolatedSum": { + "p50": 394.3040072917938, + "p90": 412.992000579834, + "p95": 422.2719967365265, + "p99": 462.0800018310547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 226.97600722312927, + "p90": 236.89599335193634, + "p95": 241.63199961185455, + "p99": 249.05599653720856 + }, + "combine": { + "p50": 363.103985786438, + "p90": 370.1759874820709, + "p95": 372.73600697517395, + "p99": 378.9440095424652 + }, + "roundtrip": { + "p50": 860.8959913253784, + "p90": 870.2080249786377, + "p95": 873.63201379776, + "p99": 905.6640267372131 + }, + "isolatedSum": { + "p50": 590.0799930095673, + "p90": 607.0719808340073, + "p95": 614.3680065870285, + "p99": 628.0000060796738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 311721984, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 5505, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 354.94399070739746, + "p90": 363.9039993286133, + "p95": 366.8479919433594, + "p99": 373.21600317955017 + }, + "combine": { + "p50": 629.4080018997192, + "p90": 637.7919912338257, + "p95": 640.3200030326843, + "p99": 647.4239826202393 + }, + "roundtrip": { + "p50": 1532.9920053482056, + "p90": 1544.2559719085693, + "p95": 1548.9599704742432, + "p99": 1569.375991821289 + }, + "isolatedSum": { + "p50": 984.3519926071167, + "p90": 1001.695990562439, + "p95": 1007.1679949760437, + "p99": 1020.6399857997894 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 621902848, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 10952, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 608.7679862976074, + "p90": 621.3439702987671, + "p95": 624.5440244674683, + "p99": 641.3760185241699 + }, + "combine": { + "p50": 1136.8319988250732, + "p90": 1148.352026939392, + "p95": 1152.8960466384888, + "p99": 1162.3040437698364 + }, + "roundtrip": { + "p50": 2869.215965270996, + "p90": 2885.3440284729004, + "p95": 2891.5200233459473, + "p99": 2907.8400135040283 + }, + "isolatedSum": { + "p50": 1745.5999851226807, + "p90": 1769.6959972381592, + "p95": 1777.440071105957, + "p99": 1803.6800622940063 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243504640, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 21781, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-72387ee2", + "identity": "mi325x|mori|n-a|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "fa6d0e82f9b35cbc", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:59:52.602029+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.5979967713356, + "p90": 74.6380016207695, + "p95": 78.47800105810165, + "p99": 91.07799828052521 + }, + "combine": { + "p50": 46.119000762701035, + "p90": 50.7580004632473, + "p95": 63.55799734592438, + "p99": 106.67800158262253 + }, + "roundtrip": { + "p50": 128.75699996948242, + "p90": 140.63699543476105, + "p95": 152.27599442005157, + "p99": 176.9549995660782 + }, + "isolatedSum": { + "p50": 115.71699753403664, + "p90": 125.3960020840168, + "p95": 142.03599840402603, + "p99": 197.75599986314774 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.43800097703934, + "p90": 72.55800068378448, + "p95": 74.99799877405167, + "p99": 90.11700004339218 + }, + "combine": { + "p50": 45.91900110244751, + "p90": 50.27899891138077, + "p95": 63.03899735212326, + "p99": 71.91800326108932 + }, + "roundtrip": { + "p50": 128.39700281620026, + "p90": 139.55600559711456, + "p95": 147.63599634170532, + "p99": 164.1560047864914 + }, + "isolatedSum": { + "p50": 114.35700207948685, + "p90": 122.83699959516525, + "p95": 138.03699612617493, + "p99": 162.0350033044815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 68.99800151586533, + "p90": 74.15799796581268, + "p95": 77.87799835205078, + "p99": 94.39700096845627 + }, + "combine": { + "p50": 45.43900117278099, + "p90": 49.75900053977966, + "p95": 58.91900137066841, + "p99": 69.51799988746643 + }, + "roundtrip": { + "p50": 127.4770051240921, + "p90": 136.23599708080292, + "p95": 144.4769948720932, + "p99": 157.43599832057953 + }, + "isolatedSum": { + "p50": 114.43700268864632, + "p90": 123.91699850559235, + "p95": 136.7969997227192, + "p99": 163.9150008559227 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.79800015687943, + "p90": 74.1180032491684, + "p95": 75.47800242900848, + "p99": 89.95799720287323 + }, + "combine": { + "p50": 45.27899995446205, + "p90": 49.39800128340721, + "p95": 57.91899934411049, + "p99": 68.99800151586533 + }, + "roundtrip": { + "p50": 128.59700620174408, + "p90": 138.75700533390045, + "p95": 144.87600326538086, + "p99": 154.8759937286377 + }, + "isolatedSum": { + "p50": 115.07700011134148, + "p90": 123.51600453257561, + "p95": 133.39700177311897, + "p99": 158.95599871873856 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 70.99899649620056, + "p90": 75.83799958229065, + "p95": 79.63799685239792, + "p99": 95.3570008277893 + }, + "combine": { + "p50": 45.83900049328804, + "p90": 49.917999655008316, + "p95": 58.91900137066841, + "p99": 70.59799879789352 + }, + "roundtrip": { + "p50": 131.31700456142426, + "p90": 141.19599759578705, + "p95": 147.79600501060486, + "p99": 157.59600698947906 + }, + "isolatedSum": { + "p50": 116.8379969894886, + "p90": 125.75599923729897, + "p95": 138.55699822306633, + "p99": 165.95499962568283 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 73.5979974269867, + "p90": 78.19800078868866, + "p95": 80.51799982786179, + "p99": 93.87800097465515 + }, + "combine": { + "p50": 47.839000821113586, + "p90": 51.95799842476845, + "p95": 57.43800103664398, + "p99": 69.63899731636047 + }, + "roundtrip": { + "p50": 134.83700156211853, + "p90": 143.996000289917, + "p95": 149.91599321365356, + "p99": 161.31600737571716 + }, + "isolatedSum": { + "p50": 121.43699824810028, + "p90": 130.1559992134571, + "p95": 137.95600086450577, + "p99": 163.51699829101562 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.23799884319305, + "p90": 91.15800261497498, + "p95": 92.55799651145935, + "p99": 97.79699891805649 + }, + "combine": { + "p50": 54.83800172805786, + "p90": 59.238001704216, + "p95": 70.39900124073029, + "p99": 78.91800254583359 + }, + "roundtrip": { + "p50": 151.99600160121918, + "p90": 160.03599762916565, + "p95": 165.79599678516388, + "p99": 172.99599945545197 + }, + "isolatedSum": { + "p50": 142.07600057125092, + "p90": 150.39600431919098, + "p95": 162.95699775218964, + "p99": 176.71500146389008 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.63799947500229, + "p90": 97.87800163030624, + "p95": 99.59699958562851, + "p99": 102.35799849033356 + }, + "combine": { + "p50": 67.47899949550629, + "p90": 70.9180012345314, + "p95": 75.51799714565277, + "p99": 88.95699679851532 + }, + "roundtrip": { + "p50": 174.35599863529205, + "p90": 180.3950071334839, + "p95": 182.91600048542023, + "p99": 188.11599910259247 + }, + "isolatedSum": { + "p50": 163.11699897050858, + "p90": 168.79600286483765, + "p95": 175.11499673128128, + "p99": 191.31499528884888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-4691b617", + "identity": "mi325x|mori|n-a|5120|8|160|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "6173949d0ef3eb39", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:00:35.776469+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.75799864530563, + "p90": 79.47800308465958, + "p95": 83.47699791193008, + "p99": 99.27800297737122 + }, + "combine": { + "p50": 44.23899948596954, + "p90": 49.279000610113144, + "p95": 65.91799855232239, + "p99": 136.19700074195862 + }, + "roundtrip": { + "p50": 139.99700546264648, + "p90": 152.07700431346893, + "p95": 160.51599383354187, + "p99": 184.67499315738678 + }, + "isolatedSum": { + "p50": 117.99699813127518, + "p90": 128.75700369477272, + "p95": 149.39499646425247, + "p99": 235.47500371932983 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 73.91799986362457, + "p90": 78.95900309085846, + "p95": 81.67800307273865, + "p99": 94.99700367450714 + }, + "combine": { + "p50": 44.11900043487549, + "p90": 48.27899858355522, + "p95": 56.83799833059311, + "p99": 68.75800341367722 + }, + "roundtrip": { + "p50": 140.71600139141083, + "p90": 152.35599875450134, + "p95": 157.43599832057953, + "p99": 170.5559939146042 + }, + "isolatedSum": { + "p50": 118.03700029850006, + "p90": 127.23800167441368, + "p95": 138.51600140333176, + "p99": 163.75500708818436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 73.95900040864944, + "p90": 79.23799753189087, + "p95": 83.59800279140472, + "p99": 95.27699649333954 + }, + "combine": { + "p50": 44.43899914622307, + "p90": 48.87799918651581, + "p95": 56.919001042842865, + "p99": 68.7590017914772 + }, + "roundtrip": { + "p50": 140.11700451374054, + "p90": 149.7959941625595, + "p95": 153.8359969854355, + "p99": 167.63600707054138 + }, + "isolatedSum": { + "p50": 118.39799955487251, + "p90": 128.11599671840668, + "p95": 140.5170038342476, + "p99": 164.03599828481674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 74.51800256967545, + "p90": 79.51799780130386, + "p95": 83.31800252199173, + "p99": 94.43800151348114 + }, + "combine": { + "p50": 44.39900070428848, + "p90": 48.47799986600876, + "p95": 56.359000504016876, + "p99": 69.67899948358536 + }, + "roundtrip": { + "p50": 139.79700207710266, + "p90": 150.51600337028503, + "p95": 156.1560034751892, + "p99": 168.31600666046143 + }, + "isolatedSum": { + "p50": 118.91700327396393, + "p90": 127.99599766731262, + "p95": 139.6770030260086, + "p99": 164.1170009970665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.67799633741379, + "p90": 79.47800308465958, + "p95": 81.47799968719482, + "p99": 93.31800043582916 + }, + "combine": { + "p50": 45.99900171160698, + "p90": 49.95900020003319, + "p95": 60.75900048017502, + "p99": 70.71799784898758 + }, + "roundtrip": { + "p50": 140.15600085258484, + "p90": 149.7959941625595, + "p95": 154.91600334644318, + "p99": 166.35599732398987 + }, + "isolatedSum": { + "p50": 120.67699804902077, + "p90": 129.43700328469276, + "p95": 142.23700016736984, + "p99": 164.03599828481674 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 77.67900079488754, + "p90": 82.19800144433975, + "p95": 83.51799845695496, + "p99": 98.2770025730133 + }, + "combine": { + "p50": 48.879001289606094, + "p90": 52.75899916887283, + "p95": 59.31900069117546, + "p99": 72.95899838209152 + }, + "roundtrip": { + "p50": 144.7959989309311, + "p90": 154.47600185871124, + "p95": 161.95599734783173, + "p99": 171.47600650787354 + }, + "isolatedSum": { + "p50": 126.55800208449364, + "p90": 134.95700061321259, + "p95": 142.83699914813042, + "p99": 171.23600095510483 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 84.477998316288, + "p90": 88.63800019025803, + "p95": 90.71700274944305, + "p99": 106.35799914598465 + }, + "combine": { + "p50": 54.79900166392326, + "p90": 58.278001844882965, + "p95": 62.477998435497284, + "p99": 77.87799835205078 + }, + "roundtrip": { + "p50": 156.79599344730377, + "p90": 165.59599339962006, + "p95": 173.23599755764008, + "p99": 184.91500616073608 + }, + "isolatedSum": { + "p50": 139.27699998021126, + "p90": 146.916002035141, + "p95": 153.19500118494034, + "p99": 184.23599749803543 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 93.43799948692322, + "p90": 96.87799960374832, + "p95": 99.03799742460251, + "p99": 103.39699685573578 + }, + "combine": { + "p50": 66.87799841165543, + "p90": 71.07800245285034, + "p95": 73.8380029797554, + "p99": 90.43800085783005 + }, + "roundtrip": { + "p50": 179.35599386692047, + "p90": 188.15499544143677, + "p95": 191.43599271774292, + "p99": 197.79500365257263 + }, + "isolatedSum": { + "p50": 160.31599789857864, + "p90": 167.95600205659866, + "p95": 172.8760004043579, + "p99": 193.83499771356583 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3e17be5e", + "identity": "mi325x|mori|n-a|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "51bc482d8e97d52e", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:01:18.650251+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.83799958229065, + "p90": 81.91800117492676, + "p95": 84.9979966878891, + "p99": 101.39700025320053 + }, + "combine": { + "p50": 45.35900056362152, + "p90": 51.03899911046028, + "p95": 64.19800221920013, + "p99": 137.63700425624847 + }, + "roundtrip": { + "p50": 142.79699325561523, + "p90": 152.11600065231323, + "p95": 162.555992603302, + "p99": 187.75500357151031 + }, + "isolatedSum": { + "p50": 121.19700014591217, + "p90": 132.95700028538704, + "p95": 149.19599890708923, + "p99": 239.034004509449 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 77.87799835205078, + "p90": 83.47799628973007, + "p95": 85.95799654722214, + "p99": 100.71799904108047 + }, + "combine": { + "p50": 45.639000833034515, + "p90": 50.11900141835213, + "p95": 53.918998688459396, + "p99": 69.91799920797348 + }, + "roundtrip": { + "p50": 144.7560042142868, + "p90": 153.95599603652954, + "p95": 157.91699290275574, + "p99": 171.5960055589676 + }, + "isolatedSum": { + "p50": 123.5169991850853, + "p90": 133.5969977080822, + "p95": 139.87699523568153, + "p99": 170.63599824905396 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 77.31799781322479, + "p90": 82.67799764871597, + "p95": 85.35800129175186, + "p99": 100.9180024266243 + }, + "combine": { + "p50": 45.27899995446205, + "p90": 49.279000610113144, + "p95": 58.958999812603, + "p99": 68.91799718141556 + }, + "roundtrip": { + "p50": 144.15700733661652, + "p90": 151.99600160121918, + "p95": 154.63599562644958, + "p99": 171.1149960756302 + }, + "isolatedSum": { + "p50": 122.59699776768684, + "p90": 131.95699825882912, + "p95": 144.31700110435486, + "p99": 169.83599960803986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 78.19800078868866, + "p90": 84.35799926519394, + "p95": 87.75699883699417, + "p99": 101.03800147771835 + }, + "combine": { + "p50": 45.75899988412857, + "p90": 50.03900080919266, + "p95": 62.99799680709839, + "p99": 71.1980015039444 + }, + "roundtrip": { + "p50": 144.67599987983704, + "p90": 152.196004986763, + "p95": 157.59600698947906, + "p99": 171.8360036611557 + }, + "isolatedSum": { + "p50": 123.95700067281723, + "p90": 134.3970000743866, + "p95": 150.75499564409256, + "p99": 172.23600298166275 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 79.35799658298492, + "p90": 84.59799736738205, + "p95": 87.957002222538, + "p99": 102.47699916362762 + }, + "combine": { + "p50": 46.838998794555664, + "p90": 51.15899816155434, + "p95": 58.758001774549484, + "p99": 69.27800178527832 + }, + "roundtrip": { + "p50": 147.6760059595108, + "p90": 156.7160040140152, + "p95": 183.95599722862244, + "p99": 284.03300046920776 + }, + "isolatedSum": { + "p50": 126.19699537754059, + "p90": 135.7569955289364, + "p95": 146.71500399708748, + "p99": 171.75500094890594 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 83.23799818754196, + "p90": 92.35800057649612, + "p95": 96.43799811601639, + "p99": 105.55700212717056 + }, + "combine": { + "p50": 51.038000732660294, + "p90": 55.47900125384331, + "p95": 64.87800180912018, + "p99": 73.79800081253052 + }, + "roundtrip": { + "p50": 151.79599821567535, + "p90": 160.03599762916565, + "p95": 163.6359989643097, + "p99": 175.43600499629974 + }, + "isolatedSum": { + "p50": 134.27599892020226, + "p90": 147.83700183033943, + "p95": 161.31599992513657, + "p99": 179.35500293970108 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 91.71800315380096, + "p90": 96.19800001382828, + "p95": 98.43800216913223, + "p99": 111.39699816703796 + }, + "combine": { + "p50": 56.598000228405, + "p90": 60.27799844741821, + "p95": 64.43800032138824, + "p99": 79.19800281524658 + }, + "roundtrip": { + "p50": 165.9960001707077, + "p90": 172.3960041999817, + "p95": 176.55600607395172, + "p99": 186.83500587940216 + }, + "isolatedSum": { + "p50": 148.31600338220596, + "p90": 156.4759984612465, + "p95": 162.87600249052048, + "p99": 190.59500098228455 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 106.59699887037277, + "p90": 110.31699925661087, + "p95": 111.63700371980667, + "p99": 115.03700166940689 + }, + "combine": { + "p50": 72.67799973487854, + "p90": 76.35799795389175, + "p95": 79.39799875020981, + "p99": 94.75799649953842 + }, + "roundtrip": { + "p50": 199.7150033712387, + "p90": 205.27499914169312, + "p95": 207.1550041437149, + "p99": 210.23499965667725 + }, + "isolatedSum": { + "p50": 179.2749986052513, + "p90": 186.67499721050262, + "p95": 191.03500247001648, + "p99": 209.7949981689453 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-7bb188a9", + "identity": "mi325x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "332588d0ce1bbd0f", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:57:43.928818+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 70.71799784898758, + "p90": 78.47800105810165, + "p95": 83.79799872636795, + "p99": 94.19699758291245 + }, + "combine": { + "p50": 45.239001512527466, + "p90": 51.11899971961975, + "p95": 67.11799651384354, + "p99": 272.7530002593994 + }, + "roundtrip": { + "p50": 130.55700063705444, + "p90": 143.07700097560883, + "p95": 152.07600593566895, + "p99": 179.91599440574646 + }, + "isolatedSum": { + "p50": 115.95699936151505, + "p90": 129.5970007777214, + "p95": 150.9159952402115, + "p99": 366.94999784231186 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.31900233030319, + "p90": 73.91899824142456, + "p95": 80.19799739122391, + "p99": 90.11700004339218 + }, + "combine": { + "p50": 45.27899995446205, + "p90": 49.598999321460724, + "p95": 66.07899814844131, + "p99": 69.5979967713356 + }, + "roundtrip": { + "p50": 131.59699738025665, + "p90": 141.95699989795685, + "p95": 148.7559974193573, + "p99": 157.63600170612335 + }, + "isolatedSum": { + "p50": 114.59800228476524, + "p90": 123.51799756288528, + "p95": 146.27699553966522, + "p99": 159.71499681472778 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.1180025935173, + "p90": 74.3580013513565, + "p95": 77.47799903154373, + "p99": 95.03799676895142 + }, + "combine": { + "p50": 45.6789992749691, + "p90": 51.11899971961975, + "p95": 65.95800071954727, + "p99": 70.67800313234329 + }, + "roundtrip": { + "p50": 131.7960023880005, + "p90": 142.15600490570068, + "p95": 149.35599267482758, + "p99": 158.5959941148758 + }, + "isolatedSum": { + "p50": 115.7970018684864, + "p90": 125.47700107097626, + "p95": 143.435999751091, + "p99": 165.7159999012947 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.9180012345314, + "p90": 75.51799714565277, + "p95": 81.83799684047699, + "p99": 93.11699867248535 + }, + "combine": { + "p50": 46.79900035262108, + "p90": 52.07899957895279, + "p95": 65.51899760961533, + "p99": 70.07899880409241 + }, + "roundtrip": { + "p50": 136.91699504852295, + "p90": 194.6749985218048, + "p95": 242.19399690628052, + "p99": 356.03100061416626 + }, + "isolatedSum": { + "p50": 117.71700158715248, + "p90": 127.59699672460556, + "p95": 147.35699445009232, + "p99": 163.19599747657776 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.35799932479858, + "p90": 78.19800078868866, + "p95": 85.59799939393997, + "p99": 95.15800327062607 + }, + "combine": { + "p50": 47.958001494407654, + "p90": 52.8389997780323, + "p95": 66.47799909114838, + "p99": 72.83800095319748 + }, + "roundtrip": { + "p50": 137.15699315071106, + "p90": 147.87699282169342, + "p95": 152.756005525589, + "p99": 162.8360003232956 + }, + "isolatedSum": { + "p50": 121.31600081920624, + "p90": 131.03700056672096, + "p95": 152.07599848508835, + "p99": 167.99600422382355 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.19800281524658, + "p90": 83.31800252199173, + "p95": 85.59799939393997, + "p99": 99.6370017528534 + }, + "combine": { + "p50": 53.397998213768005, + "p90": 57.91899934411049, + "p95": 71.71899825334549, + "p99": 76.95800065994263 + }, + "roundtrip": { + "p50": 145.63600718975067, + "p90": 157.43599832057953, + "p95": 163.75499963760376, + "p99": 169.79500651359558 + }, + "isolatedSum": { + "p50": 132.5960010290146, + "p90": 141.23700186610222, + "p95": 157.31699764728546, + "p99": 176.59500241279602 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.43799948692322, + "p90": 96.99799865484238, + "p95": 99.11700338125229, + "p99": 106.35700076818466 + }, + "combine": { + "p50": 59.5179982483387, + "p90": 63.398003578186035, + "p95": 78.43799889087677, + "p99": 82.83799886703491 + }, + "roundtrip": { + "p50": 166.67599976062775, + "p90": 175.51599442958832, + "p95": 178.91499400138855, + "p99": 188.8750046491623 + }, + "isolatedSum": { + "p50": 152.95599773526192, + "p90": 160.3960022330284, + "p95": 177.55500227212906, + "p99": 189.19499963521957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.35699665546417, + "p90": 118.55699867010117, + "p95": 120.39700150489807, + "p99": 123.91699850559235 + }, + "combine": { + "p50": 81.557996571064, + "p90": 86.11799776554108, + "p95": 97.5169986486435, + "p99": 104.95699942111969 + }, + "roundtrip": { + "p50": 209.95500683784485, + "p90": 216.91399812698364, + "p95": 219.87399458885193, + "p99": 226.8339991569519 + }, + "isolatedSum": { + "p50": 196.91499322652817, + "p90": 204.67499643564224, + "p95": 217.91400015354156, + "p99": 228.87399792671204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-f86856d4", + "identity": "mi325x|mori|n-a|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "231a2ac489970941", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:59:10.072283+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.79800081253052, + "p90": 79.6779990196228, + "p95": 84.7179964184761, + "p99": 96.1180031299591 + }, + "combine": { + "p50": 45.63799872994423, + "p90": 60.91799959540367, + "p95": 68.75800341367722, + "p99": 256.9139897823334 + }, + "roundtrip": { + "p50": 135.23699343204498, + "p90": 146.67700231075287, + "p95": 158.71599316596985, + "p99": 186.35499477386475 + }, + "isolatedSum": { + "p50": 119.43599954247475, + "p90": 140.59599861502647, + "p95": 153.47599983215332, + "p99": 353.0319929122925 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.47799634933472, + "p90": 77.75799930095673, + "p95": 81.277996301651, + "p99": 94.67799961566925 + }, + "combine": { + "p50": 45.878998935222626, + "p90": 51.63799971342087, + "p95": 62.27799877524376, + "p99": 69.27900016307831 + }, + "roundtrip": { + "p50": 135.07699966430664, + "p90": 145.2759951353073, + "p95": 150.8370041847229, + "p99": 165.7160073518753 + }, + "isolatedSum": { + "p50": 118.35699528455734, + "p90": 129.3959990143776, + "p95": 143.55599507689476, + "p99": 163.95699977874756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.27800041437149, + "p90": 77.03900337219238, + "p95": 81.7980021238327, + "p99": 91.83699637651443 + }, + "combine": { + "p50": 45.719001442193985, + "p90": 49.518998712301254, + "p95": 61.91899999976158, + "p99": 68.99800151586533 + }, + "roundtrip": { + "p50": 134.27700102329254, + "p90": 142.6369994878769, + "p95": 147.1560001373291, + "p99": 160.11600196361542 + }, + "isolatedSum": { + "p50": 117.99700185656548, + "p90": 126.55800208449364, + "p95": 143.71700212359428, + "p99": 160.83499789237976 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.43800365924835, + "p90": 78.03799957036972, + "p95": 82.35800266265869, + "p99": 94.5580005645752 + }, + "combine": { + "p50": 46.59799858927727, + "p90": 51.038000732660294, + "p95": 62.19799816608429, + "p99": 69.39899921417236 + }, + "roundtrip": { + "p50": 135.1570039987564, + "p90": 143.6769962310791, + "p95": 149.276003241539, + "p99": 162.51599788665771 + }, + "isolatedSum": { + "p50": 120.03600224852562, + "p90": 129.07600030303, + "p95": 144.55600082874298, + "p99": 163.95699977874756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.1579999923706, + "p90": 80.55800199508667, + "p95": 83.917997777462, + "p99": 96.1180031299591 + }, + "combine": { + "p50": 48.0789989233017, + "p90": 52.358999848365784, + "p95": 63.398003578186035, + "p99": 70.71799784898758 + }, + "roundtrip": { + "p50": 137.99700140953064, + "p90": 147.87699282169342, + "p95": 153.2759964466095, + "p99": 166.2759929895401 + }, + "isolatedSum": { + "p50": 123.2369989156723, + "p90": 132.91700184345245, + "p95": 147.31600135564804, + "p99": 166.8360009789467 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.59799671173096, + "p90": 85.07700264453888, + "p95": 87.6379981637001, + "p99": 96.6780036687851 + }, + "combine": { + "p50": 55.15899881720543, + "p90": 59.758998453617096, + "p95": 66.07799977064133, + "p99": 77.19899713993073 + }, + "roundtrip": { + "p50": 148.67599308490753, + "p90": 158.91599655151367, + "p95": 162.59600222110748, + "p99": 177.19599604606628 + }, + "isolatedSum": { + "p50": 135.7569955289364, + "p90": 144.83600109815598, + "p95": 153.71599793434143, + "p99": 173.87700080871582 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.71799635887146, + "p90": 99.23700243234634, + "p95": 101.27700120210648, + "p99": 105.51699995994568 + }, + "combine": { + "p50": 63.03899735212326, + "p90": 67.03799962997437, + "p95": 71.7179998755455, + "p99": 85.43799817562103 + }, + "roundtrip": { + "p50": 170.95500230789185, + "p90": 179.2760044336319, + "p95": 182.99500644207, + "p99": 186.79499626159668 + }, + "isolatedSum": { + "p50": 158.75699371099472, + "p90": 166.2750020623207, + "p95": 172.99500107765198, + "p99": 190.9549981355667 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 116.71700328588486, + "p90": 119.87700313329697, + "p95": 121.07700109481812, + "p99": 124.19699877500534 + }, + "combine": { + "p50": 80.51799982786179, + "p90": 84.71699804067612, + "p95": 92.55699813365936, + "p99": 102.51700133085251 + }, + "roundtrip": { + "p50": 212.83499896526337, + "p90": 220.4750031232834, + "p95": 224.39399361610413, + "p99": 234.7940057516098 + }, + "isolatedSum": { + "p50": 197.23500311374664, + "p90": 204.59400117397308, + "p95": 213.63399922847748, + "p99": 226.71400010585785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-c0e0c2c1", + "identity": "mi325x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "mi325x_eeb41dae", + "comparisonKey": "5fe56854473a77c9", + "schemaVersion": 4, + "generatedAt": "2026-07-02T20:35:56.382884+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi325x-amds_08", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "resource-constrained", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28619828789", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28619828789", + "createdAt": "2026-07-02T20:34:29Z", + "sha": "53f94262a5d63cf28ad931ad261268444092f667" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.27800178527832, + "p90": 74.83799755573273, + "p95": 80.91799914836884, + "p99": 91.27800166606903 + }, + "combine": { + "p50": 45.27899995446205, + "p90": 49.83900114893913, + "p95": 67.35800206661224, + "p99": 198.31399619579315 + }, + "roundtrip": { + "p50": 130.4370015859604, + "p90": 141.8360024690628, + "p95": 150.3559947013855, + "p99": 190.03500044345856 + }, + "isolatedSum": { + "p50": 114.55700173974037, + "p90": 124.67699870467186, + "p95": 148.27600121498108, + "p99": 289.5919978618622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.67799907922745, + "p90": 72.71800190210342, + "p95": 74.1180032491684, + "p99": 87.79799938201904 + }, + "combine": { + "p50": 45.639000833034515, + "p90": 49.9190017580986, + "p95": 65.71800261735916, + "p99": 70.43799757957458 + }, + "roundtrip": { + "p50": 130.99700212478638, + "p90": 142.316997051239, + "p95": 148.1959968805313, + "p99": 158.15599262714386 + }, + "isolatedSum": { + "p50": 114.31699991226196, + "p90": 122.63700366020203, + "p95": 139.83600586652756, + "p99": 158.23599696159363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.19799745082855, + "p90": 73.35799932479858, + "p95": 75.31800121068954, + "p99": 89.99799937009811 + }, + "combine": { + "p50": 45.639000833034515, + "p90": 48.83899912238121, + "p95": 51.87800154089928, + "p99": 68.7590017914772 + }, + "roundtrip": { + "p50": 131.67600333690643, + "p90": 140.59600234031677, + "p95": 146.67600393295288, + "p99": 160.47599911689758 + }, + "isolatedSum": { + "p50": 114.83699828386307, + "p90": 122.1969984471798, + "p95": 127.19600275158882, + "p99": 158.75700116157532 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.07800042629242, + "p90": 74.15799796581268, + "p95": 76.47799700498581, + "p99": 89.75700289011002 + }, + "combine": { + "p50": 46.87900096178055, + "p90": 50.99799856543541, + "p95": 65.6379982829094, + "p99": 70.99799811840057 + }, + "roundtrip": { + "p50": 132.99700617790222, + "p90": 142.3559933900833, + "p95": 147.07599580287933, + "p99": 159.23599898815155 + }, + "isolatedSum": { + "p50": 116.95700138807297, + "p90": 125.15599653124809, + "p95": 142.1159952878952, + "p99": 160.7550010085106 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.19800353050232, + "p90": 77.15799659490585, + "p95": 82.19800144433975, + "p99": 95.79800069332123 + }, + "combine": { + "p50": 48.319000750780106, + "p90": 52.3190014064312, + "p95": 65.0390014052391, + "p99": 75.1579999923706 + }, + "roundtrip": { + "p50": 135.31599938869476, + "p90": 143.59599351882935, + "p95": 148.11600744724274, + "p99": 161.31600737571716 + }, + "isolatedSum": { + "p50": 120.51700428128242, + "p90": 129.47699800133705, + "p95": 147.23700284957886, + "p99": 170.95600068569183 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.19800281524658, + "p90": 82.95799791812897, + "p95": 86.15799993276596, + "p99": 96.19800001382828 + }, + "combine": { + "p50": 53.47900092601776, + "p90": 57.3979988694191, + "p95": 65.27800112962723, + "p99": 76.43800228834152 + }, + "roundtrip": { + "p50": 145.07600665092468, + "p90": 154.71599996089935, + "p95": 160.55600345134735, + "p99": 168.83499920368195 + }, + "isolatedSum": { + "p50": 132.67700374126434, + "p90": 140.35599678754807, + "p95": 151.4360010623932, + "p99": 172.6360023021698 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.19800138473511, + "p90": 97.31800109148026, + "p95": 98.99699687957764, + "p99": 105.4769977927208 + }, + "combine": { + "p50": 60.75900048017502, + "p90": 65.0779977440834, + "p95": 68.03900003433228, + "p99": 83.23799818754196 + }, + "roundtrip": { + "p50": 169.59600150585175, + "p90": 178.31499874591827, + "p95": 181.39499425888062, + "p99": 187.27600574493408 + }, + "isolatedSum": { + "p50": 153.95700186491013, + "p90": 162.39599883556366, + "p95": 167.0359969139099, + "p99": 188.71499598026276 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.35699665546417, + "p90": 118.9970001578331, + "p95": 120.23700028657913, + "p99": 122.83699959516525 + }, + "combine": { + "p50": 83.15800130367279, + "p90": 87.11700141429901, + "p95": 91.71800315380096, + "p99": 105.4769977927208 + }, + "roundtrip": { + "p50": 211.3949954509735, + "p90": 219.6750044822693, + "p95": 223.79399836063385, + "p99": 231.75400495529175 + }, + "isolatedSum": { + "p50": 198.51499795913696, + "p90": 206.1140015721321, + "p95": 211.9550034403801, + "p99": 228.31399738788605 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d0ca6ea4", + "identity": "mi325x|mori|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi325x_223b0322", + "comparisonKey": "c27fc8a33c340c34", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:58:26.848903+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.43800163269043, + "p90": 76.59800350666046, + "p95": 78.43799889087677, + "p99": 95.79800069332123 + }, + "combine": { + "p50": 46.358998864889145, + "p90": 50.83800107240677, + "p95": 53.598999977111816, + "p99": 71.27799838781357 + }, + "roundtrip": { + "p50": 137.67699897289276, + "p90": 145.5170065164566, + "p95": 150.0760018825531, + "p99": 167.67600178718567 + }, + "isolatedSum": { + "p50": 118.79700049757957, + "p90": 127.43600457906723, + "p95": 132.0369988679886, + "p99": 167.0759990811348 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.7979987859726, + "p90": 77.75799930095673, + "p95": 80.79800009727478, + "p99": 94.19800341129303 + }, + "combine": { + "p50": 47.03899845480919, + "p90": 51.39800161123276, + "p95": 53.877998143434525, + "p99": 72.87800312042236 + }, + "roundtrip": { + "p50": 137.95599341392517, + "p90": 144.2359983921051, + "p95": 148.55599403381348, + "p99": 167.95599460601807 + }, + "isolatedSum": { + "p50": 119.83699724078178, + "p90": 129.15600091218948, + "p95": 134.6759982407093, + "p99": 167.0760065317154 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.3179971575737, + "p90": 78.27799767255783, + "p95": 80.91799914836884, + "p99": 96.19700163602829 + }, + "combine": { + "p50": 46.39900103211403, + "p90": 50.79900100827217, + "p95": 53.677998483181, + "p99": 71.35900110006332 + }, + "roundtrip": { + "p50": 138.6760026216507, + "p90": 146.51599526405334, + "p95": 150.5959928035736, + "p99": 167.67500340938568 + }, + "isolatedSum": { + "p50": 119.71699818968773, + "p90": 129.07699868083, + "p95": 134.59599763154984, + "p99": 167.5560027360916 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.67799633741379, + "p90": 79.55799996852875, + "p95": 81.07800036668777, + "p99": 95.79800069332123 + }, + "combine": { + "p50": 45.99900171160698, + "p90": 49.71799999475479, + "p95": 51.67799815535545, + "p99": 74.99799877405167 + }, + "roundtrip": { + "p50": 140.7569944858551, + "p90": 147.1959948539734, + "p95": 151.27600729465485, + "p99": 166.43600165843964 + }, + "isolatedSum": { + "p50": 120.67699804902077, + "p90": 129.27599996328354, + "p95": 132.75599852204323, + "p99": 170.7959994673729 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 79.51799780130386, + "p90": 83.35799723863602, + "p95": 85.43799817562103, + "p99": 100.75800120830536 + }, + "combine": { + "p50": 50.1989983022213, + "p90": 53.71899902820587, + "p95": 56.55800178647041, + "p99": 73.79800081253052 + }, + "roundtrip": { + "p50": 143.35699379444122, + "p90": 150.71700513362885, + "p95": 154.8359990119934, + "p99": 171.875 + }, + "isolatedSum": { + "p50": 129.71699610352516, + "p90": 137.0769962668419, + "p95": 141.99599996209145, + "p99": 174.55600202083588 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.03700178861618, + "p90": 96.83799743652344, + "p95": 98.51700067520142, + "p99": 105.35799711942673 + }, + "combine": { + "p50": 55.79899996519089, + "p90": 58.79800021648407, + "p95": 60.159001499414444, + "p99": 79.1580006480217 + }, + "roundtrip": { + "p50": 165.31500220298767, + "p90": 172.95600473880768, + "p95": 177.07599699497223, + "p99": 191.35500490665436 + }, + "isolatedSum": { + "p50": 148.83600175380707, + "p90": 155.6359976530075, + "p95": 158.67600217461586, + "p99": 184.51599776744843 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.31700193881989, + "p90": 118.35700273513794, + "p95": 119.71700191497803, + "p99": 122.63599783182144 + }, + "combine": { + "p50": 71.9980001449585, + "p90": 74.99799877405167, + "p95": 76.2379989027977, + "p99": 94.15800124406815 + }, + "roundtrip": { + "p50": 204.2749971151352, + "p90": 211.435005068779, + "p95": 213.8739973306656, + "p99": 220.23500502109528 + }, + "isolatedSum": { + "p50": 187.31500208377838, + "p90": 193.3550015091896, + "p95": 195.95500081777573, + "p99": 216.7939990758896 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-d7ddc8ed", + "identity": "mi325x|mori|n-a|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||690e78a184bfe92", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "8657455618008a94", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:00:15.015169+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "690e78a184bfe92", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.99799674749374, + "p90": 89.95799720287323, + "p95": 92.63800084590912, + "p99": 100.71799904108047 + }, + "combine": { + "p50": 46.31900042295456, + "p90": 52.039001137018204, + "p95": 60.23800000548363, + "p99": 87.15800195932388 + }, + "roundtrip": { + "p50": 138.99700343608856, + "p90": 149.75599944591522, + "p95": 156.55599534511566, + "p99": 179.87599968910217 + }, + "isolatedSum": { + "p50": 120.3169971704483, + "p90": 141.99699833989143, + "p95": 152.87600085139275, + "p99": 187.87600100040436 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 6, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 72.39899784326553, + "p90": 77.35799998044968, + "p95": 79.39799875020981, + "p99": 96.1579978466034 + }, + "combine": { + "p50": 46.358998864889145, + "p90": 50.99799856543541, + "p95": 53.27900126576424, + "p99": 72.63799756765366 + }, + "roundtrip": { + "p50": 140.11600613594055, + "p90": 148.59600365161896, + "p95": 153.5159945487976, + "p99": 172.43599891662598 + }, + "isolatedSum": { + "p50": 118.75799670815468, + "p90": 128.3559985458851, + "p95": 132.67700001597404, + "p99": 168.79599541425705 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 12, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.99899852275848, + "p90": 77.87799835205078, + "p95": 80.83800226449966, + "p99": 92.31799840927124 + }, + "combine": { + "p50": 45.83900049328804, + "p90": 50.11799931526184, + "p95": 54.239001125097275, + "p99": 72.39799946546555 + }, + "roundtrip": { + "p50": 139.7169977426529, + "p90": 148.157000541687, + "p95": 152.11600065231323, + "p99": 167.39599406719208 + }, + "isolatedSum": { + "p50": 117.83799901604652, + "p90": 127.99599766731262, + "p95": 135.07700338959694, + "p99": 164.7159978747368 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 26, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.67799973487854, + "p90": 78.03799957036972, + "p95": 79.87800240516663, + "p99": 93.15799921751022 + }, + "combine": { + "p50": 46.63899913430214, + "p90": 74.1180032491684, + "p95": 89.35700356960297, + "p99": 128.63700091838837 + }, + "roundtrip": { + "p50": 139.27599787712097, + "p90": 149.43699538707733, + "p95": 152.7159959077835, + "p99": 165.99500179290771 + }, + "isolatedSum": { + "p50": 119.31699886918068, + "p90": 152.15600281953812, + "p95": 169.2350059747696, + "p99": 221.7950001358986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 49, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.79800081253052, + "p90": 79.27799969911575, + "p95": 82.35800266265869, + "p99": 93.99700164794922 + }, + "combine": { + "p50": 46.438999474048615, + "p90": 50.07899925112724, + "p95": 53.27799916267395, + "p99": 71.15799933671951 + }, + "roundtrip": { + "p50": 139.31700587272644, + "p90": 148.716002702713, + "p95": 152.31700241565704, + "p99": 164.63600099086761 + }, + "isolatedSum": { + "p50": 120.23700028657913, + "p90": 129.356998950243, + "p95": 135.63600182533264, + "p99": 165.15500098466873 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 94, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 75.51799714565277, + "p90": 80.27800172567368, + "p95": 82.47800171375275, + "p99": 94.55700218677521 + }, + "combine": { + "p50": 48.91800135374069, + "p90": 53.077999502420425, + "p95": 56.87899887561798, + "p99": 72.19800353050232 + }, + "roundtrip": { + "p50": 143.1169956922531, + "p90": 153.43600511550903, + "p95": 158.07600319385529, + "p99": 169.4359928369522 + }, + "isolatedSum": { + "p50": 124.43599849939346, + "p90": 133.3560012280941, + "p95": 139.35700058937073, + "p99": 166.75500571727753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 186, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 87.59800344705582, + "p90": 91.31799638271332, + "p95": 93.59800070524216, + "p99": 101.15800052881241 + }, + "combine": { + "p50": 54.19899895787239, + "p90": 58.31800028681755, + "p95": 61.03900074958801, + "p99": 77.11800187826157 + }, + "roundtrip": { + "p50": 156.1570018529892, + "p90": 165.7560020685196, + "p95": 170.99599540233612, + "p99": 179.39600348472595 + }, + "isolatedSum": { + "p50": 141.7970024049282, + "p90": 149.63599666953087, + "p95": 154.63700145483017, + "p99": 178.27600240707397 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 358, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 95.71799635887146, + "p90": 100.71799904108047, + "p95": 101.79799795150757, + "p99": 104.43700104951859 + }, + "combine": { + "p50": 67.4780011177063, + "p90": 71.51799649000168, + "p95": 73.11899960041046, + "p99": 90.35799652338028 + }, + "roundtrip": { + "p50": 178.43599617481232, + "p90": 187.07500398159027, + "p95": 190.79500436782837, + "p99": 198.75499606132507 + }, + "isolatedSum": { + "p50": 163.19599747657776, + "p90": 172.23599553108215, + "p95": 174.91699755191803, + "p99": 194.79499757289886 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 699, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 114.59700018167496, + "p90": 117.95700341463089, + "p95": 119.07699704170227, + "p99": 121.95699661970139 + }, + "combine": { + "p50": 87.35799789428711, + "p90": 91.39800071716309, + "p95": 93.31800043582916, + "p99": 109.47699844837189 + }, + "roundtrip": { + "p50": 221.39400243759155, + "p90": 228.91399264335632, + "p95": 232.6740026473999, + "p99": 239.67400193214417 + }, + "isolatedSum": { + "p50": 201.95499807596207, + "p90": 209.35500413179398, + "p95": 212.39499747753143, + "p99": 231.43399506807327 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 1385, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 149.11699295043945, + "p90": 152.27599442005157, + "p95": 153.5159945487976, + "p99": 156.59600496292114 + }, + "combine": { + "p50": 131.07700645923615, + "p90": 135.5160027742386, + "p95": 137.47699558734894, + "p99": 153.35600078105927 + }, + "roundtrip": { + "p50": 301.07301473617554, + "p90": 307.43199586868286, + "p95": 310.232013463974, + "p99": 314.35200572013855 + }, + "isolatedSum": { + "p50": 280.1939994096756, + "p90": 287.79199719429016, + "p95": 290.99299013614655, + "p99": 309.9520057439804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 2772, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-fae16dc5", + "identity": "mi325x|mori|n-a|5120|8|160|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||b63fa31849a5bba", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "2e1d3be55be13114", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:00:57.633752+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b63fa31849a5bba", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.67800110578537, + "p90": 74.99799877405167, + "p95": 78.47800105810165, + "p99": 91.91799908876419 + }, + "combine": { + "p50": 45.639000833034515, + "p90": 50.35800114274025, + "p95": 62.87799775600433, + "p99": 267.9530084133148 + }, + "roundtrip": { + "p50": 132.2370022535324, + "p90": 144.67699825763702, + "p95": 150.5959928035736, + "p99": 187.5550001859665 + }, + "isolatedSum": { + "p50": 115.31700193881989, + "p90": 125.35599991679192, + "p95": 141.355998814106, + "p99": 359.871007502079 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 68.83899867534637, + "p90": 73.43800365924835, + "p95": 75.51900297403336, + "p99": 90.91799706220627 + }, + "combine": { + "p50": 45.517999678850174, + "p90": 49.47900027036667, + "p95": 54.51799929141998, + "p99": 71.1589977145195 + }, + "roundtrip": { + "p50": 131.91600143909454, + "p90": 143.51600408554077, + "p95": 148.9569991827011, + "p99": 156.07599914073944 + }, + "isolatedSum": { + "p50": 114.35699835419655, + "p90": 122.91700392961502, + "p95": 130.03700226545334, + "p99": 162.07699477672577 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.83800232410431, + "p90": 73.95800203084946, + "p95": 76.11899822950363, + "p99": 89.07800167798996 + }, + "combine": { + "p50": 45.798998326063156, + "p90": 49.99899864196777, + "p95": 54.239001125097275, + "p99": 70.23800164461136 + }, + "roundtrip": { + "p50": 132.39699602127075, + "p90": 142.03600585460663, + "p95": 147.3959982395172, + "p99": 158.19600224494934 + }, + "isolatedSum": { + "p50": 115.63700065016747, + "p90": 123.95700067281723, + "p95": 130.3579993546009, + "p99": 159.31600332260132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 25, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 69.91799920797348, + "p90": 74.43799823522568, + "p95": 76.63799822330475, + "p99": 89.99799937009811 + }, + "combine": { + "p50": 45.91900110244751, + "p90": 50.07899925112724, + "p95": 53.35799977183342, + "p99": 69.79800015687943 + }, + "roundtrip": { + "p50": 133.3560049533844, + "p90": 142.39700138568878, + "p95": 149.7970074415207, + "p99": 162.2759997844696 + }, + "isolatedSum": { + "p50": 115.83700031042099, + "p90": 124.51699748635292, + "p95": 129.99599799513817, + "p99": 159.79599952697754 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 50, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 71.87800109386444, + "p90": 76.11799985170364, + "p95": 78.51800322532654, + "p99": 93.55700016021729 + }, + "combine": { + "p50": 46.79799824953079, + "p90": 50.23900046944618, + "p95": 52.63800173997879, + "p99": 69.5590004324913 + }, + "roundtrip": { + "p50": 135.27600467205048, + "p90": 144.63700354099274, + "p95": 150.15600621700287, + "p99": 163.83600234985352 + }, + "isolatedSum": { + "p50": 118.67599934339523, + "p90": 126.35700032114983, + "p95": 131.15600496530533, + "p99": 163.1160005927086 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 93, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 74.95799660682678, + "p90": 79.19900119304657, + "p95": 81.03799819946289, + "p99": 94.59800273180008 + }, + "combine": { + "p50": 50.51799863576889, + "p90": 53.679000586271286, + "p95": 55.67900091409683, + "p99": 73.47799837589264 + }, + "roundtrip": { + "p50": 139.7169977426529, + "p90": 151.3960063457489, + "p95": 156.67599439620972, + "p99": 170.03600299358368 + }, + "isolatedSum": { + "p50": 125.47599524259567, + "p90": 132.87800177931786, + "p95": 136.71699911355972, + "p99": 168.07600110769272 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 179, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 82.0780023932457, + "p90": 86.75800263881683, + "p95": 88.7179970741272, + "p99": 98.07799756526947 + }, + "combine": { + "p50": 55.597998201847076, + "p90": 58.71900171041489, + "p95": 61.07800081372261, + "p99": 77.91800051927567 + }, + "roundtrip": { + "p50": 150.91699361801147, + "p90": 160.11600196361542, + "p95": 165.7560020685196, + "p99": 180.27600646018982 + }, + "isolatedSum": { + "p50": 137.67600059509277, + "p90": 145.47700434923172, + "p95": 149.7959978878498, + "p99": 175.99599808454514 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 355, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 91.59799665212631, + "p90": 95.23800015449524, + "p95": 96.75800055265427, + "p99": 99.83699768781662 + }, + "combine": { + "p50": 66.75799936056137, + "p90": 70.3980028629303, + "p95": 72.63799756765366, + "p99": 89.47800099849701 + }, + "roundtrip": { + "p50": 174.91599917411804, + "p90": 183.75499546527863, + "p95": 187.31600046157837, + "p99": 194.4350004196167 + }, + "isolatedSum": { + "p50": 158.35599601268768, + "p90": 165.63600301742554, + "p95": 169.39599812030792, + "p99": 189.31499868631363 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 699, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 124.83599781990051, + "p90": 128.8370043039322, + "p95": 129.95700538158417, + "p99": 132.0360004901886 + }, + "combine": { + "p50": 91.19799733161926, + "p90": 94.95700150728226, + "p95": 96.59799933433533, + "p99": 114.7570013999939 + }, + "roundtrip": { + "p50": 230.95400631427765, + "p90": 238.47399652004242, + "p95": 241.11400544643402, + "p99": 246.11300230026245 + }, + "isolatedSum": { + "p50": 216.03399515151978, + "p90": 223.79400581121445, + "p95": 226.5550047159195, + "p99": 246.7930018901825 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 1387, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 167.3160046339035, + "p90": 170.67599296569824, + "p95": 171.71600461006165, + "p99": 174.07600581645966 + }, + "combine": { + "p50": 145.43700218200684, + "p90": 148.63599836826324, + "p95": 150.3559947013855, + "p99": 168.9160019159317 + }, + "roundtrip": { + "p50": 326.31200551986694, + "p90": 333.71201157569885, + "p95": 337.79099583625793, + "p99": 344.5119857788086 + }, + "isolatedSum": { + "p50": 312.75300681591034, + "p90": 319.3119913339615, + "p95": 322.07199931144714, + "p99": 342.99200773239136 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 2762, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-34725c99", + "identity": "mi325x|mori|n-a|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||2768bdde6bf7e7f", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "94b8fb82574fc308", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:01:40.863263+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2768bdde6bf7e7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 75.67799836397171, + "p90": 81.83799684047699, + "p95": 86.19700372219086, + "p99": 101.79699957370758 + }, + "combine": { + "p50": 45.19899934530258, + "p90": 50.478000193834305, + "p95": 55.47900125384331, + "p99": 92.63800084590912 + }, + "roundtrip": { + "p50": 139.07699286937714, + "p90": 148.4770029783249, + "p95": 159.3959927558899, + "p99": 195.67500054836273 + }, + "isolatedSum": { + "p50": 120.87699770927429, + "p90": 132.3159970343113, + "p95": 141.67600497603416, + "p99": 194.4350004196167 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 75.91799646615982, + "p90": 81.07800036668777, + "p95": 84.15800333023071, + "p99": 98.55800122022629 + }, + "combine": { + "p50": 44.99899968504906, + "p90": 50.158001482486725, + "p95": 54.31799963116646, + "p99": 73.0379968881607 + }, + "roundtrip": { + "p50": 140.95699787139893, + "p90": 148.39699864387512, + "p95": 153.39599549770355, + "p99": 170.23499310016632 + }, + "isolatedSum": { + "p50": 120.91699615120888, + "p90": 131.2360018491745, + "p95": 138.47600296139717, + "p99": 171.595998108387 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 76.39800012111664, + "p90": 80.91799914836884, + "p95": 83.51799845695496, + "p99": 95.0779989361763 + }, + "combine": { + "p50": 44.63899880647659, + "p90": 50.599001348018646, + "p95": 55.63899874687195, + "p99": 72.91799783706665 + }, + "roundtrip": { + "p50": 140.516996383667, + "p90": 147.9170024394989, + "p95": 150.9159952402115, + "p99": 170.43599486351013 + }, + "isolatedSum": { + "p50": 121.03699892759323, + "p90": 131.51700049638748, + "p95": 139.1569972038269, + "p99": 167.99599677324295 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 76.47799700498581, + "p90": 81.277996301651, + "p95": 84.51800048351288, + "p99": 96.11699730157852 + }, + "combine": { + "p50": 45.35900056362152, + "p90": 50.317998975515366, + "p95": 56.519001722335815, + "p99": 72.11799919605255 + }, + "roundtrip": { + "p50": 141.3159966468811, + "p90": 149.47600662708282, + "p95": 153.11600267887115, + "p99": 167.6349937915802 + }, + "isolatedSum": { + "p50": 121.83699756860733, + "p90": 131.59599527716637, + "p95": 141.0370022058487, + "p99": 168.23499649763107 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 78.47800105810165, + "p90": 83.27800035476685, + "p95": 86.0380008816719, + "p99": 98.23799878358841 + }, + "combine": { + "p50": 46.87900096178055, + "p90": 50.599001348018646, + "p95": 53.839001804590225, + "p99": 71.59800082445145 + }, + "roundtrip": { + "p50": 144.15700733661652, + "p90": 150.23599565029144, + "p95": 153.91600131988525, + "p99": 169.59500312805176 + }, + "isolatedSum": { + "p50": 125.3570020198822, + "p90": 133.8770017027855, + "p95": 139.87700268626213, + "p99": 169.83599960803986 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.87799698114395, + "p90": 85.35800129175186, + "p95": 87.75799721479416, + "p99": 100.23699700832367 + }, + "combine": { + "p50": 50.63899978995323, + "p90": 54.239001125097275, + "p95": 56.11899867653847, + "p99": 73.35799932479858 + }, + "roundtrip": { + "p50": 149.277001619339, + "p90": 156.07599914073944, + "p95": 161.39599680900574, + "p99": 174.63600635528564 + }, + "isolatedSum": { + "p50": 131.51699677109718, + "p90": 139.59700241684914, + "p95": 143.87699589133263, + "p99": 173.59499633312225 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 90.27799963951111, + "p90": 93.75700354576111, + "p95": 95.99699825048447, + "p99": 108.27700048685074 + }, + "combine": { + "p50": 56.63900077342987, + "p90": 60.51899865269661, + "p95": 62.99900263547897, + "p99": 80.11800050735474 + }, + "roundtrip": { + "p50": 164.59600627422333, + "p90": 170.63499987125397, + "p95": 174.35599863529205, + "p99": 188.8349950313568 + }, + "isolatedSum": { + "p50": 146.91700041294098, + "p90": 154.27600219845772, + "p95": 158.99600088596344, + "p99": 188.39500099420547 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 104.99700158834457, + "p90": 108.03800076246262, + "p95": 109.1570034623146, + "p99": 112.03700304031372 + }, + "combine": { + "p50": 71.79900258779526, + "p90": 75.19900053739548, + "p95": 77.35799998044968, + "p99": 95.3579992055893 + }, + "roundtrip": { + "p50": 198.1550008058548, + "p90": 203.39499413967133, + "p95": 205.39499819278717, + "p99": 209.47499573230743 + }, + "isolatedSum": { + "p50": 176.79600417613983, + "p90": 183.2370012998581, + "p95": 186.51500344276428, + "p99": 207.39500224590302 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 126.31699442863464, + "p90": 129.47699427604675, + "p95": 130.75600564479828, + "p99": 132.99700617790222 + }, + "combine": { + "p50": 108.15799981355667, + "p90": 111.79699748754501, + "p95": 113.83700370788574, + "p99": 131.59699738025665 + }, + "roundtrip": { + "p50": 257.03299045562744, + "p90": 262.1130049228668, + "p95": 263.63399624824524, + "p99": 266.83399081230164 + }, + "isolatedSum": { + "p50": 234.47499424219131, + "p90": 241.27399176359177, + "p95": 244.59300935268402, + "p99": 264.5940035581589 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 187.71600723266602, + "p90": 192.43499636650085, + "p95": 194.31500136852264, + "p99": 198.03500175476074 + }, + "combine": { + "p50": 173.83599281311035, + "p90": 177.27600038051605, + "p95": 179.5549988746643, + "p99": 195.67599892616272 + }, + "roundtrip": { + "p50": 380.71098923683167, + "p90": 386.83000206947327, + "p95": 389.1110122203827, + "p99": 397.1099853515625 + }, + "isolatedSum": { + "p50": 361.55200004577637, + "p90": 369.7109967470169, + "p95": 373.87000024318695, + "p99": 393.71100068092346 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a02349f0", + "identity": "mi325x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||2768bdde6bf7e7f", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "774a3d46c852299d", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:58:05.954251+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2768bdde6bf7e7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 72.35799729824066, + "p90": 78.51800322532654, + "p95": 81.39800280332565, + "p99": 92.71799772977829 + }, + "combine": { + "p50": 44.71899941563606, + "p90": 48.719000071287155, + "p95": 57.43800103664398, + "p99": 81.35800063610077 + }, + "roundtrip": { + "p50": 136.43699884414673, + "p90": 147.23600447177887, + "p95": 157.35599398612976, + "p99": 182.91600048542023 + }, + "isolatedSum": { + "p50": 117.07699671387672, + "p90": 127.2370032966137, + "p95": 138.83600383996964, + "p99": 174.07599836587906 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 71.83899730443954, + "p90": 76.79799944162369, + "p95": 78.99799942970276, + "p99": 91.59799665212631 + }, + "combine": { + "p50": 44.71899941563606, + "p90": 48.399001359939575, + "p95": 52.91900038719177, + "p99": 68.39799880981445 + }, + "roundtrip": { + "p50": 136.63700222969055, + "p90": 147.07699418067932, + "p95": 152.4360030889511, + "p99": 163.99599611759186 + }, + "isolatedSum": { + "p50": 116.55799672007561, + "p90": 125.19700080156326, + "p95": 131.91699981689453, + "p99": 159.99599546194077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 72.95800000429153, + "p90": 77.79800146818161, + "p95": 79.59800213575363, + "p99": 91.3579985499382 + }, + "combine": { + "p50": 44.99899968504906, + "p90": 48.79799857735634, + "p95": 52.15800181031227, + "p99": 69.23799961805344 + }, + "roundtrip": { + "p50": 137.11699843406677, + "p90": 146.67600393295288, + "p95": 149.67599511146545, + "p99": 162.19599545001984 + }, + "isolatedSum": { + "p50": 117.95699968934059, + "p90": 126.59600004553795, + "p95": 131.7560039460659, + "p99": 160.59599816799164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 73.67800176143646, + "p90": 78.07900011539459, + "p95": 80.51799982786179, + "p99": 93.55700016021729 + }, + "combine": { + "p50": 45.159000903367996, + "p90": 49.518998712301254, + "p95": 52.07899957895279, + "p99": 68.63799691200256 + }, + "roundtrip": { + "p50": 137.8760039806366, + "p90": 146.55600488185883, + "p95": 151.636004447937, + "p99": 165.39600491523743 + }, + "isolatedSum": { + "p50": 118.83700266480446, + "p90": 127.59799882769585, + "p95": 132.59699940681458, + "p99": 162.19499707221985 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 75.11799782514572, + "p90": 79.83800023794174, + "p95": 82.55799859762192, + "p99": 95.0779989361763 + }, + "combine": { + "p50": 46.55899852514267, + "p90": 50.599001348018646, + "p95": 52.55899950861931, + "p99": 71.23900204896927 + }, + "roundtrip": { + "p50": 141.43599569797516, + "p90": 150.51600337028503, + "p95": 155.07599711418152, + "p99": 167.3559993505478 + }, + "isolatedSum": { + "p50": 121.67699635028839, + "p90": 130.4370015859604, + "p95": 135.11699810624123, + "p99": 166.31700098514557 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 80.7579979300499, + "p90": 84.91799980401993, + "p95": 86.75800263881683, + "p99": 97.67799824476242 + }, + "combine": { + "p50": 53.71899902820587, + "p90": 56.99799954891205, + "p95": 59.599000960588455, + "p99": 77.0379975438118 + }, + "roundtrip": { + "p50": 149.75599944591522, + "p90": 159.51600670814514, + "p95": 161.63599491119385, + "p99": 173.9560067653656 + }, + "isolatedSum": { + "p50": 134.47699695825577, + "p90": 141.91599935293198, + "p95": 146.3570035994053, + "p99": 174.71599578857422 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 94.71800178289413, + "p90": 98.55800122022629, + "p95": 100.55699944496155, + "p99": 105.55700212717056 + }, + "combine": { + "p50": 60.55799871683121, + "p90": 64.51799720525742, + "p95": 65.99900126457214, + "p99": 83.83800089359283 + }, + "roundtrip": { + "p50": 171.75599932670593, + "p90": 179.95500564575195, + "p95": 183.35500359535217, + "p99": 192.8749978542328 + }, + "isolatedSum": { + "p50": 155.27600049972534, + "p90": 163.0759984254837, + "p95": 166.5560007095337, + "p99": 189.3950030207634 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.07700043916702, + "p90": 120.3169971704483, + "p95": 121.87699973583221, + "p99": 125.55700540542603 + }, + "combine": { + "p50": 79.59800213575363, + "p90": 83.07799696922302, + "p95": 85.11699736118317, + "p99": 102.83800214529037 + }, + "roundtrip": { + "p50": 212.27499842643738, + "p90": 220.7150012254715, + "p95": 223.51500391960144, + "p99": 228.9540022611618 + }, + "isolatedSum": { + "p50": 196.67500257492065, + "p90": 203.39499413967133, + "p95": 206.99399709701538, + "p99": 228.3950075507164 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 149.63600039482117, + "p90": 152.7159959077835, + "p95": 153.75599265098572, + "p99": 156.9560021162033 + }, + "combine": { + "p50": 119.43700164556503, + "p90": 122.87700176239014, + "p95": 125.19699335098267, + "p99": 143.55599880218506 + }, + "roundtrip": { + "p50": 286.9119942188263, + "p90": 294.43299770355225, + "p95": 297.2320020198822, + "p99": 302.95300483703613 + }, + "isolatedSum": { + "p50": 269.0730020403862, + "p90": 275.59299767017365, + "p95": 278.9529860019684, + "p99": 300.51200091838837 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 207.1550041437149, + "p90": 210.15499532222748, + "p95": 211.07499301433563, + "p99": 212.4750018119812 + }, + "combine": { + "p50": 193.47499310970306, + "p90": 196.95599377155304, + "p95": 199.39500093460083, + "p99": 215.6739979982376 + }, + "roundtrip": { + "p50": 417.34999418258667, + "p90": 425.18898844718933, + "p95": 427.43000388145447, + "p99": 431.6290020942688 + }, + "isolatedSum": { + "p50": 400.62999725341797, + "p90": 407.1109890937805, + "p95": 410.46999394893646, + "p99": 428.1489998102188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1ab1fa0b", + "identity": "mi325x|mori|n-a|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||bd543a12adbc037", + "colorKey": "mi325x_e0d650cb", + "comparisonKey": "b36d713e83f788b5", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:59:31.927396+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "bd543a12adbc037", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 71.9589963555336, + "p90": 80.19799739122391, + "p95": 83.99800211191177, + "p99": 93.35800260305405 + }, + "combine": { + "p50": 46.23899981379509, + "p90": 52.47800052165985, + "p95": 57.83800035715103, + "p99": 86.27799898386002 + }, + "roundtrip": { + "p50": 135.35700738430023, + "p90": 146.47699892520905, + "p95": 151.7159938812256, + "p99": 188.2759928703308 + }, + "isolatedSum": { + "p50": 118.19799616932869, + "p90": 132.67599791288376, + "p95": 141.8360024690628, + "p99": 179.63600158691406 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 70.79800218343735, + "p90": 75.07800310850143, + "p95": 77.47799903154373, + "p99": 92.678003013134 + }, + "combine": { + "p50": 46.15899920463562, + "p90": 50.1989983022213, + "p95": 54.078999906778336, + "p99": 71.51799649000168 + }, + "roundtrip": { + "p50": 134.83700156211853, + "p90": 144.7959989309311, + "p95": 150.5569964647293, + "p99": 163.3560061454773 + }, + "isolatedSum": { + "p50": 116.95700138807297, + "p90": 125.27700141072273, + "p95": 131.55699893832207, + "p99": 164.19599950313568 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 71.47800177335739, + "p90": 75.83899796009064, + "p95": 77.59799808263779, + "p99": 92.27800369262695 + }, + "combine": { + "p50": 46.31900042295456, + "p90": 49.55900087952614, + "p95": 51.03899911046028, + "p99": 70.79800218343735 + }, + "roundtrip": { + "p50": 135.35700738430023, + "p90": 145.51599323749542, + "p95": 148.67599308490753, + "p99": 161.9960069656372 + }, + "isolatedSum": { + "p50": 117.79700219631195, + "p90": 125.39799883961678, + "p95": 128.63699719309807, + "p99": 163.0760058760643 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 26, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 72.27800041437149, + "p90": 76.39800012111664, + "p95": 78.99799942970276, + "p99": 91.91799908876419 + }, + "combine": { + "p50": 46.87900096178055, + "p90": 51.03899911046028, + "p95": 54.1980005800724, + "p99": 72.39799946546555 + }, + "roundtrip": { + "p50": 136.91699504852295, + "p90": 147.35600352287292, + "p95": 151.03599429130554, + "p99": 165.99500179290771 + }, + "isolatedSum": { + "p50": 119.15700137615204, + "p90": 127.43699923157692, + "p95": 133.19600000977516, + "p99": 164.31599855422974 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 48, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 74.3589997291565, + "p90": 78.63900065422058, + "p95": 80.31799644231796, + "p99": 93.87800097465515 + }, + "combine": { + "p50": 48.558998852968216, + "p90": 52.43900045752525, + "p95": 55.55799975991249, + "p99": 73.63899797201157 + }, + "roundtrip": { + "p50": 142.15700328350067, + "p90": 151.9159972667694, + "p95": 156.87599778175354, + "p99": 173.07600378990173 + }, + "isolatedSum": { + "p50": 122.91799858212471, + "p90": 131.07800111174583, + "p95": 135.87599620223045, + "p99": 167.51699894666672 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 91, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 81.11800253391266, + "p90": 85.71700006723404, + "p95": 88.03799748420715, + "p99": 98.59800338745117 + }, + "combine": { + "p50": 55.31900003552437, + "p90": 58.79899859428406, + "p95": 60.99899858236313, + "p99": 79.23799753189087 + }, + "roundtrip": { + "p50": 151.27600729465485, + "p90": 159.3559980392456, + "p95": 162.555992603302, + "p99": 180.75500428676605 + }, + "isolatedSum": { + "p50": 136.43700256943703, + "p90": 144.5159986615181, + "p95": 149.03699606657028, + "p99": 177.83600091934204 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 178, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 95.15800327062607, + "p90": 99.23700243234634, + "p95": 100.9569987654686, + "p99": 107.35800117254257 + }, + "combine": { + "p50": 62.55800276994705, + "p90": 66.3980022072792, + "p95": 67.83899664878845, + "p99": 85.31799912452698 + }, + "roundtrip": { + "p50": 175.11600255966187, + "p90": 181.6360056400299, + "p95": 184.35600399971008, + "p99": 189.51499462127686 + }, + "isolatedSum": { + "p50": 157.71600604057312, + "p90": 165.63500463962555, + "p95": 168.79599541425705, + "p99": 192.67600029706955 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 372, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 117.55699664354324, + "p90": 120.47699838876724, + "p95": 122.15700000524521, + "p99": 128.95700335502625 + }, + "combine": { + "p50": 81.557996571064, + "p90": 84.95800197124481, + "p95": 86.67799830436707, + "p99": 104.83700037002563 + }, + "roundtrip": { + "p50": 214.6349996328354, + "p90": 222.23499417304993, + "p95": 224.4739979505539, + "p99": 227.83499956130981 + }, + "isolatedSum": { + "p50": 199.11499321460724, + "p90": 205.43500036001205, + "p95": 208.83499830961227, + "p99": 233.79400372505188 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 707, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 152.2359997034073, + "p90": 155.19599616527557, + "p95": 156.31599724292755, + "p99": 158.5559993982315 + }, + "combine": { + "p50": 122.27699905633926, + "p90": 126.35700404644012, + "p95": 129.3569952249527, + "p99": 145.3160047531128 + }, + "roundtrip": { + "p50": 292.7519977092743, + "p90": 300.3929853439331, + "p95": 302.95199155807495, + "p99": 308.912992477417 + }, + "isolatedSum": { + "p50": 274.51299875974655, + "p90": 281.5530002117157, + "p95": 285.67299246788025, + "p99": 303.8720041513443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 1391, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 208.71399343013763, + "p90": 211.79400384426117, + "p95": 213.11399340629578, + "p99": 215.59399366378784 + }, + "combine": { + "p50": 196.7950016260147, + "p90": 201.15500688552856, + "p95": 203.79500091075897, + "p99": 218.75399351119995 + }, + "roundtrip": { + "p50": 421.95001244544983, + "p90": 429.2300045490265, + "p95": 431.8690001964569, + "p99": 436.82900071144104 + }, + "isolatedSum": { + "p50": 405.50899505615234, + "p90": 412.94901072978973, + "p95": 416.90899431705475, + "p99": 434.3479871749878 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 2754, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-6895d524", + "identity": "mi325x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|2768bdde6bf7e7f", + "colorKey": "mi325x_eeb41dae", + "comparisonKey": "b8ed4eb752c9b4e3", + "schemaVersion": 4, + "generatedAt": "2026-07-02T20:38:41.271614+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi325x-amds_03", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "resource-constrained", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2768bdde6bf7e7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28619974616", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28619974616", + "createdAt": "2026-07-02T20:37:05Z", + "sha": "53f94262a5d63cf28ad931ad261268444092f667" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 69.59900259971619, + "p90": 75.27799904346466, + "p95": 82.19800144433975, + "p99": 92.71799772977829 + }, + "combine": { + "p50": 44.99800130724907, + "p90": 49.038998782634735, + "p95": 52.278999239206314, + "p99": 80.4779976606369 + }, + "roundtrip": { + "p50": 131.03699684143066, + "p90": 144.07700300216675, + "p95": 153.7570059299469, + "p99": 182.71499872207642 + }, + "isolatedSum": { + "p50": 114.59700390696526, + "p90": 124.3169978260994, + "p95": 134.47700068354607, + "p99": 173.1959953904152 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.35799866914749, + "p90": 74.15899634361267, + "p95": 80.63799887895584, + "p99": 91.27800166606903 + }, + "combine": { + "p50": 45.47800123691559, + "p90": 49.518998712301254, + "p95": 51.559001207351685, + "p99": 70.75800001621246 + }, + "roundtrip": { + "p50": 132.0369988679886, + "p90": 142.75699853897095, + "p95": 153.87700498104095, + "p99": 164.83600437641144 + }, + "isolatedSum": { + "p50": 114.83599990606308, + "p90": 123.67799505591393, + "p95": 132.19700008630753, + "p99": 162.0360016822815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 69.67800110578537, + "p90": 74.67799633741379, + "p95": 77.11800187826157, + "p99": 88.67800235748291 + }, + "combine": { + "p50": 45.6789992749691, + "p90": 48.87799918651581, + "p95": 51.11899971961975, + "p99": 69.75799798965454 + }, + "roundtrip": { + "p50": 131.87600672245026, + "p90": 142.43699610233307, + "p95": 149.87599849700928, + "p99": 160.55600345134735 + }, + "isolatedSum": { + "p50": 115.35700038075447, + "p90": 123.5559955239296, + "p95": 128.23700159788132, + "p99": 158.43600034713745 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.75800001621246, + "p90": 75.27799904346466, + "p95": 76.99800282716751, + "p99": 91.19799733161926 + }, + "combine": { + "p50": 46.71899974346161, + "p90": 50.55899918079376, + "p95": 52.79900133609772, + "p99": 70.55799663066864 + }, + "roundtrip": { + "p50": 133.31599533557892, + "p90": 142.75600016117096, + "p95": 149.5160013437271, + "p99": 159.95599329471588 + }, + "isolatedSum": { + "p50": 117.47699975967407, + "p90": 125.83699822425842, + "p95": 129.79700416326523, + "p99": 161.7559939622879 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 72.71800190210342, + "p90": 77.11800187826157, + "p95": 78.83799821138382, + "p99": 93.03700178861618 + }, + "combine": { + "p50": 48.239000141620636, + "p90": 51.91899836063385, + "p95": 54.43799868226051, + "p99": 71.27799838781357 + }, + "roundtrip": { + "p50": 135.03700494766235, + "p90": 144.11599934101105, + "p95": 148.7569957971573, + "p99": 162.555992603302 + }, + "isolatedSum": { + "p50": 120.95700204372406, + "p90": 129.03700023889542, + "p95": 133.27599689364433, + "p99": 164.31500017642975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 78.43799889087677, + "p90": 82.31800049543381, + "p95": 83.79799872636795, + "p99": 96.79800271987915 + }, + "combine": { + "p50": 53.55900153517723, + "p90": 57.27899819612503, + "p95": 60.318998992443085, + "p99": 76.7190009355545 + }, + "roundtrip": { + "p50": 145.11699974536896, + "p90": 156.31599724292755, + "p95": 162.87599503993988, + "p99": 171.67599499225616 + }, + "isolatedSum": { + "p50": 131.997000426054, + "p90": 139.59699869155884, + "p95": 144.11699771881104, + "p99": 173.51700365543365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.83700042963028, + "p90": 97.59800136089325, + "p95": 99.11800175905228, + "p99": 102.27800160646439 + }, + "combine": { + "p50": 60.398999601602554, + "p90": 64.31800127029419, + "p95": 67.79800355434418, + "p99": 82.99800008535385 + }, + "roundtrip": { + "p50": 167.3949956893921, + "p90": 174.8349964618683, + "p95": 179.75500226020813, + "p99": 190.55600464344025 + }, + "isolatedSum": { + "p50": 154.23600003123283, + "p90": 161.91600263118744, + "p95": 166.91600531339645, + "p99": 185.27600169181824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.03700166940689, + "p90": 118.63700300455093, + "p95": 119.63699758052826, + "p99": 121.117003262043 + }, + "combine": { + "p50": 81.43799751996994, + "p90": 85.037000477314, + "p95": 87.75699883699417, + "p99": 105.47800362110138 + }, + "roundtrip": { + "p50": 208.9949995279312, + "p90": 215.9940004348755, + "p95": 218.71499717235565, + "p99": 223.19500148296356 + }, + "isolatedSum": { + "p50": 196.47499918937683, + "p90": 203.67400348186493, + "p95": 207.39399641752243, + "p99": 226.59500688314438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.39699864387512, + "p90": 152.196004986763, + "p95": 153.2360017299652, + "p99": 154.79600429534912 + }, + "combine": { + "p50": 119.99700218439102, + "p90": 124.87699836492538, + "p95": 128.47599387168884, + "p99": 143.59700679779053 + }, + "roundtrip": { + "p50": 283.6729884147644, + "p90": 291.0729944705963, + "p95": 294.23201084136963, + "p99": 300.27300119400024 + }, + "isolatedSum": { + "p50": 268.39400082826614, + "p90": 277.0730033516884, + "p95": 281.71199560165405, + "p99": 298.39301109313965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.83500170707703, + "p90": 209.67499911785126, + "p95": 210.43500304222107, + "p99": 212.67400681972504 + }, + "combine": { + "p50": 194.75500285625458, + "p90": 199.07499849796295, + "p95": 201.99500024318695, + "p99": 218.67500245571136 + }, + "roundtrip": { + "p50": 416.06900095939636, + "p90": 423.30899834632874, + "p95": 425.83000659942627, + "p99": 431.0300052165985 + }, + "isolatedSum": { + "p50": 401.5900045633316, + "p90": 408.7499976158142, + "p95": 412.430003285408, + "p99": 431.3490092754364 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1dd885f", + "identity": "mi325x|mori|n-a|7168|8|256|fp8|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||2768bdde6bf7e7f", + "colorKey": "mi325x_223b0322", + "comparisonKey": "6e302a5f2b1d2512", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:58:49.059622+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · mori · fp8", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "fp8", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.2632, + "configuredUnits": 80, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2768bdde6bf7e7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.14-rocm720-mi35x-mori-0701", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 69.63799893856049, + "p90": 73.3179971575737, + "p95": 76.11799985170364, + "p99": 89.91800248622894 + }, + "combine": { + "p50": 46.39900103211403, + "p90": 49.598999321460724, + "p95": 51.43899843096733, + "p99": 71.27799838781357 + }, + "roundtrip": { + "p50": 132.87700712680817, + "p90": 142.91700720787048, + "p95": 149.75599944591522, + "p99": 161.55600547790527 + }, + "isolatedSum": { + "p50": 116.03699997067451, + "p90": 122.91699647903442, + "p95": 127.55699828267097, + "p99": 161.1960008740425 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 70.43799757957458, + "p90": 74.43799823522568, + "p95": 77.5580033659935, + "p99": 90.51799774169922 + }, + "combine": { + "p50": 47.318000346422195, + "p90": 51.43899843096733, + "p95": 54.03900146484375, + "p99": 71.23900204896927 + }, + "roundtrip": { + "p50": 133.0759972333908, + "p90": 141.39600098133087, + "p95": 146.15599811077118, + "p99": 159.79599952697754 + }, + "isolatedSum": { + "p50": 117.75599792599678, + "p90": 125.87699666619301, + "p95": 131.59700483083725, + "p99": 161.7569997906685 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1240064, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 70.79900056123734, + "p90": 75.39799809455872, + "p95": 77.35799998044968, + "p99": 90.23799747228622 + }, + "combine": { + "p50": 46.599000692367554, + "p90": 50.07899925112724, + "p95": 53.918998688459396, + "p99": 70.71900367736816 + }, + "roundtrip": { + "p50": 134.47700440883636, + "p90": 142.99699664115906, + "p95": 147.27599918842316, + "p99": 161.71599924564362 + }, + "isolatedSum": { + "p50": 117.39800125360489, + "p90": 125.47699734568596, + "p95": 131.27699866890907, + "p99": 160.9570011496544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487296, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 73.11800122261047, + "p90": 76.71800255775452, + "p95": 78.75800132751465, + "p99": 90.71800112724304 + }, + "combine": { + "p50": 46.358998864889145, + "p90": 50.47899857163429, + "p95": 53.11800166964531, + "p99": 70.15799731016159 + }, + "roundtrip": { + "p50": 136.95700466632843, + "p90": 145.87700366973877, + "p95": 152.03599631786346, + "p99": 166.31600260734558 + }, + "isolatedSum": { + "p50": 119.47700008749962, + "p90": 127.19700112938881, + "p95": 131.87600299715996, + "p99": 160.87599843740463 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4960256, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 78.8780003786087, + "p90": 82.27799832820892, + "p95": 83.47799628973007, + "p99": 95.83699703216553 + }, + "combine": { + "p50": 49.917999655008316, + "p90": 53.51899936795235, + "p95": 55.07899820804596, + "p99": 73.8380029797554 + }, + "roundtrip": { + "p50": 144.7560042142868, + "p90": 154.556006193161, + "p95": 158.8360071182251, + "p99": 176.27499997615814 + }, + "isolatedSum": { + "p50": 128.79600003361702, + "p90": 135.79699769616127, + "p95": 138.55699449777603, + "p99": 169.67500001192093 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9863168, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 93.79799664020538, + "p90": 97.31700271368027, + "p95": 99.03699904680252, + "p99": 105.67700117826462 + }, + "combine": { + "p50": 56.59899860620499, + "p90": 59.599000960588455, + "p95": 61.91899999976158, + "p99": 80.27800172567368 + }, + "roundtrip": { + "p50": 164.71600532531738, + "p90": 174.47499930858612, + "p95": 178.1959980726242, + "p99": 183.11500549316406 + }, + "isolatedSum": { + "p50": 150.39699524641037, + "p90": 156.91600367426872, + "p95": 160.9559990465641, + "p99": 185.9550029039383 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 115.55799841880798, + "p90": 119.35699731111526, + "p95": 120.47699838876724, + "p99": 122.47700244188309 + }, + "combine": { + "p50": 72.43900001049042, + "p90": 75.99800080060959, + "p95": 78.23800295591354, + "p99": 95.55800259113312 + }, + "roundtrip": { + "p50": 205.19499480724335, + "p90": 214.79499340057373, + "p95": 217.5550013780594, + "p99": 222.55399823188782 + }, + "isolatedSum": { + "p50": 187.9969984292984, + "p90": 195.35499811172485, + "p95": 198.7150013446808, + "p99": 218.0350050330162 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 148.95600080490112, + "p90": 152.39599347114563, + "p95": 153.43600511550903, + "p99": 156.1170071363449 + }, + "combine": { + "p50": 122.7170005440712, + "p90": 126.71700119972229, + "p95": 128.47700715065002, + "p99": 149.07699823379517 + }, + "roundtrip": { + "p50": 288.87200355529785, + "p90": 297.1929907798767, + "p95": 300.4330098628998, + "p99": 305.2319884300232 + }, + "isolatedSum": { + "p50": 271.6730013489723, + "p90": 279.1129946708679, + "p95": 281.91301226615906, + "p99": 305.1940053701401 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77944832, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 206.47500455379486, + "p90": 209.43500101566315, + "p95": 210.23499965667725, + "p99": 213.23400735855103 + }, + "combine": { + "p50": 204.83499765396118, + "p90": 209.51500535011292, + "p95": 211.51499450206757, + "p99": 227.15400159358978 + }, + "roundtrip": { + "p50": 425.94999074935913, + "p90": 433.66900086402893, + "p95": 436.50901317596436, + "p99": 440.7089948654175 + }, + "isolatedSum": { + "p50": 411.31000220775604, + "p90": 418.95000636577606, + "p95": 421.7499941587448, + "p99": 440.3880089521408 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 156133376, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-e1593471", + "identity": "mi325x|nccl-ep|n-a|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||75530960a30b452", + "colorKey": "mi325x_56895d04", + "comparisonKey": "20a1064016015d1c", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:59:59.726865+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "75530960a30b452", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 259.7939968109131, + "p90": 273.5939919948578, + "p95": 286.0740125179291, + "p99": 4503.222942352295 + }, + "combine": { + "p50": 104.55799847841263, + "p90": 116.07799679040909, + "p95": 121.63800001144409, + "p99": 2898.9779949188232 + }, + "roundtrip": { + "p50": 350.0320017337799, + "p90": 369.7119951248169, + "p95": 399.4710147380829, + "p99": 4670.898914337158 + }, + "isolatedSum": { + "p50": 364.3519952893257, + "p90": 389.6719887852669, + "p95": 407.71201252937317, + "p99": 7402.200937271118 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 344064, + "combineLogicalBytes": 344064, + "fanoutMean": 5.25, + "recvTokensMax": 14, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 259.31400060653687, + "p90": 272.31401205062866, + "p95": 282.5540006160736, + "p99": 4306.547164916992 + }, + "combine": { + "p50": 103.95800322294235, + "p90": 114.39800262451172, + "p95": 119.03800070285797, + "p99": 4022.233009338379 + }, + "roundtrip": { + "p50": 350.3530025482178, + "p90": 366.2320077419281, + "p95": 373.9520013332367, + "p99": 4527.222156524658 + }, + "isolatedSum": { + "p50": 363.2720038294792, + "p90": 386.7120146751404, + "p95": 401.5920013189316, + "p99": 8328.780174255371 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 704512, + "combineLogicalBytes": 704512, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 265.8739984035492, + "p90": 280.27400374412537, + "p95": 290.2739942073822, + "p99": 4208.989143371582 + }, + "combine": { + "p50": 101.27700120210648, + "p90": 112.39799857139587, + "p95": 117.67800152301788, + "p99": 3920.754909515381 + }, + "roundtrip": { + "p50": 361.1530065536499, + "p90": 376.8709897994995, + "p95": 386.03100180625916, + "p99": 4463.344097137451 + }, + "isolatedSum": { + "p50": 367.15099960565567, + "p90": 392.67200231552124, + "p95": 407.9519957304001, + "p99": 8129.744052886963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1384448, + "combineLogicalBytes": 1384448, + "fanoutMean": 5.28125, + "recvTokensMax": 42, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 269.0350115299225, + "p90": 281.9939851760864, + "p95": 288.51398825645447, + "p99": 4128.750801086426 + }, + "combine": { + "p50": 100.75800120830536, + "p90": 112.55700141191483, + "p95": 120.19799649715424, + "p99": 2711.6611003875732 + }, + "roundtrip": { + "p50": 357.4329912662506, + "p90": 372.99200892448425, + "p95": 388.592004776001, + "p99": 4542.101860046387 + }, + "isolatedSum": { + "p50": 369.79301273822784, + "p90": 394.55098658800125, + "p95": 408.7119847536087, + "p99": 6840.411901473999 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2744320, + "combineLogicalBytes": 2744320, + "fanoutMean": 5.234375, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 269.39401030540466, + "p90": 281.87400102615356, + "p95": 290.1940047740936, + "p99": 4202.949047088623 + }, + "combine": { + "p50": 100.23800283670425, + "p90": 109.35799777507782, + "p95": 111.8369996547699, + "p99": 119.27799880504608 + }, + "roundtrip": { + "p50": 359.71200466156006, + "p90": 376.67199969291687, + "p95": 391.7919993400574, + "p99": 4565.02103805542 + }, + "isolatedSum": { + "p50": 369.6320131421089, + "p90": 391.2319988012314, + "p95": 402.0310044288635, + "p99": 4322.227045893669 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5464064, + "combineLogicalBytes": 5464064, + "fanoutMean": 5.2109375, + "recvTokensMax": 138, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 279.83400225639343, + "p90": 292.51301288604736, + "p95": 303.8730025291443, + "p99": 4124.990940093994 + }, + "combine": { + "p50": 101.83800011873245, + "p90": 110.71799695491791, + "p95": 113.3980005979538, + "p99": 288.79401087760925 + }, + "roundtrip": { + "p50": 364.872008562088, + "p90": 379.9920082092285, + "p95": 392.7119970321655, + "p99": 4391.506195068359 + }, + "isolatedSum": { + "p50": 381.6720023751259, + "p90": 403.23100984096527, + "p95": 417.2710031270981, + "p99": 4413.784950971603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11124736, + "combineLogicalBytes": 11124736, + "fanoutMean": 5.3046875, + "recvTokensMax": 282, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 288.9539897441864, + "p90": 311.5130066871643, + "p95": 316.3129985332489, + "p99": 3763.5180950164795 + }, + "combine": { + "p50": 118.59799921512604, + "p90": 127.11699306964874, + "p95": 138.95699381828308, + "p99": 3536.043882369995 + }, + "roundtrip": { + "p50": 374.4719922542572, + "p90": 395.3920006752014, + "p95": 411.79099678993225, + "p99": 4427.944183349609 + }, + "isolatedSum": { + "p50": 407.55198895931244, + "p90": 438.62999975681305, + "p95": 455.269992351532, + "p99": 7299.561977386475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22192128, + "combineLogicalBytes": 22192128, + "fanoutMean": 5.291015625, + "recvTokensMax": 551, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 304.7940135002136, + "p90": 319.0329968929291, + "p95": 328.3129930496216, + "p99": 4250.708103179932 + }, + "combine": { + "p50": 148.87699484825134, + "p90": 156.91600739955902, + "p95": 176.71599984169006, + "p99": 3521.56400680542 + }, + "roundtrip": { + "p50": 427.5909960269928, + "p90": 440.99000096321106, + "p95": 456.18999004364014, + "p99": 4145.471096038818 + }, + "isolatedSum": { + "p50": 453.67100834846497, + "p90": 475.9490042924881, + "p95": 505.02899289131165, + "p99": 7772.272109985352 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 1054, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-de29115c", + "identity": "mi325x|nccl-ep|n-a|5120|8|160|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||be1b44a963bd4ef", + "colorKey": "mi325x_56895d04", + "comparisonKey": "8c4037d881b56b80", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:01:02.669684+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "be1b44a963bd4ef", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 262.23400235176086, + "p90": 274.75398778915405, + "p95": 286.75299882888794, + "p99": 4446.383953094482 + }, + "combine": { + "p50": 104.59700226783752, + "p90": 117.75700002908707, + "p95": 128.59700620174408, + "p99": 4082.1518898010254 + }, + "roundtrip": { + "p50": 354.35301065444946, + "p90": 372.5520074367523, + "p95": 389.1119956970215, + "p99": 4634.260177612305 + }, + "isolatedSum": { + "p50": 366.8310046195984, + "p90": 392.5109878182411, + "p95": 415.350005030632, + "p99": 8528.535842895508 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 430080, + "combineLogicalBytes": 430080, + "fanoutMean": 5.25, + "recvTokensMax": 16, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 262.4349892139435, + "p90": 277.19399333000183, + "p95": 284.2330038547516, + "p99": 4458.583831787109 + }, + "combine": { + "p50": 103.63700240850449, + "p90": 113.3980005979538, + "p95": 117.47699975967407, + "p99": 4081.3112258911133 + }, + "roundtrip": { + "p50": 353.1930148601532, + "p90": 368.99200081825256, + "p95": 399.7519910335541, + "p99": 4748.01778793335 + }, + "isolatedSum": { + "p50": 366.07199162244797, + "p90": 390.5919939279556, + "p95": 401.71000361442566, + "p99": 8539.895057678223 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 880640, + "combineLogicalBytes": 880640, + "fanoutMean": 5.375, + "recvTokensMax": 22, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 267.9939866065979, + "p90": 282.75400400161743, + "p95": 408.7519943714142, + "p99": 4217.82922744751 + }, + "combine": { + "p50": 99.03799742460251, + "p90": 108.35699737071991, + "p95": 111.63800209760666, + "p99": 3044.5330142974854 + }, + "roundtrip": { + "p50": 358.5529923439026, + "p90": 381.27198815345764, + "p95": 438.34999203681946, + "p99": 4610.8598709106445 + }, + "isolatedSum": { + "p50": 367.0319840312004, + "p90": 391.11100137233734, + "p95": 520.3899964690208, + "p99": 7262.362241744995 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1740800, + "combineLogicalBytes": 1740800, + "fanoutMean": 5.3125, + "recvTokensMax": 41, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 269.11500096321106, + "p90": 282.43398666381836, + "p95": 302.033007144928, + "p99": 4213.788986206055 + }, + "combine": { + "p50": 101.47800296545029, + "p90": 111.07800155878067, + "p95": 116.47800356149673, + "p99": 3227.449893951416 + }, + "roundtrip": { + "p50": 359.47200655937195, + "p90": 377.1109879016876, + "p95": 399.55198764801025, + "p99": 4605.659961700439 + }, + "isolatedSum": { + "p50": 370.59300392866135, + "p90": 393.51198822259903, + "p95": 418.5110107064247, + "p99": 7441.238880157471 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3471360, + "combineLogicalBytes": 3471360, + "fanoutMean": 5.296875, + "recvTokensMax": 71, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 270.6339955329895, + "p90": 285.07301211357117, + "p95": 315.15398621559143, + "p99": 4139.389991760254 + }, + "combine": { + "p50": 101.27799957990646, + "p90": 110.27699708938599, + "p95": 113.27800154685974, + "p99": 126.83799862861633 + }, + "roundtrip": { + "p50": 360.9519898891449, + "p90": 378.07101011276245, + "p95": 392.47098565101624, + "p99": 4576.301097869873 + }, + "isolatedSum": { + "p50": 371.91199511289597, + "p90": 395.35000920295715, + "p95": 428.4319877624512, + "p99": 4266.22799038887 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6912000, + "combineLogicalBytes": 6912000, + "fanoutMean": 5.2734375, + "recvTokensMax": 137, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 283.6340069770813, + "p90": 298.5140085220337, + "p95": 335.03299951553345, + "p99": 4220.388889312744 + }, + "combine": { + "p50": 106.83800280094147, + "p90": 113.43800276517868, + "p95": 115.95799773931503, + "p99": 3525.4039764404297 + }, + "roundtrip": { + "p50": 367.23199486732483, + "p90": 381.51198625564575, + "p95": 395.9519863128662, + "p99": 4494.303226470947 + }, + "isolatedSum": { + "p50": 390.47200977802277, + "p90": 411.9520112872124, + "p95": 450.9909972548485, + "p99": 7745.792865753174 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 13977600, + "combineLogicalBytes": 13977600, + "fanoutMean": 5.33203125, + "recvTokensMax": 276, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 294.67400908470154, + "p90": 314.393013715744, + "p95": 330.1529884338379, + "p99": 4102.712154388428 + }, + "combine": { + "p50": 128.11799347400665, + "p90": 137.35699653625488, + "p95": 143.1570053100586, + "p99": 3706.5200805664062 + }, + "roundtrip": { + "p50": 390.3520107269287, + "p90": 407.710999250412, + "p95": 889.54097032547, + "p99": 4345.225811004639 + }, + "isolatedSum": { + "p50": 422.7920025587082, + "p90": 451.7500102519989, + "p95": 473.3099937438965, + "p99": 7809.232234954834 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 27975680, + "combineLogicalBytes": 27975680, + "fanoutMean": 5.3359375, + "recvTokensMax": 528, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 311.7539882659912, + "p90": 326.03299617767334, + "p95": 369.6320056915283, + "p99": 4182.18994140625 + }, + "combine": { + "p50": 162.95699775218964, + "p90": 169.07599568367004, + "p95": 171.87699675559998, + "p99": 3179.6910762786865 + }, + "roundtrip": { + "p50": 452.11100578308105, + "p90": 469.15000677108765, + "p95": 506.62899017333984, + "p99": 4024.5518684387207 + }, + "isolatedSum": { + "p50": 474.71098601818085, + "p90": 495.1089918613434, + "p95": 541.5090024471283, + "p99": 7361.8810176849365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1073, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-849c64ed", + "identity": "mi325x|nccl-ep|n-a|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi325x_56895d04", + "comparisonKey": "d7c9dea917e6d272", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:02:08.289322+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 262.3949944972992, + "p90": 279.3940007686615, + "p95": 2776.060104370117, + "p99": 4195.189952850342 + }, + "combine": { + "p50": 104.79699820280075, + "p90": 115.63800275325775, + "p95": 121.99699878692627, + "p99": 3833.2769870758057 + }, + "roundtrip": { + "p50": 352.2320091724396, + "p90": 372.5520074367523, + "p95": 402.71100401878357, + "p99": 4424.544811248779 + }, + "isolatedSum": { + "p50": 367.19199270009995, + "p90": 395.03200352191925, + "p95": 2898.0571031570435, + "p99": 8028.4669399261475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 540672, + "combineLogicalBytes": 540672, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 262.23400235176086, + "p90": 275.31400322914124, + "p95": 287.6740097999573, + "p99": 4093.5521125793457 + }, + "combine": { + "p50": 102.79799997806549, + "p90": 112.59700357913971, + "p95": 118.91700327396393, + "p99": 3987.0340824127197 + }, + "roundtrip": { + "p50": 355.71199655532837, + "p90": 375.39198994636536, + "p95": 401.5910029411316, + "p99": 4408.783912658691 + }, + "isolatedSum": { + "p50": 365.03200232982635, + "p90": 387.91100680828094, + "p95": 406.5910130739212, + "p99": 8080.586194992065 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1056768, + "combineLogicalBytes": 1056768, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 270.19399404525757, + "p90": 290.43400287628174, + "p95": 306.67299032211304, + "p99": 4100.5120277404785 + }, + "combine": { + "p50": 101.31700336933136, + "p90": 110.23800075054169, + "p95": 113.19799721240997, + "p99": 3702.120065689087 + }, + "roundtrip": { + "p50": 360.8720004558563, + "p90": 382.1110129356384, + "p95": 403.5109877586365, + "p99": 4524.742126464844 + }, + "isolatedSum": { + "p50": 371.5109974145889, + "p90": 400.6720036268234, + "p95": 419.870987534523, + "p99": 7802.632093429565 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2125824, + "combineLogicalBytes": 2125824, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 269.6340084075928, + "p90": 284.2339873313904, + "p95": 297.9139983654022, + "p99": 4091.3119316101074 + }, + "combine": { + "p50": 102.47799754142761, + "p90": 112.67799884080887, + "p95": 117.51700192689896, + "p99": 3618.6020374298096 + }, + "roundtrip": { + "p50": 357.3929965496063, + "p90": 376.4309883117676, + "p95": 418.3509945869446, + "p99": 4557.22188949585 + }, + "isolatedSum": { + "p50": 372.1120059490204, + "p90": 396.91198617219925, + "p95": 415.4310002923012, + "p99": 7709.913969039917 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4263936, + "combineLogicalBytes": 4263936, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 271.9539999961853, + "p90": 285.11399030685425, + "p95": 305.59301376342773, + "p99": 3889.4360065460205 + }, + "combine": { + "p50": 103.27699780464172, + "p90": 113.51799964904785, + "p95": 120.19699811935425, + "p99": 3365.528106689453 + }, + "roundtrip": { + "p50": 360.9920144081116, + "p90": 379.1919946670532, + "p95": 400.31200647354126, + "p99": 4413.744926452637 + }, + "isolatedSum": { + "p50": 375.230997800827, + "p90": 398.6319899559021, + "p95": 425.790011882782, + "p99": 7254.964113235474 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 8503296, + "combineLogicalBytes": 8503296, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 285.5539917945862, + "p90": 297.993004322052, + "p95": 307.873010635376, + "p99": 3726.3190746307373 + }, + "combine": { + "p50": 110.19700020551682, + "p90": 116.71800166368484, + "p95": 119.87700313329697, + "p99": 3358.0880165100098 + }, + "roundtrip": { + "p50": 369.31198835372925, + "p90": 388.2319927215576, + "p95": 426.270991563797, + "p99": 4319.946765899658 + }, + "isolatedSum": { + "p50": 395.750992000103, + "p90": 414.71100598573685, + "p95": 427.75001376867294, + "p99": 7084.407091140747 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 16908288, + "combineLogicalBytes": 16908288, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 301.79300904273987, + "p90": 323.7130045890808, + "p95": 351.3120114803314, + "p99": 3535.5238914489746 + }, + "combine": { + "p50": 137.036994099617, + "p90": 147.6770043373108, + "p95": 179.39600348472595, + "p99": 3392.6069736480713 + }, + "roundtrip": { + "p50": 403.07098627090454, + "p90": 420.3509986400604, + "p95": 471.5900123119354, + "p99": 4034.9931716918945 + }, + "isolatedSum": { + "p50": 438.8300031423569, + "p90": 471.3900089263916, + "p95": 530.7080149650574, + "p99": 6928.130865097046 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 33423360, + "combineLogicalBytes": 33423360, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 324.7930109500885, + "p90": 340.2329981327057, + "p95": 377.83199548721313, + "p99": 3943.474054336548 + }, + "combine": { + "p50": 179.87599968910217, + "p90": 186.7160052061081, + "p95": 191.07599556446075, + "p99": 3134.3328952789307 + }, + "roundtrip": { + "p50": 484.78999733924866, + "p90": 502.34901905059814, + "p95": 2692.4219131469727, + "p99": 3662.6009941101074 + }, + "isolatedSum": { + "p50": 504.6690106391907, + "p90": 526.9490033388138, + "p95": 568.9079910516739, + "p99": 7077.8069496154785 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-207a03fa", + "identity": "mi325x|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi325x_56895d04", + "comparisonKey": "0472f1d3813db8e3", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:57:40.575069+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 258.75499844551086, + "p90": 271.7140018939972, + "p95": 1028.1380414962769, + "p99": 4386.985778808594 + }, + "combine": { + "p50": 103.83699834346771, + "p90": 114.11800235509872, + "p95": 117.83699691295624, + "p99": 3893.6359882354736 + }, + "roundtrip": { + "p50": 349.7520089149475, + "p90": 368.19198727607727, + "p95": 403.4709930419922, + "p99": 4572.580814361572 + }, + "isolatedSum": { + "p50": 362.5919967889786, + "p90": 385.8320042490959, + "p95": 1145.975038409233, + "p99": 8280.621767044067 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 257.75399804115295, + "p90": 271.7140018939972, + "p95": 299.9129891395569, + "p99": 4230.348110198975 + }, + "combine": { + "p50": 102.31699794530869, + "p90": 112.27799952030182, + "p95": 121.8779981136322, + "p99": 3359.2870235443115 + }, + "roundtrip": { + "p50": 349.67300295829773, + "p90": 369.7519898414612, + "p95": 388.47100734710693, + "p99": 4485.702991485596 + }, + "isolatedSum": { + "p50": 360.07099598646164, + "p90": 383.992001414299, + "p95": 421.7909872531891, + "p99": 7589.635133743286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 265.51398634910583, + "p90": 285.11399030685425, + "p95": 331.83300495147705, + "p99": 4000.314235687256 + }, + "combine": { + "p50": 100.79800337553024, + "p90": 110.83699762821198, + "p95": 117.75700002908707, + "p99": 3592.402935028076 + }, + "roundtrip": { + "p50": 355.75199127197266, + "p90": 376.3520121574402, + "p95": 391.431987285614, + "p99": 4524.621963500977 + }, + "isolatedSum": { + "p50": 366.3119897246361, + "p90": 395.9509879350662, + "p95": 449.5900049805641, + "p99": 7592.717170715332 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 265.67399501800537, + "p90": 278.67400646209717, + "p95": 293.55400800704956, + "p99": 4249.828815460205 + }, + "combine": { + "p50": 101.63699835538864, + "p90": 111.95699870586395, + "p95": 115.95699936151505, + "p99": 2707.142114639282 + }, + "roundtrip": { + "p50": 355.2730083465576, + "p90": 373.7109899520874, + "p95": 387.6720070838928, + "p99": 4520.0629234313965 + }, + "isolatedSum": { + "p50": 367.310993373394, + "p90": 390.6310051679611, + "p95": 409.5110073685646, + "p99": 6956.970930099487 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 267.83499121665955, + "p90": 281.3139855861664, + "p95": 288.55401277542114, + "p99": 2974.5359420776367 + }, + "combine": { + "p50": 99.91800040006638, + "p90": 109.1580018401146, + "p95": 114.39699679613113, + "p99": 3738.4400367736816 + }, + "roundtrip": { + "p50": 357.6729893684387, + "p90": 377.7120113372803, + "p95": 391.87198877334595, + "p99": 4517.622947692871 + }, + "isolatedSum": { + "p50": 367.7529916167259, + "p90": 390.471987426281, + "p95": 402.9510095715523, + "p99": 6712.975978851318 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 285.2340042591095, + "p90": 299.1139888763428, + "p95": 312.19300627708435, + "p99": 3755.479097366333 + }, + "combine": { + "p50": 114.39699679613113, + "p90": 121.27699702978134, + "p95": 124.35699999332428, + "p99": 3275.0089168548584 + }, + "roundtrip": { + "p50": 371.0319995880127, + "p90": 386.31200790405273, + "p95": 401.39099955558777, + "p99": 4364.2258644104 + }, + "isolatedSum": { + "p50": 399.63100105524063, + "p90": 420.3909859061241, + "p95": 436.55000627040863, + "p99": 7030.488014221191 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 303.39398980140686, + "p90": 328.8320004940033, + "p95": 362.91199922561646, + "p99": 3622.560977935791 + }, + "combine": { + "p50": 141.2770003080368, + "p90": 149.43699538707733, + "p95": 153.35699915885925, + "p99": 203.67500185966492 + }, + "roundtrip": { + "p50": 413.4710133075714, + "p90": 434.06999111175537, + "p95": 474.5100140571594, + "p99": 4017.4331665039062 + }, + "isolatedSum": { + "p50": 444.67099010944366, + "p90": 478.2689958810806, + "p95": 516.2689983844757, + "p99": 3826.235979795456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 329.75301146507263, + "p90": 343.23298931121826, + "p95": 370.7520067691803, + "p99": 3857.635974884033 + }, + "combine": { + "p50": 195.59499621391296, + "p90": 202.99600064754486, + "p95": 207.27600157260895, + "p99": 2988.895893096924 + }, + "roundtrip": { + "p50": 512.669026851654, + "p90": 527.347981929779, + "p95": 560.3079795837402, + "p99": 3618.5619831085205 + }, + "isolatedSum": { + "p50": 525.3480076789856, + "p90": 546.2289899587631, + "p95": 578.0280083417892, + "p99": 6846.531867980957 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-246b8e9f", + "identity": "mi325x|nccl-ep|n-a|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||d6c49ae98878760", + "colorKey": "mi325x_56895d04", + "comparisonKey": "d03bb3fdeb2d3329", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:58:50.243521+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d6c49ae98878760", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 259.4740092754364, + "p90": 275.27400851249695, + "p95": 3587.4431133270264, + "p99": 4335.626125335693 + }, + "combine": { + "p50": 103.63700240850449, + "p90": 113.75799775123596, + "p95": 117.19699949026108, + "p99": 3889.5959854125977 + }, + "roundtrip": { + "p50": 349.2729961872101, + "p90": 368.51200461387634, + "p95": 415.710985660553, + "p99": 4612.260818481445 + }, + "isolatedSum": { + "p50": 363.1110116839409, + "p90": 389.0320062637329, + "p95": 3704.6401128172874, + "p99": 8225.222110748291 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 11, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 259.4340145587921, + "p90": 274.6340036392212, + "p95": 302.4730086326599, + "p99": 4181.230068206787 + }, + "combine": { + "p50": 102.99800336360931, + "p90": 112.39700019359589, + "p95": 117.87699908018112, + "p99": 4000.71382522583 + }, + "roundtrip": { + "p50": 350.271999835968, + "p90": 372.4310100078583, + "p95": 396.67099714279175, + "p99": 4505.503177642822 + }, + "isolatedSum": { + "p50": 362.4320179224014, + "p90": 387.0310038328171, + "p95": 420.35000771284103, + "p99": 8181.943893432617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1218560, + "combineLogicalBytes": 1218560, + "fanoutMean": 5.3125, + "recvTokensMax": 23, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 267.03399419784546, + "p90": 281.4739942550659, + "p95": 306.9530129432678, + "p99": 3955.0740718841553 + }, + "combine": { + "p50": 102.31799632310867, + "p90": 112.95700073242188, + "p95": 117.87699908018112, + "p99": 3740.9589290618896 + }, + "roundtrip": { + "p50": 356.79200291633606, + "p90": 379.39199805259705, + "p95": 418.39098930358887, + "p99": 4424.184799194336 + }, + "isolatedSum": { + "p50": 369.35199052095413, + "p90": 394.4309949874878, + "p95": 424.83001202344894, + "p99": 7696.033000946045 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2408448, + "combineLogicalBytes": 2408448, + "fanoutMean": 5.25, + "recvTokensMax": 36, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 267.03399419784546, + "p90": 280.7140052318573, + "p95": 302.513986825943, + "p99": 4065.432071685791 + }, + "combine": { + "p50": 100.6380021572113, + "p90": 110.4779988527298, + "p95": 114.79800194501877, + "p99": 3806.9169521331787 + }, + "roundtrip": { + "p50": 356.07200860977173, + "p90": 377.3120045661926, + "p95": 402.71100401878357, + "p99": 4507.022857666016 + }, + "isolatedSum": { + "p50": 367.67199635505676, + "p90": 391.1920040845871, + "p95": 417.31198877096176, + "p99": 7872.34902381897 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4831232, + "combineLogicalBytes": 4831232, + "fanoutMean": 5.265625, + "recvTokensMax": 76, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 269.75399255752563, + "p90": 282.6339900493622, + "p95": 299.39401149749756, + "p99": 4013.3538246154785 + }, + "combine": { + "p50": 101.55799984931946, + "p90": 111.07800155878067, + "p95": 114.717997610569, + "p99": 3599.4420051574707 + }, + "roundtrip": { + "p50": 356.4330041408539, + "p90": 377.3919939994812, + "p95": 395.9519863128662, + "p99": 4460.984230041504 + }, + "isolatedSum": { + "p50": 371.3119924068451, + "p90": 393.71199160814285, + "p95": 414.11200910806656, + "p99": 7612.795829772949 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9848832, + "combineLogicalBytes": 9848832, + "fanoutMean": 5.3671875, + "recvTokensMax": 139, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 286.2339913845062, + "p90": 298.7540066242218, + "p95": 310.59399247169495, + "p99": 4011.953830718994 + }, + "combine": { + "p50": 116.07799679040909, + "p90": 122.51800298690796, + "p95": 125.67800283432007, + "p99": 3591.8428897857666 + }, + "roundtrip": { + "p50": 372.95201420783997, + "p90": 393.0320143699646, + "p95": 428.6710023880005, + "p99": 4391.1452293396 + }, + "isolatedSum": { + "p50": 402.3119881749153, + "p90": 421.27200961112976, + "p95": 436.271995306015, + "p99": 7603.796720504761 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19496960, + "combineLogicalBytes": 19496960, + "fanoutMean": 5.3125, + "recvTokensMax": 271, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 303.5939931869507, + "p90": 327.55300402641296, + "p95": 346.1120128631592, + "p99": 3774.5189666748047 + }, + "combine": { + "p50": 143.03700625896454, + "p90": 152.55700051784515, + "p95": 171.8759983778, + "p99": 3468.1649208068848 + }, + "roundtrip": { + "p50": 414.39101099967957, + "p90": 434.7110092639923, + "p95": 479.0300130844116, + "p99": 4047.513008117676 + }, + "isolatedSum": { + "p50": 446.6309994459152, + "p90": 480.1100045442581, + "p95": 517.9880112409592, + "p99": 7242.683887481689 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38836224, + "combineLogicalBytes": 38836224, + "fanoutMean": 5.291015625, + "recvTokensMax": 555, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 329.9520015716553, + "p90": 343.8720107078552, + "p95": 377.4319887161255, + "p99": 3933.5150718688965 + }, + "combine": { + "p50": 196.07600569725037, + "p90": 202.83600687980652, + "p95": 208.27500522136688, + "p99": 3064.8140907287598 + }, + "roundtrip": { + "p50": 507.5889825820923, + "p90": 521.7880010604858, + "p95": 564.1880035400391, + "p99": 3661.4809036254883 + }, + "isolatedSum": { + "p50": 526.0280072689056, + "p90": 546.7080175876617, + "p95": 585.7069939374924, + "p99": 6998.329162597656 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 1078, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-8a9b9dcc", + "identity": "mi325x|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|ac583971f94b176", + "colorKey": "mi325x_c2e47d93", + "comparisonKey": "2607a616cf1e6c4f", + "schemaVersion": 4, + "generatedAt": "2026-07-02T16:41:26.741348+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_08", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28606340235", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606340235", + "createdAt": "2026-07-02T16:40:09Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 243.35399270057678, + "p90": 253.9530098438263, + "p95": 259.2340111732483, + "p99": 3079.3631076812744 + }, + "combine": { + "p50": 87.87800371646881, + "p90": 95.07700055837631, + "p95": 98.35699945688248, + "p99": 2559.7751140594482 + }, + "roundtrip": { + "p50": 327.551007270813, + "p90": 343.1519865989685, + "p95": 357.2309911251068, + "p99": 3109.882116317749 + }, + "isolatedSum": { + "p50": 331.2319964170456, + "p90": 349.0300104022026, + "p95": 357.59101063013077, + "p99": 5639.138221740723 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 14, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 244.31400001049042, + "p90": 255.7930052280426, + "p95": 262.23400235176086, + "p99": 2876.728057861328 + }, + "combine": { + "p50": 89.15799856185913, + "p90": 95.87699919939041, + "p95": 98.87800365686417, + "p99": 1676.3980388641357 + }, + "roundtrip": { + "p50": 326.231986284256, + "p90": 341.75199270248413, + "p95": 353.5110056400299, + "p99": 3597.429037094116 + }, + "isolatedSum": { + "p50": 333.47199857234955, + "p90": 351.670004427433, + "p95": 361.11200600862503, + "p99": 4553.126096725464 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 21, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 255.23298978805542, + "p90": 267.9139971733093, + "p95": 278.0730128288269, + "p99": 1763.2360458374023 + }, + "combine": { + "p50": 88.55699747800827, + "p90": 97.63800352811813, + "p95": 102.39800065755844, + "p99": 2732.8109741210938 + }, + "roundtrip": { + "p50": 335.7119858264923, + "p90": 352.9910147190094, + "p95": 362.47000098228455, + "p99": 3250.8790493011475 + }, + "isolatedSum": { + "p50": 343.7899872660637, + "p90": 365.55200070142746, + "p95": 380.47101348638535, + "p99": 4496.047019958496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 39, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 254.7140121459961, + "p90": 264.5930051803589, + "p95": 270.03300189971924, + "p99": 952.8160095214844 + }, + "combine": { + "p50": 88.4770005941391, + "p90": 94.75799649953842, + "p95": 97.31800109148026, + "p99": 527.6269912719727 + }, + "roundtrip": { + "p50": 334.03199911117554, + "p90": 347.7509915828705, + "p95": 359.19100046157837, + "p99": 3183.720111846924 + }, + "isolatedSum": { + "p50": 343.1910127401352, + "p90": 359.3510016798973, + "p95": 367.3510029911995, + "p99": 1480.443000793457 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 74, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 257.7129900455475, + "p90": 266.79301261901855, + "p95": 272.67301082611084, + "p99": 1558.7610006332397 + }, + "combine": { + "p50": 92.678003013134, + "p90": 98.0369970202446, + "p95": 100.87800025939941, + "p99": 2110.266923904419 + }, + "roundtrip": { + "p50": 336.87201142311096, + "p90": 351.07100009918213, + "p95": 356.1910092830658, + "p99": 2759.8109245300293 + }, + "isolatedSum": { + "p50": 350.3909930586815, + "p90": 364.83000963926315, + "p95": 373.55101108551025, + "p99": 3669.0279245376587 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 145, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 272.7130055427551, + "p90": 285.2329909801483, + "p95": 290.71301221847534, + "p99": 2568.7360763549805 + }, + "combine": { + "p50": 109.11700129508972, + "p90": 114.27800357341766, + "p95": 116.31699651479721, + "p99": 122.03700095415115 + }, + "roundtrip": { + "p50": 357.71098732948303, + "p90": 366.43099784851074, + "p95": 372.8710114955902, + "p99": 2840.848922729492 + }, + "isolatedSum": { + "p50": 381.83000683784485, + "p90": 399.510994553566, + "p95": 407.03000873327255, + "p99": 2690.7730773091316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 287, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 295.19200325012207, + "p90": 314.7920072078705, + "p95": 328.3909857273102, + "p99": 899.7380137443542 + }, + "combine": { + "p50": 137.95599341392517, + "p90": 145.19600570201874, + "p95": 150.6360024213791, + "p99": 2341.1810398101807 + }, + "roundtrip": { + "p50": 405.8699905872345, + "p90": 420.9100008010864, + "p95": 428.1899929046631, + "p99": 2947.406053543091 + }, + "isolatedSum": { + "p50": 433.14799666404724, + "p90": 459.9880129098892, + "p95": 479.02698814868927, + "p99": 3240.919053554535 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 564, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 321.1120069026947, + "p90": 330.7119905948639, + "p95": 336.0320031642914, + "p99": 394.06999945640564 + }, + "combine": { + "p50": 191.27599895000458, + "p90": 197.55500555038452, + "p95": 199.91500675678253, + "p99": 2186.505079269409 + }, + "roundtrip": { + "p50": 501.4280080795288, + "p90": 511.5870237350464, + "p95": 518.9070105552673, + "p99": 2707.5319290161133 + }, + "isolatedSum": { + "p50": 512.3880058526993, + "p90": 528.2669961452484, + "p95": 535.9470099210739, + "p99": 2580.575078725815 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-1fa6e4ae", + "identity": "mi325x|nccl-ep|n-a|4096|8|128|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||dc27c5e0894e569", + "colorKey": "mi325x_56895d04", + "comparisonKey": "60d9194abb3a6884", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:00:34.192223+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "Qwen3.5", + "shape": { + "hidden": 4096, + "topk": 8, + "experts": 128, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "dc27c5e0894e569", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 295.95300555229187, + "p90": 307.63399600982666, + "p95": 320.39299607276917, + "p99": 2105.915069580078 + }, + "combine": { + "p50": 145.5570012331009, + "p90": 153.11600267887115, + "p95": 157.11599588394165, + "p99": 3550.4040718078613 + }, + "roundtrip": { + "p50": 425.4310131072998, + "p90": 444.22999024391174, + "p95": 456.0700058937073, + "p99": 4015.7928466796875 + }, + "isolatedSum": { + "p50": 441.51000678539276, + "p90": 460.7499986886978, + "p95": 477.5089919567108, + "p99": 5656.319141387939 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 44564480, + "combineLogicalBytes": 44564480, + "fanoutMean": 5.3125, + "recvTokensMax": 1054, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 334.23298597335815, + "p90": 350.5930006504059, + "p95": 363.2720112800598, + "p99": 2441.3468837738037 + }, + "combine": { + "p50": 210.67599952220917, + "p90": 218.23500096797943, + "p95": 222.87599742412567, + "p99": 3068.013906478882 + }, + "roundtrip": { + "p50": 524.9490141868591, + "p90": 539.5089983940125, + "p95": 569.5480108261108, + "p99": 3405.527114868164 + }, + "isolatedSum": { + "p50": 544.9089854955673, + "p90": 568.8280016183853, + "p95": 586.1480087041855, + "p99": 5509.360790252686 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 89726976, + "combineLogicalBytes": 89726976, + "fanoutMean": 5.34814453125, + "recvTokensMax": 2081, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 408.0309867858887, + "p90": 425.4310131072998, + "p95": 442.9500102996826, + "p99": 3157.9720973968506 + }, + "combine": { + "p50": 343.6720073223114, + "p90": 352.31199860572815, + "p95": 357.5519919395447, + "p99": 2136.713981628418 + }, + "roundtrip": { + "p50": 729.9839854240417, + "p90": 741.104006767273, + "p95": 774.7439742088318, + "p99": 3103.173017501831 + }, + "isolatedSum": { + "p50": 751.7029941082001, + "p90": 777.743011713028, + "p95": 800.5020022392273, + "p99": 5294.686079025269 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 179503104, + "combineLogicalBytes": 179503104, + "fanoutMean": 5.349609375, + "recvTokensMax": 4215, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 512.3479962348938, + "p90": 529.3890237808228, + "p95": 793.1830286979675, + "p99": 2808.819055557251 + }, + "combine": { + "p50": 605.1070094108582, + "p90": 623.6259937286377, + "p95": 1085.8969688415527, + "p99": 1563.2859468460083 + }, + "roundtrip": { + "p50": 1092.9369926452637, + "p90": 1111.6559505462646, + "p95": 1883.3190202713013, + "p99": 2650.7821083068848 + }, + "isolatedSum": { + "p50": 1117.455005645752, + "p90": 1153.0150175094604, + "p95": 1879.0799975395203, + "p99": 4372.105002403259 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 359022592, + "combineLogicalBytes": 359022592, + "fanoutMean": 5.349853515625, + "recvTokensMax": 8322, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 751.6239881515503, + "p90": 779.2230248451233, + "p95": 902.7400016784668, + "p99": 2398.228883743286 + }, + "combine": { + "p50": 1119.3759441375732, + "p90": 1143.7749862670898, + "p95": 1164.0549898147583, + "p99": 1323.7309455871582 + }, + "roundtrip": { + "p50": 1854.9599647521973, + "p90": 1948.3979940414429, + "p95": 2284.7909927368164, + "p99": 2760.6608867645264 + }, + "isolatedSum": { + "p50": 1870.9999322891235, + "p90": 1922.9980111122131, + "p95": 2066.794991493225, + "p99": 3721.9598293304443 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 716111872, + "combineLogicalBytes": 716111872, + "fanoutMean": 5.33544921875, + "recvTokensMax": 16572, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1231.9740056991577, + "p90": 1261.4530324935913, + "p95": 1563.7660026550293, + "p99": 2115.433931350708 + }, + "combine": { + "p50": 2164.592981338501, + "p90": 2212.0730876922607, + "p95": 2225.951910018921, + "p99": 2238.7518882751465 + }, + "roundtrip": { + "p50": 3389.6470069885254, + "p90": 3438.405990600586, + "p95": 3461.124897003174, + "p99": 3689.199924468994 + }, + "isolatedSum": { + "p50": 3396.5669870376587, + "p90": 3473.526120185852, + "p95": 3789.71791267395, + "p99": 4354.1858196258545 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1432395776, + "combineLogicalBytes": 1432395776, + "fanoutMean": 5.336090087890625, + "recvTokensMax": 33121, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9eabb254", + "identity": "mi325x|nccl-ep|n-a|5120|8|160|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||0c022a63bbcbf42", + "colorKey": "mi325x_56895d04", + "comparisonKey": "3a36cf8eb0d31264", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:01:39.692524+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "shape 5120/8/160", + "shape": { + "hidden": 5120, + "topk": 8, + "experts": 160, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "0c022a63bbcbf42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 306.03399872779846, + "p90": 318.1929886341095, + "p95": 325.67301392555237, + "p99": 2916.9371128082275 + }, + "combine": { + "p50": 158.8360071182251, + "p90": 167.276993393898, + "p95": 170.7960069179535, + "p99": 3324.1279125213623 + }, + "roundtrip": { + "p50": 450.34998655319214, + "p90": 469.7900116443634, + "p95": 485.7499897480011, + "p99": 3822.2780227661133 + }, + "isolatedSum": { + "p50": 464.87000584602356, + "p90": 485.4699820280075, + "p95": 496.46902084350586, + "p99": 6241.06502532959 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 55674880, + "combineLogicalBytes": 55674880, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1073, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 351.43300890922546, + "p90": 366.8319880962372, + "p95": 394.6720063686371, + "p99": 2947.096109390259 + }, + "combine": { + "p50": 243.63499879837036, + "p90": 251.83498859405518, + "p95": 258.1540048122406, + "p99": 2709.101915359497 + }, + "roundtrip": { + "p50": 575.3870010375977, + "p90": 589.8280143737793, + "p95": 648.7460136413574, + "p99": 3351.327896118164 + }, + "isolatedSum": { + "p50": 595.0680077075958, + "p90": 618.6669766902924, + "p95": 652.8260111808777, + "p99": 5656.198024749756 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 111104000, + "combineLogicalBytes": 111104000, + "fanoutMean": 5.2978515625, + "recvTokensMax": 2116, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 438.38998675346375, + "p90": 452.0699977874756, + "p95": 482.109010219574, + "p99": 3070.6939697265625 + }, + "combine": { + "p50": 398.47099781036377, + "p90": 406.3510000705719, + "p95": 1135.4960203170776, + "p99": 2312.6299381256104 + }, + "roundtrip": { + "p50": 811.9019865989685, + "p90": 833.3420157432556, + "p95": 2230.7519912719727, + "p99": 2837.538957595825 + }, + "isolatedSum": { + "p50": 836.8609845638275, + "p90": 858.4209978580475, + "p95": 1617.6050305366516, + "p99": 5383.323907852173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 223098880, + "combineLogicalBytes": 223098880, + "fanoutMean": 5.319091796875, + "recvTokensMax": 4142, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 567.7070021629333, + "p90": 583.666980266571, + "p95": 873.0610013008118, + "p99": 2642.622947692871 + }, + "combine": { + "p50": 727.5850176811218, + "p90": 751.5439987182617, + "p95": 1174.7349500656128, + "p99": 1742.1629428863525 + }, + "roundtrip": { + "p50": 1277.932047843933, + "p90": 1307.6119422912598, + "p95": 2334.789991378784, + "p99": 2655.543088912964 + }, + "isolatedSum": { + "p50": 1295.2920198440552, + "p90": 1335.2109789848328, + "p95": 2047.7959513664246, + "p99": 4384.785890579224 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 446730240, + "combineLogicalBytes": 446730240, + "fanoutMean": 5.325439453125, + "recvTokensMax": 8384, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 862.2609972953796, + "p90": 900.7009863853455, + "p95": 1736.6830110549927, + "p99": 2137.4340057373047 + }, + "combine": { + "p50": 1377.1699666976929, + "p90": 1414.1700267791748, + "p95": 1425.6889820098877, + "p99": 1464.4880294799805 + }, + "roundtrip": { + "p50": 2232.8319549560547, + "p90": 2315.9499168395996, + "p95": 2464.4269943237305, + "p99": 2964.855909347534 + }, + "isolatedSum": { + "p50": 2239.4309639930725, + "p90": 2314.8710131645203, + "p95": 3162.3719930648804, + "p99": 3601.922035217285 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 893634560, + "combineLogicalBytes": 893634560, + "fanoutMean": 5.32647705078125, + "recvTokensMax": 16552, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1468.0479764938354, + "p90": 1504.686951637268, + "p95": 1671.8440055847168, + "p99": 2144.5538997650146 + }, + "combine": { + "p50": 2703.902006149292, + "p90": 2758.181095123291, + "p95": 2767.940044403076, + "p99": 2788.379907608032 + }, + "roundtrip": { + "p50": 4168.230056762695, + "p90": 4216.908931732178, + "p95": 4235.148906707764, + "p99": 4358.026027679443 + }, + "isolatedSum": { + "p50": 4171.949982643127, + "p90": 4262.868046760559, + "p95": 4439.784049987793, + "p99": 4932.933807373047 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1786265600, + "combineLogicalBytes": 1786265600, + "fanoutMean": 5.323486328125, + "recvTokensMax": 32960, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-906a5aaa", + "identity": "mi325x|nccl-ep|n-a|6144|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "mi325x_56895d04", + "comparisonKey": "475ba66d346c6d6b", + "schemaVersion": 4, + "generatedAt": "2026-07-03T00:02:47.482967+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "MiniMax-M3", + "shape": { + "hidden": 6144, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 311.95300817489624, + "p90": 322.4729895591736, + "p95": 327.3929953575134, + "p99": 2015.3169631958008 + }, + "combine": { + "p50": 176.3560026884079, + "p90": 186.79599463939667, + "p95": 190.35600125789642, + "p99": 3186.250925064087 + }, + "roundtrip": { + "p50": 471.78998589515686, + "p90": 488.54899406433105, + "p95": 514.7089958190918, + "p99": 3531.683921813965 + }, + "isolatedSum": { + "p50": 488.30901086330414, + "p90": 509.26898419857025, + "p95": 517.7489966154099, + "p99": 5201.567888259888 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 66576384, + "combineLogicalBytes": 66576384, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 362.91199922561646, + "p90": 376.75100564956665, + "p95": 385.3119909763336, + "p99": 2654.823064804077 + }, + "combine": { + "p50": 277.8739929199219, + "p90": 285.5539917945862, + "p95": 289.03400897979736, + "p99": 2687.582015991211 + }, + "roundtrip": { + "p50": 620.9459900856018, + "p90": 635.4259848594666, + "p95": 655.9860110282898, + "p99": 3148.7720012664795 + }, + "isolatedSum": { + "p50": 640.7859921455383, + "p90": 662.3049974441528, + "p95": 674.345999956131, + "p99": 5342.405080795288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 133619712, + "combineLogicalBytes": 133619712, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 459.4700038433075, + "p90": 474.2699861526489, + "p95": 485.5090081691742, + "p99": 2921.0970401763916 + }, + "combine": { + "p50": 463.31000328063965, + "p90": 472.9900062084198, + "p95": 1069.6970224380493, + "p99": 2026.7159938812256 + }, + "roundtrip": { + "p50": 906.1400294303894, + "p90": 921.2999939918518, + "p95": 1333.4510326385498, + "p99": 2659.3430042266846 + }, + "isolatedSum": { + "p50": 922.7800071239471, + "p90": 947.2599923610687, + "p95": 1555.2060306072235, + "p99": 4947.813034057617 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 267657216, + "combineLogicalBytes": 267657216, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 628.6259889602661, + "p90": 643.0259943008423, + "p95": 735.9049916267395, + "p99": 2416.1880016326904 + }, + "combine": { + "p50": 863.3009791374207, + "p90": 889.3010020256042, + "p95": 963.9790058135986, + "p99": 1625.2449750900269 + }, + "roundtrip": { + "p50": 1475.0089645385742, + "p90": 1514.5269632339478, + "p95": 2112.874984741211, + "p99": 2583.264112472534 + }, + "isolatedSum": { + "p50": 1491.9269680976868, + "p90": 1532.3269963264465, + "p95": 1699.8839974403381, + "p99": 4041.4329767227173 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 534380544, + "combineLogicalBytes": 534380544, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 987.1389865875244, + "p90": 1018.9379453659058, + "p95": 1438.6889934539795, + "p99": 2066.87593460083 + }, + "combine": { + "p50": 1635.0840330123901, + "p90": 1663.8840436935425, + "p95": 1675.7639646530151, + "p99": 1703.2829523086548 + }, + "roundtrip": { + "p50": 2628.8230419158936, + "p90": 2678.982973098755, + "p95": 2735.661029815674, + "p99": 3158.332109451294 + }, + "isolatedSum": { + "p50": 2622.2230195999146, + "p90": 2682.8219890594482, + "p95": 3114.4529581069946, + "p99": 3770.158886909485 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1066119168, + "combineLogicalBytes": 1066119168, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1703.042984008789, + "p90": 1754.9619674682617, + "p95": 1874.7589588165283, + "p99": 2273.071050643921 + }, + "combine": { + "p50": 3205.7700157165527, + "p90": 3289.048910140991, + "p95": 3306.4088821411133, + "p99": 3332.2880268096924 + }, + "roundtrip": { + "p50": 4876.454830169678, + "p90": 4957.333087921143, + "p95": 4975.9721755981445, + "p99": 5002.051830291748 + }, + "isolatedSum": { + "p50": 4908.812999725342, + "p90": 5044.010877609253, + "p95": 5181.167840957642, + "p99": 5605.359077453613 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2131722240, + "combineLogicalBytes": 2131722240, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-52c58e5f", + "identity": "mi325x|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||64d989e2e2a6b31", + "colorKey": "mi325x_56895d04", + "comparisonKey": "79fab06b04560d60", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:58:22.158580+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 324.63300228118896, + "p90": 339.4719958305359, + "p95": 349.11298751831055, + "p99": 2598.1040000915527 + }, + "combine": { + "p50": 195.15599310398102, + "p90": 202.31600105762482, + "p95": 206.19499683380127, + "p99": 2946.6168880462646 + }, + "roundtrip": { + "p50": 501.50901079177856, + "p90": 519.7889804840088, + "p95": 533.9879989624023, + "p99": 3449.6049880981445 + }, + "isolatedSum": { + "p50": 519.78899538517, + "p90": 541.7879968881607, + "p95": 555.3079843521118, + "p99": 5544.720888137817 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 382.31199979782104, + "p90": 396.7120051383972, + "p95": 403.83100509643555, + "p99": 2110.8338832855225 + }, + "combine": { + "p50": 313.23298811912537, + "p90": 323.39298725128174, + "p95": 519.3889737129211, + "p99": 2692.7809715270996 + }, + "roundtrip": { + "p50": 676.1050224304199, + "p90": 697.8650093078613, + "p95": 1098.7370014190674, + "p99": 2995.215892791748 + }, + "isolatedSum": { + "p50": 695.5449879169464, + "p90": 720.104992389679, + "p95": 923.2199788093567, + "p99": 4803.614854812622 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 491.71000719070435, + "p90": 509.6290111541748, + "p95": 516.3090229034424, + "p99": 2897.857904434204 + }, + "combine": { + "p50": 532.8289866447449, + "p90": 546.1080074310303, + "p95": 1198.173999786377, + "p99": 1704.1629552841187 + }, + "roundtrip": { + "p50": 1002.1779537200928, + "p90": 1017.0190334320068, + "p95": 1843.0800437927246, + "p99": 2692.3820972442627 + }, + "isolatedSum": { + "p50": 1024.5389938354492, + "p90": 1055.737018585205, + "p95": 1714.4830226898193, + "p99": 4602.020859718323 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 684.5049858093262, + "p90": 697.9050040245056, + "p95": 707.6650261878967, + "p99": 2343.630075454712 + }, + "combine": { + "p50": 993.7790036201477, + "p90": 1019.3380117416382, + "p95": 1058.9770078659058, + "p99": 1613.4849786758423 + }, + "roundtrip": { + "p50": 1658.0840349197388, + "p90": 1702.7640342712402, + "p95": 2267.910957336426, + "p99": 2698.1019973754883 + }, + "isolatedSum": { + "p50": 1678.2839894294739, + "p90": 1717.2430157661438, + "p95": 1766.6420340538025, + "p99": 3957.115054130554 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1102.776050567627, + "p90": 1143.8950300216675, + "p95": 1483.7679862976074, + "p99": 2087.354898452759 + }, + "combine": { + "p50": 1918.6780452728271, + "p90": 1962.5580310821533, + "p95": 1971.5169668197632, + "p99": 1990.196943283081 + }, + "roundtrip": { + "p50": 3005.695104598999, + "p90": 3052.2139072418213, + "p95": 3104.012966156006, + "p99": 3424.5259761810303 + }, + "isolatedSum": { + "p50": 3021.454095840454, + "p90": 3106.453061103821, + "p95": 3455.2849531173706, + "p99": 4077.55184173584 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1923.3189821243286, + "p90": 1962.19801902771, + "p95": 2010.3559494018555, + "p99": 2251.271963119507 + }, + "combine": { + "p50": 3795.159101486206, + "p90": 3860.1160049438477, + "p95": 3874.4759559631348, + "p99": 3893.5561180114746 + }, + "roundtrip": { + "p50": 5721.715927124023, + "p90": 5764.795780181885, + "p95": 5779.435157775879, + "p99": 5860.874176025391 + }, + "isolatedSum": { + "p50": 5718.478083610535, + "p90": 5822.314023971558, + "p95": 5884.83190536499, + "p99": 6144.828081130981 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-186b5564", + "identity": "mi325x|nccl-ep|n-a|7168|8|384|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||cd50548525dafdf", + "colorKey": "mi325x_56895d04", + "comparisonKey": "bdafcb730add84d3", + "schemaVersion": 4, + "generatedAt": "2026-07-02T23:59:31.850989+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_09", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16", + "model": "Kimi-K2", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 384, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "backend-tuned", + "conformanceClass": "best-known", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "cd50548525dafdf", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28629100934", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28629100934", + "createdAt": "2026-07-02T23:56:03Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 323.1930136680603, + "p90": 337.8730118274689, + "p95": 353.67199778556824, + "p99": 1694.6829557418823 + }, + "combine": { + "p50": 193.99599730968475, + "p90": 202.47499644756317, + "p95": 953.499972820282, + "p99": 3163.372039794922 + }, + "roundtrip": { + "p50": 501.42902135849, + "p90": 519.5090174674988, + "p95": 537.8680229187012, + "p99": 3310.4488849639893 + }, + "isolatedSum": { + "p50": 517.189010977745, + "p90": 540.348008275032, + "p95": 1307.1719706058502, + "p99": 4858.054995536804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77514752, + "combineLogicalBytes": 77514752, + "fanoutMean": 5.2802734375, + "recvTokensMax": 1078, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 385.55198907852173, + "p90": 402.47100591659546, + "p95": 435.8310103416443, + "p99": 2810.5390071868896 + }, + "combine": { + "p50": 315.7530128955841, + "p90": 325.0330090522766, + "p95": 330.4330110549927, + "p99": 2119.0340518951416 + }, + "roundtrip": { + "p50": 679.265022277832, + "p90": 696.5450048446655, + "p95": 1672.7240085601807, + "p99": 2902.6970863342285 + }, + "isolatedSum": { + "p50": 701.3050019741058, + "p90": 727.5040149688721, + "p95": 766.264021396637, + "p99": 4929.573059082031 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 154570752, + "combineLogicalBytes": 154570752, + "fanoutMean": 5.2646484375, + "recvTokensMax": 2149, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 491.6689991950989, + "p90": 513.7889981269836, + "p95": 1762.2419595718384, + "p99": 2654.0629863739014 + }, + "combine": { + "p50": 535.3479981422424, + "p90": 549.2280125617981, + "p95": 1106.1359643936157, + "p99": 1597.3650217056274 + }, + "roundtrip": { + "p50": 1007.9389810562134, + "p90": 1035.5780124664307, + "p95": 2160.1130962371826, + "p99": 2663.3429527282715 + }, + "isolatedSum": { + "p50": 1027.0169973373413, + "p90": 1063.0170106887817, + "p95": 2868.377923965454, + "p99": 4251.428008079529 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 309772288, + "combineLogicalBytes": 309772288, + "fanoutMean": 5.275390625, + "recvTokensMax": 4219, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 679.1449785232544, + "p90": 695.5850124359131, + "p95": 1463.8079404830933, + "p99": 2213.632106781006 + }, + "combine": { + "p50": 991.3780093193054, + "p90": 1025.0979661941528, + "p95": 1078.0969858169556, + "p99": 1405.8890342712402 + }, + "roundtrip": { + "p50": 1653.9239883422852, + "p90": 1793.7610149383545, + "p95": 2289.231061935425, + "p99": 2581.8240642547607 + }, + "isolatedSum": { + "p50": 1670.5229878425598, + "p90": 1720.682978630066, + "p95": 2541.904926300049, + "p99": 3619.521141052246 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 619501568, + "combineLogicalBytes": 619501568, + "fanoutMean": 5.2750244140625, + "recvTokensMax": 8292, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1089.2560482025146, + "p90": 1156.7349433898926, + "p95": 1658.7640047073364, + "p99": 2071.4359283447266 + }, + "combine": { + "p50": 1904.999017715454, + "p90": 1945.0780153274536, + "p95": 1956.1569690704346, + "p99": 1969.836950302124 + }, + "roundtrip": { + "p50": 2999.735116958618, + "p90": 3037.653923034668, + "p95": 3064.6939277648926, + "p99": 3377.4869441986084 + }, + "isolatedSum": { + "p50": 2994.2550659179688, + "p90": 3101.812958717346, + "p95": 3614.920973777771, + "p99": 4041.2728786468506 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1239375872, + "combineLogicalBytes": 1239375872, + "fanoutMean": 5.276611328125, + "recvTokensMax": 16554, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1913.957953453064, + "p90": 1987.4769449234009, + "p95": 2011.1169815063477, + "p99": 2220.752000808716 + }, + "combine": { + "p50": 3786.9980335235596, + "p90": 3863.1160259246826, + "p95": 3870.3160285949707, + "p99": 3885.6759071350098 + }, + "roundtrip": { + "p50": 5724.156856536865, + "p90": 5877.752780914307, + "p95": 5915.071964263916, + "p99": 5980.350971221924 + }, + "isolatedSum": { + "p50": 5700.9559869766235, + "p90": 5850.5929708480835, + "p95": 5881.433010101318, + "p99": 6106.427907943726 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2479669248, + "combineLogicalBytes": 2479669248, + "fanoutMean": 5.278564453125, + "recvTokensMax": 32983, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-a89f7c1d", + "identity": "mi325x|nccl-ep|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|normalized|0.18|64d989e2e2a6b31", + "colorKey": "mi325x_c2e47d93", + "comparisonKey": "010dffe1f43d5eb6", + "schemaVersion": 4, + "generatedAt": "2026-07-02T16:42:40.854435+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi325x-amds_07", + "sku": "mi325x", + "backend": "nccl-ep", + "phase": "prefill", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI325X EP8 · nccl-ep · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": null, + "configuredUnits": null, + "deviceUnits": 304, + "resourceClass": "resource-constrained", + "conformanceClass": "resource-conforming", + "fixedKernel": false, + "paretoEligible": true + }, + "placement": { + "kind": "packed", + "nodes": 1, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "64d989e2e2a6b31", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": null, + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28606340235", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606340235", + "createdAt": "2026-07-02T16:40:09Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 324.91299510002136, + "p90": 340.91299772262573, + "p95": 362.67200112342834, + "p99": 1435.3699684143066 + }, + "combine": { + "p50": 194.55599784851074, + "p90": 205.43600618839264, + "p95": 1497.6500272750854, + "p99": 2121.43611907959 + }, + "roundtrip": { + "p50": 499.8289942741394, + "p90": 519.4699764251709, + "p95": 548.3490228652954, + "p99": 2536.1080169677734 + }, + "isolatedSum": { + "p50": 519.4689929485321, + "p90": 546.3490039110184, + "p95": 1860.3220283985138, + "p99": 3556.8060874938965 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 1104, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 382.51200318336487, + "p90": 399.59099888801575, + "p95": 419.99098658561707, + "p99": 1884.4809532165527 + }, + "combine": { + "p50": 313.9530122280121, + "p90": 323.2730031013489, + "p95": 1136.976957321167, + "p99": 1752.243995666504 + }, + "roundtrip": { + "p50": 674.4670271873474, + "p90": 694.4260001182556, + "p95": 1675.686001777649, + "p99": 2255.0740242004395 + }, + "isolatedSum": { + "p50": 696.465015411377, + "p90": 722.8640019893646, + "p95": 1556.967943906784, + "p99": 3636.7249488830566 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 2147, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 492.46999621391296, + "p90": 509.79000329971313, + "p95": 540.4689908027649, + "p99": 2114.396095275879 + }, + "combine": { + "p50": 533.149003982544, + "p90": 547.668993473053, + "p95": 1004.4189691543579, + "p99": 1341.4520025253296 + }, + "roundtrip": { + "p50": 1003.6590099334717, + "p90": 1027.2589921951294, + "p95": 1819.1219568252563, + "p99": 2129.3160915374756 + }, + "isolatedSum": { + "p50": 1025.619000196457, + "p90": 1057.458996772766, + "p95": 1544.8879599571228, + "p99": 3455.8480978012085 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 4198, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 1024, + "globalTokens": 8192, + "dispatch": { + "p50": 682.8659772872925, + "p90": 700.8659839630127, + "p95": 1009.5789432525635, + "p99": 1744.4839477539062 + }, + "combine": { + "p50": 987.0200157165527, + "p90": 1009.4989538192749, + "p95": 1033.3789587020874, + "p99": 1110.8169555664062 + }, + "roundtrip": { + "p50": 1656.0059785842896, + "p90": 1767.8439617156982, + "p95": 1962.3600244522095, + "p99": 2325.232982635498 + }, + "isolatedSum": { + "p50": 1669.8859930038452, + "p90": 1710.3649377822876, + "p95": 2042.9579019546509, + "p99": 2855.3009033203125 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 623443968, + "combineLogicalBytes": 623443968, + "fanoutMean": 5.30859375, + "recvTokensMax": 8294, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2048, + "globalTokens": 16384, + "dispatch": { + "p50": 1094.2169427871704, + "p90": 1140.6569480895996, + "p95": 1348.8119840621948, + "p99": 1690.1259422302246 + }, + "combine": { + "p50": 1915.4009819030762, + "p90": 1952.9600143432617, + "p95": 1964.3189907073975, + "p99": 1985.5190515518188 + }, + "roundtrip": { + "p50": 3004.179000854492, + "p90": 3053.8580417633057, + "p95": 3072.3769664764404, + "p99": 3263.9739513397217 + }, + "isolatedSum": { + "p50": 3009.6179246902466, + "p90": 3093.6169624328613, + "p95": 3313.1309747695923, + "p99": 3675.6449937820435 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1243805696, + "combineLogicalBytes": 1243805696, + "fanoutMean": 5.29547119140625, + "recvTokensMax": 16581, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4096, + "globalTokens": 32768, + "dispatch": { + "p50": 1927.8000593185425, + "p90": 1980.5999994277954, + "p95": 2006.3190460205078, + "p99": 2127.3560523986816 + }, + "combine": { + "p50": 3791.642904281616, + "p90": 3859.9610328674316, + "p95": 3870.0809478759766, + "p99": 3896.6801166534424 + }, + "roundtrip": { + "p50": 6085.635185241699, + "p90": 6199.913024902344, + "p95": 6219.711780548096, + "p99": 6246.592044830322 + }, + "isolatedSum": { + "p50": 5719.442963600159, + "p90": 5840.561032295227, + "p95": 5876.399993896484, + "p99": 6024.036169052124 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2487009280, + "combineLogicalBytes": 2487009280, + "fanoutMean": 5.294189453125, + "recvTokensMax": 32967, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-9fece180", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|tuned||ac583971f94b176", + "colorKey": "mi355x_96ee5dfa", + "comparisonKey": "fb2c41dcbf4fc3dd", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:18:58.340744+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ac583971f94b176", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 43.83999854326248, + "p90": 56.20000138878822, + "p95": 83.48000049591064, + "p99": 83.48000049591064 + }, + "combine": { + "p50": 16.64000004529953, + "p90": 17.400000244379044, + "p95": 19.039999693632126, + "p99": 19.039999693632126 + }, + "roundtrip": { + "p50": 56.161001324653625, + "p90": 56.71999976038933, + "p95": 60.88100001215935, + "p99": 60.88100001215935 + }, + "isolatedSum": { + "p50": 60.47999858856201, + "p90": 73.60000163316727, + "p95": 102.52000018954277, + "p99": 102.52000018954277 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 43.72100159525871, + "p90": 44.920001178979874, + "p95": 46.160001307725906, + "p99": 46.160001307725906 + }, + "combine": { + "p50": 16.520999372005463, + "p90": 17.480000853538513, + "p95": 18.160000443458557, + "p99": 18.160000443458557 + }, + "roundtrip": { + "p50": 58.35999920964241, + "p90": 61.159998178482056, + "p95": 62.240999191999435, + "p99": 62.240999191999435 + }, + "isolatedSum": { + "p50": 60.242000967264175, + "p90": 62.40000203251839, + "p95": 64.32000175118446, + "p99": 64.32000175118446 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.12000051140785, + "p90": 43.71999949216843, + "p95": 45.56100070476532, + "p99": 45.56100070476532 + }, + "combine": { + "p50": 20.640000700950623, + "p90": 21.239999681711197, + "p95": 21.561000496149063, + "p99": 21.561000496149063 + }, + "roundtrip": { + "p50": 61.99999898672104, + "p90": 62.36099824309349, + "p95": 67.35999882221222, + "p99": 67.35999882221222 + }, + "isolatedSum": { + "p50": 63.760001212358475, + "p90": 64.95999917387962, + "p95": 67.12200120091438, + "p99": 67.12200120091438 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.59999841451645, + "p90": 43.03999990224838, + "p95": 44.199999421834946, + "p99": 44.199999421834946 + }, + "combine": { + "p50": 20.640000700950623, + "p90": 21.880000829696655, + "p95": 24.600999429821968, + "p99": 24.600999429821968 + }, + "roundtrip": { + "p50": 62.64100223779678, + "p90": 63.48100304603577, + "p95": 63.79999965429306, + "p99": 63.79999965429306 + }, + "isolatedSum": { + "p50": 63.23999911546707, + "p90": 64.92000073194504, + "p95": 68.80099885165691, + "p99": 68.80099885165691 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 43.07999834418297, + "p90": 46.59999907016754, + "p95": 46.84099927544594, + "p99": 46.84099927544594 + }, + "combine": { + "p50": 25.040000677108765, + "p90": 26.680000126361847, + "p95": 27.03999914228916, + "p99": 27.03999914228916 + }, + "roundtrip": { + "p50": 68.00000369548798, + "p90": 68.24000179767609, + "p95": 71.16000354290009, + "p99": 71.16000354290009 + }, + "isolatedSum": { + "p50": 68.11999902129173, + "p90": 73.27999919652939, + "p95": 73.8809984177351, + "p99": 73.8809984177351 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.759999960660934, + "p90": 46.51999846100807, + "p95": 50.84000155329704, + "p99": 50.84000155329704 + }, + "combine": { + "p50": 32.83999860286713, + "p90": 33.560000360012054, + "p95": 35.280000418424606, + "p99": 35.280000418424606 + }, + "roundtrip": { + "p50": 75.3600001335144, + "p90": 76.32099837064743, + "p95": 77.07999646663666, + "p99": 77.07999646663666 + }, + "isolatedSum": { + "p50": 77.59999856352806, + "p90": 80.07999882102013, + "p95": 86.12000197172165, + "p99": 86.12000197172165 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 45.32000049948692, + "p90": 46.04100063443184, + "p95": 47.15999960899353, + "p99": 47.15999960899353 + }, + "combine": { + "p50": 40.64000025391579, + "p90": 41.120998561382294, + "p95": 41.359998285770416, + "p99": 41.359998285770416 + }, + "roundtrip": { + "p50": 84.79999750852585, + "p90": 88.20100128650665, + "p95": 89.07999843358994, + "p99": 89.07999843358994 + }, + "isolatedSum": { + "p50": 85.96000075340271, + "p90": 87.16199919581413, + "p95": 88.51999789476395, + "p99": 88.51999789476395 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 59.36000123620033, + "p90": 59.960998594760895, + "p95": 64.16100263595581, + "p99": 64.16100263595581 + }, + "combine": { + "p50": 52.20000073313713, + "p90": 52.68000066280365, + "p95": 55.59999868273735, + "p99": 55.59999868273735 + }, + "roundtrip": { + "p50": 111.04100197553635, + "p90": 113.64100128412247, + "p95": 117.24100261926651, + "p99": 117.24100261926651 + }, + "isolatedSum": { + "p50": 111.56000196933746, + "p90": 112.64099925756454, + "p95": 119.76100131869316, + "p99": 119.76100131869316 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-a63227a3", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|decode|normal|none|none|0|tuned||ffa946582edb500", + "colorKey": "mi355x_d3fc5952", + "comparisonKey": "4fe8017daaba5e41", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:19:54.149763+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "ffa946582edb500", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.75999963283539, + "p90": 59.56000089645386, + "p95": 73.27999919652939, + "p99": 73.27999919652939 + }, + "combine": { + "p50": 19.600000232458115, + "p90": 19.88000050187111, + "p95": 19.920000806450844, + "p99": 19.920000806450844 + }, + "roundtrip": { + "p50": 57.56000056862831, + "p90": 58.240000158548355, + "p95": 64.43999707698822, + "p99": 64.43999707698822 + }, + "isolatedSum": { + "p50": 62.3599998652935, + "p90": 79.44000139832497, + "p95": 93.20000000298023, + "p99": 93.20000000298023 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 44.79999840259552, + "p90": 47.76100069284439, + "p95": 53.040001541376114, + "p99": 53.040001541376114 + }, + "combine": { + "p50": 19.600000232458115, + "p90": 20.840000361204147, + "p95": 24.400999769568443, + "p99": 24.400999769568443 + }, + "roundtrip": { + "p50": 60.47999858856201, + "p90": 63.519999384880066, + "p95": 63.680000603199005, + "p99": 63.680000603199005 + }, + "isolatedSum": { + "p50": 64.39999863505363, + "p90": 68.60100105404854, + "p95": 77.44100131094456, + "p99": 77.44100131094456 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 46.00000008940697, + "p90": 47.80000075697899, + "p95": 51.24099925160408, + "p99": 51.24099925160408 + }, + "combine": { + "p50": 23.840000852942467, + "p90": 24.240000173449516, + "p95": 28.440000489354134, + "p99": 28.440000489354134 + }, + "roundtrip": { + "p50": 64.96000289916992, + "p90": 65.99999964237213, + "p95": 68.36099922657013, + "p99": 68.36099922657013 + }, + "isolatedSum": { + "p50": 69.84000094234943, + "p90": 72.0400009304285, + "p95": 79.68099974095821, + "p99": 79.68099974095821 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 44.599998742341995, + "p90": 46.36000096797943, + "p95": 47.28100076317787, + "p99": 47.28100076317787 + }, + "combine": { + "p50": 25.31999908387661, + "p90": 25.599999353289604, + "p95": 26.440000161528587, + "p99": 26.440000161528587 + }, + "roundtrip": { + "p50": 67.20100343227386, + "p90": 68.40000301599503, + "p95": 69.80100274085999, + "p99": 69.80100274085999 + }, + "isolatedSum": { + "p50": 69.9199978262186, + "p90": 71.96000032126904, + "p95": 73.72100092470646, + "p99": 73.72100092470646 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 43.99999976158142, + "p90": 46.480000019073486, + "p95": 46.76000028848648, + "p99": 46.76000028848648 + }, + "combine": { + "p50": 32.00000151991844, + "p90": 32.48000144958496, + "p95": 78.8009986281395, + "p99": 78.8009986281395 + }, + "roundtrip": { + "p50": 74.44000244140625, + "p90": 76.20099931955338, + "p95": 77.2010013461113, + "p99": 77.2010013461113 + }, + "isolatedSum": { + "p50": 76.00000128149986, + "p90": 78.96000146865845, + "p95": 125.56099891662598, + "p99": 125.56099891662598 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 45.32099887728691, + "p90": 47.359999269247055, + "p95": 49.639999866485596, + "p99": 49.639999866485596 + }, + "combine": { + "p50": 39.03999924659729, + "p90": 40.52000120282173, + "p95": 43.320998549461365, + "p99": 43.320998549461365 + }, + "roundtrip": { + "p50": 81.64100348949432, + "p90": 82.7609971165657, + "p95": 88.24100345373154, + "p99": 88.24100345373154 + }, + "isolatedSum": { + "p50": 84.3609981238842, + "p90": 87.88000047206879, + "p95": 92.96099841594696, + "p99": 92.96099841594696 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 51.48100107908249, + "p90": 53.52099984884262, + "p95": 55.720001459121704, + "p99": 55.720001459121704 + }, + "combine": { + "p50": 52.72100120782852, + "p90": 54.00000140070915, + "p95": 54.80000004172325, + "p99": 54.80000004172325 + }, + "roundtrip": { + "p50": 103.28099876642227, + "p90": 109.52100157737732, + "p95": 110.84099858999252, + "p99": 110.84099858999252 + }, + "isolatedSum": { + "p50": 104.20200228691101, + "p90": 107.52100124955177, + "p95": 110.52000150084496, + "p99": 110.52000150084496 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 72.48099893331528, + "p90": 72.6810023188591, + "p95": 72.80000299215317, + "p99": 72.80000299215317 + }, + "combine": { + "p50": 70.63999772071838, + "p90": 72.08099961280823, + "p95": 72.9610025882721, + "p99": 72.9610025882721 + }, + "roundtrip": { + "p50": 145.1209932565689, + "p90": 146.80099487304688, + "p95": 147.40100502967834, + "p99": 147.40100502967834 + }, + "isolatedSum": { + "p50": 143.12099665403366, + "p90": 144.76200193166733, + "p95": 145.76100558042526, + "p99": 145.76100558042526 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-d2f4487b", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|decode|normal|none|none|0|tuned||49cdde01ffb13b2", + "colorKey": "mi355x_fd9fb9ed", + "comparisonKey": "4a28fc5d6d7d2406", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:20:50.913191+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "49cdde01ffb13b2", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 38.839999586343765, + "p90": 53.12100052833557, + "p95": 90.2400016784668, + "p99": 90.2400016784668 + }, + "combine": { + "p50": 16.96000061929226, + "p90": 17.35999993979931, + "p95": 17.559999600052834, + "p99": 17.559999600052834 + }, + "roundtrip": { + "p50": 49.64099824428558, + "p90": 50.08000135421753, + "p95": 50.76099932193756, + "p99": 50.76099932193756 + }, + "isolatedSum": { + "p50": 55.800000205636024, + "p90": 70.48100046813488, + "p95": 107.80000127851963, + "p99": 107.80000127851963 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 39.319999516010284, + "p90": 39.76000100374222, + "p95": 41.56100004911423, + "p99": 41.56100004911423 + }, + "combine": { + "p50": 17.601000145077705, + "p90": 18.360000103712082, + "p95": 19.360000267624855, + "p99": 19.360000267624855 + }, + "roundtrip": { + "p50": 52.40099877119064, + "p90": 53.0799999833107, + "p95": 58.68000164628029, + "p99": 58.68000164628029 + }, + "isolatedSum": { + "p50": 56.92099966108799, + "p90": 58.1200011074543, + "p95": 60.92100031673908, + "p99": 60.92100031673908 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 36.27999871969223, + "p90": 37.6800000667572, + "p95": 37.6800000667572, + "p99": 37.6800000667572 + }, + "combine": { + "p50": 15.881000086665154, + "p90": 16.16000011563301, + "p95": 16.920000314712524, + "p99": 16.920000314712524 + }, + "roundtrip": { + "p50": 45.6399992108345, + "p90": 45.88000103831291, + "p95": 46.160001307725906, + "p99": 46.160001307725906 + }, + "isolatedSum": { + "p50": 52.160998806357384, + "p90": 53.84000018239021, + "p95": 54.60000038146973, + "p99": 54.60000038146973 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 38.759998977184296, + "p90": 39.400000125169754, + "p95": 40.95999896526337, + "p99": 40.95999896526337 + }, + "combine": { + "p50": 16.55999943614006, + "p90": 18.44000071287155, + "p95": 19.07999999821186, + "p99": 19.07999999821186 + }, + "roundtrip": { + "p50": 52.35999822616577, + "p90": 54.84100058674812, + "p95": 61.20099872350693, + "p99": 61.20099872350693 + }, + "isolatedSum": { + "p50": 55.319998413324356, + "p90": 57.840000838041306, + "p95": 60.03999896347523, + "p99": 60.03999896347523 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 39.91999849677086, + "p90": 42.92000085115433, + "p95": 46.31999880075455, + "p99": 46.31999880075455 + }, + "combine": { + "p50": 18.161000683903694, + "p90": 19.480999559164047, + "p95": 24.52000044286251, + "p99": 24.52000044286251 + }, + "roundtrip": { + "p50": 54.120998829603195, + "p90": 55.28099834918976, + "p95": 57.5999990105629, + "p99": 57.5999990105629 + }, + "isolatedSum": { + "p50": 58.08099918067455, + "p90": 62.401000410318375, + "p95": 70.83999924361706, + "p99": 70.83999924361706 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 16, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 39.76000100374222, + "p90": 41.04100167751312, + "p95": 41.15999862551689, + "p99": 41.15999862551689 + }, + "combine": { + "p50": 18.640000373125076, + "p90": 19.360000267624855, + "p95": 20.080000162124634, + "p99": 20.080000162124634 + }, + "roundtrip": { + "p50": 56.23999983072281, + "p90": 56.7610003054142, + "p95": 56.88000097870827, + "p99": 56.88000097870827 + }, + "isolatedSum": { + "p50": 58.400001376867294, + "p90": 60.40100194513798, + "p95": 61.239998787641525, + "p99": 61.239998787641525 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 39.80100154876709, + "p90": 40.11999815702438, + "p95": 41.79999977350235, + "p99": 41.79999977350235 + }, + "combine": { + "p50": 21.75999991595745, + "p90": 22.040000185370445, + "p95": 22.081000730395317, + "p99": 22.081000730395317 + }, + "roundtrip": { + "p50": 59.119999408721924, + "p90": 60.04000082612038, + "p95": 61.159998178482056, + "p99": 61.159998178482056 + }, + "isolatedSum": { + "p50": 61.56100146472454, + "p90": 62.15999834239483, + "p95": 63.88100050389767, + "p99": 63.88100050389767 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 39.96000066399574, + "p90": 41.760001331567764, + "p95": 45.32000049948692, + "p99": 45.32000049948692 + }, + "combine": { + "p50": 26.799999177455902, + "p90": 27.68000029027462, + "p95": 30.44000081717968, + "p99": 30.44000081717968 + }, + "roundtrip": { + "p50": 64.92099910974503, + "p90": 65.67999720573425, + "p95": 70.32100111246109, + "p99": 70.32100111246109 + }, + "isolatedSum": { + "p50": 66.75999984145164, + "p90": 69.44000162184238, + "p95": 75.7600013166666, + "p99": 75.7600013166666 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 6, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-56c9df2c", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|hotspot-single|8|decode|normal|none|none|0|tuned||b6caf944f6bb621", + "colorKey": "mi355x_dec46f00", + "comparisonKey": "42f818ff7c400f1b", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:23:04.460913+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · hotspot-single", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "hotspot-single", + "routingLabel": "hotspot-single", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "b6caf944f6bb621", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 42.479999363422394, + "p90": 60.35999953746796, + "p95": 90.43999761343002, + "p99": 90.43999761343002 + }, + "combine": { + "p50": 18.880000337958336, + "p90": 19.999999552965164, + "p95": 22.71999977529049, + "p99": 22.71999977529049 + }, + "roundtrip": { + "p50": 55.72099983692169, + "p90": 58.3610013127327, + "p95": 65.15999883413315, + "p99": 65.15999883413315 + }, + "isolatedSum": { + "p50": 61.35999970138073, + "p90": 80.35999909043312, + "p95": 113.15999738872051, + "p99": 113.15999738872051 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 602112, + "combineLogicalBytes": 602112, + "fanoutMean": 5.25, + "recvTokensMax": 8, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 44.08000037074089, + "p90": 44.95999962091446, + "p95": 51.281001418828964, + "p99": 51.281001418828964 + }, + "combine": { + "p50": 17.839999869465828, + "p90": 18.200000748038292, + "p95": 19.88000050187111, + "p99": 19.88000050187111 + }, + "roundtrip": { + "p50": 58.841001242399216, + "p90": 59.76000055670738, + "p95": 62.04099953174591, + "p99": 62.04099953174591 + }, + "isolatedSum": { + "p50": 61.92000024020672, + "p90": 63.16000036895275, + "p95": 71.16100192070007, + "p99": 71.16100192070007 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1247232, + "combineLogicalBytes": 1247232, + "fanoutMean": 5.4375, + "recvTokensMax": 16, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.880000710487366, + "p90": 44.56000030040741, + "p95": 48.64099994301796, + "p99": 48.64099994301796 + }, + "combine": { + "p50": 20.600000396370888, + "p90": 21.199999377131462, + "p95": 21.479999646544456, + "p99": 21.479999646544456 + }, + "roundtrip": { + "p50": 61.719998717308044, + "p90": 62.44099885225296, + "p95": 68.20099800825119, + "p99": 68.20099800825119 + }, + "isolatedSum": { + "p50": 64.48000110685825, + "p90": 65.75999967753887, + "p95": 70.12099958956242, + "p99": 70.12099958956242 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2451456, + "combineLogicalBytes": 2451456, + "fanoutMean": 5.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 43.64100098609924, + "p90": 44.961001724004745, + "p95": 45.00000178813934, + "p99": 45.00000178813934 + }, + "combine": { + "p50": 21.800000220537186, + "p90": 23.159999400377274, + "p95": 28.55999954044819, + "p99": 28.55999954044819 + }, + "roundtrip": { + "p50": 63.48100304603577, + "p90": 63.60100209712982, + "p95": 64.24099951982498, + "p99": 64.24099951982498 + }, + "isolatedSum": { + "p50": 65.44100120663643, + "p90": 68.12100112438202, + "p95": 73.56000132858753, + "p99": 73.56000132858753 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4859904, + "combineLogicalBytes": 4859904, + "fanoutMean": 5.296875, + "recvTokensMax": 64, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.76099833846092, + "p90": 45.40099948644638, + "p95": 48.39999973773956, + "p99": 48.39999973773956 + }, + "combine": { + "p50": 27.799999341368675, + "p90": 28.240999206900597, + "p95": 28.279999271035194, + "p99": 28.279999271035194 + }, + "roundtrip": { + "p50": 69.52100247144699, + "p90": 69.96099650859833, + "p95": 74.87999647855759, + "p99": 74.87999647855759 + }, + "isolatedSum": { + "p50": 72.5609976798296, + "p90": 73.64199869334698, + "p95": 76.67999900877476, + "p99": 76.67999900877476 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9748480, + "combineLogicalBytes": 9748480, + "fanoutMean": 5.3125, + "recvTokensMax": 128, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.759999960660934, + "p90": 46.31999880075455, + "p95": 46.480000019073486, + "p99": 46.480000019073486 + }, + "combine": { + "p50": 35.360999405384064, + "p90": 36.559998989105225, + "p95": 37.87999972701073, + "p99": 37.87999972701073 + }, + "roundtrip": { + "p50": 78.88100296258926, + "p90": 80.08100092411041, + "p95": 80.20099997520447, + "p99": 80.20099997520447 + }, + "isolatedSum": { + "p50": 80.120999366045, + "p90": 82.87999778985977, + "p95": 84.35999974608421, + "p99": 84.35999974608421 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19525632, + "combineLogicalBytes": 19525632, + "fanoutMean": 5.3203125, + "recvTokensMax": 256, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 49.320999532938, + "p90": 52.76099964976311, + "p95": 54.28000167012215, + "p99": 54.28000167012215 + }, + "combine": { + "p50": 46.480998396873474, + "p90": 47.040000557899475, + "p95": 47.95999825000763, + "p99": 47.95999825000763 + }, + "roundtrip": { + "p50": 93.84100139141083, + "p90": 94.4409966468811, + "p95": 96.40099853277206, + "p99": 96.40099853277206 + }, + "isolatedSum": { + "p50": 95.80199792981148, + "p90": 99.80100020766258, + "p95": 102.23999992012978, + "p99": 102.23999992012978 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38621184, + "combineLogicalBytes": 38621184, + "fanoutMean": 5.26171875, + "recvTokensMax": 512, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 68.84100288152695, + "p90": 69.36000287532806, + "p95": 70.39999961853027, + "p99": 70.39999961853027 + }, + "combine": { + "p50": 64.99999761581421, + "p90": 67.19999760389328, + "p95": 70.6000030040741, + "p99": 70.6000030040741 + }, + "roundtrip": { + "p50": 134.40200686454773, + "p90": 134.60099697113037, + "p95": 135.88100671768188, + "p99": 135.88100671768188 + }, + "isolatedSum": { + "p50": 133.84100049734116, + "p90": 136.56000047922134, + "p95": 141.00000262260437, + "p99": 141.00000262260437 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 78102528, + "combineLogicalBytes": 78102528, + "fanoutMean": 5.3203125, + "recvTokensMax": 1024, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-f1837794", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf|8|decode|normal|none|none|0|tuned||14ded8461f2636c", + "colorKey": "mi355x_c88e2d91", + "comparisonKey": "d2731ba6943a8849", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:21:48.029275+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · zipf", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf", + "routingLabel": "zipf", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "14ded8461f2636c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 41.760001331567764, + "p90": 63.519999384880066, + "p95": 85.87999641895294, + "p99": 85.87999641895294 + }, + "combine": { + "p50": 18.239999189972878, + "p90": 18.959999084472656, + "p95": 23.48100021481514, + "p99": 23.48100021481514 + }, + "roundtrip": { + "p50": 53.68100106716156, + "p90": 54.92100119590759, + "p95": 59.28000062704086, + "p99": 59.28000062704086 + }, + "isolatedSum": { + "p50": 60.00000052154064, + "p90": 82.47999846935272, + "p95": 109.36099663376808, + "p99": 109.36099663376808 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 444416, + "combineLogicalBytes": 444416, + "fanoutMean": 3.875, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.88100078701973, + "p90": 43.35999861359596, + "p95": 45.52000015974045, + "p99": 45.52000015974045 + }, + "combine": { + "p50": 17.839999869465828, + "p90": 18.279999494552612, + "p95": 19.720999523997307, + "p99": 19.720999523997307 + }, + "roundtrip": { + "p50": 56.880999356508255, + "p90": 59.080999344587326, + "p95": 59.20099839568138, + "p99": 59.20099839568138 + }, + "isolatedSum": { + "p50": 60.72100065648556, + "p90": 61.639998108148575, + "p95": 65.24099968373775, + "p99": 65.24099968373775 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 845824, + "combineLogicalBytes": 845824, + "fanoutMean": 3.6875, + "recvTokensMax": 16, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 44.12100091576576, + "p90": 44.920001178979874, + "p95": 45.921001583337784, + "p99": 45.921001583337784 + }, + "combine": { + "p50": 20.160000771284103, + "p90": 21.84000052511692, + "p95": 22.520000115036964, + "p99": 22.520000115036964 + }, + "roundtrip": { + "p50": 60.28100103139877, + "p90": 61.840999871492386, + "p95": 62.36099824309349, + "p99": 62.36099824309349 + }, + "isolatedSum": { + "p50": 64.28100168704987, + "p90": 66.7600017040968, + "p95": 68.44100169837475, + "p99": 68.44100169837475 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1691648, + "combineLogicalBytes": 1691648, + "fanoutMean": 3.6875, + "recvTokensMax": 32, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 45.0810007750988, + "p90": 46.08000069856644, + "p95": 47.961000353097916, + "p99": 47.961000353097916 + }, + "combine": { + "p50": 21.320000290870667, + "p90": 22.360000759363174, + "p95": 22.760000079870224, + "p99": 22.760000079870224 + }, + "roundtrip": { + "p50": 61.000000685453415, + "p90": 62.80100345611572, + "p95": 63.72100114822388, + "p99": 63.72100114822388 + }, + "isolatedSum": { + "p50": 66.40100106596947, + "p90": 68.44000145792961, + "p95": 70.72100043296814, + "p99": 70.72100043296814 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3354624, + "combineLogicalBytes": 3354624, + "fanoutMean": 3.65625, + "recvTokensMax": 64, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.72000151872635, + "p90": 46.00000008940697, + "p95": 47.76100069284439, + "p99": 47.76100069284439 + }, + "combine": { + "p50": 25.119999423623085, + "p90": 26.200000196695328, + "p95": 30.03999963402748, + "p99": 30.03999963402748 + }, + "roundtrip": { + "p50": 66.48000329732895, + "p90": 67.6409974694252, + "p95": 70.47999650239944, + "p99": 70.47999650239944 + }, + "isolatedSum": { + "p50": 69.84000094234943, + "p90": 72.2000002861023, + "p95": 77.80100032687187, + "p99": 77.80100032687187 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 6537216, + "combineLogicalBytes": 6537216, + "fanoutMean": 3.5625, + "recvTokensMax": 127, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.360000640153885, + "p90": 45.8809994161129, + "p95": 47.15999960899353, + "p99": 47.15999960899353 + }, + "combine": { + "p50": 32.919999212026596, + "p90": 36.3209992647171, + "p95": 39.20099884271622, + "p99": 39.20099884271622 + }, + "roundtrip": { + "p50": 75.24099946022034, + "p90": 78.04100215435028, + "p95": 78.72000336647034, + "p99": 78.72000336647034 + }, + "isolatedSum": { + "p50": 77.27999985218048, + "p90": 82.20199868083, + "p95": 86.36099845170975, + "p99": 86.36099845170975 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 12859392, + "combineLogicalBytes": 12859392, + "fanoutMean": 3.50390625, + "recvTokensMax": 255, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 50.48099905252457, + "p90": 56.8000003695488, + "p95": 60.80099940299988, + "p99": 60.80099940299988 + }, + "combine": { + "p50": 44.16000097990036, + "p90": 44.47999969124794, + "p95": 44.759999960660934, + "p99": 44.759999960660934 + }, + "roundtrip": { + "p50": 90.2400016784668, + "p90": 90.52100032567978, + "p95": 94.28100287914276, + "p99": 94.28100287914276 + }, + "isolatedSum": { + "p50": 94.64100003242493, + "p90": 101.28000006079674, + "p95": 105.56099936366081, + "p99": 105.56099936366081 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 25145344, + "combineLogicalBytes": 25145344, + "fanoutMean": 3.42578125, + "recvTokensMax": 510, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 66.20100140571594, + "p90": 67.60100275278091, + "p95": 69.72099840641022, + "p99": 69.72099840641022 + }, + "combine": { + "p50": 61.879999935626984, + "p90": 63.24099749326706, + "p95": 65.76000154018402, + "p99": 65.76000154018402 + }, + "roundtrip": { + "p50": 127.80100107192993, + "p90": 129.92100417613983, + "p95": 131.28100335597992, + "p99": 131.28100335597992 + }, + "isolatedSum": { + "p50": 128.08100134134293, + "p90": 130.84200024604797, + "p95": 135.48099994659424, + "p99": 135.48099994659424 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 49946624, + "combineLogicalBytes": 49946624, + "fanoutMean": 3.40234375, + "recvTokensMax": 1022, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-07b57d9f", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|zipf-heavy|8|decode|normal|none|none|0|tuned||22da8b58646609c", + "colorKey": "mi355x_5666a1ab", + "comparisonKey": "d01d002111fbefcd", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:22:26.110666+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · zipf-heavy", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "zipf-heavy", + "routingLabel": "zipf-heavy", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "22da8b58646609c", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 41.680000722408295, + "p90": 63.23999911546707, + "p95": 78.12099903821945, + "p99": 78.12099903821945 + }, + "combine": { + "p50": 17.839999869465828, + "p90": 18.479999154806137, + "p95": 18.60000006854534, + "p99": 18.60000006854534 + }, + "roundtrip": { + "p50": 52.15999856591225, + "p90": 56.23999983072281, + "p95": 57.121001183986664, + "p99": 57.121001183986664 + }, + "isolatedSum": { + "p50": 59.52000059187412, + "p90": 81.71999827027321, + "p95": 96.7209991067648, + "p99": 96.7209991067648 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 172032, + "combineLogicalBytes": 172032, + "fanoutMean": 1.5, + "recvTokensMax": 8, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 41.280001401901245, + "p90": 41.56000167131424, + "p95": 43.800998479127884, + "p99": 43.800998479127884 + }, + "combine": { + "p50": 18.279999494552612, + "p90": 18.519999459385872, + "p95": 19.55999992787838, + "p99": 19.55999992787838 + }, + "roundtrip": { + "p50": 53.32000181078911, + "p90": 53.8799986243248, + "p95": 54.999999701976776, + "p99": 54.999999701976776 + }, + "isolatedSum": { + "p50": 59.56000089645386, + "p90": 60.08000113070011, + "p95": 63.360998407006264, + "p99": 63.360998407006264 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 315392, + "combineLogicalBytes": 315392, + "fanoutMean": 1.375, + "recvTokensMax": 16, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.800998479127884, + "p90": 44.72000151872635, + "p95": 47.44099825620651, + "p99": 47.44099825620651 + }, + "combine": { + "p50": 19.920000806450844, + "p90": 22.040000185370445, + "p95": 22.5600004196167, + "p99": 22.5600004196167 + }, + "roundtrip": { + "p50": 57.20100179314613, + "p90": 59.801001101732254, + "p95": 61.76000088453293, + "p99": 61.76000088453293 + }, + "isolatedSum": { + "p50": 63.72099928557873, + "p90": 66.7600017040968, + "p95": 70.00099867582321, + "p99": 70.00099867582321 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 616448, + "combineLogicalBytes": 616448, + "fanoutMean": 1.34375, + "recvTokensMax": 32, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 41.600000113248825, + "p90": 43.28000172972679, + "p95": 47.04099893569946, + "p99": 47.04099893569946 + }, + "combine": { + "p50": 20.281000062823296, + "p90": 21.040000021457672, + "p95": 22.23999984562397, + "p99": 22.23999984562397 + }, + "roundtrip": { + "p50": 59.36099961400032, + "p90": 62.6010000705719, + "p95": 65.20099937915802, + "p99": 65.20099937915802 + }, + "isolatedSum": { + "p50": 61.88100017607212, + "p90": 64.32000175118446, + "p95": 69.28099878132343, + "p99": 69.28099878132343 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1376256, + "combineLogicalBytes": 1376256, + "fanoutMean": 1.5, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 41.68099910020828, + "p90": 42.64099895954132, + "p95": 46.08000069856644, + "p99": 46.08000069856644 + }, + "combine": { + "p50": 25.439999997615814, + "p90": 27.319999411702156, + "p95": 31.3199982047081, + "p99": 31.3199982047081 + }, + "roundtrip": { + "p50": 63.120998442173004, + "p90": 65.32000005245209, + "p95": 65.88000059127808, + "p99": 65.88000059127808 + }, + "isolatedSum": { + "p50": 67.1209990978241, + "p90": 69.96099837124348, + "p95": 77.39999890327454, + "p99": 77.39999890327454 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2781184, + "combineLogicalBytes": 2781184, + "fanoutMean": 1.515625, + "recvTokensMax": 128, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 42.80000180006027, + "p90": 43.55999827384949, + "p95": 43.71999949216843, + "p99": 43.71999949216843 + }, + "combine": { + "p50": 30.559999868273735, + "p90": 32.07999840378761, + "p95": 34.60000082850456, + "p99": 34.60000082850456 + }, + "roundtrip": { + "p50": 70.8409994840622, + "p90": 73.08000326156616, + "p95": 73.56099784374237, + "p99": 73.56099784374237 + }, + "isolatedSum": { + "p50": 73.36000166833401, + "p90": 75.6399966776371, + "p95": 78.32000032067299, + "p99": 78.32000032067299 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 5533696, + "combineLogicalBytes": 5533696, + "fanoutMean": 1.5078125, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 49.67999830842018, + "p90": 50.48000067472458, + "p95": 51.360998302698135, + "p99": 51.360998302698135 + }, + "combine": { + "p50": 41.24100133776665, + "p90": 41.6410006582737, + "p95": 43.480999767780304, + "p99": 43.480999767780304 + }, + "roundtrip": { + "p50": 85.08100360631943, + "p90": 87.88099884986877, + "p95": 89.20100331306458, + "p99": 89.20100331306458 + }, + "isolatedSum": { + "p50": 90.92099964618683, + "p90": 92.12100133299828, + "p95": 94.84199807047844, + "p99": 94.84199807047844 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 11210752, + "combineLogicalBytes": 11210752, + "fanoutMean": 1.52734375, + "recvTokensMax": 512, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 64.32099640369415, + "p90": 64.96000289916992, + "p95": 65.48000127077103, + "p99": 65.48000127077103 + }, + "combine": { + "p50": 61.441000550985336, + "p90": 61.99999898672104, + "p95": 63.87999653816223, + "p99": 63.87999653816223 + }, + "roundtrip": { + "p50": 124.88099932670593, + "p90": 126.76100432872772, + "p95": 128.00100445747375, + "p99": 128.00100445747375 + }, + "isolatedSum": { + "p50": 125.76199695467949, + "p90": 126.96000188589096, + "p95": 129.35999780893326, + "p99": 129.35999780893326 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 22650880, + "combineLogicalBytes": 22650880, + "fanoutMean": 1.54296875, + "recvTokensMax": 1024, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-6fdaeedc", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|decode|normal|none|none|0|normalized|0.18|c774c8e4abb34da", + "colorKey": "mi355x_445f1429", + "comparisonKey": "4696cf21182b4839", + "schemaVersion": 4, + "generatedAt": "2026-07-02T20:57:36.572169+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_05", + "sku": "mi355x", + "backend": "mori", + "phase": "decode", + "mode": "normal", + "resourceMode": "normalized", + "suite": "resource-constrained", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 (norm)", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": 0.18, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "resource-constrained", + "conformanceClass": "minimum-functional", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "c774c8e4abb34da", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28620413991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28620413991", + "createdAt": "2026-07-02T20:44:56Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 40.32000154256821, + "p90": 43.07999834418297, + "p95": 44.520001858472824, + "p99": 47.55999892950058 + }, + "combine": { + "p50": 17.361000180244446, + "p90": 18.640000373125076, + "p95": 20.0399998575449, + "p99": 23.04000034928322 + }, + "roundtrip": { + "p50": 56.0000017285347, + "p90": 58.921001851558685, + "p95": 60.16099825501442, + "p99": 62.64100223779678 + }, + "isolatedSum": { + "p50": 57.68100172281265, + "p90": 61.719998717308044, + "p95": 64.56000171601772, + "p99": 70.5999992787838 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 42.32100024819374, + "p90": 45.20000144839287, + "p95": 46.51999846100807, + "p99": 49.600999802351 + }, + "combine": { + "p50": 16.279999166727066, + "p90": 18.319999799132347, + "p95": 19.360000267624855, + "p99": 21.920999512076378 + }, + "roundtrip": { + "p50": 58.43999981880188, + "p90": 61.601001769304276, + "p95": 62.519997358322144, + "p99": 65.36100059747696 + }, + "isolatedSum": { + "p50": 58.60099941492081, + "p90": 63.520001247525215, + "p95": 65.87999872863293, + "p99": 71.52199931442738 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 42.27999970316887, + "p90": 45.00000178813934, + "p95": 46.11999914050102, + "p99": 48.64099994301796 + }, + "combine": { + "p50": 18.92000064253807, + "p90": 20.919999107718468, + "p95": 22.08000048995018, + "p99": 24.119999259710312 + }, + "roundtrip": { + "p50": 61.64000183343887, + "p90": 64.76099789142609, + "p95": 65.80100208520889, + "p99": 68.12000274658203 + }, + "isolatedSum": { + "p50": 61.20000034570694, + "p90": 65.92000089585781, + "p95": 68.1999996304512, + "p99": 72.76099920272827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 42.24099963903427, + "p90": 45.04099860787392, + "p95": 46.720001846551895, + "p99": 49.36100170016289 + }, + "combine": { + "p50": 20.560000091791153, + "p90": 22.87999913096428, + "p95": 23.800000548362732, + "p99": 26.87999978661537 + }, + "roundtrip": { + "p50": 62.12100014090538, + "p90": 65.67999720573425, + "p95": 66.24100357294083, + "p99": 68.03999841213226 + }, + "isolatedSum": { + "p50": 62.800999730825424, + "p90": 67.9209977388382, + "p95": 70.52000239491463, + "p99": 76.24100148677826 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 42.479999363422394, + "p90": 45.40000110864639, + "p95": 46.560000628232956, + "p99": 48.840999603271484 + }, + "combine": { + "p50": 24.6799997985363, + "p90": 27.000999078154564, + "p95": 27.879999950528145, + "p99": 29.839999973773956 + }, + "roundtrip": { + "p50": 67.9209977388382, + "p90": 71.03999704122543, + "p95": 71.72100245952606, + "p99": 74.00000095367432 + }, + "isolatedSum": { + "p50": 67.1599991619587, + "p90": 72.40100018680096, + "p95": 74.4400005787611, + "p99": 78.68099957704544 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 600, + "trials": 3 + } + ] + }, + { + "id": "cx-3484c4a3", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|uniform|8|prefill|normal|none|none|0|tuned||2768bdde6bf7e7f", + "colorKey": "mi355x_96ee5dfa", + "comparisonKey": "b2be0e8e0049ee85", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:19:16.741519+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "uniform", + "routingLabel": "uniform", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "2768bdde6bf7e7f", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 73.04000109434128, + "p90": 79.6009972691536, + "p95": 80.20000159740448, + "p99": 80.20000159740448 + }, + "combine": { + "p50": 18.321000039577484, + "p90": 19.31999996304512, + "p95": 19.95999924838543, + "p99": 19.95999924838543 + }, + "roundtrip": { + "p50": 55.640000849962234, + "p90": 58.841001242399216, + "p95": 59.96000021696091, + "p99": 59.96000021696091 + }, + "isolatedSum": { + "p50": 91.36100113391876, + "p90": 98.92099723219872, + "p95": 100.16000084578991, + "p99": 100.16000084578991 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 630784, + "combineLogicalBytes": 630784, + "fanoutMean": 5.5, + "recvTokensMax": 7, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 43.68000105023384, + "p90": 48.28000068664551, + "p95": 49.64099824428558, + "p99": 49.64099824428558 + }, + "combine": { + "p50": 18.360000103712082, + "p90": 18.99999938905239, + "p95": 19.200000911951065, + "p99": 19.200000911951065 + }, + "roundtrip": { + "p50": 59.31999906897545, + "p90": 59.99999865889549, + "p95": 61.799999326467514, + "p99": 61.799999326467514 + }, + "isolatedSum": { + "p50": 62.04000115394592, + "p90": 67.2800000756979, + "p95": 68.84099915623665, + "p99": 68.84099915623665 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1232896, + "combineLogicalBytes": 1232896, + "fanoutMean": 5.375, + "recvTokensMax": 13, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 43.23999956250191, + "p90": 46.20100185275078, + "p95": 46.480000019073486, + "p99": 46.480000019073486 + }, + "combine": { + "p50": 20.0399998575449, + "p90": 21.12000063061714, + "p95": 22.321000695228577, + "p99": 22.321000695228577 + }, + "roundtrip": { + "p50": 62.36099824309349, + "p90": 65.08000195026398, + "p95": 66.40099734067917, + "p99": 66.40099734067917 + }, + "isolatedSum": { + "p50": 63.279999420046806, + "p90": 67.32100248336792, + "p95": 68.80100071430206, + "p99": 68.80100071430206 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 2480128, + "combineLogicalBytes": 2480128, + "fanoutMean": 5.40625, + "recvTokensMax": 29, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 44.39999908208847, + "p90": 46.72100022435188, + "p95": 46.84000089764595, + "p99": 46.84000089764595 + }, + "combine": { + "p50": 22.280000150203705, + "p90": 23.600000888109207, + "p95": 28.039999306201935, + "p99": 28.039999306201935 + }, + "roundtrip": { + "p50": 63.040003180503845, + "p90": 65.88000059127808, + "p95": 68.59999895095825, + "p99": 68.59999895095825 + }, + "isolatedSum": { + "p50": 66.67999923229218, + "p90": 70.32100111246109, + "p95": 74.88000020384789, + "p99": 74.88000020384789 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 4974592, + "combineLogicalBytes": 4974592, + "fanoutMean": 5.421875, + "recvTokensMax": 47, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.520001858472824, + "p90": 47.200001776218414, + "p95": 47.47999832034111, + "p99": 47.47999832034111 + }, + "combine": { + "p50": 25.0410009175539, + "p90": 29.160000383853912, + "p95": 30.36000020802021, + "p99": 30.36000020802021 + }, + "roundtrip": { + "p50": 68.20099800825119, + "p90": 70.3200027346611, + "p95": 74.8009979724884, + "p99": 74.8009979724884 + }, + "isolatedSum": { + "p50": 69.56100277602673, + "p90": 76.36000216007233, + "p95": 77.83999852836132, + "p99": 77.83999852836132 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 9920512, + "combineLogicalBytes": 9920512, + "fanoutMean": 5.40625, + "recvTokensMax": 92, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.52100023627281, + "p90": 45.159999281167984, + "p95": 45.48100009560585, + "p99": 45.48100009560585 + }, + "combine": { + "p50": 32.32099860906601, + "p90": 35.440001636743546, + "p95": 35.599999129772186, + "p99": 35.599999129772186 + }, + "roundtrip": { + "p50": 75.64099878072739, + "p90": 77.7600035071373, + "p95": 77.80099660158157, + "p99": 77.80099660158157 + }, + "isolatedSum": { + "p50": 76.84199884533882, + "p90": 80.60000091791153, + "p95": 81.08099922537804, + "p99": 81.08099922537804 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 19726336, + "combineLogicalBytes": 19726336, + "fanoutMean": 5.375, + "recvTokensMax": 182, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 46.92000150680542, + "p90": 48.31999912858009, + "p95": 48.32100123167038, + "p99": 48.32100123167038 + }, + "combine": { + "p50": 41.72100126743317, + "p90": 41.79999977350235, + "p95": 42.36000031232834, + "p99": 42.36000031232834 + }, + "roundtrip": { + "p50": 83.96100252866745, + "p90": 84.63999629020691, + "p95": 87.8399983048439, + "p99": 87.8399983048439 + }, + "isolatedSum": { + "p50": 88.64100277423859, + "p90": 90.11999890208244, + "p95": 90.68100154399872, + "p99": 90.68100154399872 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 38993920, + "combineLogicalBytes": 38993920, + "fanoutMean": 5.3125, + "recvTokensMax": 367, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 59.640999883413315, + "p90": 61.04100123047829, + "p95": 62.83999979496002, + "p99": 62.83999979496002 + }, + "combine": { + "p50": 52.20000073313713, + "p90": 52.560001611709595, + "p95": 53.75999957323074, + "p99": 53.75999957323074 + }, + "roundtrip": { + "p50": 113.2809966802597, + "p90": 113.95999789237976, + "p95": 115.92099815607071, + "p99": 115.92099815607071 + }, + "isolatedSum": { + "p50": 111.84100061655045, + "p90": 113.60100284218788, + "p95": 116.59999936819077, + "p99": 116.59999936819077 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 77672448, + "combineLogicalBytes": 77672448, + "fanoutMean": 5.291015625, + "recvTokensMax": 723, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 87.24100142717361, + "p90": 88.48000317811966, + "p95": 89.12099897861481, + "p99": 89.12099897861481 + }, + "combine": { + "p50": 100.92099756002426, + "p90": 102.9210016131401, + "p95": 105.48099875450134, + "p99": 105.48099875450134 + }, + "roundtrip": { + "p50": 191.00099802017212, + "p90": 192.6019936800003, + "p95": 193.56100261211395, + "p99": 193.56100261211395 + }, + "isolatedSum": { + "p50": 188.16199898719788, + "p90": 191.40100479125977, + "p95": 194.60199773311615, + "p99": 194.60199773311615 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 155889664, + "combineLogicalBytes": 155889664, + "fanoutMean": 5.3095703125, + "recvTokensMax": 1422, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 145.84100246429443, + "p90": 148.40100705623627, + "p95": 148.44100177288055, + "p99": 148.44100177288055 + }, + "combine": { + "p50": 111.48100346326828, + "p90": 117.56099760532379, + "p95": 121.04099988937378, + "p99": 121.04099988937378 + }, + "roundtrip": { + "p50": 269.322007894516, + "p90": 271.84200286865234, + "p95": 272.40198850631714, + "p99": 272.40198850631714 + }, + "isolatedSum": { + "p50": 257.3220059275627, + "p90": 265.96200466156006, + "p95": 269.48200166225433, + "p99": 269.48200166225433 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 312266752, + "combineLogicalBytes": 312266752, + "fanoutMean": 5.31787109375, + "recvTokensMax": 2779, + "stragglerRank": 5, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-79e88971", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced|8|prefill|normal|none|none|0|tuned||d9340f721d4da42", + "colorKey": "mi355x_d3fc5952", + "comparisonKey": "040b3ed6554d0fe7", + "schemaVersion": 4, + "generatedAt": "2026-07-02T12:20:12.625504+00:00", + "status": "valid", + "publicationStatus": "diagnostic", + "runner": "mi355x-amds_07", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced", + "routingLabel": "balanced", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "d9340f721d4da42", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28587176302", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28587176302", + "createdAt": "2026-07-02T11:40:06Z", + "sha": "5668635d37ace0cd9f27c05d37b02782d1304696" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 78.56100052595139, + "p90": 80.48100024461746, + "p95": 84.1199979186058, + "p99": 84.1199979186058 + }, + "combine": { + "p50": 18.959999084472656, + "p90": 19.40000057220459, + "p95": 19.64000053703785, + "p99": 19.64000053703785 + }, + "roundtrip": { + "p50": 59.04100090265274, + "p90": 61.24100089073181, + "p95": 61.99999898672104, + "p99": 61.99999898672104 + }, + "isolatedSum": { + "p50": 97.52099961042404, + "p90": 99.88100081682205, + "p95": 103.75999845564365, + "p99": 103.75999845564365 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 8, + "recvTokensMax": 8, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 44.520001858472824, + "p90": 45.96000164747238, + "p95": 46.31999880075455, + "p99": 46.31999880075455 + }, + "combine": { + "p50": 18.479999154806137, + "p90": 18.640000373125076, + "p95": 18.880000337958336, + "p99": 18.880000337958336 + }, + "roundtrip": { + "p50": 60.120001435279846, + "p90": 61.840999871492386, + "p95": 64.08099830150604, + "p99": 64.08099830150604 + }, + "isolatedSum": { + "p50": 63.00000101327896, + "p90": 64.60000202059746, + "p95": 65.19999913871288, + "p99": 65.19999913871288 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 8, + "recvTokensMax": 16, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 44.119998812675476, + "p90": 44.520001858472824, + "p95": 45.88000103831291, + "p99": 45.88000103831291 + }, + "combine": { + "p50": 22.95999974012375, + "p90": 24.600999429821968, + "p95": 25.119999423623085, + "p99": 25.119999423623085 + }, + "roundtrip": { + "p50": 65.12100249528885, + "p90": 66.64100289344788, + "p95": 67.00000166893005, + "p99": 67.00000166893005 + }, + "isolatedSum": { + "p50": 67.07999855279922, + "p90": 69.12100128829479, + "p95": 71.000000461936, + "p99": 71.000000461936 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 8, + "recvTokensMax": 32, + "stragglerRank": 4, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 43.480001389980316, + "p90": 44.5609986782074, + "p95": 46.799998730421066, + "p99": 46.799998730421066 + }, + "combine": { + "p50": 25.599999353289604, + "p90": 28.641000390052795, + "p95": 29.400000348687172, + "p99": 29.400000348687172 + }, + "roundtrip": { + "p50": 66.56099855899811, + "p90": 67.76099652051926, + "p95": 67.96000152826309, + "p99": 67.96000152826309 + }, + "isolatedSum": { + "p50": 69.08000074326992, + "p90": 73.20199906826019, + "p95": 76.19999907910824, + "p99": 76.19999907910824 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 8, + "recvTokensMax": 64, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 44.319998472929, + "p90": 44.319998472929, + "p95": 46.99999839067459, + "p99": 46.99999839067459 + }, + "combine": { + "p50": 30.03999963402748, + "p90": 31.04100003838539, + "p95": 32.999999821186066, + "p99": 32.999999821186066 + }, + "roundtrip": { + "p50": 72.4409967660904, + "p90": 72.9610025882721, + "p95": 73.64100217819214, + "p99": 73.64100217819214 + }, + "isolatedSum": { + "p50": 74.35999810695648, + "p90": 75.36099851131439, + "p95": 79.99999821186066, + "p99": 79.99999821186066 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 8, + "recvTokensMax": 128, + "stragglerRank": 7, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 44.87999901175499, + "p90": 45.16100138425827, + "p95": 45.52000015974045, + "p99": 45.52000015974045 + }, + "combine": { + "p50": 37.76000067591667, + "p90": 38.55999931693077, + "p95": 38.95999863743782, + "p99": 38.95999863743782 + }, + "roundtrip": { + "p50": 81.79999887943268, + "p90": 85.00000089406967, + "p95": 85.7200026512146, + "p99": 85.7200026512146 + }, + "isolatedSum": { + "p50": 82.63999968767166, + "p90": 83.72100070118904, + "p95": 84.47999879717827, + "p99": 84.47999879717827 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 8, + "recvTokensMax": 256, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 53.72000113129616, + "p90": 54.71999943256378, + "p95": 55.36099895834923, + "p99": 55.36099895834923 + }, + "combine": { + "p50": 49.400001764297485, + "p90": 50.31999945640564, + "p95": 54.60000038146973, + "p99": 54.60000038146973 + }, + "roundtrip": { + "p50": 100.68099945783615, + "p90": 102.32099890708923, + "p95": 103.80099713802338, + "p99": 103.80099713802338 + }, + "isolatedSum": { + "p50": 103.12000289559364, + "p90": 105.03999888896942, + "p95": 109.96099933981895, + "p99": 109.96099933981895 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 8, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 73.80100339651108, + "p90": 75.20099729299545, + "p95": 76.4010027050972, + "p99": 76.4010027050972 + }, + "combine": { + "p50": 69.56099718809128, + "p90": 72.24100083112717, + "p95": 73.04099947214127, + "p99": 73.04099947214127 + }, + "roundtrip": { + "p50": 143.5209959745407, + "p90": 146.28100395202637, + "p95": 147.84200489521027, + "p99": 147.84200489521027 + }, + "isolatedSum": { + "p50": 143.36200058460236, + "p90": 147.44199812412262, + "p95": 149.44200217723846, + "p99": 149.44200217723846 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 117440512, + "combineLogicalBytes": 117440512, + "fanoutMean": 8, + "recvTokensMax": 1024, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 110.76000332832336, + "p90": 111.52099817991257, + "p95": 116.28100275993347, + "p99": 116.28100275993347 + }, + "combine": { + "p50": 130.88099658489227, + "p90": 132.4010044336319, + "p95": 132.88100063800812, + "p99": 132.88100063800812 + }, + "roundtrip": { + "p50": 249.6819943189621, + "p90": 251.64300203323364, + "p95": 257.48199224472046, + "p99": 257.48199224472046 + }, + "isolatedSum": { + "p50": 241.64099991321564, + "p90": 243.92200261354446, + "p95": 249.1620033979416, + "p99": 249.1620033979416 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 234881024, + "combineLogicalBytes": 234881024, + "fanoutMean": 8, + "recvTokensMax": 2048, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 191.64100289344788, + "p90": 192.24199652671814, + "p95": 194.08200681209564, + "p99": 194.08200681209564 + }, + "combine": { + "p50": 149.0810066461563, + "p90": 149.3619978427887, + "p95": 149.40199255943298, + "p99": 149.40199255943298 + }, + "roundtrip": { + "p50": 351.44299268722534, + "p90": 356.5230071544647, + "p95": 356.72399401664734, + "p99": 356.72399401664734 + }, + "isolatedSum": { + "p50": 340.7220095396042, + "p90": 341.60399436950684, + "p95": 343.4839993715286, + "p99": 343.4839993715286 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 469762048, + "combineLogicalBytes": 469762048, + "fanoutMean": 8, + "recvTokensMax": 4096, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + }, + { + "id": "cx-c1855427", + "identity": "mi355x|mori|n-a|7168|8|256|bf16|normal|layout-and-dispatch-v1|balanced-rank-local|8|prefill|normal|none|none|0|tuned||8d00f8e483037b6", + "colorKey": "mi355x_fd9fb9ed", + "comparisonKey": "fbef33bfe8d3cfa6", + "schemaVersion": 3, + "generatedAt": "2026-07-02T10:01:43.325656+00:00", + "status": "valid", + "publicationStatus": "comparable-experimental", + "runner": "mi355x-amds_06", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "mode": "normal", + "resourceMode": "tuned", + "suite": "backend-default", + "comparisonClass": "standardized", + "measurementContract": "layout-and-dispatch-v1", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "epSize": 8, + "label": "MI355X EP8 · mori · bf16 · balanced-rank-local", + "model": "DeepSeek-V3/V4", + "shape": { + "hidden": 7168, + "topk": 8, + "experts": 256, + "routing": "balanced-rank-local", + "routingLabel": "balanced-rank-local", + "routingStep": 0, + "unevenTokens": "none", + "eplbEnabled": false, + "dispatchDtype": "bf16", + "kernelGeneration": "n-a", + "activationProfile": "normal", + "combineQuantMode": "none" + }, + "resourceProfile": { + "requestedFraction": null, + "achievedFraction": 0.3125, + "configuredUnits": 80, + "deviceUnits": 256, + "resourceClass": "backend-tuned", + "conformanceClass": "backend-default", + "fixedKernel": false, + "paretoEligible": false + }, + "placement": { + "kind": "packed", + "nodes": 2, + "gpusPerNode": 8, + "scaleUpDomain": 8 + }, + "routingConsistent": true, + "traceSignature": "8d00f8e483037b6", + "workloadId": null, + "workloadSource": "seeded-runtime", + "eplbImbalanceBefore": null, + "eplbImbalanceAfter": null, + "backendVersion": "image:rocm/sgl-dev:sglang-0.5.9-rocm720-mi35x-mori-0227-2", + "imageDigest": "sha256:061fb71f838e82000a1768c159654d526c2f17ebe751c21e7fc48ca53c8ef975", + "repository": "SemiAnalysisAI/InferenceX", + "run": { + "id": "28577799750", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577799750", + "createdAt": "2026-07-02T08:53:54Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + }, + "rows": [ + { + "tokensPerRank": 1, + "globalTokens": 8, + "dispatch": { + "p50": 45.921001583337784, + "p90": 90.68000316619873, + "p95": 94.03999894857407, + "p99": 94.03999894857407 + }, + "combine": { + "p50": 15.399999916553497, + "p90": 17.75999926030636, + "p95": 18.640000373125076, + "p99": 18.640000373125076 + }, + "roundtrip": { + "p50": 49.75999891757965, + "p90": 51.920000463724136, + "p95": 55.240001529455185, + "p99": 55.240001529455185 + }, + "isolatedSum": { + "p50": 61.32100149989128, + "p90": 108.44000242650509, + "p95": 112.67999932169914, + "p99": 112.67999932169914 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 114688, + "combineLogicalBytes": 114688, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 2, + "globalTokens": 16, + "dispatch": { + "p50": 39.88000005483627, + "p90": 40.800001472234726, + "p95": 41.80099815130234, + "p99": 41.80099815130234 + }, + "combine": { + "p50": 13.60000018030405, + "p90": 14.519999735057354, + "p95": 15.279999934136868, + "p99": 15.279999934136868 + }, + "roundtrip": { + "p50": 48.87999966740608, + "p90": 49.44099858403206, + "p95": 49.60000142455101, + "p99": 49.60000142455101 + }, + "isolatedSum": { + "p50": 53.480000235140324, + "p90": 55.32000120729208, + "p95": 57.080998085439205, + "p99": 57.080998085439205 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 229376, + "combineLogicalBytes": 229376, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 4, + "globalTokens": 32, + "dispatch": { + "p50": 33.879999071359634, + "p90": 34.71999987959862, + "p95": 35.48000007867813, + "p99": 35.48000007867813 + }, + "combine": { + "p50": 15.159999951720238, + "p90": 16.79999940097332, + "p95": 20.360000431537628, + "p99": 20.360000431537628 + }, + "roundtrip": { + "p50": 48.36000129580498, + "p90": 49.959998577833176, + "p95": 51.920000463724136, + "p99": 51.920000463724136 + }, + "isolatedSum": { + "p50": 49.03999902307987, + "p90": 51.51999928057194, + "p95": 55.84000051021576, + "p99": 55.84000051021576 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 458752, + "combineLogicalBytes": 458752, + "fanoutMean": 1, + "recvTokensMax": 4, + "stragglerRank": 3, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 8, + "globalTokens": 64, + "dispatch": { + "p50": 40.64000025391579, + "p90": 42.80000180006027, + "p95": 45.52000015974045, + "p99": 45.52000015974045 + }, + "combine": { + "p50": 17.920000478625298, + "p90": 18.360000103712082, + "p95": 18.99999938905239, + "p99": 18.99999938905239 + }, + "roundtrip": { + "p50": 52.719999104738235, + "p90": 53.0799999833107, + "p95": 53.39999869465828, + "p99": 53.39999869465828 + }, + "isolatedSum": { + "p50": 58.560000732541084, + "p90": 61.160001903772354, + "p95": 64.51999954879284, + "p99": 64.51999954879284 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 917504, + "combineLogicalBytes": 917504, + "fanoutMean": 1, + "recvTokensMax": 8, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 16, + "globalTokens": 128, + "dispatch": { + "p50": 40.04000127315521, + "p90": 40.75999930500984, + "p95": 41.600000113248825, + "p99": 41.600000113248825 + }, + "combine": { + "p50": 18.279999494552612, + "p90": 19.40000057220459, + "p95": 19.440000876784325, + "p99": 19.440000876784325 + }, + "roundtrip": { + "p50": 55.44000118970871, + "p90": 56.120000779628754, + "p95": 60.839999467134476, + "p99": 60.839999467134476 + }, + "isolatedSum": { + "p50": 58.320000767707825, + "p90": 60.15999987721443, + "p95": 61.04000099003315, + "p99": 61.04000099003315 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 1835008, + "combineLogicalBytes": 1835008, + "fanoutMean": 1, + "recvTokensMax": 16, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 32, + "globalTokens": 256, + "dispatch": { + "p50": 40.11999815702438, + "p90": 42.59999841451645, + "p95": 44.759999960660934, + "p99": 44.759999960660934 + }, + "combine": { + "p50": 19.680000841617584, + "p90": 21.199999377131462, + "p95": 23.600000888109207, + "p99": 23.600000888109207 + }, + "roundtrip": { + "p50": 57.00000002980232, + "p90": 58.83999913930893, + "p95": 59.16000157594681, + "p99": 59.16000157594681 + }, + "isolatedSum": { + "p50": 59.79999899864197, + "p90": 63.79999779164791, + "p95": 68.36000084877014, + "p99": 68.36000084877014 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 3670016, + "combineLogicalBytes": 3670016, + "fanoutMean": 1, + "recvTokensMax": 32, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 64, + "globalTokens": 512, + "dispatch": { + "p50": 40.52000120282173, + "p90": 42.67999902367592, + "p95": 47.40000143647194, + "p99": 47.40000143647194 + }, + "combine": { + "p50": 22.5600004196167, + "p90": 22.679999470710754, + "p95": 27.76000089943409, + "p99": 27.76000089943409 + }, + "roundtrip": { + "p50": 59.51999872922897, + "p90": 60.00100076198578, + "p95": 61.24100089073181, + "p99": 61.24100089073181 + }, + "isolatedSum": { + "p50": 63.08000162243843, + "p90": 65.35999849438667, + "p95": 75.16000233590603, + "p99": 75.16000233590603 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 7340032, + "combineLogicalBytes": 7340032, + "fanoutMean": 1, + "recvTokensMax": 64, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 128, + "globalTokens": 1024, + "dispatch": { + "p50": 39.80100154876709, + "p90": 41.23999923467636, + "p95": 42.080000042915344, + "p99": 42.080000042915344 + }, + "combine": { + "p50": 26.96000039577484, + "p90": 27.400000020861626, + "p95": 29.079999774694443, + "p99": 29.079999774694443 + }, + "roundtrip": { + "p50": 65.20000100135803, + "p90": 65.92000275850296, + "p95": 67.63999909162521, + "p99": 67.63999909162521 + }, + "isolatedSum": { + "p50": 66.76100194454193, + "p90": 68.63999925553799, + "p95": 71.15999981760979, + "p99": 71.15999981760979 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 14680064, + "combineLogicalBytes": 14680064, + "fanoutMean": 1, + "recvTokensMax": 128, + "stragglerRank": 0, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 256, + "globalTokens": 2048, + "dispatch": { + "p50": 55.27999997138977, + "p90": 55.48100173473358, + "p95": 55.76099827885628, + "p99": 55.76099827885628 + }, + "combine": { + "p50": 52.68000066280365, + "p90": 56.68000131845474, + "p95": 57.56000056862831, + "p99": 57.56000056862831 + }, + "roundtrip": { + "p50": 109.03999954462051, + "p90": 111.52099817991257, + "p95": 112.56100237369537, + "p99": 112.56100237369537 + }, + "isolatedSum": { + "p50": 107.96000063419342, + "p90": 112.16100305318832, + "p95": 113.32099884748459, + "p99": 113.32099884748459 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 29360128, + "combineLogicalBytes": 29360128, + "fanoutMean": 1, + "recvTokensMax": 256, + "stragglerRank": 1, + "correct": true, + "samplesPooled": 8, + "trials": 1 + }, + { + "tokensPerRank": 512, + "globalTokens": 4096, + "dispatch": { + "p50": 82.91999995708466, + "p90": 85.1999968290329, + "p95": 87.99999952316284, + "p99": 87.99999952316284 + }, + "combine": { + "p50": 63.60100209712982, + "p90": 65.95999747514725, + "p95": 67.59999692440033, + "p99": 67.59999692440033 + }, + "roundtrip": { + "p50": 150.28099715709686, + "p90": 150.9609967470169, + "p95": 152.91999280452728, + "p99": 152.91999280452728 + }, + "isolatedSum": { + "p50": 146.52100205421448, + "p90": 151.15999430418015, + "p95": 155.59999644756317, + "p99": 155.59999644756317 + }, + "roundtripMeasured": true, + "dispatchLogicalBytes": 58720256, + "combineLogicalBytes": 58720256, + "fanoutMean": 1, + "recvTokensMax": 512, + "stragglerRank": 2, + "correct": true, + "samplesPooled": 8, + "trials": 1 + } + ] + } + ], + "failures": [ + { + "id": "cxf-9c99e12a", + "identity": "h100|uccl|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:29Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h100", + "backend": "uccl", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-da62b2cf", + "identity": "h100|flashinfer|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:29Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h100", + "backend": "flashinfer", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-e91cbe11", + "identity": "h100|deepep-hybrid|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:29Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h100", + "backend": "deepep-hybrid", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-16005203", + "identity": "h100|flashinfer|n-a||||unknown|normal|unknown|unknown||prefill|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:29Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h100", + "backend": "flashinfer", + "phase": "prefill", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583528865", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583528865", + "createdAt": "2026-07-02T10:33:29Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-334ecace", + "identity": "h200|uccl|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:31Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h200", + "backend": "uccl", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-25c20eef", + "identity": "h200|deepep-hybrid|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T10:33:31Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "h200", + "backend": "deepep-hybrid", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28583530917", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28583530917", + "createdAt": "2026-07-02T10:33:31Z", + "sha": "3dbacd1edfcb4fcee1deb9e96cd0f321a6d7af13" + } + }, + { + "id": "cxf-f10582a7", + "identity": "mi325x|mori|n-a||||unknown|normal|unknown|unknown||decode|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T20:11:50Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "mi325x", + "backend": "mori", + "phase": "decode", + "config": "unknown/normal/unknown", + "reason": "timeout", + "returnCode": 124, + "run": { + "id": "28618583084", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28618583084", + "createdAt": "2026-07-02T20:11:50Z", + "sha": "db1a2037e4eb0af3c2cb8b5e7030e72a32400066" + } + }, + { + "id": "cxf-8a8f96b9", + "identity": "mi325x|mori|n-a||||unknown|normal|unknown|unknown||prefill|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T16:39:56Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "mi325x", + "backend": "mori", + "phase": "prefill", + "config": "unknown/normal/unknown", + "reason": "unknown", + "returnCode": 1, + "run": { + "id": "28606326624", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606326624", + "createdAt": "2026-07-02T16:39:56Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + } + }, + { + "id": "cxf-7b7830f2", + "identity": "mi355x|mori|n-a||||unknown|normal|unknown|unknown||prefill|normal|none|none|0|tuned||", + "generatedAt": "2026-07-02T08:53:54Z", + "publicationStatus": "failed", + "status": "failed", + "sku": "mi355x", + "backend": "mori", + "phase": "prefill", + "config": "unknown/normal/unknown", + "reason": "timeout", + "returnCode": 124, + "run": { + "id": "28577799750", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28577799750", + "createdAt": "2026-07-02T08:53:54Z", + "sha": "07c92830a783a0274599a4fdd292a0adda96f26b" + } + } + ], + "summaryCards": [ + { + "title": "Best backend · decode EP8", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Best backend · prefill EP8", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "LL -> normal crossover", + "value": "T~128 tok/rank", + "sub": "GB300 EP4 fp8 · normal RT p50 wins above this" + }, + { + "title": "Resource-normalized winner", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Backend-default winner", + "value": "no data", + "sub": "no official headline cell at this phase/EP" + }, + { + "title": "Most unstable config", + "value": "GB300 · deepep prefill", + "sub": "10.19x p99 under hotspot-single vs uniform", + "warning": true + }, + { + "title": "Invalid / diagnostic cases", + "value": "9", + "sub": "see Evidence failed table", + "warning": true, + "href": "#tab-evidence" + } + ], + "decision": { + "budgetsUs": [ + 100, + 250, + 500 + ], + "maxTokensUnderBudget": [], + "recommendations": [ + { + "id": "cxr-f74e7f65", + "sku": "b200", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 91.4, + "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned", + "epSize": 8 + }, + { + "id": "cxr-2393aa25", + "sku": "b200", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 98.8, + "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned", + "epSize": 8 + }, + { + "id": "cxr-d2992d7c", + "sku": "b300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 64.8, + "config": "bf16/normal/layout-and-dispatch-v1/zipf-heavy+eplb/tuned", + "epSize": 8 + }, + { + "id": "cxr-1c3060b2", + "sku": "b300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 81.8, + "config": "bf16/normal/layout-and-dispatch-v1/balanced+eplb/tuned", + "epSize": 8 + }, + { + "id": "cxr-1445ce8d", + "sku": "gb300", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 59.2, + "config": "bf16/ll/layout-and-dispatch-v1/uniform/tuned", + "epSize": 4 + }, + { + "id": "cxr-f7274fdd", + "sku": "gb300", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 120.8, + "config": "bf16/normal/layout-and-dispatch-v1/balanced/tuned", + "epSize": 4 + }, + { + "id": "cxr-8fcf986c", + "sku": "h100", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 53.2, + "config": "fp8/ll/runtime-visible-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-466c0bc2", + "sku": "h100", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 103.6, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-c2fe14a3", + "sku": "h200", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 52, + "config": "fp8/ll/runtime-visible-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-7e4f951f", + "sku": "h200", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 108.8, + "config": "fp8/normal/cached-layout-comm-only-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-b253cba7", + "sku": "mi325x", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 97.8, + "config": "bf16/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-a6329cf3", + "sku": "mi325x", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 122, + "config": "bf16/normal/layout-and-dispatch-v1/uniform/tuned", + "epSize": 8 + }, + { + "id": "cxr-2e17071c", + "sku": "mi355x", + "phase": "decode", + "atTokensPerRank": 64, + "lowestP99DispatchUs": 41.8, + "config": "bf16/normal/layout-and-dispatch-v1/balanced-rank-local/tuned", + "epSize": 8 + }, + { + "id": "cxr-d6cb22d2", + "sku": "mi355x", + "phase": "prefill", + "atTokensPerRank": 256, + "lowestP99DispatchUs": 55.8, + "config": "bf16/normal/layout-and-dispatch-v1/balanced-rank-local/tuned", + "epSize": 8 + } + ], + "llCrossover": [ + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 4, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "gb300", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h100", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "bf16", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": "never-in-range" + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p50", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + }, + { + "sku": "h200", + "ep": 8, + "dtype": "fp8", + "stat": "p99", + "basis": "measured-roundtrip", + "normal_faster_at_T": 128 + } + ], + "resourcePareto": [], + "topologyPenalty": [ + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 1, + "ep4_p50": 547.3, + "ep8_p50": 677.9, + "penalty_pct": 23.9 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 2, + "ep4_p50": 541.6, + "ep8_p50": 615.9, + "penalty_pct": 13.7 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 4, + "ep4_p50": 561, + "ep8_p50": 662.2, + "penalty_pct": 18.1 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 8, + "ep4_p50": 560, + "ep8_p50": 635.8, + "penalty_pct": 13.5 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 16, + "ep4_p50": 559.2, + "ep8_p50": 673.8, + "penalty_pct": 20.5 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 32, + "ep4_p50": 563.6, + "ep8_p50": 624.4, + "penalty_pct": 10.8 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 64, + "ep4_p50": 558.3, + "ep8_p50": 676.5, + "penalty_pct": 21.2 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "bf16", + "T": 128, + "ep4_p50": 558.2, + "ep8_p50": 683.7, + "penalty_pct": 22.5 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 1, + "ep4_p50": 112.5, + "ep8_p50": 146.5, + "penalty_pct": 30.2 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 2, + "ep4_p50": 111.8, + "ep8_p50": 146.2, + "penalty_pct": 30.8 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 4, + "ep4_p50": 113.2, + "ep8_p50": 147.9, + "penalty_pct": 30.6 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 8, + "ep4_p50": 111.9, + "ep8_p50": 147.1, + "penalty_pct": 31.4 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 16, + "ep4_p50": 113.2, + "ep8_p50": 145.5, + "penalty_pct": 28.5 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 32, + "ep4_p50": 113.3, + "ep8_p50": 145.7, + "penalty_pct": 28.6 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 64, + "ep4_p50": 116.4, + "ep8_p50": 148.1, + "penalty_pct": 27.2 + }, + { + "sku": "gb300", + "phase": "decode", + "dtype": "fp8", + "T": 128, + "ep4_p50": 116.3, + "ep8_p50": 147.3, + "penalty_pct": 26.7 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 128, + "ep4_p50": 558.5, + "ep8_p50": 757.2, + "penalty_pct": 35.6 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 256, + "ep4_p50": 563.2, + "ep8_p50": 640.9, + "penalty_pct": 13.8 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 512, + "ep4_p50": 610.4, + "ep8_p50": 667.6, + "penalty_pct": 9.4 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 1024, + "ep4_p50": 752.1, + "ep8_p50": 804.2, + "penalty_pct": 6.9 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 2048, + "ep4_p50": 925.6, + "ep8_p50": 981, + "penalty_pct": 6 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "bf16", + "T": 4096, + "ep4_p50": 1312.4, + "ep8_p50": 1361.2, + "penalty_pct": 3.7 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 128, + "ep4_p50": 120.2, + "ep8_p50": 127, + "penalty_pct": 5.7 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 256, + "ep4_p50": 127.2, + "ep8_p50": 140.8, + "penalty_pct": 10.7 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 512, + "ep4_p50": 145.4, + "ep8_p50": 173, + "penalty_pct": 19 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 1024, + "ep4_p50": 188.2, + "ep8_p50": 244.5, + "penalty_pct": 29.9 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 2048, + "ep4_p50": 269.4, + "ep8_p50": 413.5, + "penalty_pct": 53.5 + }, + { + "sku": "gb300", + "phase": "prefill", + "dtype": "fp8", + "T": 4096, + "ep4_p50": 474, + "ep8_p50": 762.1, + "penalty_pct": 60.8 + } + ], + "skewPenalty": [ + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.753, + "p99_amplification": 0.767 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.675, + "p99_amplification": 0.723 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.61, + "p99_amplification": 0.785 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.593, + "p99_amplification": 0.666 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.603, + "p99_amplification": 0.618 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.761, + "p99_amplification": 0.781 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.699, + "p99_amplification": 0.671 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 0.546 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.749, + "p99_amplification": 0.743 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.675, + "p99_amplification": 0.737 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.665, + "p99_amplification": 0.796 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.588, + "p99_amplification": 0.622 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.596, + "p99_amplification": 0.647 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.7, + "p99_amplification": 0.721 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.697, + "p99_amplification": 0.719 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.762, + "p99_amplification": 0.593 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.727, + "p99_amplification": 0.714 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.622, + "p99_amplification": 0.64 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.602, + "p99_amplification": 0.636 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.617, + "p99_amplification": 0.765 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.612, + "p99_amplification": 0.677 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.767, + "p99_amplification": 0.794 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.76, + "p99_amplification": 0.692 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.779, + "p99_amplification": 0.559 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.707, + "p99_amplification": 0.563 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.621, + "p99_amplification": 0.589 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.595, + "p99_amplification": 1.051 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.592, + "p99_amplification": 0.567 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.604, + "p99_amplification": 0.636 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.757, + "p99_amplification": 0.663 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.757, + "p99_amplification": 0.952 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.806, + "p99_amplification": 0.59 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.717, + "p99_amplification": 0.723 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.623, + "p99_amplification": 0.692 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.64, + "p99_amplification": 0.797 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.604, + "p99_amplification": 0.713 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.612, + "p99_amplification": 0.642 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.719, + "p99_amplification": 0.743 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.774, + "p99_amplification": 0.699 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.745, + "p99_amplification": 0.547 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.705, + "p99_amplification": 0.581 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.656, + "p99_amplification": 0.659 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.595, + "p99_amplification": 0.566 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.588, + "p99_amplification": 0.559 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.598, + "p99_amplification": 0.57 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.74, + "p99_amplification": 0.664 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.704, + "p99_amplification": 0.651 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.758, + "p99_amplification": 0.518 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.71, + "p99_amplification": 0.594 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.666, + "p99_amplification": 0.614 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.619, + "p99_amplification": 0.584 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.611, + "p99_amplification": 0.574 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.628, + "p99_amplification": 0.609 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.761, + "p99_amplification": 0.669 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.761, + "p99_amplification": 0.668 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.78, + "p99_amplification": 0.532 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.709, + "p99_amplification": 0.627 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.629, + "p99_amplification": 0.668 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.599, + "p99_amplification": 0.617 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.593, + "p99_amplification": 0.616 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.61, + "p99_amplification": 0.597 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.767, + "p99_amplification": 0.718 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.766, + "p99_amplification": 0.664 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.78, + "p99_amplification": 0.559 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.746, + "p99_amplification": 0.746 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.714, + "p99_amplification": 0.811 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.657, + "p99_amplification": 0.759 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.693, + "p99_amplification": 0.825 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.668, + "p99_amplification": 0.773 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.773, + "p99_amplification": 0.78 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.76, + "p99_amplification": 0.83 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.793, + "p99_amplification": 0.599 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.722, + "p99_amplification": 0.591 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.685, + "p99_amplification": 0.672 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.645, + "p99_amplification": 0.649 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.621, + "p99_amplification": 0.652 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.62, + "p99_amplification": 0.584 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.722, + "p99_amplification": 0.716 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.693, + "p99_amplification": 0.619 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.745, + "p99_amplification": 0.516 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.734, + "p99_amplification": 0.6 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.649, + "p99_amplification": 0.641 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.617, + "p99_amplification": 0.764 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.62, + "p99_amplification": 0.611 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.645, + "p99_amplification": 0.85 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.789, + "p99_amplification": 0.943 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.782, + "p99_amplification": 0.709 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.799, + "p99_amplification": 0.586 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.701, + "p99_amplification": 0.596 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.637, + "p99_amplification": 0.588 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.608, + "p99_amplification": 0.59 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.626, + "p99_amplification": 0.839 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.626, + "p99_amplification": 0.581 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.821, + "p99_amplification": 0.82 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.788, + "p99_amplification": 0.834 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.849, + "p99_amplification": 0.702 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.707, + "p99_amplification": 1.002 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.645, + "p99_amplification": 0.855 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.621, + "p99_amplification": 0.637 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.617, + "p99_amplification": 0.574 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.636, + "p99_amplification": 0.588 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.778, + "p99_amplification": 0.751 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.782, + "p99_amplification": 0.684 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.789, + "p99_amplification": 0.553 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.726, + "p99_amplification": 0.584 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.644, + "p99_amplification": 0.634 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.611, + "p99_amplification": 0.598 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.616, + "p99_amplification": 0.641 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.627, + "p99_amplification": 0.598 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.782, + "p99_amplification": 0.677 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.731, + "p99_amplification": 0.695 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.798, + "p99_amplification": 0.535 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.748, + "p99_amplification": 0.785 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.644, + "p99_amplification": 0.674 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.615, + "p99_amplification": 0.658 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.619, + "p99_amplification": 0.655 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.644, + "p99_amplification": 0.656 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.781, + "p99_amplification": 0.788 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.791, + "p99_amplification": 0.747 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.8, + "p99_amplification": 0.591 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.732, + "p99_amplification": 0.586 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.646, + "p99_amplification": 0.602 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.617, + "p99_amplification": 0.586 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.621, + "p99_amplification": 0.576 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.654, + "p99_amplification": 0.714 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.782, + "p99_amplification": 0.708 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.791, + "p99_amplification": 0.674 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.793, + "p99_amplification": 0.555 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 0.691 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.78, + "p99_amplification": 0.534 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.89, + "p99_amplification": 0.819 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.968, + "p99_amplification": 0.93 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.075, + "p99_amplification": 1.033 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.144, + "p99_amplification": 1.126 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.774, + "p99_amplification": 0.651 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.739, + "p99_amplification": 0.485 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.791, + "p99_amplification": 0.729 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.882, + "p99_amplification": 0.853 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.969, + "p99_amplification": 0.966 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.019, + "p99_amplification": 1.016 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.805, + "p99_amplification": 0.694 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.82, + "p99_amplification": 0.535 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.858, + "p99_amplification": 0.801 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.906, + "p99_amplification": 0.884 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.966, + "p99_amplification": 0.937 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.989, + "p99_amplification": 0.97 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.829, + "p99_amplification": 0.78 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.846, + "p99_amplification": 0.601 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.928, + "p99_amplification": 0.931 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.014, + "p99_amplification": 1.001 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.113, + "p99_amplification": 1.077 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.176, + "p99_amplification": 1.163 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.791, + "p99_amplification": 0.718 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.83, + "p99_amplification": 0.626 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.849, + "p99_amplification": 0.808 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.909, + "p99_amplification": 0.905 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.953, + "p99_amplification": 0.936 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.97, + "p99_amplification": 0.965 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.81, + "p99_amplification": 0.797 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.794, + "p99_amplification": 0.55 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.885, + "p99_amplification": 0.831 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.974, + "p99_amplification": 0.933 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.077, + "p99_amplification": 1.032 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.144, + "p99_amplification": 1.126 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.804, + "p99_amplification": 0.719 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.816, + "p99_amplification": 0.551 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.851, + "p99_amplification": 0.806 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.887, + "p99_amplification": 0.877 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.961, + "p99_amplification": 0.923 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.973, + "p99_amplification": 0.955 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.801, + "p99_amplification": 0.719 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.813, + "p99_amplification": 0.585 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.851, + "p99_amplification": 0.843 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.884, + "p99_amplification": 0.87 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.958, + "p99_amplification": 0.917 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.973, + "p99_amplification": 0.955 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.795, + "p99_amplification": 0.686 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.794, + "p99_amplification": 0.545 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.884, + "p99_amplification": 0.82 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.975, + "p99_amplification": 0.937 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.081, + "p99_amplification": 1.039 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.151, + "p99_amplification": 1.133 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.776, + "p99_amplification": 0.686 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.77, + "p99_amplification": 0.522 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.808, + "p99_amplification": 0.753 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.899, + "p99_amplification": 0.879 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.972, + "p99_amplification": 0.982 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.03, + "p99_amplification": 1.072 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.819, + "p99_amplification": 0.718 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.835, + "p99_amplification": 0.581 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.862, + "p99_amplification": 0.846 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.921, + "p99_amplification": 0.904 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.972, + "p99_amplification": 0.942 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.994, + "p99_amplification": 0.979 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.841, + "p99_amplification": 0.77 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.848, + "p99_amplification": 0.572 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.944, + "p99_amplification": 0.947 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.011, + "p99_amplification": 0.999 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.11, + "p99_amplification": 1.063 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.178, + "p99_amplification": 1.16 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.811, + "p99_amplification": 0.697 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.829, + "p99_amplification": 0.543 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.858, + "p99_amplification": 0.835 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.906, + "p99_amplification": 0.863 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.952, + "p99_amplification": 0.91 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.971, + "p99_amplification": 0.966 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.806, + "p99_amplification": 0.707 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.794, + "p99_amplification": 0.545 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.899, + "p99_amplification": 0.84 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.976, + "p99_amplification": 0.942 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.08, + "p99_amplification": 1.039 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.15, + "p99_amplification": 1.131 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.802, + "p99_amplification": 0.698 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.825, + "p99_amplification": 0.566 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.852, + "p99_amplification": 0.795 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.886, + "p99_amplification": 0.925 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.959, + "p99_amplification": 0.923 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.976, + "p99_amplification": 0.959 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.813, + "p99_amplification": 0.749 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.823, + "p99_amplification": 0.551 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.853, + "p99_amplification": 0.802 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.883, + "p99_amplification": 0.844 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.961, + "p99_amplification": 0.924 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.975, + "p99_amplification": 0.957 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.514, + "p99_amplification": 1.479 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.386, + "p99_amplification": 1.342 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.346, + "p99_amplification": 1.505 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.341, + "p99_amplification": 1.289 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.419, + "p99_amplification": 1.363 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.589, + "p99_amplification": 1.493 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.63, + "p99_amplification": 1.498 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.487, + "p99_amplification": 1.109 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.511, + "p99_amplification": 1.588 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.426, + "p99_amplification": 1.357 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.354, + "p99_amplification": 1.327 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.351, + "p99_amplification": 1.412 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.376, + "p99_amplification": 1.452 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.483, + "p99_amplification": 1.403 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.453, + "p99_amplification": 1.464 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.316, + "p99_amplification": 0.932 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.505, + "p99_amplification": 1.293 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.371, + "p99_amplification": 1.226 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.317, + "p99_amplification": 1.199 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.367, + "p99_amplification": 1.367 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.483, + "p99_amplification": 1.336 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.695, + "p99_amplification": 1.588 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.855, + "p99_amplification": 1.642 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.7, + "p99_amplification": 1.196 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.531, + "p99_amplification": 1.325 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 1.425, + "p99_amplification": 1.376 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.373, + "p99_amplification": 1.327 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.416, + "p99_amplification": 1.497 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.475, + "p99_amplification": 1.467 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.708, + "p99_amplification": 1.674 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.85, + "p99_amplification": 1.699 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.674, + "p99_amplification": 1.186 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.503, + "p99_amplification": 1.276 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.402, + "p99_amplification": 1.522 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.338, + "p99_amplification": 1.277 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.368, + "p99_amplification": 1.517 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.463, + "p99_amplification": 1.273 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.716, + "p99_amplification": 1.651 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.858, + "p99_amplification": 1.614 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.716, + "p99_amplification": 1.198 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 1.626, + "p99_amplification": 1.528 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.422, + "p99_amplification": 1.302 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.375, + "p99_amplification": 1.441 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.43, + "p99_amplification": 1.505 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.435, + "p99_amplification": 1.316 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.616, + "p99_amplification": 1.535 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.755, + "p99_amplification": 2.143 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.502, + "p99_amplification": 1.014 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.538, + "p99_amplification": 1.563 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.429, + "p99_amplification": 1.543 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.364, + "p99_amplification": 1.354 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.384, + "p99_amplification": 1.44 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.494, + "p99_amplification": 1.4 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.727, + "p99_amplification": 1.632 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.873, + "p99_amplification": 1.709 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.741, + "p99_amplification": 1.213 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.487, + "p99_amplification": 1.334 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.395, + "p99_amplification": 1.342 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.33, + "p99_amplification": 1.523 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.356, + "p99_amplification": 1.423 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.485, + "p99_amplification": 1.497 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.704, + "p99_amplification": 1.612 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.858, + "p99_amplification": 1.643 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.711, + "p99_amplification": 1.194 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.535, + "p99_amplification": 1.291 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.22, + "p99_amplification": 0.936 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.005, + "p99_amplification": 0.983 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.965, + "p99_amplification": 0.986 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.914, + "p99_amplification": 0.9 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.862, + "p99_amplification": 0.86 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.332, + "p99_amplification": 1.321 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.076, + "p99_amplification": 0.769 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.002, + "p99_amplification": 1.065 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.945, + "p99_amplification": 0.947 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.903, + "p99_amplification": 0.866 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.854, + "p99_amplification": 0.845 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.76, + "p99_amplification": 1.54 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.387, + "p99_amplification": 0.982 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.065, + "p99_amplification": 1.061 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.914, + "p99_amplification": 1.004 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.845, + "p99_amplification": 0.843 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.721, + "p99_amplification": 0.724 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.698, + "p99_amplification": 1.412 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.335, + "p99_amplification": 0.878 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.036, + "p99_amplification": 1.009 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.959, + "p99_amplification": 0.929 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.905, + "p99_amplification": 0.958 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.859, + "p99_amplification": 0.868 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.771, + "p99_amplification": 1.475 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.396, + "p99_amplification": 1.04 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.081, + "p99_amplification": 1.096 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.896, + "p99_amplification": 0.909 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.83, + "p99_amplification": 0.819 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.701, + "p99_amplification": 0.71 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.535, + "p99_amplification": 1.333 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.251, + "p99_amplification": 0.97 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.013, + "p99_amplification": 1.021 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.99, + "p99_amplification": 1.049 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.926, + "p99_amplification": 0.932 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.876, + "p99_amplification": 0.878 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.75, + "p99_amplification": 1.458 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.384, + "p99_amplification": 0.929 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.062, + "p99_amplification": 1.058 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.88, + "p99_amplification": 0.904 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.834, + "p99_amplification": 0.844 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.706, + "p99_amplification": 0.736 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.754, + "p99_amplification": 1.451 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.37, + "p99_amplification": 0.904 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.047, + "p99_amplification": 0.99 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.881, + "p99_amplification": 0.894 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.832, + "p99_amplification": 0.817 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.705, + "p99_amplification": 0.707 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.609, + "p99_amplification": 0.555 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.579, + "p99_amplification": 0.612 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.561, + "p99_amplification": 0.555 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.564, + "p99_amplification": 0.804 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.587, + "p99_amplification": 0.576 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.611, + "p99_amplification": 0.678 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.556, + "p99_amplification": 0.542 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.55, + "p99_amplification": 0.399 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.588, + "p99_amplification": 0.68 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.552, + "p99_amplification": 0.621 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.556, + "p99_amplification": 0.853 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.526, + "p99_amplification": 0.602 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.595, + "p99_amplification": 0.858 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.606, + "p99_amplification": 0.752 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.567, + "p99_amplification": 0.819 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.552, + "p99_amplification": 0.573 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.65, + "p99_amplification": 0.597 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.598, + "p99_amplification": 0.693 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.569, + "p99_amplification": 0.644 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.595, + "p99_amplification": 0.625 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.619, + "p99_amplification": 0.667 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.648, + "p99_amplification": 0.725 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.59, + "p99_amplification": 0.646 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.558, + "p99_amplification": 0.448 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.602, + "p99_amplification": 0.595 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.591, + "p99_amplification": 0.721 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.572, + "p99_amplification": 0.659 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.567, + "p99_amplification": 0.644 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.594, + "p99_amplification": 0.657 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.614, + "p99_amplification": 0.777 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.57, + "p99_amplification": 0.639 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.572, + "p99_amplification": 0.492 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.564, + "p99_amplification": 0.61 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.57, + "p99_amplification": 0.682 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.561, + "p99_amplification": 0.714 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.565, + "p99_amplification": 0.654 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.583, + "p99_amplification": 0.678 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.609, + "p99_amplification": 0.751 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.557, + "p99_amplification": 0.653 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.537, + "p99_amplification": 0.515 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.603, + "p99_amplification": 0.642 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.571, + "p99_amplification": 0.661 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.56, + "p99_amplification": 0.674 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.566, + "p99_amplification": 0.673 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.585, + "p99_amplification": 0.685 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.608, + "p99_amplification": 0.726 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.554, + "p99_amplification": 0.662 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.548, + "p99_amplification": 0.529 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.577, + "p99_amplification": 0.561 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.56, + "p99_amplification": 0.638 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.56, + "p99_amplification": 0.566 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.56, + "p99_amplification": 0.53 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.582, + "p99_amplification": 0.595 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.622, + "p99_amplification": 0.668 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.554, + "p99_amplification": 0.545 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.545, + "p99_amplification": 0.434 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.572, + "p99_amplification": 0.517 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.601, + "p99_amplification": 0.678 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.571, + "p99_amplification": 0.637 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.574, + "p99_amplification": 0.611 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.592, + "p99_amplification": 0.567 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.622, + "p99_amplification": 0.772 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.56, + "p99_amplification": 0.54 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.546, + "p99_amplification": 0.539 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.573, + "p99_amplification": 0.532 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.612, + "p99_amplification": 0.409 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.764, + "p99_amplification": 0.697 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.035, + "p99_amplification": 0.974 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.269, + "p99_amplification": 1.199 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.353, + "p99_amplification": 1.319 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.559, + "p99_amplification": 0.552 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.605, + "p99_amplification": 0.422 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.764, + "p99_amplification": 0.693 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.973, + "p99_amplification": 0.914 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.125, + "p99_amplification": 1.062 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.2, + "p99_amplification": 1.171 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.562, + "p99_amplification": 0.576 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.55, + "p99_amplification": 0.399 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.636, + "p99_amplification": 0.589 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.756, + "p99_amplification": 0.715 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.854, + "p99_amplification": 0.82 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.893, + "p99_amplification": 0.876 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.572, + "p99_amplification": 0.548 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.614, + "p99_amplification": 0.404 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.76, + "p99_amplification": 0.702 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.943, + "p99_amplification": 0.88 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.17, + "p99_amplification": 1.107 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.294, + "p99_amplification": 1.264 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.546, + "p99_amplification": 0.48 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.539, + "p99_amplification": 0.391 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.624, + "p99_amplification": 0.58 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.738, + "p99_amplification": 0.705 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.826, + "p99_amplification": 0.787 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.858, + "p99_amplification": 0.843 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.558, + "p99_amplification": 0.577 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.609, + "p99_amplification": 0.405 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.759, + "p99_amplification": 0.684 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.034, + "p99_amplification": 0.973 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.27, + "p99_amplification": 1.2 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.354, + "p99_amplification": 1.322 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.554, + "p99_amplification": 0.529 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.54, + "p99_amplification": 0.369 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.625, + "p99_amplification": 0.58 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.736, + "p99_amplification": 0.7 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.826, + "p99_amplification": 0.788 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.862, + "p99_amplification": 0.848 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.548, + "p99_amplification": 0.506 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.543, + "p99_amplification": 0.37 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.622, + "p99_amplification": 0.578 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.737, + "p99_amplification": 0.701 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.826, + "p99_amplification": 0.784 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.864, + "p99_amplification": 0.848 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 2.719, + "p99_amplification": 28.906 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 2.504, + "p99_amplification": 29.221 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 2.535, + "p99_amplification": 27.606 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 2.463, + "p99_amplification": 27.611 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 2.486, + "p99_amplification": 26.554 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 2.784, + "p99_amplification": 25.538 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 2.764, + "p99_amplification": 20.641 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 2.804, + "p99_amplification": 15.583 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 2.834, + "p99_amplification": 29.074 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 2.568, + "p99_amplification": 29.808 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 2.588, + "p99_amplification": 27.841 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 2.491, + "p99_amplification": 27.019 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 2.574, + "p99_amplification": 27.579 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 2.982, + "p99_amplification": 27.696 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 3.035, + "p99_amplification": 21.224 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.278, + "p99_amplification": 15.383 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 2.751, + "p99_amplification": 30.025 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 2.562, + "p99_amplification": 29.154 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 2.51, + "p99_amplification": 27.616 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 2.515, + "p99_amplification": 26.813 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 2.485, + "p99_amplification": 20.29 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 2.895, + "p99_amplification": 27.021 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 2.455, + "p99_amplification": 23.63 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.321, + "p99_amplification": 17.06 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 2.693, + "p99_amplification": 27.544 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 2.511, + "p99_amplification": 29.068 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 2.488, + "p99_amplification": 25.459 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 2.473, + "p99_amplification": 26.584 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 2.474, + "p99_amplification": 25.383 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 2.707, + "p99_amplification": 25.764 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 2.503, + "p99_amplification": 22.851 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.51, + "p99_amplification": 15.744 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 2.738, + "p99_amplification": 26.807 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 2.538, + "p99_amplification": 28.417 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 2.554, + "p99_amplification": 26.403 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 2.573, + "p99_amplification": 24.45 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 2.584, + "p99_amplification": 25.587 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 2.736, + "p99_amplification": 25.671 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 2.459, + "p99_amplification": 18.689 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.343, + "p99_amplification": 15.957 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 2.717, + "p99_amplification": 32.279 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 2.538, + "p99_amplification": 29.412 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 2.529, + "p99_amplification": 26.915 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 2.499, + "p99_amplification": 26.793 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 2.527, + "p99_amplification": 26.643 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 2.837, + "p99_amplification": 26.969 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 2.734, + "p99_amplification": 21.911 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 2.829, + "p99_amplification": 15.19 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 2.629, + "p99_amplification": 30.309 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 2.444, + "p99_amplification": 30.245 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 2.406, + "p99_amplification": 26.416 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 2.43, + "p99_amplification": 26.418 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 2.419, + "p99_amplification": 26.892 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 2.616, + "p99_amplification": 26.865 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 2.368, + "p99_amplification": 23.829 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.269, + "p99_amplification": 16.607 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 2.667, + "p99_amplification": 28.885 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 2.511, + "p99_amplification": 28.392 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 2.457, + "p99_amplification": 27.546 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 2.458, + "p99_amplification": 26.74 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 2.561, + "p99_amplification": 25.923 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 2.654, + "p99_amplification": 26.387 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 2.425, + "p99_amplification": 22.922 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.263, + "p99_amplification": 17.3 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 2.921, + "p99_amplification": 17.009 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 2.733, + "p99_amplification": 9.258 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 2.811, + "p99_amplification": 9.176 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 3.227, + "p99_amplification": 5.24 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 3.229, + "p99_amplification": 3.596 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 3.34, + "p99_amplification": 3.326 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.31, + "p99_amplification": 13.282 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 3.342, + "p99_amplification": 7.75 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 3.782, + "p99_amplification": 8.125 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 4.428, + "p99_amplification": 4.836 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 4.399, + "p99_amplification": 4.422 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 4.608, + "p99_amplification": 4.517 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.358, + "p99_amplification": 16.788 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.99, + "p99_amplification": 9.736 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.765, + "p99_amplification": 12.313 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.575, + "p99_amplification": 7.365 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.316, + "p99_amplification": 4.831 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.122, + "p99_amplification": 2.142 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.569, + "p99_amplification": 12.816 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 2.267, + "p99_amplification": 10.875 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 2.162, + "p99_amplification": 10.166 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 2.168, + "p99_amplification": 6.206 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 2.035, + "p99_amplification": 3.425 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 2.104, + "p99_amplification": 2.21 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.364, + "p99_amplification": 13.615 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.972, + "p99_amplification": 10.691 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.727, + "p99_amplification": 11.05 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.611, + "p99_amplification": 7.546 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.323, + "p99_amplification": 4.001 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.136, + "p99_amplification": 2.041 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 2.878, + "p99_amplification": 14.367 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 2.748, + "p99_amplification": 8.953 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 2.804, + "p99_amplification": 8.495 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 3.229, + "p99_amplification": 5.244 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 3.233, + "p99_amplification": 3.544 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 3.311, + "p99_amplification": 3.286 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.388, + "p99_amplification": 15.226 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 2.004, + "p99_amplification": 9.071 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.744, + "p99_amplification": 11.781 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.633, + "p99_amplification": 7.09 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.322, + "p99_amplification": 4.039 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.131, + "p99_amplification": 2.093 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.336, + "p99_amplification": 15.462 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.994, + "p99_amplification": 11.585 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.753, + "p99_amplification": 11.419 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.594, + "p99_amplification": 7.443 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.348, + "p99_amplification": 4.148 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.132, + "p99_amplification": 2.08 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.972, + "p99_amplification": 1.115 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.915, + "p99_amplification": 0.893 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.988, + "p99_amplification": 0.901 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.986, + "p99_amplification": 0.872 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.999, + "p99_amplification": 0.993 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.976, + "p99_amplification": 0.997 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.004, + "p99_amplification": 0.89 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.009, + "p99_amplification": 0.7 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.983, + "p99_amplification": 1.335 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.906, + "p99_amplification": 0.964 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.971, + "p99_amplification": 1.002 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.976, + "p99_amplification": 0.969 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.99, + "p99_amplification": 0.982 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.018, + "p99_amplification": 1.145 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.963, + "p99_amplification": 0.926 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.036, + "p99_amplification": 0.94 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.984, + "p99_amplification": 1.038 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.946, + "p99_amplification": 0.951 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.993, + "p99_amplification": 0.988 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.99, + "p99_amplification": 1.027 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.996, + "p99_amplification": 1.027 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.982, + "p99_amplification": 1.027 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.985, + "p99_amplification": 0.981 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.989, + "p99_amplification": 0.809 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.025, + "p99_amplification": 1.002 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.971, + "p99_amplification": 0.922 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.995, + "p99_amplification": 0.98 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.983, + "p99_amplification": 0.9 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.999, + "p99_amplification": 1 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.989, + "p99_amplification": 0.989 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.983, + "p99_amplification": 0.876 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.047, + "p99_amplification": 0.763 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.06, + "p99_amplification": 1.015 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.995, + "p99_amplification": 1.313 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.984, + "p99_amplification": 0.892 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.988, + "p99_amplification": 0.901 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.003, + "p99_amplification": 1.092 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.991, + "p99_amplification": 0.921 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.98, + "p99_amplification": 0.932 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.973, + "p99_amplification": 0.716 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.975, + "p99_amplification": 0.969 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.942, + "p99_amplification": 0.997 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.004, + "p99_amplification": 1.109 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.988, + "p99_amplification": 1.079 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.988, + "p99_amplification": 0.988 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.979, + "p99_amplification": 1.016 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.017, + "p99_amplification": 1.002 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.02, + "p99_amplification": 0.773 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.987, + "p99_amplification": 0.939 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.999, + "p99_amplification": 1.296 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.005, + "p99_amplification": 0.985 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.009, + "p99_amplification": 0.975 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.012, + "p99_amplification": 1.042 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.993, + "p99_amplification": 1.01 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1, + "p99_amplification": 1.145 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.998, + "p99_amplification": 0.753 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.999, + "p99_amplification": 0.854 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.013, + "p99_amplification": 1.297 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.996, + "p99_amplification": 0.972 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.993, + "p99_amplification": 0.91 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.01, + "p99_amplification": 0.99 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.999, + "p99_amplification": 0.907 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.004, + "p99_amplification": 0.908 + }, + { + "sku": "b200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.01, + "p99_amplification": 0.691 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.054, + "p99_amplification": 0.96 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.014, + "p99_amplification": 0.721 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.01, + "p99_amplification": 0.989 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.081, + "p99_amplification": 1.074 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.159, + "p99_amplification": 1.145 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.236, + "p99_amplification": 1.223 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.026, + "p99_amplification": 0.982 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.957, + "p99_amplification": 0.638 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.947, + "p99_amplification": 0.969 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.003, + "p99_amplification": 1.031 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.06, + "p99_amplification": 1.055 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.121, + "p99_amplification": 1.119 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.056, + "p99_amplification": 1.199 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.971, + "p99_amplification": 0.698 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.976, + "p99_amplification": 0.956 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.007, + "p99_amplification": 1.001 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.024, + "p99_amplification": 1.006 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.022, + "p99_amplification": 1.015 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.079, + "p99_amplification": 1.026 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.027, + "p99_amplification": 0.672 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.06, + "p99_amplification": 1.059 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.103, + "p99_amplification": 1.059 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.171, + "p99_amplification": 1.145 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.263, + "p99_amplification": 1.257 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.086, + "p99_amplification": 1.048 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.971, + "p99_amplification": 0.693 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.993, + "p99_amplification": 1.044 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.004, + "p99_amplification": 1.025 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.016, + "p99_amplification": 1.161 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.007, + "p99_amplification": 1.05 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.057, + "p99_amplification": 0.954 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.009, + "p99_amplification": 0.705 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.014, + "p99_amplification": 0.982 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.084, + "p99_amplification": 1.067 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.164, + "p99_amplification": 1.138 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.239, + "p99_amplification": 1.232 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.059, + "p99_amplification": 1.051 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.988, + "p99_amplification": 0.734 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.997, + "p99_amplification": 1.027 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.003, + "p99_amplification": 1 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.019, + "p99_amplification": 1.015 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.004, + "p99_amplification": 0.994 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.016, + "p99_amplification": 1.03 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.994, + "p99_amplification": 0.834 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.984, + "p99_amplification": 1.026 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.007, + "p99_amplification": 1.031 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.016, + "p99_amplification": 0.988 + }, + { + "sku": "b200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.006, + "p99_amplification": 0.997 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.677, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.712, + "p99_amplification": 1.117 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.733, + "p99_amplification": 0.835 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.734, + "p99_amplification": 1.135 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.741, + "p99_amplification": 1.599 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.762, + "p99_amplification": 1.301 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.759, + "p99_amplification": 1.228 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.815, + "p99_amplification": 1.163 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.672, + "p99_amplification": 0.866 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.702, + "p99_amplification": 1.314 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.718, + "p99_amplification": 0.842 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.716, + "p99_amplification": 0.937 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.741, + "p99_amplification": 0.818 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.718, + "p99_amplification": 0.817 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.764, + "p99_amplification": 0.883 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.748, + "p99_amplification": 0.803 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.682, + "p99_amplification": 0.723 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.715, + "p99_amplification": 0.743 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.739, + "p99_amplification": 0.783 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.737, + "p99_amplification": 0.927 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.762, + "p99_amplification": 1.215 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.698, + "p99_amplification": 0.934 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.799, + "p99_amplification": 0.939 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.761, + "p99_amplification": 0.863 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.672, + "p99_amplification": 0.765 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.71, + "p99_amplification": 0.738 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.735, + "p99_amplification": 0.775 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.737, + "p99_amplification": 0.873 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.757, + "p99_amplification": 0.844 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.701, + "p99_amplification": 0.839 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.853, + "p99_amplification": 1.006 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.838, + "p99_amplification": 0.895 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.686, + "p99_amplification": 1.101 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.724, + "p99_amplification": 1.759 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.731, + "p99_amplification": 0.8 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.737, + "p99_amplification": 1.601 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.755, + "p99_amplification": 1.229 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.704, + "p99_amplification": 1.43 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.846, + "p99_amplification": 1.401 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.764, + "p99_amplification": 0.878 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.674, + "p99_amplification": 1.17 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.711, + "p99_amplification": 1.098 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.733, + "p99_amplification": 0.759 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.734, + "p99_amplification": 1.113 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.741, + "p99_amplification": 0.89 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.764, + "p99_amplification": 0.88 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.761, + "p99_amplification": 0.941 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.829, + "p99_amplification": 1.223 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.687, + "p99_amplification": 1.032 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.724, + "p99_amplification": 1.258 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.744, + "p99_amplification": 0.965 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.741, + "p99_amplification": 1.278 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.766, + "p99_amplification": 0.927 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.705, + "p99_amplification": 1.297 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.856, + "p99_amplification": 0.925 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 0.863 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.689, + "p99_amplification": 1.158 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.724, + "p99_amplification": 1.249 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.744, + "p99_amplification": 0.747 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.746, + "p99_amplification": 0.878 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.761, + "p99_amplification": 0.828 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.707, + "p99_amplification": 0.784 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.858, + "p99_amplification": 0.891 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.768, + "p99_amplification": 0.87 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.674, + "p99_amplification": 1.278 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.711, + "p99_amplification": 1.282 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.714, + "p99_amplification": 0.976 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.732, + "p99_amplification": 1.939 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.745, + "p99_amplification": 1.428 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.757, + "p99_amplification": 1.22 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.771, + "p99_amplification": 1.436 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.831, + "p99_amplification": 1.228 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.746, + "p99_amplification": 0.842 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.705, + "p99_amplification": 0.895 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.706, + "p99_amplification": 0.732 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.713, + "p99_amplification": 0.901 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.725, + "p99_amplification": 1.047 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.721, + "p99_amplification": 0.917 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.769, + "p99_amplification": 1.547 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.756, + "p99_amplification": 0.877 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.71, + "p99_amplification": 1.593 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.738, + "p99_amplification": 1.194 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.736, + "p99_amplification": 0.974 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.759, + "p99_amplification": 1.944 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.768, + "p99_amplification": 1.273 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.716, + "p99_amplification": 0.902 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.853, + "p99_amplification": 1.552 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.776, + "p99_amplification": 0.97 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.673, + "p99_amplification": 0.791 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.71, + "p99_amplification": 1.281 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.714, + "p99_amplification": 0.981 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.739, + "p99_amplification": 0.896 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.818, + "p99_amplification": 0.751 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.709, + "p99_amplification": 1.007 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.852, + "p99_amplification": 1.255 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.841, + "p99_amplification": 1.13 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.691, + "p99_amplification": 0.784 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.726, + "p99_amplification": 0.727 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.73, + "p99_amplification": 0.653 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.753, + "p99_amplification": 0.782 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.766, + "p99_amplification": 0.814 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.71, + "p99_amplification": 0.857 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.846, + "p99_amplification": 1.101 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 0.824 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.676, + "p99_amplification": 1.251 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.715, + "p99_amplification": 1.403 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.712, + "p99_amplification": 0.683 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.744, + "p99_amplification": 1.408 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.745, + "p99_amplification": 0.835 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.759, + "p99_amplification": 0.789 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.777, + "p99_amplification": 1.127 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.833, + "p99_amplification": 0.954 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.685, + "p99_amplification": 1.394 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.721, + "p99_amplification": 1.481 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.74, + "p99_amplification": 1.318 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.752, + "p99_amplification": 1.769 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.76, + "p99_amplification": 1.256 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.702, + "p99_amplification": 0.734 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.861, + "p99_amplification": 1.363 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.771, + "p99_amplification": 1.038 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.689, + "p99_amplification": 0.849 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.726, + "p99_amplification": 1.289 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.724, + "p99_amplification": 0.703 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.747, + "p99_amplification": 0.875 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.76, + "p99_amplification": 0.837 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.709, + "p99_amplification": 0.78 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.859, + "p99_amplification": 1.219 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.771, + "p99_amplification": 1.142 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.824, + "p99_amplification": 0.874 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.843, + "p99_amplification": 0.735 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.922, + "p99_amplification": 0.926 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.031, + "p99_amplification": 0.863 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.101, + "p99_amplification": 1.705 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.167, + "p99_amplification": 1.021 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.736, + "p99_amplification": 0.842 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.795, + "p99_amplification": 0.674 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.805, + "p99_amplification": 0.849 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.915, + "p99_amplification": 0.776 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.986, + "p99_amplification": 1.025 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.044, + "p99_amplification": 0.954 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 1.21 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.858, + "p99_amplification": 0.71 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.867, + "p99_amplification": 1.081 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.967, + "p99_amplification": 0.936 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.991, + "p99_amplification": 1.285 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.004, + "p99_amplification": 0.906 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.836, + "p99_amplification": 1.053 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.888, + "p99_amplification": 0.747 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.963, + "p99_amplification": 1.058 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.054, + "p99_amplification": 0.97 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.13, + "p99_amplification": 1.272 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.188, + "p99_amplification": 1.073 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.763, + "p99_amplification": 1.027 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.837, + "p99_amplification": 0.848 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.867, + "p99_amplification": 1.057 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.968, + "p99_amplification": 0.795 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.969, + "p99_amplification": 1.024 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.986, + "p99_amplification": 0.921 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.831, + "p99_amplification": 1.42 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.857, + "p99_amplification": 0.983 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.915, + "p99_amplification": 1.021 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.033, + "p99_amplification": 0.915 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.1, + "p99_amplification": 1.292 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.166, + "p99_amplification": 1.032 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.764, + "p99_amplification": 1.216 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.862, + "p99_amplification": 0.694 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.866, + "p99_amplification": 1.004 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.953, + "p99_amplification": 0.858 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.973, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.982, + "p99_amplification": 1.001 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 1.233 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.863, + "p99_amplification": 1.029 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.865, + "p99_amplification": 1.115 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.955, + "p99_amplification": 0.926 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.978, + "p99_amplification": 1.225 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.981, + "p99_amplification": 0.943 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.816, + "p99_amplification": 0.887 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.835, + "p99_amplification": 0.686 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.915, + "p99_amplification": 0.925 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.017, + "p99_amplification": 0.9 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.097, + "p99_amplification": 1.154 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.162, + "p99_amplification": 1.002 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.747, + "p99_amplification": 0.793 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.803, + "p99_amplification": 0.639 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.832, + "p99_amplification": 0.805 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.918, + "p99_amplification": 0.77 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.975, + "p99_amplification": 1.049 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.035, + "p99_amplification": 0.928 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.767, + "p99_amplification": 0.859 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.869, + "p99_amplification": 0.668 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.869, + "p99_amplification": 0.846 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.967, + "p99_amplification": 0.79 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.989, + "p99_amplification": 1.09 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.003, + "p99_amplification": 0.876 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.838, + "p99_amplification": 0.87 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.891, + "p99_amplification": 0.726 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.964, + "p99_amplification": 0.928 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.053, + "p99_amplification": 0.86 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.125, + "p99_amplification": 1.156 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.184, + "p99_amplification": 1.046 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 0.913 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.833, + "p99_amplification": 0.665 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.867, + "p99_amplification": 0.822 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.964, + "p99_amplification": 0.809 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.974, + "p99_amplification": 1.12 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.988, + "p99_amplification": 0.87 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.827, + "p99_amplification": 1.058 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.841, + "p99_amplification": 0.733 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.915, + "p99_amplification": 1.015 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.017, + "p99_amplification": 0.894 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.096, + "p99_amplification": 1.215 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.162, + "p99_amplification": 1.006 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.767, + "p99_amplification": 1.127 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.84, + "p99_amplification": 0.817 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.87, + "p99_amplification": 0.981 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.977, + "p99_amplification": 0.864 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.99, + "p99_amplification": 1.04 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.982, + "p99_amplification": 0.874 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.766, + "p99_amplification": 0.815 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.845, + "p99_amplification": 0.682 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.867, + "p99_amplification": 0.844 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.952, + "p99_amplification": 0.795 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.976, + "p99_amplification": 0.999 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.981, + "p99_amplification": 0.871 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.388, + "p99_amplification": 1.911 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.485, + "p99_amplification": 1.593 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.498, + "p99_amplification": 1.375 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.533, + "p99_amplification": 2.38 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.652, + "p99_amplification": 1.411 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.486, + "p99_amplification": 1.508 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.683, + "p99_amplification": 1.711 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.43, + "p99_amplification": 1.803 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.379, + "p99_amplification": 2.156 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.43, + "p99_amplification": 1.281 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.462, + "p99_amplification": 1.35 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.471, + "p99_amplification": 1.864 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.527, + "p99_amplification": 2.088 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.33, + "p99_amplification": 1.295 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.425, + "p99_amplification": 2.26 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.198, + "p99_amplification": 1.19 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.375, + "p99_amplification": 1.437 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.484, + "p99_amplification": 1.556 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.502, + "p99_amplification": 1.375 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.573, + "p99_amplification": 1.553 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.727, + "p99_amplification": 2.618 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.669, + "p99_amplification": 1.93 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.952, + "p99_amplification": 1.934 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.665, + "p99_amplification": 3.038 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.384, + "p99_amplification": 1.528 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 1.482, + "p99_amplification": 1.513 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.496, + "p99_amplification": 1.339 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.555, + "p99_amplification": 1.599 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.674, + "p99_amplification": 1.432 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.587, + "p99_amplification": 1.626 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.916, + "p99_amplification": 1.903 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.61, + "p99_amplification": 1.675 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.399, + "p99_amplification": 1.365 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.49, + "p99_amplification": 1.342 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.519, + "p99_amplification": 1.334 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.59, + "p99_amplification": 1.593 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.727, + "p99_amplification": 1.471 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.656, + "p99_amplification": 1.762 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 2.001, + "p99_amplification": 2.228 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.672, + "p99_amplification": 1.8 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 1.4, + "p99_amplification": 2.716 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.481, + "p99_amplification": 1.512 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.504, + "p99_amplification": 1.324 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.544, + "p99_amplification": 1.537 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.658, + "p99_amplification": 1.559 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.49, + "p99_amplification": 1.499 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.69, + "p99_amplification": 1.712 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.432, + "p99_amplification": 1.435 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.388, + "p99_amplification": 1.509 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.492, + "p99_amplification": 1.707 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.504, + "p99_amplification": 1.317 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.582, + "p99_amplification": 2.391 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.716, + "p99_amplification": 2.481 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.616, + "p99_amplification": 1.76 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.945, + "p99_amplification": 2.105 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.677, + "p99_amplification": 3.105 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.398, + "p99_amplification": 1.577 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.478, + "p99_amplification": 1.405 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.514, + "p99_amplification": 1.38 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.572, + "p99_amplification": 1.754 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.714, + "p99_amplification": 1.519 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.608, + "p99_amplification": 1.648 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.947, + "p99_amplification": 2.006 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.684, + "p99_amplification": 2.203 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.429, + "p99_amplification": 1.524 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.173, + "p99_amplification": 0.936 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.96, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.94, + "p99_amplification": 0.763 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.871, + "p99_amplification": 0.884 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.838, + "p99_amplification": 0.793 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.194, + "p99_amplification": 1.782 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.995, + "p99_amplification": 0.783 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.934, + "p99_amplification": 1.022 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.913, + "p99_amplification": 0.81 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.861, + "p99_amplification": 1.005 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.829, + "p99_amplification": 0.721 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.689, + "p99_amplification": 2.563 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.359, + "p99_amplification": 1.204 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.012, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.859, + "p99_amplification": 0.723 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.791, + "p99_amplification": 0.985 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.694, + "p99_amplification": 0.611 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.603, + "p99_amplification": 2.077 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.302, + "p99_amplification": 1.073 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.967, + "p99_amplification": 0.969 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.924, + "p99_amplification": 0.769 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.863, + "p99_amplification": 0.879 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.835, + "p99_amplification": 0.711 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.669, + "p99_amplification": 1.789 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.344, + "p99_amplification": 1.1 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.005, + "p99_amplification": 1.008 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.851, + "p99_amplification": 0.703 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.783, + "p99_amplification": 1.002 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.68, + "p99_amplification": 0.658 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.432, + "p99_amplification": 1.574 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.169, + "p99_amplification": 0.948 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.957, + "p99_amplification": 1.041 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.94, + "p99_amplification": 0.849 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.869, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.838, + "p99_amplification": 0.747 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.665, + "p99_amplification": 1.969 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.337, + "p99_amplification": 1.131 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.995, + "p99_amplification": 1.265 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.847, + "p99_amplification": 0.809 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.782, + "p99_amplification": 1.095 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.68, + "p99_amplification": 0.624 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.654, + "p99_amplification": 1.711 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.326, + "p99_amplification": 1.046 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.991, + "p99_amplification": 0.975 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.844, + "p99_amplification": 0.691 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.78, + "p99_amplification": 0.829 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.679, + "p99_amplification": 0.595 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.555, + "p99_amplification": 0.634 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.601, + "p99_amplification": 0.532 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.609, + "p99_amplification": 0.605 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.623, + "p99_amplification": 0.664 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.643, + "p99_amplification": 0.599 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.538, + "p99_amplification": 0.611 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.555, + "p99_amplification": 0.567 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.6, + "p99_amplification": 0.575 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.522, + "p99_amplification": 0.532 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.564, + "p99_amplification": 0.544 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.564, + "p99_amplification": 0.544 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.579, + "p99_amplification": 0.589 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.617, + "p99_amplification": 0.532 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.521, + "p99_amplification": 0.585 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.543, + "p99_amplification": 0.641 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.583, + "p99_amplification": 0.63 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.563, + "p99_amplification": 0.636 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.604, + "p99_amplification": 0.552 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.634, + "p99_amplification": 0.624 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.634, + "p99_amplification": 0.615 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.654, + "p99_amplification": 0.604 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.555, + "p99_amplification": 0.613 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.569, + "p99_amplification": 0.548 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.56, + "p99_amplification": 0.562 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.56, + "p99_amplification": 0.58 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.617, + "p99_amplification": 0.694 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.636, + "p99_amplification": 0.632 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.635, + "p99_amplification": 0.71 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.648, + "p99_amplification": 0.672 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.547, + "p99_amplification": 0.674 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.565, + "p99_amplification": 0.605 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.598, + "p99_amplification": 0.639 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.542, + "p99_amplification": 0.57 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.606, + "p99_amplification": 0.655 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.638, + "p99_amplification": 0.588 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.642, + "p99_amplification": 0.79 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.662, + "p99_amplification": 0.655 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.552, + "p99_amplification": 0.639 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.569, + "p99_amplification": 0.69 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.558, + "p99_amplification": 0.583 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.551, + "p99_amplification": 0.671 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.6, + "p99_amplification": 0.676 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.623, + "p99_amplification": 0.513 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.633, + "p99_amplification": 0.638 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.644, + "p99_amplification": 0.611 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.541, + "p99_amplification": 0.549 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.553, + "p99_amplification": 0.566 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.593, + "p99_amplification": 0.575 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.564, + "p99_amplification": 0.609 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.608, + "p99_amplification": 0.576 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.636, + "p99_amplification": 0.521 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.64, + "p99_amplification": 0.705 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.648, + "p99_amplification": 0.661 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.549, + "p99_amplification": 0.614 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.565, + "p99_amplification": 0.599 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.554, + "p99_amplification": 0.529 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.557, + "p99_amplification": 0.68 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.612, + "p99_amplification": 0.659 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.641, + "p99_amplification": 0.532 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.64, + "p99_amplification": 0.693 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.649, + "p99_amplification": 0.575 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.547, + "p99_amplification": 0.59 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.565, + "p99_amplification": 0.602 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.558, + "p99_amplification": 0.7 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.595, + "p99_amplification": 0.607 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.698, + "p99_amplification": 0.511 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.85, + "p99_amplification": 0.771 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.167, + "p99_amplification": 0.922 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.303, + "p99_amplification": 1.298 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.376, + "p99_amplification": 1.171 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.575, + "p99_amplification": 0.703 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.675, + "p99_amplification": 0.501 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.846, + "p99_amplification": 0.776 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.058, + "p99_amplification": 0.831 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.154, + "p99_amplification": 1.134 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.22, + "p99_amplification": 1.041 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.56, + "p99_amplification": 0.595 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.614, + "p99_amplification": 0.474 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.698, + "p99_amplification": 0.644 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.816, + "p99_amplification": 0.673 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.874, + "p99_amplification": 0.889 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.908, + "p99_amplification": 0.778 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.585, + "p99_amplification": 0.647 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.69, + "p99_amplification": 0.575 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.839, + "p99_amplification": 0.76 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.051, + "p99_amplification": 0.848 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.226, + "p99_amplification": 1.215 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.327, + "p99_amplification": 1.188 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.555, + "p99_amplification": 0.615 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.61, + "p99_amplification": 0.471 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.682, + "p99_amplification": 0.691 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.796, + "p99_amplification": 0.651 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.844, + "p99_amplification": 1.12 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.869, + "p99_amplification": 0.865 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.594, + "p99_amplification": 0.622 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.697, + "p99_amplification": 0.511 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.851, + "p99_amplification": 0.788 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.171, + "p99_amplification": 1.314 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.304, + "p99_amplification": 1.305 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.378, + "p99_amplification": 1.209 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.556, + "p99_amplification": 0.596 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.605, + "p99_amplification": 0.492 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.682, + "p99_amplification": 0.642 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.792, + "p99_amplification": 0.663 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.845, + "p99_amplification": 1.087 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.874, + "p99_amplification": 0.899 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.553, + "p99_amplification": 0.656 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.606, + "p99_amplification": 0.537 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.679, + "p99_amplification": 0.627 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.793, + "p99_amplification": 0.629 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.845, + "p99_amplification": 0.865 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.873, + "p99_amplification": 0.754 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 2.248, + "p99_amplification": 49.623 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 2.37, + "p99_amplification": 51.729 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 2.425, + "p99_amplification": 39.812 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 2.503, + "p99_amplification": 45.714 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 2.57, + "p99_amplification": 39.102 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 2.367, + "p99_amplification": 37.608 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 2.591, + "p99_amplification": 28.789 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 2.685, + "p99_amplification": 26.287 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 2.289, + "p99_amplification": 56.144 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 2.382, + "p99_amplification": 57.163 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 2.479, + "p99_amplification": 50.937 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 2.604, + "p99_amplification": 56.119 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 2.579, + "p99_amplification": 43.683 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 2.529, + "p99_amplification": 37.791 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 2.903, + "p99_amplification": 32.497 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.054, + "p99_amplification": 32.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 2.282, + "p99_amplification": 49.052 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 2.398, + "p99_amplification": 47.375 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 2.471, + "p99_amplification": 41.397 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 2.478, + "p99_amplification": 44.467 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 2.464, + "p99_amplification": 38.425 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 2.286, + "p99_amplification": 35.945 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 2.344, + "p99_amplification": 37.35 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.009, + "p99_amplification": 36.48 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 2.254, + "p99_amplification": 53.407 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 2.376, + "p99_amplification": 55.37 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 2.452, + "p99_amplification": 43.543 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 2.48, + "p99_amplification": 49.856 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 2.559, + "p99_amplification": 39.858 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 2.335, + "p99_amplification": 39.055 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 2.408, + "p99_amplification": 27.633 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.289, + "p99_amplification": 28.694 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 2.218, + "p99_amplification": 56.912 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 2.33, + "p99_amplification": 48.825 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 2.41, + "p99_amplification": 39.581 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 2.437, + "p99_amplification": 47.718 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 2.449, + "p99_amplification": 36.847 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 2.268, + "p99_amplification": 35.96 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 2.308, + "p99_amplification": 33.2 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.978, + "p99_amplification": 33.427 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 2.295, + "p99_amplification": 63.559 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 2.4, + "p99_amplification": 49.747 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 2.49, + "p99_amplification": 51.663 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 2.567, + "p99_amplification": 49.459 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 2.636, + "p99_amplification": 33.398 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 2.427, + "p99_amplification": 38.584 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 2.651, + "p99_amplification": 39.126 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 2.733, + "p99_amplification": 26.076 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 2.248, + "p99_amplification": 58.681 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 2.355, + "p99_amplification": 56.519 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 2.444, + "p99_amplification": 44.062 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 2.45, + "p99_amplification": 39.285 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 2.468, + "p99_amplification": 38.436 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 2.276, + "p99_amplification": 41.322 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 2.348, + "p99_amplification": 30.435 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.997, + "p99_amplification": 26.291 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 2.284, + "p99_amplification": 63.864 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 2.387, + "p99_amplification": 56.563 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 2.473, + "p99_amplification": 45.133 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 2.485, + "p99_amplification": 45.819 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 2.477, + "p99_amplification": 54.391 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 2.295, + "p99_amplification": 2.284 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 2.362, + "p99_amplification": 32.636 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.007, + "p99_amplification": 36.46 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 2.656, + "p99_amplification": 17.775 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 2.672, + "p99_amplification": 11.428 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 2.743, + "p99_amplification": 8.54 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 3.105, + "p99_amplification": 3.739 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 3.057, + "p99_amplification": 3.182 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 3.052, + "p99_amplification": 2.614 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.049, + "p99_amplification": 17.029 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 3.254, + "p99_amplification": 9.905 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 3.541, + "p99_amplification": 7.445 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 4.159, + "p99_amplification": 3.9 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 4.311, + "p99_amplification": 4.42 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 4.406, + "p99_amplification": 3.749 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.995, + "p99_amplification": 19.43 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.734, + "p99_amplification": 7.675 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.456, + "p99_amplification": 10.094 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.422, + "p99_amplification": 5.891 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.155, + "p99_amplification": 3.984 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.998, + "p99_amplification": 1.57 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.222, + "p99_amplification": 21.263 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 2.124, + "p99_amplification": 13.04 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.978, + "p99_amplification": 10.582 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 2.111, + "p99_amplification": 5.169 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.894, + "p99_amplification": 3.103 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.787, + "p99_amplification": 1.603 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.982, + "p99_amplification": 22.729 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.722, + "p99_amplification": 7.684 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.432, + "p99_amplification": 12.28 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.413, + "p99_amplification": 6.004 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.158, + "p99_amplification": 3.936 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.995, + "p99_amplification": 1.505 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 2.7, + "p99_amplification": 13.73 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 2.718, + "p99_amplification": 9.194 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 2.771, + "p99_amplification": 8.595 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 3.103, + "p99_amplification": 4.24 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 3.065, + "p99_amplification": 3.199 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 3.055, + "p99_amplification": 2.622 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.972, + "p99_amplification": 19.216 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.719, + "p99_amplification": 12.685 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.443, + "p99_amplification": 13.101 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.412, + "p99_amplification": 5.884 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.157, + "p99_amplification": 4.209 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.995, + "p99_amplification": 1.535 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.983, + "p99_amplification": 14.781 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.722, + "p99_amplification": 10.956 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.446, + "p99_amplification": 12.54 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.408, + "p99_amplification": 5.805 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.153, + "p99_amplification": 4.044 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.995, + "p99_amplification": 1.469 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.005, + "p99_amplification": 1.042 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.054, + "p99_amplification": 1.14 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1, + "p99_amplification": 0.976 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.995, + "p99_amplification": 1.195 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.989, + "p99_amplification": 0.902 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.931, + "p99_amplification": 1.553 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.924, + "p99_amplification": 0.986 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.948, + "p99_amplification": 0.947 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.004, + "p99_amplification": 1.141 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.004, + "p99_amplification": 1.034 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.998, + "p99_amplification": 1.197 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.99, + "p99_amplification": 1.199 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.993, + "p99_amplification": 1.036 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.914, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.963, + "p99_amplification": 1.212 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.933, + "p99_amplification": 0.924 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.015, + "p99_amplification": 1.238 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.001, + "p99_amplification": 1.071 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.997, + "p99_amplification": 0.918 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.995, + "p99_amplification": 1.116 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.991, + "p99_amplification": 1.012 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.991, + "p99_amplification": 1.011 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.987, + "p99_amplification": 1.047 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.001, + "p99_amplification": 1.033 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.011, + "p99_amplification": 1.044 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.993, + "p99_amplification": 0.989 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.987, + "p99_amplification": 1.114 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.995, + "p99_amplification": 1.102 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.992, + "p99_amplification": 1.001 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.984, + "p99_amplification": 0.955 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.009, + "p99_amplification": 1.332 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.025, + "p99_amplification": 1.614 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1, + "p99_amplification": 1.073 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.002, + "p99_amplification": 1.016 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.996, + "p99_amplification": 0.959 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.997, + "p99_amplification": 0.955 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1, + "p99_amplification": 1.582 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.993, + "p99_amplification": 1.1 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.043, + "p99_amplification": 0.981 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.997, + "p99_amplification": 0.957 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.96, + "p99_amplification": 1.252 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.025, + "p99_amplification": 1.142 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.991, + "p99_amplification": 1.024 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.989, + "p99_amplification": 1.638 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.98, + "p99_amplification": 0.874 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.937, + "p99_amplification": 1.05 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.928, + "p99_amplification": 1.04 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.948, + "p99_amplification": 0.947 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.023, + "p99_amplification": 1.087 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1, + "p99_amplification": 0.929 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.987, + "p99_amplification": 0.841 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.988, + "p99_amplification": 0.988 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.002, + "p99_amplification": 1.096 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.003, + "p99_amplification": 1.137 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.961, + "p99_amplification": 1.154 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.003, + "p99_amplification": 1.037 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.02, + "p99_amplification": 1.092 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.028, + "p99_amplification": 1.075 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.999, + "p99_amplification": 0.895 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.003, + "p99_amplification": 1.2 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.998, + "p99_amplification": 1.043 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.997, + "p99_amplification": 1.001 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.957, + "p99_amplification": 1.223 + }, + { + "sku": "b300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.007, + "p99_amplification": 1.011 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.941, + "p99_amplification": 0.973 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.969, + "p99_amplification": 0.821 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.017, + "p99_amplification": 0.964 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.096, + "p99_amplification": 1.028 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.145, + "p99_amplification": 1.289 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.206, + "p99_amplification": 1.066 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.923, + "p99_amplification": 0.973 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.899, + "p99_amplification": 0.73 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.924, + "p99_amplification": 1.058 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.982, + "p99_amplification": 0.858 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.029, + "p99_amplification": 1.173 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.092, + "p99_amplification": 0.978 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.993, + "p99_amplification": 0.995 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1, + "p99_amplification": 0.761 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.998, + "p99_amplification": 0.976 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.026, + "p99_amplification": 0.88 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.017, + "p99_amplification": 1.022 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.02, + "p99_amplification": 0.879 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.013, + "p99_amplification": 1.052 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.03, + "p99_amplification": 0.784 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.061, + "p99_amplification": 1.067 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.12, + "p99_amplification": 0.998 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.158, + "p99_amplification": 1.236 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.231, + "p99_amplification": 1.072 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.996, + "p99_amplification": 1.151 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.003, + "p99_amplification": 0.753 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.988, + "p99_amplification": 0.922 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.007, + "p99_amplification": 0.831 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.992, + "p99_amplification": 1.026 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.001, + "p99_amplification": 0.873 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.938, + "p99_amplification": 1.009 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.972, + "p99_amplification": 0.788 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.014, + "p99_amplification": 1.041 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.095, + "p99_amplification": 1.076 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.145, + "p99_amplification": 1.219 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.205, + "p99_amplification": 1.063 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.999, + "p99_amplification": 1.006 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.975, + "p99_amplification": 0.783 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.995, + "p99_amplification": 0.999 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.975, + "p99_amplification": 0.899 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.007, + "p99_amplification": 1.024 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.002, + "p99_amplification": 0.877 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.99, + "p99_amplification": 1.091 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.996, + "p99_amplification": 0.797 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.996, + "p99_amplification": 0.96 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.975, + "p99_amplification": 0.81 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.007, + "p99_amplification": 1.023 + }, + { + "sku": "b300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.002, + "p99_amplification": 0.865 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.168, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.173, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.167, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.181, + "p99_amplification": 0.055 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.17, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.169, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.182, + "p99_amplification": 0.06 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.214, + "p99_amplification": 0.07 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.167, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.17, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.168, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.169, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.169, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.169, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.179, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.199, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.157, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.158, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.155, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.154, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.155, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.158, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.179, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.204, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.163, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.165, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.159, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.159, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.162, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.163, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.182, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.21, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.169, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.171, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.163, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.165, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.166, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.166, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.187, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.211, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.165, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.168, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.163, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.162, + "p99_amplification": 0.145 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.162, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.164, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.183, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.205, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.172, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.174, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.169, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.17, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.171, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.171, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.192, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.216, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.171, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.172, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.168, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.169, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.165, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.17, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.189, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.213, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.161, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.161, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.156, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.157, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.157, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.16, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.179, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.202, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.166, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.166, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.164, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.167, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.168, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.166, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.179, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.199, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.161, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.163, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.159, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.158, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.159, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.163, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.183, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.204, + "p99_amplification": 0.06 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.177, + "p99_amplification": 0.07 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.177, + "p99_amplification": 0.063 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.171, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.177, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.177, + "p99_amplification": 0.062 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.176, + "p99_amplification": 0.071 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.194, + "p99_amplification": 0.05 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.223, + "p99_amplification": 0.082 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.178, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.177, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.172, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.173, + "p99_amplification": 0.055 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.176, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.172, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.195, + "p99_amplification": 0.062 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.218, + "p99_amplification": 0.07 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.166, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.167, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.164, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.165, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.166, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.165, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.184, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.208, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.168, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.17, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.165, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.165, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.166, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.168, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.191, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.212, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.163, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.175, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.162, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.16, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.16, + "p99_amplification": 0.055 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.168, + "p99_amplification": 0.062 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.186, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.211, + "p99_amplification": 0.069 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.143, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.16, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.151, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.157, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.15, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.162, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.16, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.186, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.139, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.157, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.146, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.157, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.151, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.165, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.159, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.184, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.144, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.16, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.15, + "p99_amplification": 0.03 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.156, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.149, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.166, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.166, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.187, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.132, + "p99_amplification": 0.027 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.146, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.14, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.147, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.14, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.158, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.161, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.188, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.142, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.157, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.15, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.155, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.149, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.167, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.169, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.189, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.147, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.164, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.154, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.161, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.153, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.168, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.164, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.191, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.144, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.163, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.152, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.16, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.15, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.168, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.168, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.188, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.138, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.152, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.143, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.15, + "p99_amplification": 0.03 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.145, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.163, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.166, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.185, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.15, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.166, + "p99_amplification": 0.03 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.154, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.163, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.156, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.167, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.166, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.193, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.141, + "p99_amplification": 0.027 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.155, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.146, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.155, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.145, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.164, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.16, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.181, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.15, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.163, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.152, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.162, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.153, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.169, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.171, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.192, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.156, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.174, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.164, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.17, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.161, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.175, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.175, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.201, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.157, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.173, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.16, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.171, + "p99_amplification": 0.05 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.163, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.177, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.176, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.195, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.136, + "p99_amplification": 0.027 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.15, + "p99_amplification": 0.027 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.143, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.149, + "p99_amplification": 0.032 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.142, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.16, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.162, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.187, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.144, + "p99_amplification": 0.028 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.157, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.148, + "p99_amplification": 0.029 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.156, + "p99_amplification": 0.03 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.149, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.167, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.168, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.19, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.145, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.163, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.152, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.161, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.152, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.172, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.17, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.192, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.206, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.256, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.329, + "p99_amplification": 0.085 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.416, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.579, + "p99_amplification": 0.239 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.778, + "p99_amplification": 0.5 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.197, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.233, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.289, + "p99_amplification": 0.075 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.363, + "p99_amplification": 0.117 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.493, + "p99_amplification": 0.204 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.72, + "p99_amplification": 0.462 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.215, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.337, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.416, + "p99_amplification": 0.132 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.581, + "p99_amplification": 0.239 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.747, + "p99_amplification": 0.477 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.214, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.06 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.335, + "p99_amplification": 0.086 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.428, + "p99_amplification": 0.137 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.6, + "p99_amplification": 0.246 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.783, + "p99_amplification": 0.501 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.212, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.338, + "p99_amplification": 0.087 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.418, + "p99_amplification": 0.135 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.585, + "p99_amplification": 0.242 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.762, + "p99_amplification": 0.487 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.198, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.252, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.324, + "p99_amplification": 0.084 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.412, + "p99_amplification": 0.131 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.574, + "p99_amplification": 0.236 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.777, + "p99_amplification": 0.503 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.214, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.338, + "p99_amplification": 0.088 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.419, + "p99_amplification": 0.134 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.582, + "p99_amplification": 0.241 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.758, + "p99_amplification": 0.487 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.211, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.263, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.336, + "p99_amplification": 0.086 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.417, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.582, + "p99_amplification": 0.24 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.758, + "p99_amplification": 0.485 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.205, + "p99_amplification": 0.068 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.256, + "p99_amplification": 0.068 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.331, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.414, + "p99_amplification": 0.146 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.574, + "p99_amplification": 0.251 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.772, + "p99_amplification": 0.499 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.199, + "p99_amplification": 0.066 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.234, + "p99_amplification": 0.062 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.287, + "p99_amplification": 0.076 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.362, + "p99_amplification": 0.129 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.493, + "p99_amplification": 0.207 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.719, + "p99_amplification": 0.461 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.208, + "p99_amplification": 0.077 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.26, + "p99_amplification": 0.063 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.331, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.412, + "p99_amplification": 0.147 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.57, + "p99_amplification": 0.252 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.736, + "p99_amplification": 0.487 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.215, + "p99_amplification": 0.077 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.261, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.336, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.423, + "p99_amplification": 0.134 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.597, + "p99_amplification": 0.259 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.782, + "p99_amplification": 0.508 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.211, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.266, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.336, + "p99_amplification": 0.087 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.417, + "p99_amplification": 0.132 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.581, + "p99_amplification": 0.238 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.754, + "p99_amplification": 0.483 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.204, + "p99_amplification": 0.077 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.253, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.327, + "p99_amplification": 0.09 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.412, + "p99_amplification": 0.151 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.573, + "p99_amplification": 0.253 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.774, + "p99_amplification": 0.507 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.212, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.06 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.333, + "p99_amplification": 0.085 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.417, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.578, + "p99_amplification": 0.239 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.751, + "p99_amplification": 0.479 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.212, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.265, + "p99_amplification": 0.068 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.336, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.415, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.578, + "p99_amplification": 0.239 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.751, + "p99_amplification": 0.481 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.165, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.249, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.346, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.461, + "p99_amplification": 0.109 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.705, + "p99_amplification": 5.532 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.988, + "p99_amplification": 0.437 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.157, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.235, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.315, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.416, + "p99_amplification": 0.104 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.62, + "p99_amplification": 0.204 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.876, + "p99_amplification": 0.39 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.165, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.253, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.345, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.449, + "p99_amplification": 0.117 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.634, + "p99_amplification": 0.333 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.85, + "p99_amplification": 0.377 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.168, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.26, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.362, + "p99_amplification": 0.078 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.484, + "p99_amplification": 0.119 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.711, + "p99_amplification": 0.223 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.994, + "p99_amplification": 0.44 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.171, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.262, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.357, + "p99_amplification": 0.068 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.456, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.635, + "p99_amplification": 0.203 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.835, + "p99_amplification": 0.369 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.168, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.248, + "p99_amplification": 0.063 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.347, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.46, + "p99_amplification": 0.109 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.702, + "p99_amplification": 0.228 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.987, + "p99_amplification": 0.439 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.166, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.254, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.343, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.445, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.63, + "p99_amplification": 0.204 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.84, + "p99_amplification": 0.373 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.172, + "p99_amplification": 0.05 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.264, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.355, + "p99_amplification": 0.075 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.452, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.63, + "p99_amplification": 0.211 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.843, + "p99_amplification": 0.391 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.17, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.251, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.347, + "p99_amplification": 0.075 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.464, + "p99_amplification": 0.11 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.706, + "p99_amplification": 0.223 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.981, + "p99_amplification": 0.435 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.163, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.243, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.32, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.426, + "p99_amplification": 1.007 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.622, + "p99_amplification": 0.198 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.876, + "p99_amplification": 0.39 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.166, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.256, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.348, + "p99_amplification": 0.22 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.45, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.636, + "p99_amplification": 3.337 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.849, + "p99_amplification": 0.386 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.172, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.261, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.366, + "p99_amplification": 0.072 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.489, + "p99_amplification": 0.124 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.71, + "p99_amplification": 1.15 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.985, + "p99_amplification": 0.445 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.169, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.259, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.348, + "p99_amplification": 0.065 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.448, + "p99_amplification": 0.11 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.626, + "p99_amplification": 0.198 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.829, + "p99_amplification": 0.366 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.169, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.252, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.35, + "p99_amplification": 0.071 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.471, + "p99_amplification": 1.255 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.707, + "p99_amplification": 0.526 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.98, + "p99_amplification": 0.437 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.17, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.26, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.347, + "p99_amplification": 0.069 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.443, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.63, + "p99_amplification": 0.204 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.833, + "p99_amplification": 0.373 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.169, + "p99_amplification": 0.063 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.256, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.343, + "p99_amplification": 0.064 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.441, + "p99_amplification": 0.104 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.629, + "p99_amplification": 2.137 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.833, + "p99_amplification": 0.367 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.471, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.477, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.461, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.46, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.462, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.463, + "p99_amplification": 0.109 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.493, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.493, + "p99_amplification": 0.116 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.437, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.451, + "p99_amplification": 0.094 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.447, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.448, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.449, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.448, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.457, + "p99_amplification": 0.104 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.457, + "p99_amplification": 0.108 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.446, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.452, + "p99_amplification": 0.093 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.439, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.437, + "p99_amplification": 0.093 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.44, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.449, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.501, + "p99_amplification": 0.114 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.498, + "p99_amplification": 0.118 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.45, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.456, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.438, + "p99_amplification": 0.101 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.44, + "p99_amplification": 0.093 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.441, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.446, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.501, + "p99_amplification": 0.114 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.502, + "p99_amplification": 0.119 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.461, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.468, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.452, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.451, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.452, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.46, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.515, + "p99_amplification": 0.124 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.519, + "p99_amplification": 0.121 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.451, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.457, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.44, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.441, + "p99_amplification": 0.092 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.444, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.445, + "p99_amplification": 0.104 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.478, + "p99_amplification": 0.108 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.479, + "p99_amplification": 0.113 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.464, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.469, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.454, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.453, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.457, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.459, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.519, + "p99_amplification": 0.117 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.516, + "p99_amplification": 0.122 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.457, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.46, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.446, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.448, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.449, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.453, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.509, + "p99_amplification": 0.116 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.509, + "p99_amplification": 0.12 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.506, + "p99_amplification": 0.083 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.561, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.521, + "p99_amplification": 0.088 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.545, + "p99_amplification": 0.094 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.511, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.552, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.524, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.523, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.499, + "p99_amplification": 0.08 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.583, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.541, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.566, + "p99_amplification": 0.127 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.531, + "p99_amplification": 0.126 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.574, + "p99_amplification": 0.125 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.523, + "p99_amplification": 0.109 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.523, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.54, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.598, + "p99_amplification": 0.092 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.537, + "p99_amplification": 0.101 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.572, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.544, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.58, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.574, + "p99_amplification": 0.138 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.575, + "p99_amplification": 0.13 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.523, + "p99_amplification": 0.085 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.58, + "p99_amplification": 0.091 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.536, + "p99_amplification": 0.09 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.558, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.521, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.567, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.564, + "p99_amplification": 0.119 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.556, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.516, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.556, + "p99_amplification": 0.091 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.508, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.563, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.523, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.564, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.557, + "p99_amplification": 0.125 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.562, + "p99_amplification": 0.119 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.502, + "p99_amplification": 0.093 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.552, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.51, + "p99_amplification": 0.087 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.536, + "p99_amplification": 0.094 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.503, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.542, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.513, + "p99_amplification": 0.109 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.51, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.517, + "p99_amplification": 0.086 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.566, + "p99_amplification": 0.09 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.519, + "p99_amplification": 0.087 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.546, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.51, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.56, + "p99_amplification": 0.094 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.559, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.565, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.519, + "p99_amplification": 0.085 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.579, + "p99_amplification": 0.094 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.522, + "p99_amplification": 0.091 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.556, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.519, + "p99_amplification": 0.099 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.57, + "p99_amplification": 0.097 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.566, + "p99_amplification": 0.126 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.572, + "p99_amplification": 0.122 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.505, + "p99_amplification": 0.145 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.488, + "p99_amplification": 0.129 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.458, + "p99_amplification": 0.14 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.407, + "p99_amplification": 0.15 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.429, + "p99_amplification": 0.199 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.432, + "p99_amplification": 0.298 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.478, + "p99_amplification": 0.123 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.468, + "p99_amplification": 0.108 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.442, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.406, + "p99_amplification": 0.137 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.414, + "p99_amplification": 0.177 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.426, + "p99_amplification": 0.395 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.51, + "p99_amplification": 0.128 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.511, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.48, + "p99_amplification": 0.123 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.411, + "p99_amplification": 0.134 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.461, + "p99_amplification": 0.194 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.441, + "p99_amplification": 0.29 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.499, + "p99_amplification": 0.126 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.498, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.47, + "p99_amplification": 0.122 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.407, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.449, + "p99_amplification": 0.189 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.441, + "p99_amplification": 0.288 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.508, + "p99_amplification": 0.128 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.509, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.481, + "p99_amplification": 0.123 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.413, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.463, + "p99_amplification": 0.195 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.447, + "p99_amplification": 0.29 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.48, + "p99_amplification": 0.122 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.479, + "p99_amplification": 0.108 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.449, + "p99_amplification": 0.116 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.397, + "p99_amplification": 0.129 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.421, + "p99_amplification": 0.178 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.423, + "p99_amplification": 0.276 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.519, + "p99_amplification": 0.131 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.517, + "p99_amplification": 0.118 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.483, + "p99_amplification": 0.124 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.411, + "p99_amplification": 0.132 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.469, + "p99_amplification": 0.198 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.445, + "p99_amplification": 0.292 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.511, + "p99_amplification": 0.129 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.514, + "p99_amplification": 0.117 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.484, + "p99_amplification": 0.125 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.412, + "p99_amplification": 0.133 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.467, + "p99_amplification": 0.196 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.444, + "p99_amplification": 0.293 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.408, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.48, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.499, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.524, + "p99_amplification": 0.147 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.621, + "p99_amplification": 0.225 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.735, + "p99_amplification": 0.346 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.377, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.451, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.495, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.511, + "p99_amplification": 0.135 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.615, + "p99_amplification": 0.2 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.729, + "p99_amplification": 0.326 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.492, + "p99_amplification": 0.115 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.594, + "p99_amplification": 0.11 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.57, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.553, + "p99_amplification": 0.135 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.623, + "p99_amplification": 0.205 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.664, + "p99_amplification": 0.299 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.488, + "p99_amplification": 0.11 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.583, + "p99_amplification": 0.102 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.567, + "p99_amplification": 0.106 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.574, + "p99_amplification": 0.136 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.638, + "p99_amplification": 3.102 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.719, + "p99_amplification": 0.319 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.505, + "p99_amplification": 0.114 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.611, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.594, + "p99_amplification": 0.112 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.563, + "p99_amplification": 0.134 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.632, + "p99_amplification": 0.209 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.663, + "p99_amplification": 0.304 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.442, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.533, + "p99_amplification": 0.095 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.538, + "p99_amplification": 0.105 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.565, + "p99_amplification": 0.138 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.623, + "p99_amplification": 0.203 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.737, + "p99_amplification": 0.329 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.497, + "p99_amplification": 0.113 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.587, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.584, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.551, + "p99_amplification": 0.134 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.622, + "p99_amplification": 0.205 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.659, + "p99_amplification": 0.298 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.528, + "p99_amplification": 0.119 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.63, + "p99_amplification": 0.111 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.609, + "p99_amplification": 0.117 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.575, + "p99_amplification": 0.141 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.649, + "p99_amplification": 0.209 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.671, + "p99_amplification": 0.317 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.183, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.185, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.177, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.18, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.18, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.178, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.183, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.183, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.178, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.179, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.172, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.175, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.174, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.173, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.178, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.179, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.179, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.18, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.178, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.179, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.178, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.178, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.182, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.183, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.18, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.183, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.177, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.179, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.177, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.175, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.182, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.183, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.185, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.186, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.182, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.182, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.182, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.181, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.187, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.188, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.184, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.188, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.182, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.183, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.184, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.183, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.189, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.188, + "p99_amplification": 0.05 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.176, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.178, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.174, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.172, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.174, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.175, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.18, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.181, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.183, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.188, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.182, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.181, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.179, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.181, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.185, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.186, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.18, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.196, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.183, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.191, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.18, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.193, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.182, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.185, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.171, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.189, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.176, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.182, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.174, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.188, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.175, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.176, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.173, + "p99_amplification": 0.03 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.194, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.179, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.187, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.175, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.192, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.179, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.181, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.168, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.184, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.174, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.181, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.17, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.183, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.166, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.174, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.175, + "p99_amplification": 0.033 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.191, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.178, + "p99_amplification": 0.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.186, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.174, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.186, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.175, + "p99_amplification": 0.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.176, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.201, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.218, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.198, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.213, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.199, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.211, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.199, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.199, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.18, + "p99_amplification": 0.031 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.197, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.185, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.192, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.181, + "p99_amplification": 0.216 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.191, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.181, + "p99_amplification": 0.043 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.182, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.179, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.199, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.186, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.194, + "p99_amplification": 0.039 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.179, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.196, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.181, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.183, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.191, + "p99_amplification": 0.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.197, + "p99_amplification": 0.05 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.209, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.243, + "p99_amplification": 0.079 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.354, + "p99_amplification": 0.148 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 0.499, + "p99_amplification": 0.32 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.178, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.185, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.193, + "p99_amplification": 0.055 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.233, + "p99_amplification": 0.076 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.354, + "p99_amplification": 0.147 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.465, + "p99_amplification": 0.298 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.208, + "p99_amplification": 0.056 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.214, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.225, + "p99_amplification": 0.061 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.248, + "p99_amplification": 0.08 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.343, + "p99_amplification": 0.142 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.441, + "p99_amplification": 0.282 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.19, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.203, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.216, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.248, + "p99_amplification": 0.08 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.359, + "p99_amplification": 0.149 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 0.469, + "p99_amplification": 0.301 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.188, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.2, + "p99_amplification": 0.049 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.215, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.244, + "p99_amplification": 0.078 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.343, + "p99_amplification": 0.142 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.445, + "p99_amplification": 0.285 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.178, + "p99_amplification": 0.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.185, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.197, + "p99_amplification": 0.062 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.242, + "p99_amplification": 0.078 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.354, + "p99_amplification": 0.147 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 0.496, + "p99_amplification": 0.318 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.185, + "p99_amplification": 0.069 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.198, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.212, + "p99_amplification": 0.073 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.242, + "p99_amplification": 0.089 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.341, + "p99_amplification": 0.148 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.444, + "p99_amplification": 0.287 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.187, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.2, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.214, + "p99_amplification": 0.059 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.243, + "p99_amplification": 0.08 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.342, + "p99_amplification": 0.142 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.444, + "p99_amplification": 0.284 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.155, + "p99_amplification": 0.04 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.21, + "p99_amplification": 0.041 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.296, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 0.471, + "p99_amplification": 0.107 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 0.791, + "p99_amplification": 0.244 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.101, + "p99_amplification": 0.482 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.146, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.197, + "p99_amplification": 0.042 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.288, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 0.454, + "p99_amplification": 0.103 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 0.7, + "p99_amplification": 0.216 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 0.976, + "p99_amplification": 0.427 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.157, + "p99_amplification": 0.046 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.2, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.251, + "p99_amplification": 0.058 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.359, + "p99_amplification": 0.098 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.533, + "p99_amplification": 0.181 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.729, + "p99_amplification": 0.335 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.148, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.204, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.293, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 0.44, + "p99_amplification": 0.1 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 0.721, + "p99_amplification": 0.224 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.049, + "p99_amplification": 0.46 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.156, + "p99_amplification": 0.054 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.208, + "p99_amplification": 0.048 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.271, + "p99_amplification": 0.06 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.352, + "p99_amplification": 0.096 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.519, + "p99_amplification": 0.177 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.703, + "p99_amplification": 0.323 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.148, + "p99_amplification": 0.037 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.203, + "p99_amplification": 0.038 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.294, + "p99_amplification": 0.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 0.472, + "p99_amplification": 0.108 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 0.791, + "p99_amplification": 0.244 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.102, + "p99_amplification": 0.482 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.146, + "p99_amplification": 0.035 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.196, + "p99_amplification": 0.045 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.245, + "p99_amplification": 0.051 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.349, + "p99_amplification": 0.09 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.516, + "p99_amplification": 0.163 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.703, + "p99_amplification": 0.31 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.142, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.191, + "p99_amplification": 0.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.248, + "p99_amplification": 0.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.345, + "p99_amplification": 0.083 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.516, + "p99_amplification": 0.165 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.704, + "p99_amplification": 0.311 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.011, + "p99_amplification": 1.064 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.011, + "p99_amplification": 1.04 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.015, + "p99_amplification": 1.071 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.017, + "p99_amplification": 0.964 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.017, + "p99_amplification": 1.024 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.007, + "p99_amplification": 1.004 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.021, + "p99_amplification": 0.987 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.023, + "p99_amplification": 1.105 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.022, + "p99_amplification": 1.118 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.024, + "p99_amplification": 0.907 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.016, + "p99_amplification": 1.035 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.022, + "p99_amplification": 0.921 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.026, + "p99_amplification": 1.105 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.014, + "p99_amplification": 0.981 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.023, + "p99_amplification": 0.987 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.034, + "p99_amplification": 1.012 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.991, + "p99_amplification": 1.078 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.011, + "p99_amplification": 1.067 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.014, + "p99_amplification": 0.945 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.998, + "p99_amplification": 0.958 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.005, + "p99_amplification": 0.971 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.007, + "p99_amplification": 1.076 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.009, + "p99_amplification": 1.087 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.014, + "p99_amplification": 1.118 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.999, + "p99_amplification": 1.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.988, + "p99_amplification": 0.915 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.998, + "p99_amplification": 1.021 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.01, + "p99_amplification": 0.945 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.995, + "p99_amplification": 1.001 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.98, + "p99_amplification": 1.03 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.003, + "p99_amplification": 0.842 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.023, + "p99_amplification": 1.267 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.004, + "p99_amplification": 1.052 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.011, + "p99_amplification": 0.997 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.011, + "p99_amplification": 1.066 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.005, + "p99_amplification": 0.914 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.017, + "p99_amplification": 0.988 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.002, + "p99_amplification": 1.083 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.02, + "p99_amplification": 1.039 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.018, + "p99_amplification": 1.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.996, + "p99_amplification": 1.084 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.003, + "p99_amplification": 0.984 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.992, + "p99_amplification": 1.116 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.994, + "p99_amplification": 0.921 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.997, + "p99_amplification": 1.017 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.989, + "p99_amplification": 1.111 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.006, + "p99_amplification": 1.032 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.008, + "p99_amplification": 1.111 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.005, + "p99_amplification": 1.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.006, + "p99_amplification": 0.948 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.999, + "p99_amplification": 1.081 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.002, + "p99_amplification": 0.976 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.007, + "p99_amplification": 0.987 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.996, + "p99_amplification": 1.108 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.007, + "p99_amplification": 1.014 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.013, + "p99_amplification": 1.082 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.982, + "p99_amplification": 1.041 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.996, + "p99_amplification": 1.136 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.987, + "p99_amplification": 1.12 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.991, + "p99_amplification": 1.02 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.992, + "p99_amplification": 0.995 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.988, + "p99_amplification": 1.157 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.996, + "p99_amplification": 0.853 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.995, + "p99_amplification": 0.528 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.062, + "p99_amplification": 0.811 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.158, + "p99_amplification": 0.879 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.091, + "p99_amplification": 0.971 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.173, + "p99_amplification": 0.892 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.084, + "p99_amplification": 0.956 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.16, + "p99_amplification": 0.891 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.05, + "p99_amplification": 0.997 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.063, + "p99_amplification": 0.964 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.051, + "p99_amplification": 0.823 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.097, + "p99_amplification": 0.942 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.103, + "p99_amplification": 0.934 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.146, + "p99_amplification": 0.934 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.093, + "p99_amplification": 0.967 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.126, + "p99_amplification": 0.966 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.055, + "p99_amplification": 1.044 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.099, + "p99_amplification": 0.856 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.982, + "p99_amplification": 0.928 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.038, + "p99_amplification": 0.94 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.033, + "p99_amplification": 0.859 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.068, + "p99_amplification": 0.644 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.031, + "p99_amplification": 0.648 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.052, + "p99_amplification": 0.64 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.003, + "p99_amplification": 0.764 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.998, + "p99_amplification": 0.681 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.04, + "p99_amplification": 0.758 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 1.166, + "p99_amplification": 0.716 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.073, + "p99_amplification": 0.591 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.147, + "p99_amplification": 0.628 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.077, + "p99_amplification": 0.656 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.177, + "p99_amplification": 0.623 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.07, + "p99_amplification": 0.737 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.063, + "p99_amplification": 0.684 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.07, + "p99_amplification": 1.102 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.077, + "p99_amplification": 0.883 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.018, + "p99_amplification": 0.772 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.062, + "p99_amplification": 0.894 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.049, + "p99_amplification": 0.829 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.078, + "p99_amplification": 0.641 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1, + "p99_amplification": 0.798 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.012, + "p99_amplification": 0.938 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.998, + "p99_amplification": 0.86 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.093, + "p99_amplification": 0.907 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.052, + "p99_amplification": 0.891 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.098, + "p99_amplification": 0.68 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.037, + "p99_amplification": 0.963 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.113, + "p99_amplification": 0.883 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.027, + "p99_amplification": 0.73 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.024, + "p99_amplification": 1.069 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.026, + "p99_amplification": 0.88 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.116, + "p99_amplification": 0.915 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.092, + "p99_amplification": 0.913 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.125, + "p99_amplification": 0.942 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.025, + "p99_amplification": 0.961 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.125, + "p99_amplification": 0.933 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.048, + "p99_amplification": 1.077 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.002, + "p99_amplification": 1.067 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.086, + "p99_amplification": 0.841 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.19, + "p99_amplification": 1.222 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.09, + "p99_amplification": 0.917 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.158, + "p99_amplification": 1.228 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.069, + "p99_amplification": 0.9 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.153, + "p99_amplification": 0.972 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.02, + "p99_amplification": 1.047 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.034, + "p99_amplification": 0.989 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.992, + "p99_amplification": 1.07 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.046, + "p99_amplification": 0.931 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.126, + "p99_amplification": 0.905 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.238, + "p99_amplification": 0.878 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.426, + "p99_amplification": 0.812 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.637, + "p99_amplification": 1.136 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.011, + "p99_amplification": 1.128 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.118, + "p99_amplification": 1.007 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.241, + "p99_amplification": 0.984 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.397, + "p99_amplification": 0.904 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.679, + "p99_amplification": 0.884 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.986, + "p99_amplification": 1.563 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.05, + "p99_amplification": 1.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.032, + "p99_amplification": 1.031 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.007, + "p99_amplification": 1.095 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.005, + "p99_amplification": 1.023 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.014, + "p99_amplification": 1.287 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.001, + "p99_amplification": 1.222 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.021, + "p99_amplification": 0.937 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.021, + "p99_amplification": 0.953 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.048, + "p99_amplification": 0.958 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.089, + "p99_amplification": 0.92 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.175, + "p99_amplification": 0.927 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.258, + "p99_amplification": 1.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.024, + "p99_amplification": 1.096 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.016, + "p99_amplification": 0.937 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.006, + "p99_amplification": 1.036 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.005, + "p99_amplification": 0.982 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.003, + "p99_amplification": 0.976 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.999, + "p99_amplification": 1.057 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.04, + "p99_amplification": 0.987 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.153, + "p99_amplification": 0.911 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.176, + "p99_amplification": 0.976 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.264, + "p99_amplification": 0.853 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.476, + "p99_amplification": 0.878 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.652, + "p99_amplification": 1.172 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.008, + "p99_amplification": 1.053 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.007, + "p99_amplification": 0.947 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.003, + "p99_amplification": 1.084 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.002, + "p99_amplification": 1.009 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1, + "p99_amplification": 0.98 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.996, + "p99_amplification": 1.05 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.001, + "p99_amplification": 0.965 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.003, + "p99_amplification": 0.985 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.997, + "p99_amplification": 1.058 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.999, + "p99_amplification": 0.976 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.999, + "p99_amplification": 0.969 + }, + { + "sku": "gb300", + "ep": 4, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.996, + "p99_amplification": 1.026 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.801, + "p99_amplification": 1.058 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.127, + "p99_amplification": 0.943 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.344, + "p99_amplification": 0.894 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.672, + "p99_amplification": 0.698 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 2.106, + "p99_amplification": 0.771 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 2.764, + "p99_amplification": 1.253 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.038, + "p99_amplification": 0.927 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.263, + "p99_amplification": 0.876 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.685, + "p99_amplification": 0.852 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 2.221, + "p99_amplification": 0.709 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 2.842, + "p99_amplification": 1.052 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 3.819, + "p99_amplification": 1.693 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.767, + "p99_amplification": 1.087 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.905, + "p99_amplification": 0.79 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.942, + "p99_amplification": 0.781 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.957, + "p99_amplification": 0.919 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.96, + "p99_amplification": 0.986 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.977, + "p99_amplification": 0.865 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.869, + "p99_amplification": 1.034 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.031, + "p99_amplification": 0.732 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.151, + "p99_amplification": 0.865 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.28, + "p99_amplification": 0.826 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.442, + "p99_amplification": 0.829 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.739, + "p99_amplification": 0.836 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.77, + "p99_amplification": 1.015 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.925, + "p99_amplification": 0.932 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.951, + "p99_amplification": 0.748 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.969, + "p99_amplification": 0.923 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.967, + "p99_amplification": 1.019 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.985, + "p99_amplification": 0.938 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.913, + "p99_amplification": 1.019 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.117, + "p99_amplification": 0.97 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.398, + "p99_amplification": 0.852 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.676, + "p99_amplification": 0.742 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 2.103, + "p99_amplification": 0.786 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 2.769, + "p99_amplification": 1.272 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.966, + "p99_amplification": 0.645 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.099, + "p99_amplification": 0.692 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.054, + "p99_amplification": 0.661 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.065, + "p99_amplification": 0.676 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.066, + "p99_amplification": 0.754 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.021, + "p99_amplification": 0.768 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.776, + "p99_amplification": 1.171 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.918, + "p99_amplification": 1.01 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.947, + "p99_amplification": 1.06 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.97, + "p99_amplification": 1.023 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.966, + "p99_amplification": 1.036 + }, + { + "sku": "gb300", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.989, + "p99_amplification": 0.991 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.739, + "p99_amplification": 0.749 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.554, + "p99_amplification": 0.74 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.728, + "p99_amplification": 0.509 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.74, + "p99_amplification": 0.753 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.714, + "p99_amplification": 0.519 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.642, + "p99_amplification": 0.494 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.711, + "p99_amplification": 0.585 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.762, + "p99_amplification": 0.51 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.716, + "p99_amplification": 0.735 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.543, + "p99_amplification": 0.726 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.541, + "p99_amplification": 0.518 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.751, + "p99_amplification": 0.752 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.723, + "p99_amplification": 0.539 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.62, + "p99_amplification": 0.513 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.666, + "p99_amplification": 0.59 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.73, + "p99_amplification": 0.547 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.532, + "p99_amplification": 0.578 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.532, + "p99_amplification": 0.562 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.548, + "p99_amplification": 0.518 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.563, + "p99_amplification": 1.587 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.634, + "p99_amplification": 0.519 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.654, + "p99_amplification": 0.513 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.647, + "p99_amplification": 0.614 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.678, + "p99_amplification": 0.524 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.76, + "p99_amplification": 0.756 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.559, + "p99_amplification": 0.744 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.543, + "p99_amplification": 0.549 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.575, + "p99_amplification": 0.772 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.623, + "p99_amplification": 0.523 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.663, + "p99_amplification": 0.494 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.679, + "p99_amplification": 0.599 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.74, + "p99_amplification": 0.52 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.741, + "p99_amplification": 0.742 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.553, + "p99_amplification": 0.746 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.737, + "p99_amplification": 0.515 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.74, + "p99_amplification": 0.77 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.726, + "p99_amplification": 0.522 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.662, + "p99_amplification": 0.511 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.741, + "p99_amplification": 0.589 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.761, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.739, + "p99_amplification": 0.773 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.558, + "p99_amplification": 0.733 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.567, + "p99_amplification": 0.726 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.597, + "p99_amplification": 0.744 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.71, + "p99_amplification": 0.519 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.642, + "p99_amplification": 0.51 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.707, + "p99_amplification": 0.606 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.787, + "p99_amplification": 1.072 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.533, + "p99_amplification": 0.565 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.534, + "p99_amplification": 0.557 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.554, + "p99_amplification": 0.501 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.563, + "p99_amplification": 0.754 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.631, + "p99_amplification": 0.533 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.651, + "p99_amplification": 0.516 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.64, + "p99_amplification": 0.599 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.676, + "p99_amplification": 0.527 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.533, + "p99_amplification": 0.568 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.534, + "p99_amplification": 0.568 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.554, + "p99_amplification": 0.507 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.536, + "p99_amplification": 0.573 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.624, + "p99_amplification": 0.453 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.641, + "p99_amplification": 0.464 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.644, + "p99_amplification": 0.584 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.683, + "p99_amplification": 0.508 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.753, + "p99_amplification": 0.753 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.561, + "p99_amplification": 0.76 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.55, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.753, + "p99_amplification": 0.751 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.735, + "p99_amplification": 0.529 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.649, + "p99_amplification": 0.496 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.703, + "p99_amplification": 0.736 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.783, + "p99_amplification": 0.552 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.54, + "p99_amplification": 0.734 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.526, + "p99_amplification": 0.559 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.54, + "p99_amplification": 0.526 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.551, + "p99_amplification": 0.75 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.588, + "p99_amplification": 0.537 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.627, + "p99_amplification": 0.526 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.674, + "p99_amplification": 0.587 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.733, + "p99_amplification": 0.552 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.786, + "p99_amplification": 0.772 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.576, + "p99_amplification": 0.766 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.591, + "p99_amplification": 0.541 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.601, + "p99_amplification": 0.764 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.763, + "p99_amplification": 0.546 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.661, + "p99_amplification": 0.531 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.682, + "p99_amplification": 0.599 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.714, + "p99_amplification": 0.543 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.54, + "p99_amplification": 0.574 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.541, + "p99_amplification": 0.598 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.549, + "p99_amplification": 0.52 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.561, + "p99_amplification": 0.749 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.64, + "p99_amplification": 0.527 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.675, + "p99_amplification": 0.519 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.688, + "p99_amplification": 0.609 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.76, + "p99_amplification": 0.546 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.779, + "p99_amplification": 0.768 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.581, + "p99_amplification": 0.753 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.567, + "p99_amplification": 0.528 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.591, + "p99_amplification": 0.769 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.759, + "p99_amplification": 0.546 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.678, + "p99_amplification": 0.515 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.657, + "p99_amplification": 0.614 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.72, + "p99_amplification": 0.527 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.557, + "p99_amplification": 0.772 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.55, + "p99_amplification": 0.758 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.551, + "p99_amplification": 0.522 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.564, + "p99_amplification": 0.781 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.642, + "p99_amplification": 0.532 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.637, + "p99_amplification": 0.523 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.708, + "p99_amplification": 0.622 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.802, + "p99_amplification": 1.574 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.565, + "p99_amplification": 0.739 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.566, + "p99_amplification": 0.767 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.565, + "p99_amplification": 0.524 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.582, + "p99_amplification": 0.75 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.642, + "p99_amplification": 0.534 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.67, + "p99_amplification": 0.528 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.653, + "p99_amplification": 0.599 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.693, + "p99_amplification": 0.522 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.543, + "p99_amplification": 0.597 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.544, + "p99_amplification": 0.575 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.554, + "p99_amplification": 0.526 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.577, + "p99_amplification": 0.752 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.642, + "p99_amplification": 0.545 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.672, + "p99_amplification": 0.528 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.649, + "p99_amplification": 0.597 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.69, + "p99_amplification": 0.54 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.773, + "p99_amplification": 0.838 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.861, + "p99_amplification": 0.856 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.974, + "p99_amplification": 0.941 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.084, + "p99_amplification": 1.061 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.188, + "p99_amplification": 1.18 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.219, + "p99_amplification": 0.966 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.714, + "p99_amplification": 0.695 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.856, + "p99_amplification": 0.864 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.956, + "p99_amplification": 0.942 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.077, + "p99_amplification": 1.054 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.164, + "p99_amplification": 1.153 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.189, + "p99_amplification": 1.043 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.683, + "p99_amplification": 0.693 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.778, + "p99_amplification": 0.771 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.831, + "p99_amplification": 0.822 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.899, + "p99_amplification": 0.88 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.946, + "p99_amplification": 0.937 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.981, + "p99_amplification": 0.799 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.721, + "p99_amplification": 0.746 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.854, + "p99_amplification": 0.886 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.959, + "p99_amplification": 0.939 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.08, + "p99_amplification": 1.068 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.193, + "p99_amplification": 1.184 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.227, + "p99_amplification": 0.973 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.685, + "p99_amplification": 0.696 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.781, + "p99_amplification": 0.798 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.815, + "p99_amplification": 0.8 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.885, + "p99_amplification": 0.874 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.933, + "p99_amplification": 0.951 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.962, + "p99_amplification": 0.789 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.754, + "p99_amplification": 0.756 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.864, + "p99_amplification": 0.841 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.966, + "p99_amplification": 0.943 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.076, + "p99_amplification": 1.057 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.185, + "p99_amplification": 1.18 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.218, + "p99_amplification": 0.961 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.691, + "p99_amplification": 0.682 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.751, + "p99_amplification": 0.754 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.823, + "p99_amplification": 0.803 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.872, + "p99_amplification": 0.857 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.924, + "p99_amplification": 0.923 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.955, + "p99_amplification": 0.771 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.687, + "p99_amplification": 0.674 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.759, + "p99_amplification": 0.758 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.825, + "p99_amplification": 0.808 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.873, + "p99_amplification": 0.861 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.924, + "p99_amplification": 1.205 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.955, + "p99_amplification": 0.77 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 0.746 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.867, + "p99_amplification": 0.852 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.977, + "p99_amplification": 0.955 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.084, + "p99_amplification": 1.063 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.172, + "p99_amplification": 1.16 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.202, + "p99_amplification": 0.951 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.739, + "p99_amplification": 0.716 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.883, + "p99_amplification": 0.857 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.958, + "p99_amplification": 0.932 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.084, + "p99_amplification": 1.45 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.163, + "p99_amplification": 1.147 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.185, + "p99_amplification": 0.935 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.694, + "p99_amplification": 0.679 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.804, + "p99_amplification": 0.803 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.839, + "p99_amplification": 0.824 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.898, + "p99_amplification": 0.878 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.942, + "p99_amplification": 0.936 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.966, + "p99_amplification": 0.785 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.735, + "p99_amplification": 0.728 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.869, + "p99_amplification": 2.672 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.958, + "p99_amplification": 0.951 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.077, + "p99_amplification": 1.071 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.178, + "p99_amplification": 1.172 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.207, + "p99_amplification": 0.954 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.691, + "p99_amplification": 0.682 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.764, + "p99_amplification": 0.765 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.824, + "p99_amplification": 0.811 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.876, + "p99_amplification": 0.855 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.922, + "p99_amplification": 0.921 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.933, + "p99_amplification": 0.873 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.768, + "p99_amplification": 0.748 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.866, + "p99_amplification": 1.502 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.975, + "p99_amplification": 0.953 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.086, + "p99_amplification": 1.058 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.173, + "p99_amplification": 1.162 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.202, + "p99_amplification": 0.954 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.702, + "p99_amplification": 0.695 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.763, + "p99_amplification": 0.763 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.828, + "p99_amplification": 0.802 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.871, + "p99_amplification": 0.853 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.917, + "p99_amplification": 0.918 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.947, + "p99_amplification": 0.762 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.699, + "p99_amplification": 0.697 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.774, + "p99_amplification": 0.761 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.828, + "p99_amplification": 0.809 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.871, + "p99_amplification": 0.853 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.915, + "p99_amplification": 0.919 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.946, + "p99_amplification": 0.769 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.212, + "p99_amplification": 1.565 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.209, + "p99_amplification": 1.52 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.19, + "p99_amplification": 0.843 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.222, + "p99_amplification": 1.274 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.236, + "p99_amplification": 0.856 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.308, + "p99_amplification": 1.069 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.448, + "p99_amplification": 1.343 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.336, + "p99_amplification": 0.889 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.315, + "p99_amplification": 1.74 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.317, + "p99_amplification": 1.295 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.305, + "p99_amplification": 1.156 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.323, + "p99_amplification": 1.701 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.294, + "p99_amplification": 1.247 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.25, + "p99_amplification": 1.087 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.276, + "p99_amplification": 1.199 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.202, + "p99_amplification": 1.687 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.258, + "p99_amplification": 1.255 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.252, + "p99_amplification": 1.53 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.259, + "p99_amplification": 1.057 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.269, + "p99_amplification": 1.288 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.344, + "p99_amplification": 2.219 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.471, + "p99_amplification": 1.013 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.73, + "p99_amplification": 1.482 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.564, + "p99_amplification": 1.194 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.254, + "p99_amplification": 1.202 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 1.26, + "p99_amplification": 1.286 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.237, + "p99_amplification": 0.892 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.277, + "p99_amplification": 1.264 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.303, + "p99_amplification": 0.912 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.439, + "p99_amplification": 0.971 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.671, + "p99_amplification": 1.758 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.502, + "p99_amplification": 0.983 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.298, + "p99_amplification": 1.255 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.291, + "p99_amplification": 1.255 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.294, + "p99_amplification": 1.134 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.304, + "p99_amplification": 1.264 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.356, + "p99_amplification": 0.931 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.504, + "p99_amplification": 1.019 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.75, + "p99_amplification": 1.528 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.582, + "p99_amplification": 1.038 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 1.239, + "p99_amplification": 1.264 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.236, + "p99_amplification": 1.276 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.22, + "p99_amplification": 0.948 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.246, + "p99_amplification": 1.224 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.256, + "p99_amplification": 0.897 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.331, + "p99_amplification": 0.932 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.469, + "p99_amplification": 1.279 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.353, + "p99_amplification": 0.892 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.25, + "p99_amplification": 1.226 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.254, + "p99_amplification": 1.196 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.243, + "p99_amplification": 1.054 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.268, + "p99_amplification": 1.241 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.318, + "p99_amplification": 0.903 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.465, + "p99_amplification": 0.994 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.7, + "p99_amplification": 1.509 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.562, + "p99_amplification": 1.158 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.255, + "p99_amplification": 1.271 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.25, + "p99_amplification": 1.22 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.258, + "p99_amplification": 1.107 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.268, + "p99_amplification": 1.207 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.315, + "p99_amplification": 0.915 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.462, + "p99_amplification": 0.996 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.729, + "p99_amplification": 1.5 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.59, + "p99_amplification": 1.172 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.363, + "p99_amplification": 1.332 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.286, + "p99_amplification": 1.255 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.388, + "p99_amplification": 1.335 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.538, + "p99_amplification": 1.49 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.617, + "p99_amplification": 1.586 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.615, + "p99_amplification": 1.263 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.154, + "p99_amplification": 1.136 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.25, + "p99_amplification": 1.223 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.36, + "p99_amplification": 1.314 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.504, + "p99_amplification": 1.457 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.603, + "p99_amplification": 1.574 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.6, + "p99_amplification": 1.252 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.585, + "p99_amplification": 3.808 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.441, + "p99_amplification": 1.433 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.249, + "p99_amplification": 1.221 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.305, + "p99_amplification": 1.268 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.338, + "p99_amplification": 1.415 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.296, + "p99_amplification": 1.015 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.514, + "p99_amplification": 3.455 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.376, + "p99_amplification": 1.347 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.381, + "p99_amplification": 1.334 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.52, + "p99_amplification": 1.502 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.598, + "p99_amplification": 1.594 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.581, + "p99_amplification": 1.238 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.586, + "p99_amplification": 1.5 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.431, + "p99_amplification": 1.41 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.247, + "p99_amplification": 1.212 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.288, + "p99_amplification": 1.248 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.302, + "p99_amplification": 1.278 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.253, + "p99_amplification": 0.993 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.381, + "p99_amplification": 1.318 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.308, + "p99_amplification": 1.289 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.403, + "p99_amplification": 1.358 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.55, + "p99_amplification": 2.387 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.625, + "p99_amplification": 1.592 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.619, + "p99_amplification": 1.267 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.55, + "p99_amplification": 1.473 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.407, + "p99_amplification": 1.391 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.23, + "p99_amplification": 1.234 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.266, + "p99_amplification": 1.232 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.303, + "p99_amplification": 1.471 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.26, + "p99_amplification": 0.994 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.595, + "p99_amplification": 1.539 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.464, + "p99_amplification": 1.44 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.256, + "p99_amplification": 1.24 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.285, + "p99_amplification": 1.356 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.31, + "p99_amplification": 1.289 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.264, + "p99_amplification": 1.013 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.76, + "p99_amplification": 0.756 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.758, + "p99_amplification": 0.727 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.746, + "p99_amplification": 0.513 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.76, + "p99_amplification": 0.746 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.74, + "p99_amplification": 0.529 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.708, + "p99_amplification": 0.551 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.664, + "p99_amplification": 0.548 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.713, + "p99_amplification": 0.476 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.765, + "p99_amplification": 0.75 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.762, + "p99_amplification": 0.764 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.746, + "p99_amplification": 0.746 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.767, + "p99_amplification": 0.8 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.748, + "p99_amplification": 0.576 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.711, + "p99_amplification": 0.513 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.665, + "p99_amplification": 0.567 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.702, + "p99_amplification": 0.497 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.735, + "p99_amplification": 0.755 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.73, + "p99_amplification": 0.727 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.708, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.734, + "p99_amplification": 0.729 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.715, + "p99_amplification": 0.52 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.677, + "p99_amplification": 0.477 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.631, + "p99_amplification": 0.527 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.656, + "p99_amplification": 0.452 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.74, + "p99_amplification": 1.075 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.735, + "p99_amplification": 0.728 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.721, + "p99_amplification": 0.5 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.735, + "p99_amplification": 0.749 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.718, + "p99_amplification": 0.635 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.686, + "p99_amplification": 0.491 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.65, + "p99_amplification": 0.524 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.708, + "p99_amplification": 0.541 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.774, + "p99_amplification": 0.763 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.786, + "p99_amplification": 0.778 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.757, + "p99_amplification": 0.558 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.788, + "p99_amplification": 0.981 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.753, + "p99_amplification": 0.551 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.728, + "p99_amplification": 0.755 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.67, + "p99_amplification": 0.544 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.678, + "p99_amplification": 0.51 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.743, + "p99_amplification": 0.742 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.752, + "p99_amplification": 0.857 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.738, + "p99_amplification": 0.506 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.751, + "p99_amplification": 1.053 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.732, + "p99_amplification": 0.525 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.702, + "p99_amplification": 0.502 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.654, + "p99_amplification": 0.528 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.705, + "p99_amplification": 0.482 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.72, + "p99_amplification": 0.69 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.719, + "p99_amplification": 0.698 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.703, + "p99_amplification": 0.477 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.721, + "p99_amplification": 0.751 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.701, + "p99_amplification": 0.575 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.666, + "p99_amplification": 0.481 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.628, + "p99_amplification": 0.52 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.646, + "p99_amplification": 0.443 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.769, + "p99_amplification": 0.813 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.765, + "p99_amplification": 0.794 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.756, + "p99_amplification": 0.702 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.77, + "p99_amplification": 0.805 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.743, + "p99_amplification": 0.574 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.714, + "p99_amplification": 0.536 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.669, + "p99_amplification": 0.574 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.671, + "p99_amplification": 0.478 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.643, + "p99_amplification": 0.605 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.994, + "p99_amplification": 0.952 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.591, + "p99_amplification": 1.523 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 2.2, + "p99_amplification": 2.117 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 2.642, + "p99_amplification": 2.581 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 2.828, + "p99_amplification": 2.199 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.624, + "p99_amplification": 0.583 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.001, + "p99_amplification": 0.949 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.454, + "p99_amplification": 1.376 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.962, + "p99_amplification": 1.871 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 2.353, + "p99_amplification": 2.291 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 2.516, + "p99_amplification": 1.957 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.546, + "p99_amplification": 0.741 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.817, + "p99_amplification": 0.8 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.12, + "p99_amplification": 1.09 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.483, + "p99_amplification": 1.452 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.744, + "p99_amplification": 1.704 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.843, + "p99_amplification": 1.452 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.644, + "p99_amplification": 0.766 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.984, + "p99_amplification": 0.957 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.418, + "p99_amplification": 1.38 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 2.032, + "p99_amplification": 1.959 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 2.516, + "p99_amplification": 2.458 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 2.745, + "p99_amplification": 2.142 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.551, + "p99_amplification": 0.552 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.8, + "p99_amplification": 0.796 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.093, + "p99_amplification": 1.067 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.431, + "p99_amplification": 1.382 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.68, + "p99_amplification": 1.646 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.777, + "p99_amplification": 1.387 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.64, + "p99_amplification": 0.604 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.993, + "p99_amplification": 0.943 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.585, + "p99_amplification": 1.519 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 2.201, + "p99_amplification": 2.111 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 2.642, + "p99_amplification": 2.569 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 2.828, + "p99_amplification": 2.202 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.544, + "p99_amplification": 0.547 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.798, + "p99_amplification": 0.795 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.087, + "p99_amplification": 1.059 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.429, + "p99_amplification": 1.39 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.685, + "p99_amplification": 1.65 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.789, + "p99_amplification": 1.402 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.538, + "p99_amplification": 0.549 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.797, + "p99_amplification": 0.975 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.086, + "p99_amplification": 1.046 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.427, + "p99_amplification": 1.378 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.686, + "p99_amplification": 1.655 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.791, + "p99_amplification": 1.398 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 2.461, + "p99_amplification": 37.138 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 2.455, + "p99_amplification": 41.06 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 2.552, + "p99_amplification": 23.145 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 2.553, + "p99_amplification": 35.363 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 2.502, + "p99_amplification": 24.264 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 2.664, + "p99_amplification": 24.137 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 2.874, + "p99_amplification": 21.966 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 3.099, + "p99_amplification": 16.347 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 2.383, + "p99_amplification": 37.868 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 2.37, + "p99_amplification": 37.696 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 2.467, + "p99_amplification": 25.664 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 2.453, + "p99_amplification": 48.267 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 2.428, + "p99_amplification": 24.929 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 2.77, + "p99_amplification": 23.58 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 3.074, + "p99_amplification": 21.132 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.486, + "p99_amplification": 16.668 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 2.424, + "p99_amplification": 37.51 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 2.412, + "p99_amplification": 35.653 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 2.503, + "p99_amplification": 25.756 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 2.505, + "p99_amplification": 35.892 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 2.481, + "p99_amplification": 26.566 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 2.514, + "p99_amplification": 21.181 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 2.312, + "p99_amplification": 24.504 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.236, + "p99_amplification": 18.272 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 2.459, + "p99_amplification": 37.885 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 2.453, + "p99_amplification": 39.178 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 2.537, + "p99_amplification": 25.01 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 2.537, + "p99_amplification": 33.304 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 2.484, + "p99_amplification": 23.644 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 2.507, + "p99_amplification": 23.117 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 2.44, + "p99_amplification": 22.334 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.524, + "p99_amplification": 19.665 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 2.475, + "p99_amplification": 37.539 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 2.473, + "p99_amplification": 37.428 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 2.554, + "p99_amplification": 22.401 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 2.554, + "p99_amplification": 41.539 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 2.492, + "p99_amplification": 23.648 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 2.513, + "p99_amplification": 24.597 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 2.361, + "p99_amplification": 23.489 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.281, + "p99_amplification": 17.872 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 2.477, + "p99_amplification": 47.657 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 2.485, + "p99_amplification": 37.949 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 2.555, + "p99_amplification": 23.869 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 2.615, + "p99_amplification": 34.742 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 2.558, + "p99_amplification": 22.944 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 2.658, + "p99_amplification": 20.732 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 2.809, + "p99_amplification": 21.815 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 3.059, + "p99_amplification": 16.559 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 2.471, + "p99_amplification": 34.594 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 2.47, + "p99_amplification": 37.387 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 2.555, + "p99_amplification": 23.238 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 2.544, + "p99_amplification": 35.062 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 2.526, + "p99_amplification": 24.321 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 2.498, + "p99_amplification": 23.175 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 2.36, + "p99_amplification": 23.135 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.248, + "p99_amplification": 18.007 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 2.496, + "p99_amplification": 37.897 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 2.491, + "p99_amplification": 38.89 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 2.568, + "p99_amplification": 23.172 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 2.569, + "p99_amplification": 34.386 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 2.535, + "p99_amplification": 24.534 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 2.531, + "p99_amplification": 22.767 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 2.371, + "p99_amplification": 24.729 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.265, + "p99_amplification": 18.003 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 3.006, + "p99_amplification": 20.579 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 3.551, + "p99_amplification": 17.288 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 4.223, + "p99_amplification": 10.742 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 5.073, + "p99_amplification": 5.767 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 5.692, + "p99_amplification": 5.817 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 6.445, + "p99_amplification": 5.314 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.519, + "p99_amplification": 18.916 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 4.43, + "p99_amplification": 14.698 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 5.549, + "p99_amplification": 9.391 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 7.093, + "p99_amplification": 7.317 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 7.998, + "p99_amplification": 7.921 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 8.429, + "p99_amplification": 6.585 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.276, + "p99_amplification": 23.704 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 2.237, + "p99_amplification": 22.879 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 2.086, + "p99_amplification": 16.351 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 2.088, + "p99_amplification": 9.312 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.965, + "p99_amplification": 5.378 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.821, + "p99_amplification": 2.068 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.52, + "p99_amplification": 23.384 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 2.742, + "p99_amplification": 19.216 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 2.932, + "p99_amplification": 13.129 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 3.349, + "p99_amplification": 7.847 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 3.406, + "p99_amplification": 3.945 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 3.715, + "p99_amplification": 2.972 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.212, + "p99_amplification": 26.976 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 2.15, + "p99_amplification": 19.743 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 2.023, + "p99_amplification": 15.706 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 2.064, + "p99_amplification": 10.088 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.961, + "p99_amplification": 5.024 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.822, + "p99_amplification": 1.979 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 3.027, + "p99_amplification": 19.87 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 3.569, + "p99_amplification": 16.103 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 4.235, + "p99_amplification": 10.951 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 5.082, + "p99_amplification": 5.777 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 5.701, + "p99_amplification": 5.784 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 6.453, + "p99_amplification": 5.331 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.29, + "p99_amplification": 24.347 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 2.208, + "p99_amplification": 20.812 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 2.075, + "p99_amplification": 16.042 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 2.082, + "p99_amplification": 9.339 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.966, + "p99_amplification": 5.215 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.822, + "p99_amplification": 2.051 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.24, + "p99_amplification": 23.851 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 2.181, + "p99_amplification": 20.9 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 2.051, + "p99_amplification": 15.821 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 2.083, + "p99_amplification": 9.28 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.967, + "p99_amplification": 5.198 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.823, + "p99_amplification": 2.063 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.429, + "p99_amplification": 1.391 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.994, + "p99_amplification": 1.346 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.976, + "p99_amplification": 0.953 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.005, + "p99_amplification": 1.357 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.976, + "p99_amplification": 1.301 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.965, + "p99_amplification": 1.258 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.978, + "p99_amplification": 0.951 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.029, + "p99_amplification": 0.82 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.441, + "p99_amplification": 1.345 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.087, + "p99_amplification": 3.146 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.017, + "p99_amplification": 0.968 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.38, + "p99_amplification": 1.324 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.376, + "p99_amplification": 0.96 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.966, + "p99_amplification": 0.913 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.97, + "p99_amplification": 0.96 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.007, + "p99_amplification": 0.786 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.997, + "p99_amplification": 0.992 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.999, + "p99_amplification": 1.015 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.009, + "p99_amplification": 1.017 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.003, + "p99_amplification": 0.975 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.996, + "p99_amplification": 0.989 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.955, + "p99_amplification": 0.693 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.991, + "p99_amplification": 0.991 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.003, + "p99_amplification": 0.776 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.965, + "p99_amplification": 1.003 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.967, + "p99_amplification": 0.952 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.973, + "p99_amplification": 0.956 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.97, + "p99_amplification": 1.758 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.968, + "p99_amplification": 0.97 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.007, + "p99_amplification": 0.897 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.002, + "p99_amplification": 1.253 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.028, + "p99_amplification": 0.775 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.035, + "p99_amplification": 1.172 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.01, + "p99_amplification": 1.091 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.007, + "p99_amplification": 0.976 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.01, + "p99_amplification": 1.025 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.009, + "p99_amplification": 1.035 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.007, + "p99_amplification": 0.937 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.01, + "p99_amplification": 1.282 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.008, + "p99_amplification": 0.797 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.979, + "p99_amplification": 0.98 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.978, + "p99_amplification": 1.279 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.985, + "p99_amplification": 0.968 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.986, + "p99_amplification": 1.077 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.974, + "p99_amplification": 1.072 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.951, + "p99_amplification": 0.679 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.981, + "p99_amplification": 0.986 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.028, + "p99_amplification": 0.752 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.005, + "p99_amplification": 1.44 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.002, + "p99_amplification": 1.478 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.009, + "p99_amplification": 0.991 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1, + "p99_amplification": 1.034 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.007, + "p99_amplification": 1 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.983, + "p99_amplification": 0.884 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.995, + "p99_amplification": 0.994 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.998, + "p99_amplification": 0.795 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.015, + "p99_amplification": 1.013 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.011, + "p99_amplification": 1.023 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.01, + "p99_amplification": 0.982 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.014, + "p99_amplification": 1.056 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.002, + "p99_amplification": 0.992 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.979, + "p99_amplification": 0.707 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.984, + "p99_amplification": 1.306 + }, + { + "sku": "h100", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.985, + "p99_amplification": 0.797 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.028, + "p99_amplification": 0.986 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.081, + "p99_amplification": 1.062 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.132, + "p99_amplification": 1.107 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.197, + "p99_amplification": 2.1 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.261, + "p99_amplification": 1.287 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.255, + "p99_amplification": 0.992 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.008, + "p99_amplification": 0.967 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.064, + "p99_amplification": 1.063 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.114, + "p99_amplification": 1.153 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.191, + "p99_amplification": 2.022 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.237, + "p99_amplification": 1.228 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.22, + "p99_amplification": 0.986 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.994, + "p99_amplification": 0.971 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.021, + "p99_amplification": 1.011 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.011, + "p99_amplification": 1.018 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.025, + "p99_amplification": 1 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.02, + "p99_amplification": 1.018 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.007, + "p99_amplification": 0.81 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.02, + "p99_amplification": 1.131 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.08, + "p99_amplification": 1.059 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.115, + "p99_amplification": 1.083 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.2, + "p99_amplification": 1.202 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.261, + "p99_amplification": 1.254 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.256, + "p99_amplification": 0.993 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.993, + "p99_amplification": 0.979 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.013, + "p99_amplification": 1.003 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.001, + "p99_amplification": 0.999 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.009, + "p99_amplification": 0.99 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.996, + "p99_amplification": 0.996 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.967, + "p99_amplification": 0.779 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.014, + "p99_amplification": 2.898 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.07, + "p99_amplification": 1.057 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.13, + "p99_amplification": 1.825 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.197, + "p99_amplification": 1.196 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.257, + "p99_amplification": 1.25 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.256, + "p99_amplification": 0.993 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.984, + "p99_amplification": 0.949 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.988, + "p99_amplification": 0.991 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.993, + "p99_amplification": 0.977 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.001, + "p99_amplification": 0.975 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.999, + "p99_amplification": 0.996 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.981, + "p99_amplification": 0.787 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.996, + "p99_amplification": 0.961 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.003, + "p99_amplification": 1.053 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.997, + "p99_amplification": 0.977 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.002, + "p99_amplification": 0.985 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.003, + "p99_amplification": 1.211 + }, + { + "sku": "h100", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.982, + "p99_amplification": 0.791 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.654, + "p99_amplification": 0.684 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.635, + "p99_amplification": 0.733 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.665, + "p99_amplification": 0.536 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.674, + "p99_amplification": 0.613 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.705, + "p99_amplification": 0.951 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.685, + "p99_amplification": 0.702 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.723, + "p99_amplification": 0.936 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.797, + "p99_amplification": 0.825 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.608, + "p99_amplification": 0.712 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.625, + "p99_amplification": 0.7 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.652, + "p99_amplification": 0.472 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.671, + "p99_amplification": 1.468 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.692, + "p99_amplification": 0.68 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.665, + "p99_amplification": 0.751 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.729, + "p99_amplification": 0.737 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.791, + "p99_amplification": 0.686 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.649, + "p99_amplification": 0.776 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.638, + "p99_amplification": 0.8 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.675, + "p99_amplification": 0.547 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.679, + "p99_amplification": 0.714 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.68, + "p99_amplification": 0.808 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.665, + "p99_amplification": 0.908 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.703, + "p99_amplification": 0.838 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.766, + "p99_amplification": 0.713 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.656, + "p99_amplification": 0.836 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.634, + "p99_amplification": 0.831 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.656, + "p99_amplification": 0.542 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.652, + "p99_amplification": 0.62 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.659, + "p99_amplification": 0.823 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.672, + "p99_amplification": 0.824 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.73, + "p99_amplification": 1.849 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.814, + "p99_amplification": 0.655 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.67, + "p99_amplification": 0.674 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.663, + "p99_amplification": 0.997 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.677, + "p99_amplification": 0.589 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.679, + "p99_amplification": 0.648 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.687, + "p99_amplification": 0.671 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.7, + "p99_amplification": 0.851 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.723, + "p99_amplification": 0.854 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.775, + "p99_amplification": 0.666 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.633, + "p99_amplification": 1.401 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.623, + "p99_amplification": 0.639 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.651, + "p99_amplification": 0.524 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.652, + "p99_amplification": 0.561 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.656, + "p99_amplification": 0.984 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.679, + "p99_amplification": 0.782 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.732, + "p99_amplification": 0.744 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.791, + "p99_amplification": 0.643 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.656, + "p99_amplification": 0.693 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.646, + "p99_amplification": 0.619 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.666, + "p99_amplification": 0.489 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.675, + "p99_amplification": 0.55 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.665, + "p99_amplification": 0.667 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.689, + "p99_amplification": 0.723 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.7, + "p99_amplification": 0.736 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.777, + "p99_amplification": 0.62 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.664, + "p99_amplification": 0.749 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.661, + "p99_amplification": 1.014 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.678, + "p99_amplification": 0.518 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.678, + "p99_amplification": 0.65 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.66, + "p99_amplification": 0.79 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.703, + "p99_amplification": 1.198 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.704, + "p99_amplification": 0.737 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.778, + "p99_amplification": 0.75 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 0.66, + "p99_amplification": 0.778 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 0.647, + "p99_amplification": 1.17 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 0.674, + "p99_amplification": 0.487 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 0.677, + "p99_amplification": 0.663 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 0.689, + "p99_amplification": 0.77 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 0.697, + "p99_amplification": 0.76 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 0.742, + "p99_amplification": 0.718 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.809, + "p99_amplification": 0.634 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 0.582, + "p99_amplification": 0.706 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.584, + "p99_amplification": 0.637 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 0.634, + "p99_amplification": 0.518 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.64, + "p99_amplification": 0.756 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.689, + "p99_amplification": 0.738 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.65, + "p99_amplification": 0.727 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 0.711, + "p99_amplification": 0.721 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.781, + "p99_amplification": 0.633 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 0.646, + "p99_amplification": 0.841 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 0.656, + "p99_amplification": 0.769 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 0.673, + "p99_amplification": 0.54 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 0.681, + "p99_amplification": 0.735 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 0.676, + "p99_amplification": 0.837 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 0.687, + "p99_amplification": 0.826 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 0.713, + "p99_amplification": 1.082 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 0.673 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 0.684, + "p99_amplification": 0.91 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.653, + "p99_amplification": 0.79 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 0.664, + "p99_amplification": 0.565 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.678, + "p99_amplification": 0.698 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 0.728, + "p99_amplification": 0.877 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 0.711, + "p99_amplification": 0.865 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 0.741, + "p99_amplification": 0.791 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.812, + "p99_amplification": 0.706 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 0.66, + "p99_amplification": 0.857 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 0.654, + "p99_amplification": 0.749 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 0.665, + "p99_amplification": 0.515 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 0.679, + "p99_amplification": 0.672 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 0.677, + "p99_amplification": 0.722 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 0.695, + "p99_amplification": 0.807 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 0.726, + "p99_amplification": 0.745 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.773, + "p99_amplification": 0.666 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 0.652, + "p99_amplification": 0.829 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.625, + "p99_amplification": 0.816 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 0.67, + "p99_amplification": 0.574 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 0.668, + "p99_amplification": 0.649 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 0.659, + "p99_amplification": 0.789 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 0.692, + "p99_amplification": 0.839 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 0.766, + "p99_amplification": 0.878 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.818, + "p99_amplification": 0.698 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 0.672, + "p99_amplification": 0.946 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 0.657, + "p99_amplification": 0.815 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 0.671, + "p99_amplification": 0.737 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 0.685, + "p99_amplification": 0.668 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 0.687, + "p99_amplification": 0.81 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 0.687, + "p99_amplification": 0.813 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.701, + "p99_amplification": 0.796 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.769, + "p99_amplification": 0.673 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 0.681, + "p99_amplification": 0.841 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 0.671, + "p99_amplification": 0.764 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 0.686, + "p99_amplification": 0.552 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 0.689, + "p99_amplification": 0.686 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 0.697, + "p99_amplification": 0.81 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 0.728, + "p99_amplification": 0.968 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 0.725, + "p99_amplification": 0.812 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.793, + "p99_amplification": 0.723 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.768, + "p99_amplification": 0.8 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.851, + "p99_amplification": 0.78 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.983, + "p99_amplification": 1.298 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.081, + "p99_amplification": 1.064 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.159, + "p99_amplification": 1.157 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.218, + "p99_amplification": 1.304 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.765, + "p99_amplification": 0.842 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.849, + "p99_amplification": 0.827 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.964, + "p99_amplification": 0.97 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.072, + "p99_amplification": 1.037 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.146, + "p99_amplification": 1.224 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.198, + "p99_amplification": 1.163 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.741, + "p99_amplification": 0.793 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.802, + "p99_amplification": 0.748 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.859, + "p99_amplification": 0.894 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.915, + "p99_amplification": 0.963 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.94, + "p99_amplification": 0.949 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.972, + "p99_amplification": 0.961 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.788, + "p99_amplification": 1.135 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.875, + "p99_amplification": 0.733 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.979, + "p99_amplification": 0.973 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.081, + "p99_amplification": 1.061 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.178, + "p99_amplification": 1.283 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.231, + "p99_amplification": 1.272 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.759, + "p99_amplification": 0.854 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.784, + "p99_amplification": 0.712 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.845, + "p99_amplification": 0.938 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.893, + "p99_amplification": 0.934 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.918, + "p99_amplification": 0.965 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.947, + "p99_amplification": 1.128 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.803, + "p99_amplification": 0.895 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.87, + "p99_amplification": 0.803 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.986, + "p99_amplification": 1.065 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.088, + "p99_amplification": 1.092 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.16, + "p99_amplification": 1.254 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.217, + "p99_amplification": 1.284 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.76, + "p99_amplification": 0.834 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.781, + "p99_amplification": 0.698 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.848, + "p99_amplification": 0.929 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.895, + "p99_amplification": 0.933 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.91, + "p99_amplification": 0.955 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.96, + "p99_amplification": 1.01 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.77, + "p99_amplification": 0.892 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.768, + "p99_amplification": 0.723 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.847, + "p99_amplification": 0.916 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.888, + "p99_amplification": 0.933 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.908, + "p99_amplification": 0.937 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.965, + "p99_amplification": 1.038 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 0.771, + "p99_amplification": 0.789 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 0.87, + "p99_amplification": 0.804 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 0.981, + "p99_amplification": 0.959 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.087, + "p99_amplification": 1.065 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.158, + "p99_amplification": 1.162 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.213, + "p99_amplification": 1.22 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 0.759, + "p99_amplification": 0.765 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 0.845, + "p99_amplification": 0.708 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 0.965, + "p99_amplification": 0.978 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.073, + "p99_amplification": 1.061 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.143, + "p99_amplification": 1.313 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.195, + "p99_amplification": 1.177 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 0.771, + "p99_amplification": 0.751 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.814, + "p99_amplification": 0.665 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 0.868, + "p99_amplification": 0.862 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 0.927, + "p99_amplification": 0.914 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 0.948, + "p99_amplification": 0.934 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 0.976, + "p99_amplification": 1.035 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 0.815, + "p99_amplification": 0.845 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 0.881, + "p99_amplification": 0.724 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 0.987, + "p99_amplification": 0.957 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.089, + "p99_amplification": 1.068 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.174, + "p99_amplification": 1.167 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.224, + "p99_amplification": 1.222 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.759, + "p99_amplification": 0.818 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.802, + "p99_amplification": 0.71 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 0.861, + "p99_amplification": 1.254 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 0.902, + "p99_amplification": 0.904 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.922, + "p99_amplification": 0.931 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 0.948, + "p99_amplification": 0.927 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 0.815, + "p99_amplification": 0.792 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 0.879, + "p99_amplification": 0.722 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 0.996, + "p99_amplification": 0.969 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.101, + "p99_amplification": 1.126 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.161, + "p99_amplification": 1.153 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.214, + "p99_amplification": 1.178 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 0.764, + "p99_amplification": 0.753 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.788, + "p99_amplification": 0.755 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 0.851, + "p99_amplification": 0.91 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 0.894, + "p99_amplification": 0.879 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.912, + "p99_amplification": 0.965 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 0.955, + "p99_amplification": 1.275 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 0.764, + "p99_amplification": 0.741 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.778, + "p99_amplification": 0.656 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.851, + "p99_amplification": 0.841 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 0.898, + "p99_amplification": 0.907 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.912, + "p99_amplification": 0.975 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 0.955, + "p99_amplification": 0.958 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.602, + "p99_amplification": 3.119 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.57, + "p99_amplification": 1.75 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.675, + "p99_amplification": 1.364 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.574, + "p99_amplification": 1.552 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.658, + "p99_amplification": 3.07 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.657, + "p99_amplification": 1.994 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.738, + "p99_amplification": 1.97 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.584, + "p99_amplification": 1.386 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.383, + "p99_amplification": 1.475 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.508, + "p99_amplification": 1.589 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.526, + "p99_amplification": 1.095 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.559, + "p99_amplification": 1.545 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.545, + "p99_amplification": 1.953 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 1.543, + "p99_amplification": 1.873 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.399, + "p99_amplification": 1.514 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.309, + "p99_amplification": 1.295 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.599, + "p99_amplification": 1.954 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.557, + "p99_amplification": 1.666 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.529, + "p99_amplification": 1.343 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.565, + "p99_amplification": 1.525 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.608, + "p99_amplification": 1.666 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.715, + "p99_amplification": 1.862 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.926, + "p99_amplification": 1.935 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.738, + "p99_amplification": 1.803 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.541, + "p99_amplification": 1.702 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 1.482, + "p99_amplification": 2.139 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.539, + "p99_amplification": 2.319 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 1.508, + "p99_amplification": 1.633 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.574, + "p99_amplification": 1.825 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.703, + "p99_amplification": 2.163 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.939, + "p99_amplification": 1.961 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.673, + "p99_amplification": 1.425 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.518, + "p99_amplification": 1.518 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.505, + "p99_amplification": 1.47 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.538, + "p99_amplification": 1.182 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.545, + "p99_amplification": 1.216 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.606, + "p99_amplification": 1.519 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.71, + "p99_amplification": 1.74 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.994, + "p99_amplification": 1.874 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.735, + "p99_amplification": 1.616 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 1.654, + "p99_amplification": 1.848 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 1.606, + "p99_amplification": 1.946 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.568, + "p99_amplification": 1.236 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.631, + "p99_amplification": 1.346 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.651, + "p99_amplification": 1.897 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.602, + "p99_amplification": 1.734 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.72, + "p99_amplification": 1.91 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.504, + "p99_amplification": 1.253 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.615, + "p99_amplification": 1.952 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.581, + "p99_amplification": 1.903 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.568, + "p99_amplification": 1.414 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.617, + "p99_amplification": 1.446 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.683, + "p99_amplification": 1.968 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.776, + "p99_amplification": 2.062 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 1.997, + "p99_amplification": 2.059 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.749, + "p99_amplification": 1.511 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.605, + "p99_amplification": 1.578 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.54, + "p99_amplification": 1.682 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.569, + "p99_amplification": 1.004 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.58, + "p99_amplification": 1.415 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.675, + "p99_amplification": 1.517 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.74, + "p99_amplification": 2.088 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.997, + "p99_amplification": 2.218 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.777, + "p99_amplification": 1.649 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.505, + "p99_amplification": 1.624 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.319, + "p99_amplification": 1.171 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.383, + "p99_amplification": 1.534 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.483, + "p99_amplification": 1.602 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.508, + "p99_amplification": 1.64 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.535, + "p99_amplification": 1.797 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.245, + "p99_amplification": 1.84 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.257, + "p99_amplification": 1.173 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.356, + "p99_amplification": 1.496 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.452, + "p99_amplification": 1.427 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.503, + "p99_amplification": 1.683 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.526, + "p99_amplification": 1.78 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.742, + "p99_amplification": 1.756 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 1.532, + "p99_amplification": 1.353 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.256, + "p99_amplification": 1.396 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.285, + "p99_amplification": 1.386 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.264, + "p99_amplification": 1.39 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.247, + "p99_amplification": 2.293 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.664, + "p99_amplification": 2.103 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.398, + "p99_amplification": 1.297 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.4, + "p99_amplification": 1.52 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.461, + "p99_amplification": 2.852 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.503, + "p99_amplification": 2.33 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.504, + "p99_amplification": 1.499 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.7, + "p99_amplification": 1.817 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 1.448, + "p99_amplification": 1.272 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.216, + "p99_amplification": 1.419 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.263, + "p99_amplification": 1.302 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.233, + "p99_amplification": 1.324 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.22, + "p99_amplification": 1.34 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.54, + "p99_amplification": 1.672 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.346, + "p99_amplification": 1.253 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.408, + "p99_amplification": 1.714 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.514, + "p99_amplification": 1.703 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.52, + "p99_amplification": 1.564 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.564, + "p99_amplification": 1.677 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.719, + "p99_amplification": 1.817 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 1.49, + "p99_amplification": 1.662 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.216, + "p99_amplification": 1.321 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.258, + "p99_amplification": 1.318 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.25, + "p99_amplification": 1.344 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.228, + "p99_amplification": 1.292 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.759, + "p99_amplification": 1.867 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 1.514, + "p99_amplification": 1.408 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 1.234, + "p99_amplification": 1.376 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.255, + "p99_amplification": 1.296 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.252, + "p99_amplification": 1.47 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.225, + "p99_amplification": 1.345 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 3.367, + "p99_amplification": 28.306 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 3.282, + "p99_amplification": 33.44 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 3.448, + "p99_amplification": 18.228 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 3.481, + "p99_amplification": 23.417 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 3.693, + "p99_amplification": 26.393 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 3.378, + "p99_amplification": 24.932 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 3.313, + "p99_amplification": 20.015 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 3.557, + "p99_amplification": 14.366 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 3.383, + "p99_amplification": 28.141 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 3.342, + "p99_amplification": 27.284 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 3.582, + "p99_amplification": 18.835 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 3.518, + "p99_amplification": 22.277 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 3.636, + "p99_amplification": 26.361 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 3.542, + "p99_amplification": 25.62 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 3.748, + "p99_amplification": 19.226 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 4.177, + "p99_amplification": 14.813 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 3.058, + "p99_amplification": 29.623 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 2.891, + "p99_amplification": 28.174 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 3.086, + "p99_amplification": 18.678 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 3.052, + "p99_amplification": 22.452 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 3.139, + "p99_amplification": 27.707 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 3.044, + "p99_amplification": 27.12 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 2.82, + "p99_amplification": 23.331 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.563, + "p99_amplification": 16.936 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 3.062, + "p99_amplification": 29.511 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 3.113, + "p99_amplification": 34.192 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 3.067, + "p99_amplification": 19.528 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 3.305, + "p99_amplification": 21.596 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 3.169, + "p99_amplification": 26.304 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 3.159, + "p99_amplification": 25.845 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 2.867, + "p99_amplification": 21.392 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.816, + "p99_amplification": 15.182 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 3.082, + "p99_amplification": 30.467 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 3.029, + "p99_amplification": 27.463 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 3.194, + "p99_amplification": 17.519 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 3.06, + "p99_amplification": 22.269 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 3.122, + "p99_amplification": 27.069 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 3.284, + "p99_amplification": 27.063 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 2.788, + "p99_amplification": 21.525 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.587, + "p99_amplification": 17.034 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 3.578, + "p99_amplification": 28.472 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 3.42, + "p99_amplification": 25.937 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 3.475, + "p99_amplification": 18.736 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 3.572, + "p99_amplification": 19.75 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 3.677, + "p99_amplification": 24.554 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 3.531, + "p99_amplification": 24.007 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 3.397, + "p99_amplification": 19.317 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 3.535, + "p99_amplification": 15.67 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 3.031, + "p99_amplification": 29.838 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 3.089, + "p99_amplification": 27.203 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 3.142, + "p99_amplification": 18.776 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 3.313, + "p99_amplification": 22.2 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 3.371, + "p99_amplification": 26.765 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 3.156, + "p99_amplification": 26.983 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 2.851, + "p99_amplification": 22.225 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.579, + "p99_amplification": 17.4 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 3.023, + "p99_amplification": 27.332 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 3.179, + "p99_amplification": 25.952 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 3.145, + "p99_amplification": 16.296 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 3.379, + "p99_amplification": 20.791 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 3.316, + "p99_amplification": 26.614 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 3.367, + "p99_amplification": 24.372 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 2.924, + "p99_amplification": 20.814 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.505, + "p99_amplification": 16.337 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 3.287, + "p99_amplification": 16.139 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 3.74, + "p99_amplification": 9.928 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 4.427, + "p99_amplification": 8.424 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 5.133, + "p99_amplification": 5.546 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 5.607, + "p99_amplification": 5.8 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 6.484, + "p99_amplification": 6.497 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 3.931, + "p99_amplification": 14.928 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 4.77, + "p99_amplification": 9.006 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 5.958, + "p99_amplification": 7.605 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 7.229, + "p99_amplification": 7.255 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 7.905, + "p99_amplification": 8.053 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 8.471, + "p99_amplification": 8.168 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 2.598, + "p99_amplification": 18.355 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 2.482, + "p99_amplification": 11.348 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 2.368, + "p99_amplification": 10.368 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 2.201, + "p99_amplification": 6.656 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.972, + "p99_amplification": 4.244 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.791, + "p99_amplification": 2.413 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 2.729, + "p99_amplification": 17.487 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 2.855, + "p99_amplification": 10.668 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 3.019, + "p99_amplification": 9.545 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 3.318, + "p99_amplification": 5.808 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 3.266, + "p99_amplification": 3.746 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 3.693, + "p99_amplification": 3.687 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 2.696, + "p99_amplification": 18.11 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 2.501, + "p99_amplification": 11.56 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 2.363, + "p99_amplification": 9.986 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 2.264, + "p99_amplification": 6.799 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 1.967, + "p99_amplification": 3.973 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.811, + "p99_amplification": 2.288 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 3.423, + "p99_amplification": 14.62 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 3.781, + "p99_amplification": 9.262 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 4.448, + "p99_amplification": 8.322 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 5.169, + "p99_amplification": 5.626 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 5.619, + "p99_amplification": 5.779 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 6.483, + "p99_amplification": 6.528 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 2.481, + "p99_amplification": 18.164 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 2.364, + "p99_amplification": 12.559 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 2.226, + "p99_amplification": 12.241 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 2.139, + "p99_amplification": 6.797 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 1.947, + "p99_amplification": 4.114 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.78, + "p99_amplification": 2.391 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 2.41, + "p99_amplification": 17.321 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 2.288, + "p99_amplification": 12.015 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 2.143, + "p99_amplification": 11.491 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 2.09, + "p99_amplification": 6.863 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 1.89, + "p99_amplification": 3.937 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.754, + "p99_amplification": 2.368 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.061, + "p99_amplification": 1.372 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.068, + "p99_amplification": 1.127 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.028, + "p99_amplification": 0.928 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.05, + "p99_amplification": 1.163 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.034, + "p99_amplification": 1.032 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 32, + "p50_amplification": 1.044, + "p99_amplification": 1.077 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 64, + "p50_amplification": 1.038, + "p99_amplification": 1.152 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.054, + "p99_amplification": 0.951 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.135, + "p99_amplification": 1.401 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 1.048, + "p99_amplification": 1.563 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.019, + "p99_amplification": 0.839 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 1.032, + "p99_amplification": 1.045 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 1.034, + "p99_amplification": 1.318 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 32, + "p50_amplification": 0.995, + "p99_amplification": 1.241 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 64, + "p50_amplification": 1.025, + "p99_amplification": 1.184 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.027, + "p99_amplification": 0.959 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 1, + "p50_amplification": 1.117, + "p99_amplification": 1.384 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 2, + "p50_amplification": 1.029, + "p99_amplification": 1.166 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 4, + "p50_amplification": 1.051, + "p99_amplification": 0.901 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 8, + "p50_amplification": 1.05, + "p99_amplification": 1.083 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 16, + "p50_amplification": 1.035, + "p99_amplification": 1.245 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 32, + "p50_amplification": 1.033, + "p99_amplification": 1.197 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 64, + "p50_amplification": 1.01, + "p99_amplification": 1.196 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.007, + "p99_amplification": 0.904 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 1, + "p50_amplification": 1.012, + "p99_amplification": 1.21 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 2, + "p50_amplification": 0.973, + "p99_amplification": 1.178 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 4, + "p50_amplification": 1.083, + "p99_amplification": 0.902 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 8, + "p50_amplification": 0.998, + "p99_amplification": 0.916 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 16, + "p50_amplification": 1.088, + "p99_amplification": 1.468 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 32, + "p50_amplification": 1.027, + "p99_amplification": 1.236 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 64, + "p50_amplification": 1.042, + "p99_amplification": 1.218 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.063, + "p99_amplification": 0.963 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 1, + "p50_amplification": 1.024, + "p99_amplification": 0.957 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 2, + "p50_amplification": 1.027, + "p99_amplification": 1.153 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 4, + "p50_amplification": 1.04, + "p99_amplification": 0.77 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 8, + "p50_amplification": 1.019, + "p99_amplification": 0.819 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 16, + "p50_amplification": 1.038, + "p99_amplification": 1.463 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 32, + "p50_amplification": 1.011, + "p99_amplification": 1.042 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 64, + "p50_amplification": 1.011, + "p99_amplification": 1.081 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 1.012, + "p99_amplification": 0.916 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 1, + "p50_amplification": 1.024, + "p99_amplification": 1.028 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 2, + "p50_amplification": 0.995, + "p99_amplification": 0.96 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 4, + "p50_amplification": 1.052, + "p99_amplification": 0.701 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 8, + "p50_amplification": 1.025, + "p99_amplification": 0.912 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 16, + "p50_amplification": 1.021, + "p99_amplification": 1.061 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 32, + "p50_amplification": 1.037, + "p99_amplification": 1.125 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 64, + "p50_amplification": 1.039, + "p99_amplification": 0.975 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.063, + "p99_amplification": 0.945 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 1, + "p50_amplification": 1.024, + "p99_amplification": 3.84 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 2, + "p50_amplification": 1.045, + "p99_amplification": 1.199 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 4, + "p50_amplification": 1.049, + "p99_amplification": 0.892 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 8, + "p50_amplification": 1.055, + "p99_amplification": 1.079 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 16, + "p50_amplification": 1.009, + "p99_amplification": 0.972 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 32, + "p50_amplification": 1.071, + "p99_amplification": 1.414 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 64, + "p50_amplification": 0.997, + "p99_amplification": 1.142 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.008, + "p99_amplification": 0.91 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 1, + "p50_amplification": 1.096, + "p99_amplification": 1.353 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 2, + "p50_amplification": 1.022, + "p99_amplification": 1.291 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 4, + "p50_amplification": 1.105, + "p99_amplification": 0.958 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 8, + "p50_amplification": 1.001, + "p99_amplification": 0.934 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 16, + "p50_amplification": 1.046, + "p99_amplification": 1.234 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 32, + "p50_amplification": 1.041, + "p99_amplification": 1.332 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 64, + "p50_amplification": 1.012, + "p99_amplification": 1.259 + }, + { + "sku": "h200", + "ep": 8, + "phase": "decode", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.001, + "p99_amplification": 0.988 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 128, + "p50_amplification": 1.027, + "p99_amplification": 1.077 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 256, + "p50_amplification": 1.034, + "p99_amplification": 0.932 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 512, + "p50_amplification": 1.122, + "p99_amplification": 1.206 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 1024, + "p50_amplification": 1.196, + "p99_amplification": 1.303 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 2048, + "p50_amplification": 1.226, + "p99_amplification": 1.24 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf", + "T": 4096, + "p50_amplification": 1.25, + "p99_amplification": 1.221 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 128, + "p50_amplification": 1.011, + "p99_amplification": 1.014 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 256, + "p50_amplification": 1.036, + "p99_amplification": 0.85 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 512, + "p50_amplification": 1.1, + "p99_amplification": 1.149 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 1024, + "p50_amplification": 1.168, + "p99_amplification": 1.157 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 2048, + "p50_amplification": 1.209, + "p99_amplification": 1.257 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy", + "T": 4096, + "p50_amplification": 1.226, + "p99_amplification": 1.207 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 128, + "p50_amplification": 1.001, + "p99_amplification": 1.059 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 256, + "p50_amplification": 0.991, + "p99_amplification": 0.927 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 512, + "p50_amplification": 1.016, + "p99_amplification": 1.093 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 1024, + "p50_amplification": 1.029, + "p99_amplification": 1.117 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 2048, + "p50_amplification": 1.021, + "p99_amplification": 1.069 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-heavy+eplb", + "T": 4096, + "p50_amplification": 1.019, + "p99_amplification": 1.033 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 128, + "p50_amplification": 1.031, + "p99_amplification": 1.161 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 256, + "p50_amplification": 1.067, + "p99_amplification": 0.948 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 512, + "p50_amplification": 1.119, + "p99_amplification": 1.206 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 1024, + "p50_amplification": 1.184, + "p99_amplification": 1.211 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 2048, + "p50_amplification": 1.233, + "p99_amplification": 1.265 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild", + "T": 4096, + "p50_amplification": 1.262, + "p99_amplification": 1.25 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 128, + "p50_amplification": 0.989, + "p99_amplification": 0.99 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 256, + "p50_amplification": 0.995, + "p99_amplification": 0.968 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 512, + "p50_amplification": 1.003, + "p99_amplification": 1.007 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 1024, + "p50_amplification": 1.001, + "p99_amplification": 0.978 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 2048, + "p50_amplification": 0.987, + "p99_amplification": 0.979 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-mild+eplb", + "T": 4096, + "p50_amplification": 1.001, + "p99_amplification": 0.986 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 128, + "p50_amplification": 1.039, + "p99_amplification": 1.159 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 256, + "p50_amplification": 1.04, + "p99_amplification": 0.906 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 512, + "p50_amplification": 1.118, + "p99_amplification": 1.136 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 1024, + "p50_amplification": 1.176, + "p99_amplification": 1.173 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 2048, + "p50_amplification": 1.225, + "p99_amplification": 1.339 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate", + "T": 4096, + "p50_amplification": 1.248, + "p99_amplification": 1.226 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 128, + "p50_amplification": 1.024, + "p99_amplification": 1.395 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 256, + "p50_amplification": 0.998, + "p99_amplification": 2.317 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 512, + "p50_amplification": 1.007, + "p99_amplification": 1.017 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 1024, + "p50_amplification": 1.008, + "p99_amplification": 1.038 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 2048, + "p50_amplification": 0.989, + "p99_amplification": 1.083 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf-moderate+eplb", + "T": 4096, + "p50_amplification": 1.004, + "p99_amplification": 1.072 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 128, + "p50_amplification": 1.005, + "p99_amplification": 1.122 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 256, + "p50_amplification": 0.987, + "p99_amplification": 0.956 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 512, + "p50_amplification": 0.999, + "p99_amplification": 1.1 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 1024, + "p50_amplification": 1.01, + "p99_amplification": 1.238 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 2048, + "p50_amplification": 0.992, + "p99_amplification": 1.033 + }, + { + "sku": "h200", + "ep": 8, + "phase": "prefill", + "routing": "zipf+eplb", + "T": 4096, + "p50_amplification": 1.007, + "p99_amplification": 1.02 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 1, + "p50_amplification": 1.036, + "p99_amplification": 1.806 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 2, + "p50_amplification": 1.013, + "p99_amplification": 0.918 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 4, + "p50_amplification": 1.044, + "p99_amplification": 0.944 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 8, + "p50_amplification": 1.067, + "p99_amplification": 0.972 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf", + "T": 16, + "p50_amplification": 1.053, + "p99_amplification": 0.978 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 1, + "p50_amplification": 1.034, + "p99_amplification": 1.643 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 2, + "p50_amplification": 0.975, + "p99_amplification": 0.883 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 4, + "p50_amplification": 1.036, + "p99_amplification": 0.975 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 8, + "p50_amplification": 0.985, + "p99_amplification": 0.953 + }, + { + "sku": "mi355x", + "ep": 8, + "phase": "decode", + "routing": "zipf-heavy", + "T": 16, + "p50_amplification": 0.981, + "p99_amplification": 0.943 + } + ] + }, + "nccl": [ + { + "id": "cxn-c969040f", + "identity": "nccl|mi325x|all_gather|mi325x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_gather", + "sku": "mi325x", + "runner": "mi325x-amds_04", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "f73bf6497a834947", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 94.0214, + "status": "valid", + "valid": true, + "colorKey": "mi325x_c969040f", + "label": "MI325X · mi325x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T16:47:31.488827+00:00", + "run": { + "id": "28606364747", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606364747", + "createdAt": "2026-07-02T16:40:32Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 61.58, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 61.58, + "inPlaceUs": 57.51, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 58.92, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.92, + "inPlaceUs": 58.71, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 58.78, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 58.78, + "inPlaceUs": 57.82, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 57.85, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 57.85, + "inPlaceUs": 56.42, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 58, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 58, + "inPlaceUs": 56.45, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 58.28, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.06, + "outOfPlaceUs": 58.28, + "inPlaceUs": 56.58, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 57.24, + "algBandwidthGbps": 0.14, + "busBandwidthGbps": 0.13, + "outOfPlaceUs": 59.4, + "inPlaceUs": 57.24, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 56.52, + "algBandwidthGbps": 0.29, + "busBandwidthGbps": 0.25, + "outOfPlaceUs": 59.61, + "inPlaceUs": 56.52, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 56.55, + "algBandwidthGbps": 0.58, + "busBandwidthGbps": 0.51, + "outOfPlaceUs": 60.02, + "inPlaceUs": 56.55, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 58.1, + "algBandwidthGbps": 1.13, + "busBandwidthGbps": 0.99, + "outOfPlaceUs": 61.39, + "inPlaceUs": 58.1, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 62.08, + "algBandwidthGbps": 2.11, + "busBandwidthGbps": 1.85, + "outOfPlaceUs": 63.84, + "inPlaceUs": 62.08, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 66.15, + "algBandwidthGbps": 3.96, + "busBandwidthGbps": 3.47, + "outOfPlaceUs": 67.84, + "inPlaceUs": 66.15, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 66.51, + "algBandwidthGbps": 7.88, + "busBandwidthGbps": 6.9, + "outOfPlaceUs": 67.46, + "inPlaceUs": 66.51, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 59.24, + "algBandwidthGbps": 17.7, + "busBandwidthGbps": 15.49, + "outOfPlaceUs": 59.24, + "inPlaceUs": 59.65, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 62.77, + "algBandwidthGbps": 33.41, + "busBandwidthGbps": 29.23, + "outOfPlaceUs": 62.77, + "inPlaceUs": 62.99, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 67.98, + "algBandwidthGbps": 61.7, + "busBandwidthGbps": 53.99, + "outOfPlaceUs": 67.98, + "inPlaceUs": 68.21, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 63.12, + "algBandwidthGbps": 132.9, + "busBandwidthGbps": 116.29, + "outOfPlaceUs": 63.12, + "inPlaceUs": 64.56, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 76.47, + "algBandwidthGbps": 219.4, + "busBandwidthGbps": 191.98, + "outOfPlaceUs": 76.47, + "inPlaceUs": 76.59, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 124.5, + "algBandwidthGbps": 269.58, + "busBandwidthGbps": 235.88, + "outOfPlaceUs": 125.3, + "inPlaceUs": 124.5, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 223.4, + "algBandwidthGbps": 300.45, + "busBandwidthGbps": 262.89, + "outOfPlaceUs": 225.5, + "inPlaceUs": 223.4, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 421.9, + "algBandwidthGbps": 318.1, + "busBandwidthGbps": 278.33, + "outOfPlaceUs": 427.5, + "inPlaceUs": 421.9, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 813.9, + "algBandwidthGbps": 329.83, + "busBandwidthGbps": 288.6, + "outOfPlaceUs": 831, + "inPlaceUs": 813.9, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1614.7, + "algBandwidthGbps": 332.49, + "busBandwidthGbps": 290.93, + "outOfPlaceUs": 1638.8, + "inPlaceUs": 1614.7, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3212.5, + "algBandwidthGbps": 334.24, + "busBandwidthGbps": 292.46, + "outOfPlaceUs": 3250.9, + "inPlaceUs": 3212.5, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6451.2, + "algBandwidthGbps": 332.88, + "busBandwidthGbps": 291.27, + "outOfPlaceUs": 6547.5, + "inPlaceUs": 6451.2, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 13163, + "algBandwidthGbps": 326.29, + "busBandwidthGbps": 285.5, + "outOfPlaceUs": 13178, + "inPlaceUs": 13163, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 26516, + "algBandwidthGbps": 323.96, + "busBandwidthGbps": 283.46, + "outOfPlaceUs": 26708, + "inPlaceUs": 26516, + "correct": true + } + ] + }, + { + "id": "cxn-d3435e9a", + "identity": "nccl|mi325x|all_gather|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_gather", + "sku": "mi325x", + "runner": "mi325x-amds_05", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "8f8417874bf37410", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 93.6351, + "status": "valid", + "valid": true, + "colorKey": "mi325x_d3435e9a", + "label": "MI325X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T15:23:32.859178+00:00", + "run": { + "id": "28601042764", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601042764", + "createdAt": "2026-07-02T15:16:13Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 60.57, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 60.57, + "inPlaceUs": 58.19, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 59.07, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 59.07, + "inPlaceUs": 59.32, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 58.94, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 58.94, + "inPlaceUs": 59.39, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 58.56, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 58.56, + "inPlaceUs": 57.64, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 58.63, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 58.63, + "inPlaceUs": 57.53, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 59.28, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.06, + "outOfPlaceUs": 59.28, + "inPlaceUs": 57.61, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 60.55, + "algBandwidthGbps": 0.14, + "busBandwidthGbps": 0.12, + "outOfPlaceUs": 60.55, + "inPlaceUs": 58.23, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 57.69, + "algBandwidthGbps": 0.28, + "busBandwidthGbps": 0.25, + "outOfPlaceUs": 60.5, + "inPlaceUs": 57.69, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 57.29, + "algBandwidthGbps": 0.57, + "busBandwidthGbps": 0.5, + "outOfPlaceUs": 60.41, + "inPlaceUs": 57.29, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 58.62, + "algBandwidthGbps": 1.12, + "busBandwidthGbps": 0.98, + "outOfPlaceUs": 62.47, + "inPlaceUs": 58.62, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 63.55, + "algBandwidthGbps": 2.06, + "busBandwidthGbps": 1.8, + "outOfPlaceUs": 65.24, + "inPlaceUs": 63.55, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 67.63, + "algBandwidthGbps": 3.88, + "busBandwidthGbps": 3.39, + "outOfPlaceUs": 68.82, + "inPlaceUs": 67.63, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 67.23, + "algBandwidthGbps": 7.8, + "busBandwidthGbps": 6.82, + "outOfPlaceUs": 68.97, + "inPlaceUs": 67.23, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 60.93, + "algBandwidthGbps": 17.21, + "busBandwidthGbps": 15.06, + "outOfPlaceUs": 60.93, + "inPlaceUs": 61.18, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 63.97, + "algBandwidthGbps": 32.78, + "busBandwidthGbps": 28.69, + "outOfPlaceUs": 64.28, + "inPlaceUs": 63.97, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 69.57, + "algBandwidthGbps": 60.29, + "busBandwidthGbps": 52.75, + "outOfPlaceUs": 69.57, + "inPlaceUs": 70.09, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 66.28, + "algBandwidthGbps": 126.56, + "busBandwidthGbps": 110.74, + "outOfPlaceUs": 66.28, + "inPlaceUs": 66.77, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 77.15, + "algBandwidthGbps": 217.47, + "busBandwidthGbps": 190.29, + "outOfPlaceUs": 77.26, + "inPlaceUs": 77.15, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 125.4, + "algBandwidthGbps": 267.61, + "busBandwidthGbps": 234.15, + "outOfPlaceUs": 127, + "inPlaceUs": 125.4, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 224.2, + "algBandwidthGbps": 299.3, + "busBandwidthGbps": 261.89, + "outOfPlaceUs": 227.6, + "inPlaceUs": 224.2, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 421.9, + "algBandwidthGbps": 318.11, + "busBandwidthGbps": 278.35, + "outOfPlaceUs": 429.7, + "inPlaceUs": 421.9, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 817.2, + "algBandwidthGbps": 328.49, + "busBandwidthGbps": 287.42, + "outOfPlaceUs": 833.3, + "inPlaceUs": 817.2, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1614.6, + "algBandwidthGbps": 332.51, + "busBandwidthGbps": 290.95, + "outOfPlaceUs": 1630.4, + "inPlaceUs": 1614.6, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3213.6, + "algBandwidthGbps": 334.12, + "busBandwidthGbps": 292.35, + "outOfPlaceUs": 3249, + "inPlaceUs": 3213.6, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6450.4, + "algBandwidthGbps": 332.92, + "busBandwidthGbps": 291.31, + "outOfPlaceUs": 6541.1, + "inPlaceUs": 6450.4, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 13092, + "algBandwidthGbps": 328.07, + "busBandwidthGbps": 287.06, + "outOfPlaceUs": 13215, + "inPlaceUs": 13092, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 26431, + "algBandwidthGbps": 324.99, + "busBandwidthGbps": 284.37, + "outOfPlaceUs": 26551, + "inPlaceUs": 26431, + "correct": true + } + ] + }, + { + "id": "cxn-e1de3b53", + "identity": "nccl|mi355x|all_gather|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_gather", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "8f8417874bf37410", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 112.398, + "status": "valid", + "valid": true, + "colorKey": "mi355x_e1de3b53", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T21:03:03.109527+00:00", + "run": { + "id": "28620413991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28620413991", + "createdAt": "2026-07-02T20:44:56Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 188.3, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 188.3, + "inPlaceUs": 201.4, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 171.9, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 171.9, + "inPlaceUs": 199.5, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 172.1, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 172.1, + "inPlaceUs": 202.5, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 169.5, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 169.5, + "inPlaceUs": 167.4, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 171.5, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 171.5, + "inPlaceUs": 167.7, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 172.8, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 172.8, + "inPlaceUs": 167.5, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 167.6, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 204.8, + "inPlaceUs": 167.6, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 168, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 206.3, + "inPlaceUs": 168, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 167.3, + "algBandwidthGbps": 0.2, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 206.3, + "inPlaceUs": 167.3, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 173.6, + "algBandwidthGbps": 0.38, + "busBandwidthGbps": 0.33, + "outOfPlaceUs": 230.2, + "inPlaceUs": 173.6, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 184.4, + "algBandwidthGbps": 0.71, + "busBandwidthGbps": 0.62, + "outOfPlaceUs": 220.4, + "inPlaceUs": 184.4, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 133.6, + "algBandwidthGbps": 1.96, + "busBandwidthGbps": 1.72, + "outOfPlaceUs": 203.6, + "inPlaceUs": 133.6, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 133.9, + "algBandwidthGbps": 3.91, + "busBandwidthGbps": 3.43, + "outOfPlaceUs": 136.4, + "inPlaceUs": 133.9, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 120.6, + "algBandwidthGbps": 8.7, + "busBandwidthGbps": 7.61, + "outOfPlaceUs": 120.6, + "inPlaceUs": 138.3, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 126.9, + "algBandwidthGbps": 16.53, + "busBandwidthGbps": 14.46, + "outOfPlaceUs": 126.9, + "inPlaceUs": 145.6, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 137.9, + "algBandwidthGbps": 30.41, + "busBandwidthGbps": 26.61, + "outOfPlaceUs": 137.9, + "inPlaceUs": 155.6, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 138.9, + "algBandwidthGbps": 60.38, + "busBandwidthGbps": 52.83, + "outOfPlaceUs": 138.9, + "inPlaceUs": 156.2, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 131.8, + "algBandwidthGbps": 127.25, + "busBandwidthGbps": 111.35, + "outOfPlaceUs": 131.8, + "inPlaceUs": 149.4, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 132.3, + "algBandwidthGbps": 253.72, + "busBandwidthGbps": 222, + "outOfPlaceUs": 132.7, + "inPlaceUs": 132.3, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 174.5, + "algBandwidthGbps": 384.64, + "busBandwidthGbps": 336.56, + "outOfPlaceUs": 174.5, + "inPlaceUs": 174.6, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 320.8, + "algBandwidthGbps": 418.43, + "busBandwidthGbps": 366.12, + "outOfPlaceUs": 322, + "inPlaceUs": 320.8, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 616.9, + "algBandwidthGbps": 435.15, + "busBandwidthGbps": 380.75, + "outOfPlaceUs": 625.7, + "inPlaceUs": 616.9, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1205.6, + "algBandwidthGbps": 445.33, + "busBandwidthGbps": 389.67, + "outOfPlaceUs": 1205.6, + "inPlaceUs": 1206.4, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2401.3, + "algBandwidthGbps": 447.15, + "busBandwidthGbps": 391.26, + "outOfPlaceUs": 2401.3, + "inPlaceUs": 2401.5, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4738.5, + "algBandwidthGbps": 453.2, + "busBandwidthGbps": 396.55, + "outOfPlaceUs": 4746.9, + "inPlaceUs": 4738.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9405.6, + "algBandwidthGbps": 456.64, + "busBandwidthGbps": 399.56, + "outOfPlaceUs": 9405.6, + "inPlaceUs": 9424.3, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 18652, + "algBandwidthGbps": 460.54, + "busBandwidthGbps": 402.97, + "outOfPlaceUs": 18652, + "inPlaceUs": 18914, + "correct": true + } + ] + }, + { + "id": "cxn-3eb3b655", + "identity": "allreduce-fw|mi325x|aiter|mi325x-xgmi|xgmi|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "mi325x", + "runner": "mi325x-amds_01", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "40354035d3fa14d9", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "mi325x_3eb3b655", + "label": "MI325X · aiter (fw-AR · ws8)", + "generatedAt": "2026-07-02T17:59:30.492807+00:00", + "run": { + "id": "28610750008", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28610750008", + "createdAt": "2026-07-02T17:54:35Z", + "sha": "8b91e30edfd944a33f19f9b742fda40356098e0c" + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 22.675, + "algBandwidthGbps": 0.045, + "busBandwidthGbps": 0.079, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 23.467, + "algBandwidthGbps": 0.175, + "busBandwidthGbps": 0.305, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 45.913, + "algBandwidthGbps": 0.357, + "busBandwidthGbps": 0.624, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 25.453, + "algBandwidthGbps": 2.575, + "busBandwidthGbps": 4.506, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 22.23, + "algBandwidthGbps": 11.792, + "busBandwidthGbps": 20.637, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 22.434, + "algBandwidthGbps": 46.741, + "busBandwidthGbps": 81.796, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 45.403, + "algBandwidthGbps": 92.379, + "busBandwidthGbps": 161.664, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 127.229, + "algBandwidthGbps": 131.866, + "busBandwidthGbps": 230.765, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 480.443, + "algBandwidthGbps": 139.681, + "busBandwidthGbps": 244.442, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-bcdec830", + "identity": "nccl|mi325x|all_reduce|mi325x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "mi325x", + "runner": "mi325x-amds_04", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "3e025054047137aa", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 105.848, + "status": "valid", + "valid": true, + "colorKey": "mi325x_bcdec830", + "label": "MI325X · mi325x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T16:46:14.015734+00:00", + "run": { + "id": "28606364747", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606364747", + "createdAt": "2026-07-02T16:40:32Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 46.45, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 46.45, + "inPlaceUs": 42.74, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 44.89, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 44.89, + "inPlaceUs": 43.22, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 43.32, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 43.32, + "inPlaceUs": 43.19, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 42.8, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 42.8, + "inPlaceUs": 44.56, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 43.24, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 43.24, + "inPlaceUs": 44.72, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 42.99, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 42.99, + "inPlaceUs": 44.38, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 43.03, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 43.03, + "inPlaceUs": 44.64, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 35.84, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 35.84, + "inPlaceUs": 39.46, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 39.43, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 39.43, + "inPlaceUs": 39.34, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 39.96, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.18, + "outOfPlaceUs": 39.96, + "inPlaceUs": 39.2, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 38.09, + "algBandwidthGbps": 0.22, + "busBandwidthGbps": 0.38, + "outOfPlaceUs": 39.8, + "inPlaceUs": 38.09, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 39.33, + "algBandwidthGbps": 0.42, + "busBandwidthGbps": 0.73, + "outOfPlaceUs": 40.18, + "inPlaceUs": 39.33, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 39.59, + "algBandwidthGbps": 0.83, + "busBandwidthGbps": 1.45, + "outOfPlaceUs": 39.76, + "inPlaceUs": 39.59, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 38.23, + "algBandwidthGbps": 1.71, + "busBandwidthGbps": 3, + "outOfPlaceUs": 40.45, + "inPlaceUs": 38.23, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 44.06, + "algBandwidthGbps": 2.97, + "busBandwidthGbps": 5.21, + "outOfPlaceUs": 44.06, + "inPlaceUs": 44.56, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 43.54, + "algBandwidthGbps": 6.02, + "busBandwidthGbps": 10.54, + "outOfPlaceUs": 45.48, + "inPlaceUs": 43.54, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 45.57, + "algBandwidthGbps": 11.51, + "busBandwidthGbps": 20.13, + "outOfPlaceUs": 45.77, + "inPlaceUs": 45.57, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 44.64, + "algBandwidthGbps": 23.49, + "busBandwidthGbps": 41.11, + "outOfPlaceUs": 44.64, + "inPlaceUs": 46.34, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 46, + "algBandwidthGbps": 45.59, + "busBandwidthGbps": 79.79, + "outOfPlaceUs": 46.54, + "inPlaceUs": 46, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 57.88, + "algBandwidthGbps": 72.47, + "busBandwidthGbps": 126.82, + "outOfPlaceUs": 57.88, + "inPlaceUs": 58.29, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 89.54, + "algBandwidthGbps": 93.68, + "busBandwidthGbps": 163.95, + "outOfPlaceUs": 89.54, + "inPlaceUs": 90.45, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 152.4, + "algBandwidthGbps": 110.1, + "busBandwidthGbps": 192.68, + "outOfPlaceUs": 152.4, + "inPlaceUs": 153.4, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 222.3, + "algBandwidthGbps": 150.93, + "busBandwidthGbps": 264.13, + "outOfPlaceUs": 222.3, + "inPlaceUs": 231.5, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 412.9, + "algBandwidthGbps": 162.52, + "busBandwidthGbps": 284.41, + "outOfPlaceUs": 412.9, + "inPlaceUs": 415, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 798.2, + "algBandwidthGbps": 168.14, + "busBandwidthGbps": 294.25, + "outOfPlaceUs": 800.8, + "inPlaceUs": 798.2, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1569.4, + "algBandwidthGbps": 171.04, + "busBandwidthGbps": 299.32, + "outOfPlaceUs": 1569.4, + "inPlaceUs": 1570.1, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 3123.2, + "algBandwidthGbps": 171.9, + "busBandwidthGbps": 300.82, + "outOfPlaceUs": 3136.2, + "inPlaceUs": 3123.2, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 6217.8, + "algBandwidthGbps": 172.69, + "busBandwidthGbps": 302.2, + "outOfPlaceUs": 6241.6, + "inPlaceUs": 6217.8, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 12452, + "algBandwidthGbps": 172.46, + "busBandwidthGbps": 301.8, + "outOfPlaceUs": 12452, + "inPlaceUs": 12453, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 25055, + "algBandwidthGbps": 171.42, + "busBandwidthGbps": 299.98, + "outOfPlaceUs": 25173, + "inPlaceUs": 25055, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 50065, + "algBandwidthGbps": 171.58, + "busBandwidthGbps": 300.26, + "outOfPlaceUs": 50091, + "inPlaceUs": 50065, + "correct": true + } + ] + }, + { + "id": "cxn-dc2c02e9", + "identity": "nccl|mi325x|all_reduce|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "mi325x", + "runner": "mi325x-amds_05", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "643cf957198f1634", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 105.51, + "status": "valid", + "valid": true, + "colorKey": "mi325x_dc2c02e9", + "label": "MI325X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T15:22:16.275882+00:00", + "run": { + "id": "28601042764", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601042764", + "createdAt": "2026-07-02T15:16:13Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 46.53, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 46.53, + "inPlaceUs": 42.91, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 44.11, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 44.11, + "inPlaceUs": 43.23, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 42.68, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 42.68, + "inPlaceUs": 42.75, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 43.08, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 43.08, + "inPlaceUs": 45.29, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 43.77, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 43.77, + "inPlaceUs": 45.08, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 43.71, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 43.71, + "inPlaceUs": 44.71, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 43.22, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 43.22, + "inPlaceUs": 45.2, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 36.3, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 36.3, + "inPlaceUs": 39.54, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 39.64, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 39.64, + "inPlaceUs": 39.77, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 39.86, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.18, + "outOfPlaceUs": 39.86, + "inPlaceUs": 39.73, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 38.59, + "algBandwidthGbps": 0.21, + "busBandwidthGbps": 0.37, + "outOfPlaceUs": 39.88, + "inPlaceUs": 38.59, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 40.57, + "algBandwidthGbps": 0.4, + "busBandwidthGbps": 0.71, + "outOfPlaceUs": 40.57, + "inPlaceUs": 40.16, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 40.3, + "algBandwidthGbps": 0.81, + "busBandwidthGbps": 1.42, + "outOfPlaceUs": 40.3, + "inPlaceUs": 40.29, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 38.68, + "algBandwidthGbps": 1.69, + "busBandwidthGbps": 2.97, + "outOfPlaceUs": 40.41, + "inPlaceUs": 38.68, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 44.21, + "algBandwidthGbps": 2.96, + "busBandwidthGbps": 5.19, + "outOfPlaceUs": 44.21, + "inPlaceUs": 45.46, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 44.28, + "algBandwidthGbps": 5.92, + "busBandwidthGbps": 10.36, + "outOfPlaceUs": 46.14, + "inPlaceUs": 44.28, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 46.78, + "algBandwidthGbps": 11.21, + "busBandwidthGbps": 19.62, + "outOfPlaceUs": 46.78, + "inPlaceUs": 46.88, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 45.1, + "algBandwidthGbps": 23.25, + "busBandwidthGbps": 40.69, + "outOfPlaceUs": 45.1, + "inPlaceUs": 46.71, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 47.12, + "algBandwidthGbps": 44.51, + "busBandwidthGbps": 77.89, + "outOfPlaceUs": 47.24, + "inPlaceUs": 47.12, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 58.31, + "algBandwidthGbps": 71.93, + "busBandwidthGbps": 125.88, + "outOfPlaceUs": 58.31, + "inPlaceUs": 59.08, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 89.2, + "algBandwidthGbps": 94.05, + "busBandwidthGbps": 164.58, + "outOfPlaceUs": 89.2, + "inPlaceUs": 90.71, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 153.4, + "algBandwidthGbps": 109.33, + "busBandwidthGbps": 191.33, + "outOfPlaceUs": 153.4, + "inPlaceUs": 154, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 223.9, + "algBandwidthGbps": 149.89, + "busBandwidthGbps": 262.31, + "outOfPlaceUs": 223.9, + "inPlaceUs": 231.4, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 415.4, + "algBandwidthGbps": 161.54, + "busBandwidthGbps": 282.7, + "outOfPlaceUs": 415.9, + "inPlaceUs": 415.4, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 802.1, + "algBandwidthGbps": 167.33, + "busBandwidthGbps": 292.84, + "outOfPlaceUs": 803, + "inPlaceUs": 802.1, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1574.9, + "algBandwidthGbps": 170.44, + "busBandwidthGbps": 298.28, + "outOfPlaceUs": 1577.3, + "inPlaceUs": 1574.9, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 3122.6, + "algBandwidthGbps": 171.93, + "busBandwidthGbps": 300.88, + "outOfPlaceUs": 3145.7, + "inPlaceUs": 3122.6, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 6256.8, + "algBandwidthGbps": 171.61, + "busBandwidthGbps": 300.32, + "outOfPlaceUs": 6290.1, + "inPlaceUs": 6256.8, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 12444, + "algBandwidthGbps": 172.57, + "busBandwidthGbps": 301.99, + "outOfPlaceUs": 12447, + "inPlaceUs": 12444, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 24994, + "algBandwidthGbps": 171.84, + "busBandwidthGbps": 300.71, + "outOfPlaceUs": 24994, + "inPlaceUs": 25043, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 50047, + "algBandwidthGbps": 171.64, + "busBandwidthGbps": 300.37, + "outOfPlaceUs": 50112, + "inPlaceUs": 50047, + "correct": true + } + ] + }, + { + "id": "cxn-b39a1256", + "identity": "allreduce-fw|mi325x|nccl|mi325x-xgmi|xgmi|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "mi325x", + "runner": "mi325x-amds_01", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "5c5fd77e233bf053", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "mi325x_b39a1256", + "label": "MI325X · nccl (fw-AR · ws8)", + "generatedAt": "2026-07-02T17:59:30.492807+00:00", + "run": { + "id": "28610750008", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28610750008", + "createdAt": "2026-07-02T17:54:35Z", + "sha": "8b91e30edfd944a33f19f9b742fda40356098e0c" + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 25.886, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.069, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 27.979, + "algBandwidthGbps": 0.146, + "busBandwidthGbps": 0.256, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 27.776, + "algBandwidthGbps": 0.59, + "busBandwidthGbps": 1.032, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 27.571, + "algBandwidthGbps": 2.377, + "busBandwidthGbps": 4.16, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 30.215, + "algBandwidthGbps": 8.676, + "busBandwidthGbps": 15.183, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 38.78, + "algBandwidthGbps": 27.039, + "busBandwidthGbps": 47.318, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 67.727, + "algBandwidthGbps": 61.929, + "busBandwidthGbps": 108.376, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 166.295, + "algBandwidthGbps": 100.888, + "busBandwidthGbps": 176.554, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 435.4, + "algBandwidthGbps": 154.132, + "busBandwidthGbps": 269.73, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-2bfe03c0", + "identity": "allreduce-fw|mi325x|sglang|mi325x-xgmi|xgmi|8|allreduce-fw-v1", + "op": "all_reduce", + "sku": "mi325x", + "runner": "mi325x-amds_01", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": null, + "dtype": "bf16", + "comparisonClass": null, + "comparisonKey": "085ac2eb0efcca23", + "measurementContract": "allreduce-fw-v1", + "avgBusBandwidthGbps": null, + "status": "valid", + "valid": true, + "colorKey": "mi325x_2bfe03c0", + "label": "MI325X · sglang (fw-AR · ws8)", + "generatedAt": "2026-07-02T17:59:30.492807+00:00", + "run": { + "id": "28610750008", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28610750008", + "createdAt": "2026-07-02T17:54:35Z", + "sha": "8b91e30edfd944a33f19f9b742fda40356098e0c" + }, + "rows": [ + { + "sizeBytes": 1024, + "dtype": "bf16", + "latencyUs": 24.126, + "algBandwidthGbps": 0.042, + "busBandwidthGbps": 0.074, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "bf16", + "latencyUs": 21.676, + "algBandwidthGbps": 0.189, + "busBandwidthGbps": 0.331, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "bf16", + "latencyUs": 23.04, + "algBandwidthGbps": 0.711, + "busBandwidthGbps": 1.244, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "bf16", + "latencyUs": 43.394, + "algBandwidthGbps": 1.51, + "busBandwidthGbps": 2.643, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "bf16", + "latencyUs": 21.869, + "algBandwidthGbps": 11.987, + "busBandwidthGbps": 20.977, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "bf16", + "latencyUs": 21.374, + "algBandwidthGbps": 49.059, + "busBandwidthGbps": 85.853, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "bf16", + "latencyUs": 44.959, + "algBandwidthGbps": 93.292, + "busBandwidthGbps": 163.26, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "bf16", + "latencyUs": 126.764, + "algBandwidthGbps": 132.35, + "busBandwidthGbps": 231.613, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "bf16", + "latencyUs": 479.152, + "algBandwidthGbps": 140.058, + "busBandwidthGbps": 245.101, + "outOfPlaceUs": null, + "inPlaceUs": null, + "correct": true + } + ] + }, + { + "id": "cxn-be6147f8", + "identity": "nccl|mi355x|all_reduce|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "all_reduce", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "643cf957198f1634", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 133.824, + "status": "valid", + "valid": true, + "colorKey": "mi355x_be6147f8", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T21:00:52.884543+00:00", + "run": { + "id": "28620413991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28620413991", + "createdAt": "2026-07-02T20:44:56Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "sizeBytes": 8, + "dtype": "float", + "latencyUs": 68.12, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 68.12, + "inPlaceUs": 52.73, + "correct": true + }, + { + "sizeBytes": 16, + "dtype": "float", + "latencyUs": 54.16, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 54.16, + "inPlaceUs": 53.86, + "correct": true + }, + { + "sizeBytes": 32, + "dtype": "float", + "latencyUs": 54.29, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 54.29, + "inPlaceUs": 54.19, + "correct": true + }, + { + "sizeBytes": 64, + "dtype": "float", + "latencyUs": 53.93, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 53.93, + "inPlaceUs": 64.09, + "correct": true + }, + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 53.75, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 53.75, + "inPlaceUs": 63.73, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 54.12, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 54.12, + "inPlaceUs": 64.27, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 53.88, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 53.88, + "inPlaceUs": 64.26, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 38.41, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 38.41, + "inPlaceUs": 51.19, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 51.24, + "algBandwidthGbps": 0.04, + "busBandwidthGbps": 0.07, + "outOfPlaceUs": 51.24, + "inPlaceUs": 52.09, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 51.6, + "algBandwidthGbps": 0.08, + "busBandwidthGbps": 0.14, + "outOfPlaceUs": 51.6, + "inPlaceUs": 51.86, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 41.83, + "algBandwidthGbps": 0.2, + "busBandwidthGbps": 0.34, + "outOfPlaceUs": 50.93, + "inPlaceUs": 41.83, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 50.98, + "algBandwidthGbps": 0.32, + "busBandwidthGbps": 0.56, + "outOfPlaceUs": 50.98, + "inPlaceUs": 51.91, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 51.51, + "algBandwidthGbps": 0.64, + "busBandwidthGbps": 1.11, + "outOfPlaceUs": 51.51, + "inPlaceUs": 52.31, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 42.18, + "algBandwidthGbps": 1.55, + "busBandwidthGbps": 2.72, + "outOfPlaceUs": 51.22, + "inPlaceUs": 42.18, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 54.61, + "algBandwidthGbps": 2.4, + "busBandwidthGbps": 4.2, + "outOfPlaceUs": 54.61, + "inPlaceUs": 66.16, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 55.5, + "algBandwidthGbps": 4.72, + "busBandwidthGbps": 8.27, + "outOfPlaceUs": 64.29, + "inPlaceUs": 55.5, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 65.36, + "algBandwidthGbps": 8.02, + "busBandwidthGbps": 14.04, + "outOfPlaceUs": 65.36, + "inPlaceUs": 66.38, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 55.5, + "algBandwidthGbps": 18.89, + "busBandwidthGbps": 33.06, + "outOfPlaceUs": 55.5, + "inPlaceUs": 66.11, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 65.91, + "algBandwidthGbps": 31.82, + "busBandwidthGbps": 55.69, + "outOfPlaceUs": 65.91, + "inPlaceUs": 66.11, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 58.51, + "algBandwidthGbps": 71.68, + "busBandwidthGbps": 125.44, + "outOfPlaceUs": 58.73, + "inPlaceUs": 58.51, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 73.88, + "algBandwidthGbps": 113.55, + "busBandwidthGbps": 198.71, + "outOfPlaceUs": 79.15, + "inPlaceUs": 73.88, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 118.6, + "algBandwidthGbps": 141.42, + "busBandwidthGbps": 247.48, + "outOfPlaceUs": 118.6, + "inPlaceUs": 126.9, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 182.6, + "algBandwidthGbps": 183.77, + "busBandwidthGbps": 321.6, + "outOfPlaceUs": 183.7, + "inPlaceUs": 182.6, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 324.1, + "algBandwidthGbps": 207.04, + "busBandwidthGbps": 362.31, + "outOfPlaceUs": 325.3, + "inPlaceUs": 324.1, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 614.1, + "algBandwidthGbps": 218.57, + "busBandwidthGbps": 382.5, + "outOfPlaceUs": 623.6, + "inPlaceUs": 614.1, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 1191.8, + "algBandwidthGbps": 225.23, + "busBandwidthGbps": 394.15, + "outOfPlaceUs": 1191.8, + "inPlaceUs": 1192.9, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 2349.2, + "algBandwidthGbps": 228.53, + "busBandwidthGbps": 399.93, + "outOfPlaceUs": 2349.2, + "inPlaceUs": 2351.6, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 4672.2, + "algBandwidthGbps": 229.81, + "busBandwidthGbps": 402.18, + "outOfPlaceUs": 4672.3, + "inPlaceUs": 4672.2, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 9226.1, + "algBandwidthGbps": 232.76, + "busBandwidthGbps": 407.33, + "outOfPlaceUs": 9226.1, + "inPlaceUs": 9243.8, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 18517, + "algBandwidthGbps": 231.94, + "busBandwidthGbps": 405.9, + "outOfPlaceUs": 18517, + "inPlaceUs": 18524, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 37095, + "algBandwidthGbps": 231.57, + "busBandwidthGbps": 405.24, + "outOfPlaceUs": 37095, + "inPlaceUs": 37121, + "correct": true + } + ] + }, + { + "id": "cxn-d26cfe11", + "identity": "nccl|mi325x|alltoall|mi325x-xgmi|xgmi|8|nccl-tests-v1", + "op": "alltoall", + "sku": "mi325x", + "runner": "mi325x-amds_04", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "fa0bad2230ebdbb0", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 94.0539, + "status": "valid", + "valid": true, + "colorKey": "mi325x_d26cfe11", + "label": "MI325X · mi325x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T16:50:04.211738+00:00", + "run": { + "id": "28606364747", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606364747", + "createdAt": "2026-07-02T16:40:32Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 33.61, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 33.61, + "inPlaceUs": 65.96, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 40.25, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 40.25, + "inPlaceUs": 64.81, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 42.46, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 42.46, + "inPlaceUs": 66.28, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 39.89, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 39.89, + "inPlaceUs": 64.54, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 40.37, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 40.37, + "inPlaceUs": 66.77, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 39.98, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.09, + "outOfPlaceUs": 39.98, + "inPlaceUs": 64.42, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 41.98, + "algBandwidthGbps": 0.2, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 41.98, + "inPlaceUs": 64.75, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 43.69, + "algBandwidthGbps": 0.38, + "busBandwidthGbps": 0.33, + "outOfPlaceUs": 43.69, + "inPlaceUs": 66.92, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 42.77, + "algBandwidthGbps": 0.77, + "busBandwidthGbps": 0.67, + "outOfPlaceUs": 42.77, + "inPlaceUs": 65.58, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 43, + "algBandwidthGbps": 1.52, + "busBandwidthGbps": 1.33, + "outOfPlaceUs": 43, + "inPlaceUs": 68.56, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 43.36, + "algBandwidthGbps": 3.02, + "busBandwidthGbps": 2.64, + "outOfPlaceUs": 43.36, + "inPlaceUs": 68.81, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 47.53, + "algBandwidthGbps": 5.52, + "busBandwidthGbps": 4.83, + "outOfPlaceUs": 47.53, + "inPlaceUs": 73.56, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 47.73, + "algBandwidthGbps": 10.98, + "busBandwidthGbps": 9.61, + "outOfPlaceUs": 47.73, + "inPlaceUs": 76.29, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 47.5, + "algBandwidthGbps": 22.07, + "busBandwidthGbps": 19.32, + "outOfPlaceUs": 47.5, + "inPlaceUs": 66.36, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 49.8, + "algBandwidthGbps": 42.11, + "busBandwidthGbps": 36.84, + "outOfPlaceUs": 49.8, + "inPlaceUs": 71.07, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 48.38, + "algBandwidthGbps": 86.7, + "busBandwidthGbps": 75.86, + "outOfPlaceUs": 48.38, + "inPlaceUs": 74.32, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 50.86, + "algBandwidthGbps": 164.94, + "busBandwidthGbps": 144.33, + "outOfPlaceUs": 50.86, + "inPlaceUs": 74.87, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 75.21, + "algBandwidthGbps": 223.07, + "busBandwidthGbps": 195.19, + "outOfPlaceUs": 75.21, + "inPlaceUs": 81.69, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 133.6, + "algBandwidthGbps": 251.2, + "busBandwidthGbps": 219.8, + "outOfPlaceUs": 133.6, + "inPlaceUs": 142.1, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 239.6, + "algBandwidthGbps": 280.11, + "busBandwidthGbps": 245.1, + "outOfPlaceUs": 240.3, + "inPlaceUs": 239.6, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 439, + "algBandwidthGbps": 305.75, + "busBandwidthGbps": 267.53, + "outOfPlaceUs": 439, + "inPlaceUs": 439.1, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 830, + "algBandwidthGbps": 323.42, + "busBandwidthGbps": 282.99, + "outOfPlaceUs": 837.5, + "inPlaceUs": 830, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1609.7, + "algBandwidthGbps": 333.52, + "busBandwidthGbps": 291.83, + "outOfPlaceUs": 1631.5, + "inPlaceUs": 1609.7, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3180.3, + "algBandwidthGbps": 337.62, + "busBandwidthGbps": 295.42, + "outOfPlaceUs": 3225.4, + "inPlaceUs": 3180.3, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6306.6, + "algBandwidthGbps": 340.51, + "busBandwidthGbps": 297.95, + "outOfPlaceUs": 6411.6, + "inPlaceUs": 6306.6, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 12566, + "algBandwidthGbps": 341.79, + "busBandwidthGbps": 299.07, + "outOfPlaceUs": 12778, + "inPlaceUs": 12566, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 25027, + "algBandwidthGbps": 343.22, + "busBandwidthGbps": 300.32, + "outOfPlaceUs": 25436, + "inPlaceUs": 25027, + "correct": true + } + ] + }, + { + "id": "cxn-d457bdd8", + "identity": "nccl|mi325x|alltoall|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "alltoall", + "sku": "mi325x", + "runner": "mi325x-amds_05", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "67a9b0532a278ee9", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 94.5562, + "status": "valid", + "valid": true, + "colorKey": "mi325x_d457bdd8", + "label": "MI325X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T15:26:01.530613+00:00", + "run": { + "id": "28601042764", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601042764", + "createdAt": "2026-07-02T15:16:13Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 32.95, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 32.95, + "inPlaceUs": 63.38, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 37.5, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 37.5, + "inPlaceUs": 59.94, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 37.72, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 37.72, + "inPlaceUs": 62.18, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 35.97, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 35.97, + "inPlaceUs": 59.44, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 36.08, + "algBandwidthGbps": 0.06, + "busBandwidthGbps": 0.05, + "outOfPlaceUs": 36.08, + "inPlaceUs": 61.45, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 36.38, + "algBandwidthGbps": 0.11, + "busBandwidthGbps": 0.1, + "outOfPlaceUs": 36.38, + "inPlaceUs": 59.72, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 37.53, + "algBandwidthGbps": 0.22, + "busBandwidthGbps": 0.19, + "outOfPlaceUs": 37.53, + "inPlaceUs": 60.04, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 39.27, + "algBandwidthGbps": 0.42, + "busBandwidthGbps": 0.37, + "outOfPlaceUs": 39.27, + "inPlaceUs": 61.59, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 38.5, + "algBandwidthGbps": 0.85, + "busBandwidthGbps": 0.74, + "outOfPlaceUs": 38.5, + "inPlaceUs": 59.82, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 39.3, + "algBandwidthGbps": 1.67, + "busBandwidthGbps": 1.46, + "outOfPlaceUs": 39.3, + "inPlaceUs": 62.97, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 38.67, + "algBandwidthGbps": 3.39, + "busBandwidthGbps": 2.97, + "outOfPlaceUs": 38.67, + "inPlaceUs": 63.94, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 42.91, + "algBandwidthGbps": 6.11, + "busBandwidthGbps": 5.35, + "outOfPlaceUs": 42.91, + "inPlaceUs": 69.58, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 43.19, + "algBandwidthGbps": 12.14, + "busBandwidthGbps": 10.62, + "outOfPlaceUs": 43.19, + "inPlaceUs": 70.94, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 42.16, + "algBandwidthGbps": 24.87, + "busBandwidthGbps": 21.76, + "outOfPlaceUs": 42.16, + "inPlaceUs": 61.47, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 44.66, + "algBandwidthGbps": 46.96, + "busBandwidthGbps": 41.09, + "outOfPlaceUs": 44.66, + "inPlaceUs": 66.26, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 43.98, + "algBandwidthGbps": 95.37, + "busBandwidthGbps": 83.45, + "outOfPlaceUs": 43.98, + "inPlaceUs": 69.83, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 47.52, + "algBandwidthGbps": 176.53, + "busBandwidthGbps": 154.46, + "outOfPlaceUs": 47.52, + "inPlaceUs": 70.52, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 75.3, + "algBandwidthGbps": 222.8, + "busBandwidthGbps": 194.95, + "outOfPlaceUs": 75.3, + "inPlaceUs": 81.65, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 134.6, + "algBandwidthGbps": 249.36, + "busBandwidthGbps": 218.19, + "outOfPlaceUs": 134.6, + "inPlaceUs": 141.8, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 239.9, + "algBandwidthGbps": 279.71, + "busBandwidthGbps": 244.74, + "outOfPlaceUs": 240.3, + "inPlaceUs": 239.9, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 439, + "algBandwidthGbps": 305.74, + "busBandwidthGbps": 267.52, + "outOfPlaceUs": 439.2, + "inPlaceUs": 439, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 831.4, + "algBandwidthGbps": 322.87, + "busBandwidthGbps": 282.51, + "outOfPlaceUs": 839.8, + "inPlaceUs": 831.4, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1611.9, + "algBandwidthGbps": 333.06, + "busBandwidthGbps": 291.43, + "outOfPlaceUs": 1634.5, + "inPlaceUs": 1611.9, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3186.5, + "algBandwidthGbps": 336.96, + "busBandwidthGbps": 294.84, + "outOfPlaceUs": 3233.7, + "inPlaceUs": 3186.5, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6325.1, + "algBandwidthGbps": 339.52, + "busBandwidthGbps": 297.08, + "outOfPlaceUs": 6431.6, + "inPlaceUs": 6325.1, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 12584, + "algBandwidthGbps": 341.3, + "busBandwidthGbps": 298.64, + "outOfPlaceUs": 12788, + "inPlaceUs": 12584, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 25060, + "algBandwidthGbps": 342.78, + "busBandwidthGbps": 299.93, + "outOfPlaceUs": 25481, + "inPlaceUs": 25060, + "correct": true + } + ] + }, + { + "id": "cxn-35eb6655", + "identity": "nccl|mi355x|alltoall|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "alltoall", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "67a9b0532a278ee9", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 108.473, + "status": "valid", + "valid": true, + "colorKey": "mi355x_35eb6655", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T21:07:15.764921+00:00", + "run": { + "id": "28620413991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28620413991", + "createdAt": "2026-07-02T20:44:56Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 36.82, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 36.82, + "inPlaceUs": 76.94, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 51.09, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0, + "outOfPlaceUs": 51.09, + "inPlaceUs": 75.54, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 51.05, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 51.05, + "inPlaceUs": 87.12, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 42.41, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 42.41, + "inPlaceUs": 75.09, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 41.3, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 41.3, + "inPlaceUs": 85.55, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 42.58, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.08, + "outOfPlaceUs": 42.58, + "inPlaceUs": 74.89, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 52.43, + "algBandwidthGbps": 0.16, + "busBandwidthGbps": 0.14, + "outOfPlaceUs": 52.43, + "inPlaceUs": 75.41, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 53.44, + "algBandwidthGbps": 0.31, + "busBandwidthGbps": 0.27, + "outOfPlaceUs": 53.44, + "inPlaceUs": 85.2, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 53.88, + "algBandwidthGbps": 0.61, + "busBandwidthGbps": 0.53, + "outOfPlaceUs": 53.88, + "inPlaceUs": 74.32, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 52.97, + "algBandwidthGbps": 1.24, + "busBandwidthGbps": 1.08, + "outOfPlaceUs": 52.97, + "inPlaceUs": 86.39, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 52.92, + "algBandwidthGbps": 2.48, + "busBandwidthGbps": 2.17, + "outOfPlaceUs": 52.92, + "inPlaceUs": 79.58, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 66.13, + "algBandwidthGbps": 3.96, + "busBandwidthGbps": 3.47, + "outOfPlaceUs": 66.13, + "inPlaceUs": 84.91, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 65.81, + "algBandwidthGbps": 7.97, + "busBandwidthGbps": 6.97, + "outOfPlaceUs": 65.81, + "inPlaceUs": 94.84, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 58.1, + "algBandwidthGbps": 18.05, + "busBandwidthGbps": 15.79, + "outOfPlaceUs": 58.1, + "inPlaceUs": 77.07, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 67.74, + "algBandwidthGbps": 30.96, + "busBandwidthGbps": 27.09, + "outOfPlaceUs": 67.74, + "inPlaceUs": 90.11, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 67.49, + "algBandwidthGbps": 62.14, + "busBandwidthGbps": 54.38, + "outOfPlaceUs": 67.49, + "inPlaceUs": 84.41, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 69.93, + "algBandwidthGbps": 119.96, + "busBandwidthGbps": 104.96, + "outOfPlaceUs": 69.93, + "inPlaceUs": 85.15, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 72.54, + "algBandwidthGbps": 231.27, + "busBandwidthGbps": 202.36, + "outOfPlaceUs": 72.54, + "inPlaceUs": 96.99, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 104.7, + "algBandwidthGbps": 320.47, + "busBandwidthGbps": 280.41, + "outOfPlaceUs": 104.7, + "inPlaceUs": 108.7, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 196.7, + "algBandwidthGbps": 341.23, + "busBandwidthGbps": 298.58, + "outOfPlaceUs": 196.7, + "inPlaceUs": 196.7, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 365.8, + "algBandwidthGbps": 366.89, + "busBandwidthGbps": 321.03, + "outOfPlaceUs": 365.8, + "inPlaceUs": 366.2, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 697.5, + "algBandwidthGbps": 384.86, + "busBandwidthGbps": 336.75, + "outOfPlaceUs": 698.3, + "inPlaceUs": 697.5, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1357.7, + "algBandwidthGbps": 395.43, + "busBandwidthGbps": 346, + "outOfPlaceUs": 1357.7, + "inPlaceUs": 1358.4, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2675.5, + "algBandwidthGbps": 401.32, + "busBandwidthGbps": 351.16, + "outOfPlaceUs": 2675.5, + "inPlaceUs": 2690.1, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 5295.3, + "algBandwidthGbps": 405.55, + "busBandwidthGbps": 354.85, + "outOfPlaceUs": 5299.7, + "inPlaceUs": 5295.3, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 10549, + "algBandwidthGbps": 407.13, + "busBandwidthGbps": 356.24, + "outOfPlaceUs": 10549, + "inPlaceUs": 10681, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 21029, + "algBandwidthGbps": 408.48, + "busBandwidthGbps": 357.42, + "outOfPlaceUs": 21029, + "inPlaceUs": 21322, + "correct": true + } + ] + }, + { + "id": "cxn-2e3a9193", + "identity": "nccl|mi325x|reduce_scatter|mi325x-xgmi|xgmi|8|nccl-tests-v1", + "op": "reduce_scatter", + "sku": "mi325x", + "runner": "mi325x-amds_04", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "8cdb35dd0e30e856", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 97.7791, + "status": "valid", + "valid": true, + "colorKey": "mi325x_2e3a9193", + "label": "MI325X · mi325x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T16:48:45.172485+00:00", + "run": { + "id": "28606364747", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606364747", + "createdAt": "2026-07-02T16:40:32Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 48.6, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 48.6, + "inPlaceUs": 44.27, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 43.33, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 43.33, + "inPlaceUs": 44.93, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 42.93, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 42.93, + "inPlaceUs": 45.2, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 42.43, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 42.43, + "inPlaceUs": 43.04, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 43.11, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 43.11, + "inPlaceUs": 42.96, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 42.94, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.08, + "outOfPlaceUs": 42.94, + "inPlaceUs": 42.59, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 43.37, + "algBandwidthGbps": 0.19, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 44.61, + "inPlaceUs": 43.37, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 43.55, + "algBandwidthGbps": 0.38, + "busBandwidthGbps": 0.33, + "outOfPlaceUs": 45.34, + "inPlaceUs": 43.55, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 44.71, + "algBandwidthGbps": 0.73, + "busBandwidthGbps": 0.64, + "outOfPlaceUs": 45.98, + "inPlaceUs": 44.71, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 45.48, + "algBandwidthGbps": 1.44, + "busBandwidthGbps": 1.26, + "outOfPlaceUs": 47.34, + "inPlaceUs": 45.48, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 48.95, + "algBandwidthGbps": 2.68, + "busBandwidthGbps": 2.34, + "outOfPlaceUs": 49.91, + "inPlaceUs": 48.95, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 53.91, + "algBandwidthGbps": 4.86, + "busBandwidthGbps": 4.26, + "outOfPlaceUs": 53.96, + "inPlaceUs": 53.91, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 53.43, + "algBandwidthGbps": 9.81, + "busBandwidthGbps": 8.59, + "outOfPlaceUs": 53.43, + "inPlaceUs": 53.82, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 57.69, + "algBandwidthGbps": 18.18, + "busBandwidthGbps": 15.9, + "outOfPlaceUs": 57.69, + "inPlaceUs": 59.15, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 60.73, + "algBandwidthGbps": 34.53, + "busBandwidthGbps": 30.22, + "outOfPlaceUs": 60.73, + "inPlaceUs": 63.09, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 58.17, + "algBandwidthGbps": 72.11, + "busBandwidthGbps": 63.1, + "outOfPlaceUs": 58.17, + "inPlaceUs": 60.45, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 63.1, + "algBandwidthGbps": 132.94, + "busBandwidthGbps": 116.33, + "outOfPlaceUs": 63.1, + "inPlaceUs": 63.56, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 82.93, + "algBandwidthGbps": 202.31, + "busBandwidthGbps": 177.02, + "outOfPlaceUs": 82.93, + "inPlaceUs": 84.01, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 129, + "algBandwidthGbps": 260.13, + "busBandwidthGbps": 227.61, + "outOfPlaceUs": 129, + "inPlaceUs": 130.6, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 223, + "algBandwidthGbps": 300.96, + "busBandwidthGbps": 263.34, + "outOfPlaceUs": 223, + "inPlaceUs": 223.2, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 409.6, + "algBandwidthGbps": 327.68, + "busBandwidthGbps": 286.72, + "outOfPlaceUs": 409.6, + "inPlaceUs": 412.5, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 782.4, + "algBandwidthGbps": 343.11, + "busBandwidthGbps": 300.22, + "outOfPlaceUs": 782.4, + "inPlaceUs": 787.2, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1529.4, + "algBandwidthGbps": 351.03, + "busBandwidthGbps": 307.15, + "outOfPlaceUs": 1529.4, + "inPlaceUs": 1537.6, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3053.5, + "algBandwidthGbps": 351.64, + "busBandwidthGbps": 307.68, + "outOfPlaceUs": 3053.5, + "inPlaceUs": 3069.9, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6018.4, + "algBandwidthGbps": 356.82, + "busBandwidthGbps": 312.21, + "outOfPlaceUs": 6018.4, + "inPlaceUs": 6037.8, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 12158, + "algBandwidthGbps": 353.28, + "busBandwidthGbps": 309.12, + "outOfPlaceUs": 12195, + "inPlaceUs": 12158, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 24511, + "algBandwidthGbps": 350.46, + "busBandwidthGbps": 306.65, + "outOfPlaceUs": 24549, + "inPlaceUs": 24511, + "correct": true + } + ] + }, + { + "id": "cxn-45c3fe06", + "identity": "nccl|mi325x|reduce_scatter|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "reduce_scatter", + "sku": "mi325x", + "runner": "mi325x-amds_05", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "fd5d1a361a3ebfa3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 97.7756, + "status": "valid", + "valid": true, + "colorKey": "mi325x_45c3fe06", + "label": "MI325X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T15:24:44.415286+00:00", + "run": { + "id": "28601042764", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601042764", + "createdAt": "2026-07-02T15:16:13Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 47.05, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 47.05, + "inPlaceUs": 44.37, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 42.59, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 42.59, + "inPlaceUs": 45.02, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 42.29, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 42.29, + "inPlaceUs": 44.23, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 42.46, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 42.46, + "inPlaceUs": 42.99, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 42.77, + "algBandwidthGbps": 0.05, + "busBandwidthGbps": 0.04, + "outOfPlaceUs": 42.77, + "inPlaceUs": 42.93, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 43.06, + "algBandwidthGbps": 0.1, + "busBandwidthGbps": 0.08, + "outOfPlaceUs": 43.06, + "inPlaceUs": 42.74, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 43.19, + "algBandwidthGbps": 0.19, + "busBandwidthGbps": 0.17, + "outOfPlaceUs": 44.97, + "inPlaceUs": 43.19, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 43.51, + "algBandwidthGbps": 0.38, + "busBandwidthGbps": 0.33, + "outOfPlaceUs": 44.47, + "inPlaceUs": 43.51, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 43.77, + "algBandwidthGbps": 0.75, + "busBandwidthGbps": 0.65, + "outOfPlaceUs": 45.58, + "inPlaceUs": 43.77, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 45.31, + "algBandwidthGbps": 1.45, + "busBandwidthGbps": 1.27, + "outOfPlaceUs": 46.8, + "inPlaceUs": 45.31, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 48.43, + "algBandwidthGbps": 2.71, + "busBandwidthGbps": 2.37, + "outOfPlaceUs": 49.77, + "inPlaceUs": 48.43, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 53.49, + "algBandwidthGbps": 4.9, + "busBandwidthGbps": 4.29, + "outOfPlaceUs": 53.49, + "inPlaceUs": 53.73, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 53.28, + "algBandwidthGbps": 9.84, + "busBandwidthGbps": 8.61, + "outOfPlaceUs": 53.28, + "inPlaceUs": 53.8, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 58.94, + "algBandwidthGbps": 17.79, + "busBandwidthGbps": 15.57, + "outOfPlaceUs": 58.94, + "inPlaceUs": 60.52, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 61.87, + "algBandwidthGbps": 33.9, + "busBandwidthGbps": 29.66, + "outOfPlaceUs": 61.87, + "inPlaceUs": 63.63, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 58.39, + "algBandwidthGbps": 71.83, + "busBandwidthGbps": 62.85, + "outOfPlaceUs": 58.39, + "inPlaceUs": 59.85, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 63.19, + "algBandwidthGbps": 132.75, + "busBandwidthGbps": 116.16, + "outOfPlaceUs": 63.19, + "inPlaceUs": 64.95, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 82.58, + "algBandwidthGbps": 203.17, + "busBandwidthGbps": 177.77, + "outOfPlaceUs": 82.58, + "inPlaceUs": 83.59, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 129.3, + "algBandwidthGbps": 259.54, + "busBandwidthGbps": 227.1, + "outOfPlaceUs": 129.3, + "inPlaceUs": 130.2, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 223.1, + "algBandwidthGbps": 300.74, + "busBandwidthGbps": 263.15, + "outOfPlaceUs": 223.1, + "inPlaceUs": 223.4, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 410.7, + "algBandwidthGbps": 326.81, + "busBandwidthGbps": 285.96, + "outOfPlaceUs": 410.7, + "inPlaceUs": 412.2, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 783.6, + "algBandwidthGbps": 342.55, + "busBandwidthGbps": 299.73, + "outOfPlaceUs": 783.9, + "inPlaceUs": 783.6, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1529.6, + "algBandwidthGbps": 350.99, + "busBandwidthGbps": 307.12, + "outOfPlaceUs": 1529.6, + "inPlaceUs": 1530.9, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 3053.9, + "algBandwidthGbps": 351.6, + "busBandwidthGbps": 307.65, + "outOfPlaceUs": 3053.9, + "inPlaceUs": 3056.8, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 6021.3, + "algBandwidthGbps": 356.65, + "busBandwidthGbps": 312.06, + "outOfPlaceUs": 6046.5, + "inPlaceUs": 6021.3, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 12174, + "algBandwidthGbps": 352.8, + "busBandwidthGbps": 308.7, + "outOfPlaceUs": 12179, + "inPlaceUs": 12174, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 24538, + "algBandwidthGbps": 350.07, + "busBandwidthGbps": 306.31, + "outOfPlaceUs": 24539, + "inPlaceUs": 24538, + "correct": true + } + ] + }, + { + "id": "cxn-9383336f", + "identity": "nccl|mi355x|reduce_scatter|mi355x-xgmi|xgmi|8|nccl-tests-v1", + "op": "reduce_scatter", + "sku": "mi355x", + "runner": "mi355x-amds_01", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "worldSize": 8, + "nodes": 1, + "dtype": "float", + "comparisonClass": "standardized", + "comparisonKey": "fd5d1a361a3ebfa3", + "measurementContract": "nccl-tests-v1", + "avgBusBandwidthGbps": 116.267, + "status": "valid", + "valid": true, + "colorKey": "mi355x_9383336f", + "label": "MI355X · mi355x-xgmi · xgmi (ws8)", + "generatedAt": "2026-07-02T21:05:02.299146+00:00", + "run": { + "id": "28620413991", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28620413991", + "createdAt": "2026-07-02T20:44:56Z", + "sha": "b21a7206b5bc58aee42f8dddc71e5fe3d9197779" + }, + "rows": [ + { + "sizeBytes": 128, + "dtype": "float", + "latencyUs": 61.56, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 61.56, + "inPlaceUs": 66.2, + "correct": true + }, + { + "sizeBytes": 256, + "dtype": "float", + "latencyUs": 58.57, + "algBandwidthGbps": 0, + "busBandwidthGbps": 0, + "outOfPlaceUs": 58.57, + "inPlaceUs": 68.45, + "correct": true + }, + { + "sizeBytes": 512, + "dtype": "float", + "latencyUs": 58.33, + "algBandwidthGbps": 0.01, + "busBandwidthGbps": 0.01, + "outOfPlaceUs": 58.33, + "inPlaceUs": 69.02, + "correct": true + }, + { + "sizeBytes": 1024, + "dtype": "float", + "latencyUs": 57.95, + "algBandwidthGbps": 0.02, + "busBandwidthGbps": 0.02, + "outOfPlaceUs": 57.95, + "inPlaceUs": 58.23, + "correct": true + }, + { + "sizeBytes": 2048, + "dtype": "float", + "latencyUs": 58.53, + "algBandwidthGbps": 0.03, + "busBandwidthGbps": 0.03, + "outOfPlaceUs": 58.53, + "inPlaceUs": 59.41, + "correct": true + }, + { + "sizeBytes": 4096, + "dtype": "float", + "latencyUs": 58.74, + "algBandwidthGbps": 0.07, + "busBandwidthGbps": 0.06, + "outOfPlaceUs": 58.74, + "inPlaceUs": 58.47, + "correct": true + }, + { + "sizeBytes": 8192, + "dtype": "float", + "latencyUs": 59.51, + "algBandwidthGbps": 0.14, + "busBandwidthGbps": 0.12, + "outOfPlaceUs": 68.27, + "inPlaceUs": 59.51, + "correct": true + }, + { + "sizeBytes": 16384, + "dtype": "float", + "latencyUs": 58.72, + "algBandwidthGbps": 0.28, + "busBandwidthGbps": 0.24, + "outOfPlaceUs": 68.26, + "inPlaceUs": 58.72, + "correct": true + }, + { + "sizeBytes": 32768, + "dtype": "float", + "latencyUs": 59.72, + "algBandwidthGbps": 0.55, + "busBandwidthGbps": 0.48, + "outOfPlaceUs": 69.54, + "inPlaceUs": 59.72, + "correct": true + }, + { + "sizeBytes": 65536, + "dtype": "float", + "latencyUs": 61.25, + "algBandwidthGbps": 1.07, + "busBandwidthGbps": 0.94, + "outOfPlaceUs": 69.57, + "inPlaceUs": 61.25, + "correct": true + }, + { + "sizeBytes": 131072, + "dtype": "float", + "latencyUs": 64.33, + "algBandwidthGbps": 2.04, + "busBandwidthGbps": 1.78, + "outOfPlaceUs": 74.19, + "inPlaceUs": 64.33, + "correct": true + }, + { + "sizeBytes": 262144, + "dtype": "float", + "latencyUs": 70.18, + "algBandwidthGbps": 3.74, + "busBandwidthGbps": 3.27, + "outOfPlaceUs": 70.18, + "inPlaceUs": 70.05, + "correct": true + }, + { + "sizeBytes": 524288, + "dtype": "float", + "latencyUs": 69.52, + "algBandwidthGbps": 7.54, + "busBandwidthGbps": 6.6, + "outOfPlaceUs": 70.19, + "inPlaceUs": 69.52, + "correct": true + }, + { + "sizeBytes": 1048576, + "dtype": "float", + "latencyUs": 74.84, + "algBandwidthGbps": 14.01, + "busBandwidthGbps": 12.26, + "outOfPlaceUs": 74.84, + "inPlaceUs": 85.09, + "correct": true + }, + { + "sizeBytes": 2097152, + "dtype": "float", + "latencyUs": 77.8, + "algBandwidthGbps": 26.96, + "busBandwidthGbps": 23.59, + "outOfPlaceUs": 77.8, + "inPlaceUs": 87.66, + "correct": true + }, + { + "sizeBytes": 4194304, + "dtype": "float", + "latencyUs": 76.81, + "algBandwidthGbps": 54.61, + "busBandwidthGbps": 47.78, + "outOfPlaceUs": 76.81, + "inPlaceUs": 87.45, + "correct": true + }, + { + "sizeBytes": 8388608, + "dtype": "float", + "latencyUs": 80.26, + "algBandwidthGbps": 104.51, + "busBandwidthGbps": 91.45, + "outOfPlaceUs": 80.26, + "inPlaceUs": 91.58, + "correct": true + }, + { + "sizeBytes": 16777216, + "dtype": "float", + "latencyUs": 84.58, + "algBandwidthGbps": 198.36, + "busBandwidthGbps": 173.57, + "outOfPlaceUs": 84.58, + "inPlaceUs": 90.12, + "correct": true + }, + { + "sizeBytes": 33554432, + "dtype": "float", + "latencyUs": 115, + "algBandwidthGbps": 291.69, + "busBandwidthGbps": 255.22, + "outOfPlaceUs": 117.5, + "inPlaceUs": 115, + "correct": true + }, + { + "sizeBytes": 67108864, + "dtype": "float", + "latencyUs": 186.5, + "algBandwidthGbps": 359.89, + "busBandwidthGbps": 314.9, + "outOfPlaceUs": 192.3, + "inPlaceUs": 186.5, + "correct": true + }, + { + "sizeBytes": 134217728, + "dtype": "float", + "latencyUs": 323.2, + "algBandwidthGbps": 415.3, + "busBandwidthGbps": 363.39, + "outOfPlaceUs": 343, + "inPlaceUs": 323.2, + "correct": true + }, + { + "sizeBytes": 268435456, + "dtype": "float", + "latencyUs": 602.6, + "algBandwidthGbps": 445.44, + "busBandwidthGbps": 389.76, + "outOfPlaceUs": 659, + "inPlaceUs": 602.6, + "correct": true + }, + { + "sizeBytes": 536870912, + "dtype": "float", + "latencyUs": 1174, + "algBandwidthGbps": 457.3, + "busBandwidthGbps": 400.14, + "outOfPlaceUs": 1260.3, + "inPlaceUs": 1174, + "correct": true + }, + { + "sizeBytes": 1073741824, + "dtype": "float", + "latencyUs": 2313.2, + "algBandwidthGbps": 464.18, + "busBandwidthGbps": 406.16, + "outOfPlaceUs": 2484, + "inPlaceUs": 2313.2, + "correct": true + }, + { + "sizeBytes": 2147483648, + "dtype": "float", + "latencyUs": 4567.5, + "algBandwidthGbps": 470.17, + "busBandwidthGbps": 411.4, + "outOfPlaceUs": 4853.7, + "inPlaceUs": 4567.5, + "correct": true + }, + { + "sizeBytes": 4294967296, + "dtype": "float", + "latencyUs": 9235.7, + "algBandwidthGbps": 465.04, + "busBandwidthGbps": 406.91, + "outOfPlaceUs": 9618.4, + "inPlaceUs": 9235.7, + "correct": true + }, + { + "sizeBytes": 8589934592, + "dtype": "float", + "latencyUs": 19111, + "algBandwidthGbps": 449.47, + "busBandwidthGbps": 393.29, + "outOfPlaceUs": 19140, + "inPlaceUs": 19111, + "correct": true + } + ] + } + ], + "offload": [ + { + "id": "cxt-d1096a8a", + "identity": "offload|mi325x|mi325x-xgmi|xgmi|d2h|pageable|us", + "cohortIdentity": "offload|mi325x|mi325x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.271, + "latencyUnit": "us", + "colorKey": "mi325x_d1096a8a", + "label": "MI325X · d2h · pageable", + "generatedAt": "2026-07-02T16:44:49.417774+00:00", + "run": { + "id": "28606360825", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606360825", + "createdAt": "2026-07-02T16:40:28Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.293, + "latency": 13.9577, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.126, + "latency": 14.5517, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.61, + "latency": 14.2157, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.78, + "latency": 24.3174, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.561, + "latency": 72.0125, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.697, + "latency": 87.9362, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.223, + "latency": 309.4095, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 55.671, + "latency": 1205.4586, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 55.799, + "latency": 4810.7525, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-99afb7ef", + "identity": "offload|mi325x|mi355x-xgmi|xgmi|d2h|pageable|us", + "cohortIdentity": "offload|mi325x|mi355x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "d2h", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.172, + "latencyUnit": "us", + "colorKey": "mi325x_99afb7ef", + "label": "MI325X · d2h · pageable", + "generatedAt": "2026-07-02T15:35:14.168488+00:00", + "run": { + "id": "28601862129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601862129", + "createdAt": "2026-07-02T15:28:44Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.29, + "latency": 14.1136, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.117, + "latency": 14.6656, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.645, + "latency": 14.1077, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.741, + "latency": 24.4053, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.583, + "latency": 71.9021, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.758, + "latency": 87.8237, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.255, + "latency": 309.2299, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 55.693, + "latency": 1204.9806, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 55.834, + "latency": 4807.7297, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-7fb2f34d", + "identity": "offload|mi325x|mi325x-xgmi|xgmi|d2h|pinned|us", + "cohortIdentity": "offload|mi325x|mi325x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.271, + "latencyUnit": "us", + "colorKey": "mi325x_7fb2f34d", + "label": "MI325X · d2h · pinned", + "generatedAt": "2026-07-02T16:44:49.417774+00:00", + "run": { + "id": "28606360825", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606360825", + "createdAt": "2026-07-02T16:40:28Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.061, + "latency": 3.8599, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.197, + "latency": 3.9039, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 6.572, + "latency": 9.9718, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 19.413, + "latency": 13.5037, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 38.343, + "latency": 27.3474, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 49.838, + "latency": 84.1582, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.891, + "latency": 305.6474, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.35, + "latency": 1190.9344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 56.725, + "latency": 4732.2521, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-c3e4c62c", + "identity": "offload|mi325x|mi355x-xgmi|xgmi|d2h|pinned|us", + "cohortIdentity": "offload|mi325x|mi355x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "d2h", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.172, + "latencyUnit": "us", + "colorKey": "mi325x_c3e4c62c", + "label": "MI325X · d2h · pinned", + "generatedAt": "2026-07-02T15:35:14.168488+00:00", + "run": { + "id": "28601862129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601862129", + "createdAt": "2026-07-02T15:28:44Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.026, + "latency": 3.9939, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 3.91, + "latency": 4.1899, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 6.58, + "latency": 9.9597, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 19.174, + "latency": 13.6716, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 38.515, + "latency": 27.2253, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 49.89, + "latency": 84.0718, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.9, + "latency": 305.596, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.352, + "latency": 1190.8907, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 56.734, + "latency": 4731.4968, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-a03107ca", + "identity": "offload|mi325x|mi325x-xgmi|xgmi|h2d|pageable|us", + "cohortIdentity": "offload|mi325x|mi325x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.271, + "latencyUnit": "us", + "colorKey": "mi325x_a03107ca", + "label": "MI325X · h2d · pageable", + "generatedAt": "2026-07-02T16:44:49.417774+00:00", + "run": { + "id": "28606360825", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606360825", + "createdAt": "2026-07-02T16:40:28Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.297, + "latency": 13.8037, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.186, + "latency": 13.8116, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.499, + "latency": 14.5676, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.123, + "latency": 23.5675, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 17.418, + "latency": 60.2006, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.857, + "latency": 87.6421, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.655, + "latency": 306.9633, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.397, + "latency": 1189.928, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 56.555, + "latency": 4746.4439, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-482a9557", + "identity": "offload|mi325x|mi355x-xgmi|xgmi|h2d|pageable|us", + "cohortIdentity": "offload|mi325x|mi355x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "h2d", + "subtype": "pageable", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.172, + "latencyUnit": "us", + "colorKey": "mi325x_482a9557", + "label": "MI325X · h2d · pageable", + "generatedAt": "2026-07-02T15:35:14.168488+00:00", + "run": { + "id": "28601862129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601862129", + "createdAt": "2026-07-02T15:28:44Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.298, + "latency": 13.7496, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 1.163, + "latency": 14.0856, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 4.488, + "latency": 14.6036, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 10.431, + "latency": 25.1313, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 17.667, + "latency": 59.3525, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.924, + "latency": 87.5197, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.92, + "latency": 305.4838, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.492, + "latency": 1187.9442, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 56.613, + "latency": 4741.608, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-2813428d", + "identity": "offload|mi325x|mi325x-xgmi|xgmi|h2d|pinned|us", + "cohortIdentity": "offload|mi325x|mi325x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.271, + "latencyUnit": "us", + "colorKey": "mi325x_2813428d", + "label": "MI325X · h2d · pinned", + "generatedAt": "2026-07-02T16:44:49.417774+00:00", + "run": { + "id": "28606360825", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606360825", + "createdAt": "2026-07-02T16:40:28Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 1.002, + "latency": 4.0859, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 4.303, + "latency": 3.8079, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 5.361, + "latency": 12.2257, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 16.042, + "latency": 16.3416, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 31.136, + "latency": 33.6773, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.803, + "latency": 87.7419, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 54.56, + "latency": 307.5028, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.677, + "latency": 1184.0636, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.271, + "latency": 4687.1136, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-af53bff4", + "identity": "offload|mi325x|mi355x-xgmi|xgmi|h2d|pinned|us", + "cohortIdentity": "offload|mi325x|mi355x-xgmi|xgmi", + "family": "offload", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "h2d", + "subtype": "pinned", + "valid": true, + "status": "valid", + "note": "peak 57 GB/s · copy/compute overlap 0% · 2 NUMA node(s)", + "peakBandwidthGbps": 57.172, + "latencyUnit": "us", + "colorKey": "mi325x_af53bff4", + "label": "MI325X · h2d · pinned", + "generatedAt": "2026-07-02T15:35:14.168488+00:00", + "run": { + "id": "28601862129", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601862129", + "createdAt": "2026-07-02T15:28:44Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 4096, + "bandwidthGbps": 0.944, + "latency": 4.3398, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16384, + "bandwidthGbps": 3.958, + "latency": 4.1399, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 65536, + "bandwidthGbps": 5.372, + "latency": 12.1996, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 16.006, + "latency": 16.3776, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 31.094, + "latency": 33.7231, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 47.23, + "latency": 88.8057, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 55.387, + "latency": 302.9079, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 56.887, + "latency": 1179.689, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 57.172, + "latency": 4695.2663, + "sizeClass": null, + "correct": null + } + ] + } + ], + "copyEngine": [ + { + "id": "cxt-980f8ffd", + "identity": "copy-engine|mi325x|mi325x-xgmi|xgmi|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|mi325x|mi325x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 27587 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27587.3, + "latencyUnit": "us", + "colorKey": "mi325x_980f8ffd", + "label": "MI325X · dtod · copy-engine", + "generatedAt": "2026-07-02T16:43:33.750537+00:00", + "run": { + "id": "28606352980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606352980", + "createdAt": "2026-07-02T16:40:21Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.261, + "latency": 10.4677, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 24.418, + "latency": 10.7357, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 98.442, + "latency": 10.6517, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 387.177, + "latency": 10.833, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1572.122, + "latency": 10.6717, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6257.237, + "latency": 10.725, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26304.221, + "latency": 10.205, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-58ef9054", + "identity": "copy-engine|mi325x|mi355x-xgmi|xgmi|dtod|copy-engine|us", + "cohortIdentity": "copy-engine|mi325x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 28068 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 28067.867, + "latencyUnit": "us", + "colorKey": "mi325x_58ef9054", + "label": "MI325X · dtod · copy-engine", + "generatedAt": "2026-07-02T15:34:02.287523+00:00", + "run": { + "id": "28601849075", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601849075", + "createdAt": "2026-07-02T15:28:32Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.445, + "latency": 10.1678, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 25.799, + "latency": 10.1611, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 99.186, + "latency": 10.5718, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 404.969, + "latency": 10.3571, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1620.919, + "latency": 10.3504, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6534.18, + "latency": 10.2704, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26617.035, + "latency": 10.0851, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-816d3fa9", + "identity": "copy-engine|mi325x|mi325x-xgmi|xgmi|dtod|sm|us", + "cohortIdentity": "copy-engine|mi325x|mi325x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 27587 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27587.3, + "latencyUnit": "us", + "colorKey": "mi325x_816d3fa9", + "label": "MI325X · dtod · sm", + "generatedAt": "2026-07-02T16:43:33.750537+00:00", + "run": { + "id": "28606352980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606352980", + "createdAt": "2026-07-02T16:40:21Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.141, + "latency": 10.6717, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 23.723, + "latency": 11.0504, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 93.537, + "latency": 11.2103, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 387.99, + "latency": 10.8103, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1512.96, + "latency": 11.089, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6175.075, + "latency": 10.8677, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 24537.734, + "latency": 10.9397, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-1498e19a", + "identity": "copy-engine|mi325x|mi355x-xgmi|xgmi|dtod|sm|us", + "cohortIdentity": "copy-engine|mi325x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "dtod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 28068 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 28067.867, + "latencyUnit": "us", + "colorKey": "mi325x_1498e19a", + "label": "MI325X · dtod · sm", + "generatedAt": "2026-07-02T15:34:02.287523+00:00", + "run": { + "id": "28601849075", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601849075", + "createdAt": "2026-07-02T15:28:32Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.194, + "latency": 10.5811, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 24.012, + "latency": 10.9171, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 97.988, + "latency": 10.7011, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 391.755, + "latency": 10.7064, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1548.514, + "latency": 10.8344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6091.355, + "latency": 11.0171, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 24703.18, + "latency": 10.8664, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-01062ca1", + "identity": "copy-engine|mi325x|mi325x-xgmi|xgmi|htod|copy-engine|us", + "cohortIdentity": "copy-engine|mi325x|mi325x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 27587 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27587.3, + "latencyUnit": "us", + "colorKey": "mi325x_01062ca1", + "label": "MI325X · htod · copy-engine", + "generatedAt": "2026-07-02T16:43:33.750537+00:00", + "run": { + "id": "28606352980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606352980", + "createdAt": "2026-07-02T16:40:21Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.432, + "latency": 8.8184, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 29.341, + "latency": 8.9344, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 122.539, + "latency": 8.5571, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 481.971, + "latency": 8.7024, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1915.55, + "latency": 8.7584, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 7531.521, + "latency": 8.9104, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 27587.3, + "latency": 9.7304, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-08e7f418", + "identity": "copy-engine|mi325x|mi355x-xgmi|xgmi|htod|copy-engine|us", + "cohortIdentity": "copy-engine|mi325x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "copy-engine", + "valid": true, + "status": "valid", + "note": "peak 28068 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 28067.867, + "latencyUnit": "us", + "colorKey": "mi325x_08e7f418", + "label": "MI325X · htod · copy-engine", + "generatedAt": "2026-07-02T15:34:02.287523+00:00", + "run": { + "id": "28601849075", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601849075", + "createdAt": "2026-07-02T15:28:32Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 7.388, + "latency": 8.8705, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 29.28, + "latency": 8.9531, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 122.481, + "latency": 8.5611, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 481.083, + "latency": 8.7185, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1889.369, + "latency": 8.8798, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 7618.071, + "latency": 8.8092, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 28067.867, + "latency": 9.5638, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-8f61c985", + "identity": "copy-engine|mi325x|mi325x-xgmi|xgmi|htod|sm|us", + "cohortIdentity": "copy-engine|mi325x|mi325x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 27587 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 27587.3, + "latencyUnit": "us", + "colorKey": "mi325x_8f61c985", + "label": "MI325X · htod · sm", + "generatedAt": "2026-07-02T16:43:33.750537+00:00", + "run": { + "id": "28606352980", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606352980", + "createdAt": "2026-07-02T16:40:21Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.37, + "latency": 10.2877, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 24.297, + "latency": 10.789, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 98.85, + "latency": 10.6077, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 405.232, + "latency": 10.3504, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1606.234, + "latency": 10.4451, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6597.584, + "latency": 10.1717, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 26726.661, + "latency": 10.0437, + "sizeClass": null, + "correct": null + } + ] + }, + { + "id": "cxt-eab06156", + "identity": "copy-engine|mi325x|mi355x-xgmi|xgmi|htod|sm|us", + "cohortIdentity": "copy-engine|mi325x|mi355x-xgmi|xgmi", + "family": "copy-engine", + "sku": "mi325x", + "topologyClass": "mi355x-xgmi", + "transport": "xgmi", + "operation": "htod", + "subtype": "sm", + "valid": true, + "status": "valid", + "note": "peak 28068 GB/s · copy-engine uses near-zero SMs: no", + "peakBandwidthGbps": 28067.867, + "latencyUnit": "us", + "colorKey": "mi325x_eab06156", + "label": "MI325X · htod · sm", + "generatedAt": "2026-07-02T15:34:02.287523+00:00", + "run": { + "id": "28601849075", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28601849075", + "createdAt": "2026-07-02T15:28:32Z", + "sha": "2e55a47c26d43c2f47e54c826062f13916a847da" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 6.593, + "latency": 9.9398, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 25.213, + "latency": 10.3971, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 102.63, + "latency": 10.2171, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 412.456, + "latency": 10.1691, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 1658.084, + "latency": 10.1184, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 6638.461, + "latency": 10.1091, + "sizeClass": null, + "correct": null + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 27354.715, + "latency": 9.8131, + "sizeClass": null, + "correct": null + } + ] + } + ], + "kvCache": [ + { + "id": "cxt-e248878d", + "identity": "kv-cache|mi325x|mi325x-xgmi|xgmi|dtod-remote|contiguous/mori-io|ms", + "cohortIdentity": "kv-cache|mi325x|xgmi", + "family": "kv-cache", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "dtod-remote", + "subtype": "contiguous/mori-io", + "valid": true, + "status": "valid", + "note": "wired: mori-io", + "peakBandwidthGbps": null, + "latencyUnit": "ms", + "colorKey": "mi325x_e248878d", + "label": "MI325X · dtod-remote · contiguous/mori-io", + "generatedAt": "2026-07-02T19:05:17.669169+00:00", + "run": { + "id": "28614653583", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28614653583", + "createdAt": "2026-07-02T19:00:54Z", + "sha": "d2522cc0ac83d776f9b1420a1d79ea697507c5fe" + }, + "rows": [ + { + "sizeBytes": 65536, + "bandwidthGbps": 3.15, + "latency": 0.02083, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 262144, + "bandwidthGbps": 11.3, + "latency": 0.02321, + "sizeClass": "decode", + "correct": true + }, + { + "sizeBytes": 1048576, + "bandwidthGbps": 24.29, + "latency": 0.04317, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 39.28, + "latency": 0.10679, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 45.87, + "latency": 0.36578, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 47.77, + "latency": 1.40485, + "sizeClass": "prefill", + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 48.23, + "latency": 5.56527, + "sizeClass": "prefill", + "correct": true + } + ] + } + ], + "rlMesh": [ + { + "id": "cxt-ec80f252", + "identity": "rl-mesh|mi325x|mi325x-xgmi|xgmi|generator_to_trainer|paired|ms", + "cohortIdentity": "rl-mesh|mi325x|xgmi", + "family": "rl-mesh", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "generator_to_trainer", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 49 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 48.78, + "latencyUnit": "ms", + "colorKey": "mi325x_ec80f252", + "label": "MI325X · gen->trn · paired", + "generatedAt": "2026-07-02T16:44:37.843965+00:00", + "run": { + "id": "28606356962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606356962", + "createdAt": "2026-07-02T16:40:24Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.94, + "latency": 0.0702, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 34.96, + "latency": 0.11998, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 44.42, + "latency": 0.37768, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 47.58, + "latency": 1.4103, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 48.47, + "latency": 5.5384, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 48.68, + "latency": 22.05901, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-495592df", + "identity": "rl-mesh|mi325x|mi325x-xgmi|xgmi|generator_to_trainer|redistribute|ms", + "cohortIdentity": "rl-mesh|mi325x|xgmi", + "family": "rl-mesh", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "generator_to_trainer", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 49 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 48.78, + "latencyUnit": "ms", + "colorKey": "mi325x_495592df", + "label": "MI325X · gen->trn · redistribute", + "generatedAt": "2026-07-02T16:44:37.843965+00:00", + "run": { + "id": "28606356962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606356962", + "createdAt": "2026-07-02T16:40:24Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.11, + "latency": 9.55698, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 21.31, + "latency": 0.19681, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 36.43, + "latency": 0.46049, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 44.92, + "latency": 1.49384, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 47.51, + "latency": 5.65002, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 48.36, + "latency": 22.20277, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-4608506a", + "identity": "rl-mesh|mi325x|mi325x-xgmi|xgmi|trainer_to_generator|paired|ms", + "cohortIdentity": "rl-mesh|mi325x|xgmi", + "family": "rl-mesh", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "trainer_to_generator", + "subtype": "paired", + "valid": true, + "status": "valid", + "note": "peak 49 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 48.78, + "latencyUnit": "ms", + "colorKey": "mi325x_4608506a", + "label": "MI325X · trn->gen · paired", + "generatedAt": "2026-07-02T16:44:37.843965+00:00", + "run": { + "id": "28606356962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606356962", + "createdAt": "2026-07-02T16:40:24Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 14.87, + "latency": 0.0705, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 35, + "latency": 0.11984, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 44.48, + "latency": 0.37721, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 47.65, + "latency": 1.40849, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 48.52, + "latency": 5.53277, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 48.73, + "latency": 22.03509, + "sizeClass": null, + "correct": true + } + ] + }, + { + "id": "cxt-ecea5367", + "identity": "rl-mesh|mi325x|mi325x-xgmi|xgmi|trainer_to_generator|redistribute|ms", + "cohortIdentity": "rl-mesh|mi325x|xgmi", + "family": "rl-mesh", + "sku": "mi325x", + "topologyClass": "mi325x-xgmi", + "transport": "xgmi", + "operation": "trainer_to_generator", + "subtype": "redistribute", + "valid": true, + "status": "valid", + "note": "peak 49 GB/s · world=8: trainer 4 <-> generator 4", + "peakBandwidthGbps": 48.78, + "latencyUnit": "ms", + "colorKey": "mi325x_ecea5367", + "label": "MI325X · trn->gen · redistribute", + "generatedAt": "2026-07-02T16:44:37.843965+00:00", + "run": { + "id": "28606356962", + "url": "https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28606356962", + "createdAt": "2026-07-02T16:40:24Z", + "sha": "6f71e44945ffc668e5c22545d6dec55d91af0670" + }, + "rows": [ + { + "sizeBytes": 1048576, + "bandwidthGbps": 0.01, + "latency": 89.74005, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 4194304, + "bandwidthGbps": 21.85, + "latency": 0.19198, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 16777216, + "bandwidthGbps": 37.36, + "latency": 0.44912, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 67108864, + "bandwidthGbps": 45.44, + "latency": 1.47697, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 268435456, + "bandwidthGbps": 48.07, + "latency": 5.5837, + "sizeClass": null, + "correct": true + }, + { + "sizeBytes": 1073741824, + "bandwidthGbps": 48.78, + "latency": 22.01165, + "sizeClass": null, + "correct": true + } + ] + } + ], + "scannedRuns": 68, + "scannedArtifacts": 74, + "contributingRuns": 24, + "generatedAt": "2026-07-03T00:02:47.482Z" +} diff --git a/packages/app/scripts/generate-collectivex-data.ts b/packages/app/scripts/generate-collectivex-data.ts new file mode 100644 index 00000000..713a8b45 --- /dev/null +++ b/packages/app/scripts/generate-collectivex-data.ts @@ -0,0 +1,55 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { generateCollectiveXSnapshot } from '../src/lib/collectivex-snapshot'; + +import type { CollectiveXApiResponse } from '@/components/collectivex/types'; + +function argumentValue(name: string): string | undefined { + const index = process.argv.indexOf(name); + return index === -1 ? undefined : process.argv[index + 1]; +} + +async function main() { + const token = process.env.GITHUB_TOKEN || process.env.GH_TOKEN; + if (!token) throw new Error('GITHUB_TOKEN or GH_TOKEN is required'); + + const sourceRunId = + argumentValue('--source-run-id') || process.env.COLLECTIVEX_SOURCE_RUN_ID || undefined; + const outputPath = resolve(__dirname, '..', 'public', 'data', 'collectivex.json'); + const previous = await readFile(outputPath, 'utf8').catch(() => null); + + // Carry the committed snapshot forward by default: GitHub run discovery only sees runs + // that still exist, so a from-scratch rebuild silently drops every series whose source + // run was deleted or whose artifact expired. --rebuild (or COLLECTIVEX_REBUILD=1) is the + // explicit way to retire stale data. + const rebuild = process.argv.includes('--rebuild') || process.env.COLLECTIVEX_REBUILD === '1'; + let previousSnapshot: CollectiveXApiResponse | null = null; + if (!rebuild && previous) { + try { + previousSnapshot = JSON.parse(previous) as CollectiveXApiResponse; + } catch { + console.warn('Existing collectivex.json is unparseable; rebuilding from scratch.'); + } + } + + const snapshot = await generateCollectiveXSnapshot({ token, sourceRunId, previousSnapshot }); + const contents = `${JSON.stringify(snapshot, null, 2)}\n`; + + await mkdir(resolve(outputPath, '..'), { recursive: true }); + if (previous === contents) { + console.log(`CollectiveX snapshot is unchanged: ${outputPath}`); + return; + } + + await writeFile(outputPath, contents, 'utf8'); + const rowCount = snapshot.series.reduce((total, series) => total + series.rows.length, 0); + console.log( + `Wrote ${snapshot.series.length} CollectiveX series (${rowCount} rows, ${snapshot.failures.length} failures) from ${snapshot.contributingRuns} runs to ${outputPath}`, + ); +} + +main().catch((error: unknown) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/packages/app/src/app/(dashboard)/collectivex/page.tsx b/packages/app/src/app/(dashboard)/collectivex/page.tsx new file mode 100644 index 00000000..d3380bd9 --- /dev/null +++ b/packages/app/src/app/(dashboard)/collectivex/page.tsx @@ -0,0 +1,10 @@ +import type { Metadata } from 'next'; + +import CollectiveXDisplay from '@/components/collectivex/CollectiveXDisplay'; +import { tabMetadata } from '@/lib/tab-meta'; + +export const metadata: Metadata = tabMetadata('collectivex'); + +export default function CollectiveXPage() { + return ; +} diff --git a/packages/app/src/app/sitemap.ts b/packages/app/src/app/sitemap.ts index d1717aa3..57cbda8c 100644 --- a/packages/app/src/app/sitemap.ts +++ b/packages/app/src/app/sitemap.ts @@ -12,6 +12,7 @@ const TABS = [ 'reliability', 'gpu-specs', 'gpu-metrics', + 'collectivex', ] as const; export default async function sitemap(): Promise { diff --git a/packages/app/src/components/collectivex/CollectiveXChart.tsx b/packages/app/src/components/collectivex/CollectiveXChart.tsx new file mode 100644 index 00000000..eff16d89 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXChart.tsx @@ -0,0 +1,282 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { sparseLogTicks } from './axis'; +import { chartPoints } from './data'; +import type { + CollectiveXChartPoint, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXSeries, + CollectiveXScale, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +interface CollectiveXChartProps { + chartId: string; + series: CollectiveXSeries[]; + colors: Record; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + xAxis: CollectiveXXAxis; + yAxis: CollectiveXYAxis; + xScaleType: CollectiveXScale; + yScaleType: CollectiveXScale; + prefillFloor?: number; + compact?: boolean; + caption?: React.ReactNode; + legendElement?: React.ReactNode; + testId?: string; +} + +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (Σp, not measured)', +}; + +const X_AXIS_LABELS: Record = { + 'tokens-per-rank': 'Source tokens / rank', + 'global-tokens': 'Global source tokens', +}; + +const Y_AXIS_LABELS: Record = { + latency: 'Latency (µs)', + 'tokens-per-second': 'Tokens / s', + 'payload-rate': 'Logical routed payload rate (GB/s)', +}; + +function paddedDomain(values: number[], scaleType: CollectiveXScale): [number, number] { + if (values.length === 0) return scaleType === 'log' ? [1, 10] : [0, 1]; + const min = d3.min(values) ?? 0; + const max = d3.max(values) ?? 1; + if (min === max) { + if (scaleType === 'log') return [Math.max(min / 2, Number.MIN_VALUE), max * 2]; + const padding = Math.max(Math.abs(min) * 0.1, 1); + return [min - padding, max + padding]; + } + if (scaleType === 'log') return [min / 1.08, max * 1.08]; + const padding = (max - min) * 0.06; + return [Math.max(0, min - padding), max + padding]; +} + +function formatCompact(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(value < 1e10 ? 1 : 0)}G`; + if (value >= 1e6) return `${(value / 1e6).toFixed(value < 1e7 ? 1 : 0)}M`; + if (value >= 1e3) return `${(value / 1e3).toFixed(value < 1e4 ? 1 : 0)}k`; + if (value >= 10) return value.toFixed(0); + if (value >= 1) return value.toFixed(value < 3 ? 1 : 0); + return value.toFixed(2); +} + +function formatTokenCount(value: number): string { + return Number.isInteger(value) ? value.toLocaleString('en-US') : formatCompact(value); +} + +function formatMetric(value: number, yAxis: CollectiveXYAxis): string { + if (yAxis === 'latency') return `${value.toFixed(value >= 100 ? 0 : 1)} µs`; + if (yAxis === 'tokens-per-second') return `${formatCompact(value)} tok/s`; + return `${value.toFixed(value >= 100 ? 0 : 2)} GB/s`; +} + +function escapeHtml(value: string): string { + return value + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"') + .replaceAll("'", '''); +} + +export function CollectiveXChart({ + chartId, + series, + colors, + operation, + percentile, + xAxis, + yAxis, + xScaleType, + yScaleType, + prefillFloor = 128, + compact = false, + caption, + legendElement, + testId, +}: CollectiveXChartProps) { + const points = useMemo( + () => chartPoints(series, operation, percentile, xAxis, yAxis, prefillFloor), + [series, operation, percentile, xAxis, yAxis, prefillFloor], + ); + const seriesById = useMemo(() => new Map(series.map((item) => [item.id, item])), [series]); + const lines = useMemo(() => { + const result: Record = {}; + for (const point of points) { + (result[point.seriesId] ??= []).push({ x: point.x, y: point.y }); + } + for (const line of Object.values(result)) { + line.sort((a, b) => a.x - b.x); + } + return result; + }, [points]); + + const xDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.x), + xScaleType, + ), + [points, xScaleType], + ); + const yDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.y), + yScaleType, + ), + [points, yScaleType], + ); + const xTickValues = useMemo( + () => [...new Set(points.map((point) => point.x))].toSorted((a, b) => a - b), + [points], + ); + + const noDataOverlay = + points.length === 0 ? ( +
+

No matching CollectiveX series.

+
+ ) : undefined; + + return ( + + chartId={chartId} + data={points} + height={compact ? 260 : 560} + margin={ + compact + ? { top: 16, right: 12, bottom: 48, left: 62 } + : { top: 24, right: 20, bottom: 62, left: 78 } + } + watermark={compact ? 'none' : 'logo'} + testId={testId} + grabCursor={!compact} + instructions={ + compact + ? '' + : 'Shift+Scroll to zoom · Drag to pan · Double-click to reset · Click a point to pin tooltip' + } + xScale={ + xScaleType === 'log' + ? { type: 'log', domain: xDomain, base: 2, nice: false } + : { type: 'linear', domain: xDomain, nice: true } + } + yScale={{ type: yScaleType, domain: yDomain, nice: yScaleType === 'linear' }} + xAxis={{ + label: `${X_AXIS_LABELS[xAxis]}${xScaleType === 'log' ? ' (log2)' : ''}`, + tickCount: compact ? 5 : 8, + tickValues: xTickValues, + tickFormat: (value) => formatTokenCount(Number(value)), + }} + yAxis={{ + label: Y_AXIS_LABELS[yAxis], + tickCount: compact ? 4 : 5, + tickValues: + yScaleType === 'log' + ? (scale) => sparseLogTicks(scale.domain().map(Number), compact ? 4 : 5) + : undefined, + tickFormat: (value) => formatCompact(Number(value)), + }} + layers={[ + { + type: 'line', + key: 'collectivex-lines', + lines, + config: { + getColor: (key) => colors[seriesById.get(key)?.colorKey ?? ''] ?? '#888', + getStrokeDasharray: (key) => + seriesById.get(key)?.shape.dispatchDtype === 'bf16' ? null : '6 4', + strokeWidth: compact ? 1.75 : 2.25, + curve: d3.curveMonotoneX, + }, + }, + { + type: 'point', + key: 'collectivex-points', + data: points, + config: { + getCx: () => 0, + getCy: () => 0, + getX: (point) => point.x, + getY: (point) => point.y, + getColor: (point) => colors[point.colorKey] ?? '#888', + getRadius: () => (compact ? 2.5 : 3.5), + stroke: 'var(--background)', + strokeWidth: compact ? 0.75 : 1, + keyFn: (point) => `${point.seriesId}-${point.x}`, + maxPoints: Infinity, + }, + }, + ]} + zoom={ + compact + ? undefined + : { + enabled: true, + axes: 'both', + scaleExtent: [1, 20], + resetEventName: `collectivex_zoom_reset_${chartId}`, + } + } + tooltip={{ + rulerType: 'crosshair', + attachToLayer: 1, + content: (point, isPinned) => { + const color = colors[point.colorKey] ?? '#888'; + const row = point.row; + const runLabel = point.series.run.id ? `Run ${point.series.run.id}` : 'Run unavailable'; + const roundtripLabel = row.roundtripMeasured + ? 'measured' + : 'legacy isolated-sum fallback'; + const workload = + point.series.workloadId ?? point.series.traceSignature ?? 'not canonical'; + const sourceSha = point.series.run.sha?.slice(0, 10) ?? 'unknown'; + const imageDigest = point.series.imageDigest?.slice(0, 19) ?? 'unknown'; + return `
+ ${isPinned ? '
Click elsewhere to dismiss
' : ''} +
${escapeHtml(point.seriesLabel)}
+
${escapeHtml(OPERATION_LABELS[operation])} ${percentile}: ${formatMetric(point.y, yAxis)} · ${escapeHtml(point.series.publicationStatus)}
+
${row.tokensPerRank} tokens/rank · ${row.globalTokens} global tokens
+
Dispatch p50/p90/p99: ${row.dispatch.p50.toFixed(1)} / ${row.dispatch.p90.toFixed(1)} / ${row.dispatch.p99.toFixed(1)} µs
+
Combine p50/p90/p99: ${row.combine.p50.toFixed(1)} / ${row.combine.p90.toFixed(1)} / ${row.combine.p99.toFixed(1)} µs
+
Round trip p50/p90/p99: ${row.roundtrip.p50.toFixed(1)} / ${row.roundtrip.p90.toFixed(1)} / ${row.roundtrip.p99.toFixed(1)} µs (${roundtripLabel})
+
Fan-out: ${row.fanoutMean?.toFixed(2) ?? 'n/a'} · recv max: ${row.recvTokensMax ?? 'n/a'}${row.stragglerRank === null ? '' : ` · straggler: r${row.stragglerRank}`} · correctness: ${row.correct ? 'pass' : 'fail'}
+
${escapeHtml(point.series.measurementContract)} · ${escapeHtml(point.series.suite)} · ${escapeHtml(point.series.topologyClass)}
+
dispatch=${escapeHtml(point.series.shape.dispatchDtype)} · combine=${escapeHtml(point.series.shape.combineQuantMode)} · activation=${escapeHtml(point.series.shape.activationProfile)}
+
workload=${escapeHtml(workload)} · source=${escapeHtml(sourceSha)}
+
${escapeHtml(runLabel)} · image=${escapeHtml(imageDigest)}
+
`; + }, + getRulerX: (point, scale) => + (scale as d3.ScaleLinear | d3.ScaleLogarithmic)(point.x), + getRulerY: (point, scale) => scale(point.y), + onHoverStart: (selection) => { + selection.attr('r', compact ? 4 : 6); + }, + onHoverEnd: (selection) => { + selection.attr('r', compact ? 2.5 : 3.5); + }, + }} + transitionDuration={200} + legendElement={legendElement} + noDataOverlay={noDataOverlay} + caption={caption} + /> + ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXDecision.tsx b/packages/app/src/components/collectivex/CollectiveXDecision.tsx new file mode 100644 index 00000000..7ad1dd8d --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXDecision.tsx @@ -0,0 +1,195 @@ +'use client'; + +import { ExternalLink } from 'lucide-react'; + +import { Card } from '@/components/ui/card'; +import { track } from '@/lib/analytics'; + +import type { CollectiveXDecisionSummary, CollectiveXSummaryCard } from './types'; + +interface CollectiveXDecisionProps { + cards: CollectiveXSummaryCard[]; + decision: CollectiveXDecisionSummary; +} + +function budgetValue(value: number | null | undefined): string { + return value === null || value === undefined ? '-' : value.toLocaleString('en-US'); +} + +function EmptyTable({ label }: { label: string }) { + return ( + +

No {label} data available yet.

+
+ ); +} + +export function CollectiveXDecision({ cards, decision }: CollectiveXDecisionProps) { + return ( +
+
+ {cards.map((card) => { + const body = ( + +

+ {card.title} +

+

+ {card.value} +

+

{card.sub}

+
+ ); + if (!card.href) return
{body}
; + return ( + track('collectivex_decision_card_opened', { title: card.title })} + className="block rounded-xl focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring" + > + {body} + + ); + })} +
+ +
+ +

Max tokens under round-trip p99 budget

+

+ Official headline cells only. Values are the largest measured source tokens per rank + whose measured round-trip p99 stays under the budget. +

+ {decision.maxTokensUnderBudget.length === 0 ? ( +

No budget cell is satisfied yet.

+ ) : ( +
+ + + + + + + + + {decision.budgetsUs.map((budget) => ( + + ))} + + + + {decision.maxTokensUnderBudget.map((row) => ( + + + + + + + {decision.budgetsUs.map((budget) => ( + + ))} + + ))} + +
SKU + Backend + PhaseDtypeEP + {'<= '} + {budget} us +
{row.sku.toUpperCase()}{row.backend}{row.phase}{row.dispatchDtype}{row.epSize === null ? '-' : `EP${row.epSize}`} + {budgetValue(row.budgets[String(budget)])} +
+
+ )} +
+ + +

Lowest dispatch p99 recommendations

+

+ Per SKU and phase at T=64 for decode or T=256 for prefill. +

+ {decision.recommendations.length === 0 ? ( +

No recommendation cells yet.

+ ) : ( +
+ + + + + + + + + + + + {decision.recommendations.map((row) => ( + + + + + + + + ))} + +
SKUPhaseT + p99 us + + Config +
{row.sku.toUpperCase()}{row.phase}{row.atTokensPerRank} + {row.lowestP99DispatchUs.toFixed(1)} + {row.config}
+
+ )} +
+
+ +
+ {[ + ['LL crossover rows', decision.llCrossover.length], + ['Resource Pareto cells', decision.resourcePareto.length], + ['Topology penalty cells', decision.topologyPenalty.length], + ['Skew penalty cells', decision.skewPenalty.length], + ].map(([label, count]) => ( + +

{count}

+

{label}

+
+ ))} +
+ + {decision.llCrossover.length === 0 && + decision.resourcePareto.length === 0 && + decision.topologyPenalty.length === 0 && + decision.skewPenalty.length === 0 ? ( + + ) : ( + +

+ Analysis outputs + +

+

+ Heavy analysis rows are generated into the static snapshot for auditability. The compact + counts above match the source report outputs; detailed EP evidence lives in the Evidence + tab. +

+
+ )} +
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXDisplay.tsx b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx new file mode 100644 index 00000000..cd3ecd72 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXDisplay.tsx @@ -0,0 +1,1484 @@ +'use client'; + +import { ExternalLink, Loader2, RefreshCw } from 'lucide-react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; + +import { Button } from '@/components/ui/button'; +import { Card } from '@/components/ui/card'; +import ChartLegend from '@/components/ui/chart-legend'; +import { Label } from '@/components/ui/label'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; +import { useCollectiveX } from '@/hooks/api/use-collectivex'; +import { useThemeColors } from '@/hooks/useThemeColors'; +import { track } from '@/lib/analytics'; +import { getModelSortIndex } from '@/lib/constants'; + +import { CollectiveXChart } from './CollectiveXChart'; +import { CollectiveXDecision } from './CollectiveXDecision'; +import { CollectiveXHeatmap } from './CollectiveXHeatmap'; +import { CollectiveXScaling } from './CollectiveXScaling'; +import { CollectiveXSizePanel } from './CollectiveXSizePanel'; +import { + CollectiveXCoverageTable, + CollectiveXFailureTable, + CollectiveXSensitivityTable, +} from './CollectiveXTables'; +import { + collectiveXBackendLabel, + collectiveXDecisionSummary, + collectiveXPrefillFloor, + collectiveXSeriesLabel, + collectiveXShapeKey, + collectiveXSummaryCards, + comparisonDifferences, + publicationMatches, +} from './data'; +import type { + CollectiveXDecisionSummary, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXPhase, + CollectiveXPublicationFilter, + CollectiveXScale, + CollectiveXSeries, + CollectiveXSuite, + CollectiveXXAxis, + CollectiveXYAxis, + CollectiveXSummaryCard, +} from './types'; + +const OPERATION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'dispatch', label: 'Dispatch' }, + { value: 'combine', label: 'Combine' }, + { value: 'roundtrip', label: 'Round trip' }, + { value: 'isolated-sum', label: 'Isolated sum' }, +]; + +const OVERVIEW_OPERATIONS: CollectiveXOperation[] = ['dispatch', 'combine', 'roundtrip']; + +const PHASE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'decode', label: 'Decode' }, + { value: 'prefill', label: 'Prefill' }, +]; + +const PERCENTILE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'p50', label: 'p50' }, + { value: 'p90', label: 'p90' }, + { value: 'p99', label: 'p99' }, +]; + +const SUITE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'all', label: 'All' }, + { value: 'backend-default', label: 'Backend default' }, + { value: 'resource-constrained', label: 'Resource constrained' }, +]; + +const PUBLICATION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'official-headline', label: 'Official headline' }, + { value: 'publishable', label: 'Publishable' }, + { value: 'official', label: 'Official only' }, + { value: 'all', label: 'All' }, +]; + +const SCALE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'log', label: 'Log' }, + { value: 'linear', label: 'Linear' }, +]; + +const BASE_PRECISION_OPTIONS: SegmentedToggleOption[] = [ + { value: 'all', label: 'All' }, + { value: 'bf16', label: 'BF16' }, + { value: 'fp8', label: 'FP8' }, +]; + +const BASE_EP_OPTIONS: SegmentedToggleOption[] = [ + { value: 'all', label: 'All' }, + { value: '8', label: 'EP8' }, +]; + +const HEADLINE_SHAPE_KEY = '7168/8/256'; + +const COLLECTIVEX_TABS = [ + { value: 'ep', label: 'EP dispatch / combine' }, + { value: 'decision', label: 'Decision' }, + { value: 'evidence', label: 'Evidence' }, + { value: 'all-reduce', label: 'All-reduce' }, + { value: 'all-gather', label: 'All-gather' }, + { value: 'offload', label: 'CPU-GPU offload' }, + { value: 'kv-cache', label: 'KV-cache transfer' }, + { value: 'copy-engine', label: 'Copy-engine / SDMA' }, + { value: 'rl-mesh', label: 'RL mesh' }, +] as const; + +type CollectiveXTab = (typeof COLLECTIVEX_TABS)[number]['value']; + +const ACTIVATION_ORDER = [ + 'normal', + 'zeros', + 'small-amplitude', + 'wide-dynamic-range', + 'fp8-saturation', +]; + +// Hardware-generation order (Hopper -> Blackwell -> Grace-Blackwell -> AMD) so the SKU +// selector reads as a progression rather than alphabetically interleaving vendors. +const SKU_ORDER = ['h100', 'h200', 'b200', 'b300', 'gb200', 'gb300', 'mi355x']; + +function skuSortRank(value: string): number { + const index = SKU_ORDER.indexOf(value); + return index === -1 ? Number.MAX_SAFE_INTEGER : index; +} + +const ACTIVATION_LABELS: Record = { + normal: 'Normal', + zeros: 'Zeros', + 'small-amplitude': 'Small amplitude', + 'wide-dynamic-range': 'Wide dynamic range', + 'fp8-saturation': 'FP8 saturation', +}; + +const OPERATION_LABELS: Record = { + dispatch: 'Dispatch', + combine: 'Combine', + roundtrip: 'Round trip (measured)', + 'isolated-sum': 'Isolated sum (Σp, not measured)', +}; + +const Y_AXIS_LABELS: Record = { + latency: 'Latency', + 'tokens-per-second': 'Tokens / second', + 'payload-rate': 'Logical payload rate', +}; + +const BACKEND_ORDER = ['deepep', 'deepep v2', 'deepep-hybrid', 'mori', 'nccl-ep', 'uccl']; + +function formatDate(value: string): string { + const date = new Date(value); + if (Number.isNaN(date.getTime())) return value; + return new Intl.DateTimeFormat('en', { + dateStyle: 'medium', + timeStyle: 'short', + timeZone: 'UTC', + }).format(date); +} + +function formatActivation(value: string): string { + return ACTIVATION_LABELS[value] ?? value; +} + +function formatModelShapeOption(item: CollectiveXSeries): string { + const key = collectiveXShapeKey(item.shape); + return `${item.model} (${key})`; +} + +function displaySeriesLabel(item: CollectiveXSeries): string { + const label = collectiveXSeriesLabel(item); + if (item.shape.activationProfile === 'normal') return label; + return `${label} · ${formatActivation(item.shape.activationProfile)} activation`; +} + +function backendFilterValue(item: CollectiveXSeries): string { + return collectiveXBackendLabel(item.backend, item.backendVersion, item.shape.kernelGeneration); +} + +function backendSortRank(value: string): number { + const index = BACKEND_ORDER.indexOf(value); + return index === -1 ? Number.MAX_SAFE_INTEGER : index; +} + +function uniqueRuns(series: CollectiveXSeries[]) { + const byId = new Map(); + for (const item of series) { + if (item.run.id) byId.set(item.run.id, item.run); + } + return [...byId.values()].toSorted( + (a, b) => Date.parse(b.createdAt ?? '') - Date.parse(a.createdAt ?? ''), + ); +} + +function ControlGroup({ label, children }: { label: string; children: React.ReactNode }) { + return ( +
+ + {children} +
+ ); +} + +function InlineLegend({ + series, + colors, +}: { + series: CollectiveXSeries[]; + colors: Record; +}) { + const entries = [ + ...new Map( + series.map((item) => [`${item.colorKey}|${displaySeriesLabel(item)}`, item]), + ).values(), + ]; + return ( +
+ {entries.map((item) => ( + + + {displaySeriesLabel(item)} + + ))} +
+ ); +} + +function LineStyleKey() { + return ( +
+ + + BF16 + + + + FP8 + +
+ ); +} + +export default function CollectiveXDisplay() { + const { data, error, isLoading, isFetching, refetch } = useCollectiveX(); + const [activeTab, setActiveTab] = useState('ep'); + const [operation, setOperation] = useState('roundtrip'); + const [phase, setPhase] = useState('decode'); + const [modelShape, setModelShape] = useState(HEADLINE_SHAPE_KEY); + const [backendFilter, setBackendFilter] = useState('all'); + const [skuFilter, setSkuFilter] = useState('all'); + const [precision, setPrecision] = useState('bf16'); + const [epFilter, setEpFilter] = useState('8'); + const [activation, setActivation] = useState('all'); + const [percentile, setPercentile] = useState('p99'); + const [suite, setSuite] = useState('resource-constrained'); + const [routing, setRouting] = useState('uniform'); + const [publication, setPublication] = useState('all'); + const [xAxis, setXAxis] = useState('tokens-per-rank'); + const [yAxis, setYAxis] = useState('latency'); + const [xScaleType, setXScaleType] = useState('log'); + const [yScaleType, setYScaleType] = useState('log'); + const [activeSeriesIds, setActiveSeriesIds] = useState>(new Set()); + const [isLegendExpanded, setIsLegendExpanded] = useState(true); + const [highContrast, setHighContrast] = useState(false); + + const series = data?.series ?? []; + const ncclSeries = data?.nccl ?? []; + const offloadSeries = data?.offload ?? []; + const copyEngineSeries = data?.copyEngine ?? []; + const kvCacheSeries = data?.kvCache ?? []; + const rlMeshSeries = data?.rlMesh ?? []; + const summaryCards: CollectiveXSummaryCard[] = useMemo( + () => data?.summaryCards ?? collectiveXSummaryCards(series, data?.failures ?? []), + [data?.failures, data?.summaryCards, series], + ); + const decision: CollectiveXDecisionSummary = useMemo( + () => data?.decision ?? collectiveXDecisionSummary(series), + [data?.decision, series], + ); + const prefillFloor = useMemo(() => collectiveXPrefillFloor(series), [series]); + + useEffect(() => { + if (series.length > 0) setActiveSeriesIds(new Set(series.map((item) => item.id))); + }, [series]); + + useEffect(() => { + const readHash = () => { + const rawHash = window.location.hash.replace('#', ''); + const tab = rawHash.startsWith('tab-') ? rawHash.slice(4) : rawHash; + if (COLLECTIVEX_TABS.some((item) => item.value === tab)) { + setActiveTab(tab as CollectiveXTab); + } + }; + readHash(); + window.addEventListener('hashchange', readHash); + return () => window.removeEventListener('hashchange', readHash); + }, []); + + const routingOptions = useMemo(() => { + const values = [...new Set(series.map((item) => item.shape.routingLabel))].toSorted((a, b) => { + if (a === 'uniform') return -1; + if (b === 'uniform') return 1; + return a.localeCompare(b); + }); + return ['all', ...values]; + }, [series]); + + const modelShapeOptions = useMemo(() => { + const byShape = new Map(); + for (const item of series) { + const key = collectiveXShapeKey(item.shape); + if (!byShape.has(key)) byShape.set(key, formatModelShapeOption(item)); + } + const shapes = [...byShape.entries()].toSorted(([a], [b]) => { + if (a === HEADLINE_SHAPE_KEY) return -1; + if (b === HEADLINE_SHAPE_KEY) return 1; + return a.localeCompare(b, undefined, { numeric: true }); + }); + return [['all', 'All shapes'], ...shapes] as [string, string][]; + }, [series]); + + const precisionOptions = useMemo[]>(() => { + const baseValues = new Set(BASE_PRECISION_OPTIONS.map((option) => option.value)); + const extraValues = [...new Set(series.map((item) => item.shape.dispatchDtype))] + .filter((value) => !baseValues.has(value)) + .toSorted(); + return [ + ...BASE_PRECISION_OPTIONS, + ...extraValues.map((value) => ({ value, label: value.toUpperCase() })), + ]; + }, [series]); + + const backendOptions = useMemo(() => { + const values = [...new Set(series.map(backendFilterValue))].toSorted((a, b) => { + const rankDiff = backendSortRank(a) - backendSortRank(b); + return rankDiff === 0 ? a.localeCompare(b) : rankDiff; + }); + return ['all', ...values]; + }, [series]); + + const skuOptions = useMemo(() => { + const values = [...new Set(series.map((item) => item.sku))].toSorted((a, b) => { + const rankDiff = skuSortRank(a) - skuSortRank(b); + return rankDiff === 0 ? a.localeCompare(b) : rankDiff; + }); + return ['all', ...values]; + }, [series]); + + const epOptions = useMemo[]>(() => { + const baseValues = new Set(BASE_EP_OPTIONS.map((option) => option.value)); + const extraValues = [...new Set(series.map((item) => item.epSize))] + .filter((value): value is number => value !== null) + .map(String) + .filter((value) => !baseValues.has(value)) + .toSorted((a, b) => Number(a) - Number(b)); + return [...BASE_EP_OPTIONS, ...extraValues.map((value) => ({ value, label: `EP${value}` }))]; + }, [series]); + + const activationOptions = useMemo(() => { + const values = [...new Set(series.map((item) => item.shape.activationProfile))].toSorted( + (a, b) => { + const aIndex = ACTIVATION_ORDER.indexOf(a); + const bIndex = ACTIVATION_ORDER.indexOf(b); + if (aIndex !== -1 || bIndex !== -1) { + return ( + (aIndex === -1 ? Number.MAX_SAFE_INTEGER : aIndex) - + (bIndex === -1 ? Number.MAX_SAFE_INTEGER : bIndex) + ); + } + return a.localeCompare(b); + }, + ); + return ['all', ...values]; + }, [series]); + + useEffect(() => { + if (series.length === 0) return; + if (routingOptions.includes(routing)) return; + setRouting(routingOptions.includes('uniform') ? 'uniform' : 'all'); + }, [routing, routingOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || modelShape === 'all') return; + if (modelShapeOptions.some(([value]) => value === modelShape)) return; + setModelShape( + modelShapeOptions.some(([value]) => value === HEADLINE_SHAPE_KEY) + ? HEADLINE_SHAPE_KEY + : 'all', + ); + }, [modelShape, modelShapeOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || precision === 'all') return; + if (precisionOptions.some((option) => option.value === precision)) return; + setPrecision('all'); + }, [precision, precisionOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || backendFilter === 'all') return; + if (backendOptions.includes(backendFilter)) return; + setBackendFilter('all'); + }, [backendFilter, backendOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || skuFilter === 'all') return; + if (skuOptions.includes(skuFilter)) return; + setSkuFilter('all'); + }, [skuFilter, skuOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || epFilter === 'all') return; + if (epOptions.some((option) => option.value === epFilter)) return; + setEpFilter('all'); + }, [epFilter, epOptions, series.length]); + + useEffect(() => { + if (series.length === 0 || activation === 'all') return; + if (activationOptions.includes(activation)) return; + setActivation('all'); + }, [activation, activationOptions, series.length]); + + const dimensionFilteredSeries = useMemo( + () => + series.filter( + (item) => + (modelShape === 'all' || collectiveXShapeKey(item.shape) === modelShape) && + (skuFilter === 'all' || item.sku === skuFilter) && + (backendFilter === 'all' || backendFilterValue(item) === backendFilter) && + (epFilter === 'all' || String(item.epSize) === epFilter) && + (precision === 'all' || item.shape.dispatchDtype === precision) && + (activation === 'all' || item.shape.activationProfile === activation), + ), + [activation, backendFilter, epFilter, modelShape, precision, series, skuFilter], + ); + + const filteredSeries = useMemo( + () => + dimensionFilteredSeries.filter( + (item) => + (suite === 'all' || item.suite === suite) && + (routing === 'all' || item.shape.routingLabel === routing) && + publicationMatches(item, publication, modelShape), + ), + [dimensionFilteredSeries, modelShape, publication, routing, suite], + ); + const phaseSeries = useMemo( + () => filteredSeries.filter((item) => item.phase === phase), + [filteredSeries, phase], + ); + + // NOTE: an earlier effect silently RELAXED filters (suite -> precision -> backend -> EP -> SKU + // -> publication) whenever the selection matched zero series. That made the Publication toggle + // look dead — clicking "Official headline" with no official data instantly snapped back to + // "All" after mutating other filters the user had chosen. Zero-match selections now stay put + // and render an explicit empty state with a one-click reset instead (see the EP tab below). + const showEverything = useCallback(() => { + setModelShape('all'); + setSkuFilter('all'); + setBackendFilter('all'); + setPrecision('all'); + setEpFilter('all'); + setActivation('all'); + setSuite('all'); + setRouting('all'); + setPublication('all'); + track('collectivex_filters_reset', {}); + }, []); + + const activePhaseSeries = useMemo( + () => phaseSeries.filter((item) => activeSeriesIds.has(item.id)), + [activeSeriesIds, phaseSeries], + ); + const heatmapSeries = useMemo( + () => + dimensionFilteredSeries.filter( + (item) => + item.phase === phase && + (suite === 'all' || item.suite === suite) && + publicationMatches(item, publication, modelShape), + ), + [dimensionFilteredSeries, modelShape, phase, publication, suite], + ); + const scalingColorSeries = useMemo( + () => + dimensionFilteredSeries.filter( + (item) => + item.shape.routing === 'uniform' && + !item.shape.eplbEnabled && + item.mode === 'normal' && + item.measurementContract === 'layout-and-dispatch-v1' && + item.suite === 'backend-default' && + publicationMatches(item, 'publishable'), + ), + [dimensionFilteredSeries], + ); + const colorKeys = useMemo( + () => [ + ...new Set( + [ + ...filteredSeries, + ...scalingColorSeries, + ...ncclSeries, + ...offloadSeries, + ...copyEngineSeries, + ...kvCacheSeries, + ...rlMeshSeries, + ].map((item) => item.colorKey), + ), + ], + [ + copyEngineSeries, + filteredSeries, + kvCacheSeries, + ncclSeries, + offloadSeries, + rlMeshSeries, + scalingColorSeries, + ], + ); + const { resolveColor, getCssColor } = useThemeColors({ + highContrast, + activeKeys: colorKeys, + hcKeys: colorKeys, + hcVendorKeyFor: (key) => key.split('_')[0], + }); + const colors = useMemo( + () => Object.fromEntries(colorKeys.map((key) => [key, getCssColor(resolveColor(key, key))])), + [colorKeys, getCssColor, resolveColor], + ); + + const toggleSeries = useCallback((id: string) => { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + track('collectivex_series_toggled', { series: id }); + }, []); + + const comparisonWarnings = useMemo( + () => comparisonDifferences(activePhaseSeries), + [activePhaseSeries], + ); + const runs = useMemo(() => uniqueRuns(series), [series]); + const hardwareCount = new Set(activePhaseSeries.map((item) => item.sku)).size; + const selectedModelLabel = + modelShapeOptions.find(([value]) => value === modelShape)?.[1] ?? modelShape; + const overviewGroups = useMemo(() => { + const availablePhases = PHASE_OPTIONS.map((option) => option.value).filter((candidate) => + filteredSeries.some((item) => item.phase === candidate), + ); + return availablePhases.flatMap((overviewPhase) => { + const phaseCandidates = filteredSeries.filter((item) => item.phase === overviewPhase); + const epSizes = [...new Set(phaseCandidates.map((item) => item.epSize))].toSorted((a, b) => { + if (a === b) return 0; + if (a === null) return 1; + if (b === null) return -1; + return a - b; + }); + return epSizes.map((epSize) => ({ + phase: overviewPhase, + epSize, + series: phaseCandidates.filter( + (item) => item.epSize === epSize && activeSeriesIds.has(item.id), + ), + })); + }); + }, [activeSeriesIds, filteredSeries]); + const hasLegacyP90Fallback = + percentile === 'p90' && activePhaseSeries.some((item) => item.schemaVersion < 3); + const hasUnmeasuredRoundtrip = + operation === 'roundtrip' && + activePhaseSeries.some((item) => item.rows.some((row) => !row.roundtripMeasured)); + const routingIdentityProven = + series.length > 0 && series.every((item) => item.routingConsistent === true); + const pooledSamples = [ + ...new Set( + series.flatMap((item) => + item.rows + .map((row) => row.samplesPooled) + .filter((value): value is number => value !== null), + ), + ), + ].toSorted((a, b) => a - b); + const placementCount = new Set(heatmapSeries.map((item) => item.placement.kind)).size; + const eplbExample = series.find( + (item) => item.eplbImbalanceBefore !== null && item.eplbImbalanceAfter !== null, + ); + const allReduceSeries = useMemo( + () => ncclSeries.filter((item) => item.op === 'all_reduce'), + [ncclSeries], + ); + const allGatherSeries = useMemo( + () => ncclSeries.filter((item) => item.op === 'all_gather'), + [ncclSeries], + ); + + const legendItems = useMemo( + () => + phaseSeries + .toSorted( + (a, b) => + getModelSortIndex(a.sku) - getModelSortIndex(b.sku) || + a.label.localeCompare(b.label) || + a.identity.localeCompare(b.identity), + ) + .map((item) => ({ + name: item.id, + label: + modelShape === 'all' + ? `[${item.model}] ${displaySeriesLabel(item)}` + : displaySeriesLabel(item), + color: colors[item.colorKey] ?? 'var(--muted-foreground)', + isActive: activeSeriesIds.has(item.id), + title: `${item.model} · ${item.publicationStatus} · ${item.shape.routingLabel} · ${item.topologyClass} · ${item.measurementContract} · ${formatActivation(item.shape.activationProfile)} activation`, + onClick: () => toggleSeries(item.id), + })), + [activeSeriesIds, colors, modelShape, phaseSeries, toggleSeries], + ); + + const handleRefresh = useCallback(() => { + track('collectivex_data_refreshed'); + void refetch(); + }, [refetch]); + + const handleTabChange = useCallback((value: string) => { + const tab = value as CollectiveXTab; + setActiveTab(tab); + track('collectivex_tab_changed', { tab }); + if (typeof window !== 'undefined') { + window.history.replaceState(null, '', `#tab-${tab}`); + } + }, []); + + if (isLoading) { + return ( + + +

Loading CollectiveX artifacts...

+
+ ); + } + + if (error || !data) { + return ( + +

CollectiveX data unavailable

+

+ {error instanceof Error ? error.message : 'Failed to load CollectiveX artifacts.'} +

+ +
+ ); + } + + return ( +
+ +
+
+
+

CollectiveX

+ + Experimental + +
+

+ Cross-vendor MoE expert-parallel dispatch, combine, and independently measured + round-trip benchmarks from the{' '} + collectivex branch. +

+

+ Uniform routing is the controlled cross-hardware headline. Skewed routing, EPLB, + activation profiles, resource budgets, topology, EP degree, and timing contract are + sensitivity dimensions and remain part of each line's identity. +

+
+ +
+ +
+
+

{activePhaseSeries.length}

+

Visible configurations

+
+
+

{hardwareCount}

+

Visible hardware

+
+
+

{series.length}

+

Retained sweeps

+
+
+

{data.failures.length}

+

Quarantined cases

+
+
+

{data.contributingRuns}

+

Source workflow runs

+
+
+

{formatDate(data.generatedAt)}

+

Newest result (UTC)

+
+
+
+ + +
+

+ Controls +

+

+ Data filters select the plotted cohort; axis controls set the projection. Mismatched + benchmark dimensions are never overlaid silently. +

+
+ + + +
+ + { + setOperation(value); + track('collectivex_operation_changed', { operation: value }); + }} + ariaLabel="CollectiveX operation" + testId="collectivex-operation-toggle" + className="flex-wrap" + /> + +
+ + { + setPhase(value); + track('collectivex_phase_changed', { phase: value }); + }} + ariaLabel="CollectiveX phase" + testId="collectivex-phase-toggle" + /> + + + { + setPercentile(value); + track('collectivex_percentile_changed', { percentile: value }); + }} + ariaLabel="CollectiveX percentile" + testId="collectivex-percentile-toggle" + /> + +
+ + { + setSuite(value); + track('collectivex_suite_changed', { suite: value }); + }} + ariaLabel="CollectiveX comparison suite" + testId="collectivex-suite-toggle" + className="flex-wrap" + /> + +
+ + + + + + +
+ + { + setPrecision(value); + track('collectivex_precision_changed', { precision: value }); + }} + ariaLabel="CollectiveX dispatch dtype" + testId="collectivex-precision-toggle" + className="flex-wrap" + buttonClassName="min-h-8 justify-center px-3" + /> + +
+ + { + setEpFilter(value); + track('collectivex_ep_degree_changed', { ep: value }); + }} + ariaLabel="CollectiveX EP degree" + testId="collectivex-ep-toggle" + className="flex-wrap" + /> + + + + +
+ + { + setPublication(value); + track('collectivex_publication_changed', { publication: value }); + }} + ariaLabel="CollectiveX publication status" + testId="collectivex-publication-toggle" + className="flex-wrap" + /> + +
+ {/* The activation-value axis was retired from the sweep (single profile = normal; + measured latency-neutral under bf16 combine), so hide the selector once the data + carries only one profile — it reappears automatically if a value-sensitive + quantized-combine sweep ever lands. */} + {activationOptions.length > 2 && ( + + + + )} + + + + + { + setXScaleType(value); + track('collectivex_x_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX x scale" + testId="collectivex-x-scale-toggle" + /> + + + + + + { + setYScaleType(value); + track('collectivex_y_scale_changed', { scale: value }); + }} + ariaLabel="CollectiveX y scale" + testId="collectivex-y-scale-toggle" + /> + +
+
+
+ + + + {COLLECTIVEX_TABS.map((tab) => ( + + {tab.label} + + ))} + + + + {series.length > 0 && phaseSeries.length === 0 && ( + +
+

+ No series match the current filters + {publication === 'all' + ? '.' + : ` — the "${ + PUBLICATION_OPTIONS.find((option) => option.value === publication)?.label ?? + publication + }" cohort has no data under the other selections (most sweep results are seeded-runtime, not official).`} +

+ +
+
+ )} + + +

+ {OPERATION_LABELS[operation]} · {phase} · {percentile} + {modelShape === 'all' ? ' · all shapes' : ` · ${selectedModelLabel}`} + {skuFilter === 'all' ? '' : ` · ${skuFilter.toUpperCase()}`} + {backendFilter === 'all' ? '' : ` · ${backendFilter}`} + {precision === 'all' ? '' : ` · ${precision.toUpperCase()}`} + {epFilter === 'all' ? '' : ` · EP${epFilter}`} + {activation === 'all' ? '' : ` · ${formatActivation(activation)} activation`} + {routing === 'all' ? '' : ` · ${routing}`} +

+

+ {Y_AXIS_LABELS[yAxis]} versus{' '} + {xAxis === 'tokens-per-rank' + ? 'source tokens per rank' + : 'global source tokens'} + . FP8 lines are dashed. +

+ + } + legendElement={ + { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + next.delete(id); + return next; + }); + track('collectivex_series_hidden', { series: id }); + }} + isLegendExpanded={isLegendExpanded} + onExpandedChange={(expanded) => { + setIsLegendExpanded(expanded); + track('collectivex_legend_expanded', { expanded }); + }} + switches={[ + { + id: 'collectivex-high-contrast', + label: 'High Contrast', + checked: highContrast, + onCheckedChange: (checked) => { + setHighContrast(checked); + track('collectivex_high_contrast_toggled', { enabled: checked }); + }, + }, + ]} + keyIndicators={} + actions={ + activePhaseSeries.length < phaseSeries.length + ? [ + { + id: 'collectivex-reset-filter', + label: 'Reset filter', + onClick: () => { + setActiveSeriesIds((previous) => { + const next = new Set(previous); + for (const item of phaseSeries) next.add(item.id); + return next; + }); + track('collectivex_legend_filter_reset'); + }, + }, + ] + : [] + } + /> + } + /> + + {comparisonWarnings.length > 0 && ( +

+ Not directly comparable: selected lines vary in{' '} + {comparisonWarnings.join(', ')}. Isolate a fixed workload and timing contract before + ranking hardware. +

+ )} + {hasLegacyP90Fallback && ( +

+ Some legacy artifacts did not record p90; those points use p50 in the p90 view. +

+ )} + {hasUnmeasuredRoundtrip && ( +

+ Legacy rows without an independently measured round trip fall back to the isolated + dispatch-plus-combine percentile sum and are identified in the tooltip. +

+ )} + {operation === 'isolated-sum' && ( +

+ Isolated sum is dispatch percentile + combine percentile. It is not a measured + chained operation and should not be used as a round-trip SLO. +

+ )} + {yAxis === 'payload-rate' && ( +

+ Logical payload rate counts routed activation bytes divided by latency. It is not + wire, algorithmic, or bus bandwidth and excludes indices, scales, metadata, padding, + and protocol overhead. +

+ )} + {xScaleType === 'log' && ( +

+ The token sweep is geometric, so log2 spacing is intentional. Axis labels are + restricted to token counts that were actually measured. +

+ )} +
+ +
+
+

Latency overview

+

+ Separate dispatch, combine, and independently measured round-trip panels for each + phase and EP degree. Prefill panels contain only their own measured large-token + range. +

+
+ {overviewGroups.length === 0 ? ( + +

+ No latency panels match the current backend, precision, activation, suite, + routing, and publication filters. +

+
+ ) : ( + overviewGroups.map((group) => { + const epLabel = group.epSize === null ? 'EP unknown' : `EP${group.epSize}`; + return ( + +

+ {group.phase} · {epLabel} · {percentile} latency +

+ +
+ {OVERVIEW_OPERATIONS.map((overviewOperation) => ( +
+

+ {OPERATION_LABELS[overviewOperation]} +

+ +
+ ))} +
+
+ ); + }) + )} +
+ +
+
+

Scaling

+

+ Strong and weak scaling are distinct experiments with separately labeled fixed-work + contracts. Backend, precision, and activation filters apply; a chart appears once a + SKU has matched measurements at two EP degrees. +

+
+ + +
+ +
+
+

Heatmaps

+

+ Dispatch p50 across EP, routing, and resource dimensions for the current phase, + backend, precision, activation, suite, and publication filters. The routing selector + is intentionally not applied here. +

+
+
+ + + + {placementCount > 1 && ( + + )} +
+
+
+ + + + + + + + + + + +

Provenance

+

+ Snapshot v{data.snapshotVersion} scanned {data.scannedRuns} workflow runs and{' '} + {data.scannedArtifacts} CollectiveX artifacts, then retained the best/newest record + for each fixed configuration. The generated static JSON is committed by the Update + CollectiveX Data workflow; no database is involved. +

+

+ Routing identity is{' '} + {routingIdentityProven ? 'proven across ranks' : 'not proven for every series'}. + Latency values are pooled per-iteration cross-rank maxima + {pooledSamples.length > 0 ? ` (${pooledSamples.join('/')} samples per point)` : ''}. + Correctness is a round-trip reconstruction smoke check, not a complete per-token + routing proof. Backend, topology, EP degree, dtype, mode, activation, combine + quantization, resource budget, workload ID, and measurement contract remain part of + the result. + {eplbExample + ? ` One EPLB cohort reduced recorded rank-load imbalance from ${eplbExample.eplbImbalanceBefore?.toFixed(1)}× to ${eplbExample.eplbImbalanceAfter?.toFixed(1)}×.` + : ''} +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXHeatmap.tsx b/packages/app/src/components/collectivex/CollectiveXHeatmap.tsx new file mode 100644 index 00000000..384592ff --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXHeatmap.tsx @@ -0,0 +1,162 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { Card } from '@/components/ui/card'; +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { collectiveXHeatmapCells } from './data'; +import type { + CollectiveXHeatmapCell, + CollectiveXHeatmapDimension, + CollectiveXPhase, + CollectiveXSeries, +} from './types'; + +interface CollectiveXHeatmapProps { + series: CollectiveXSeries[]; + dimension: CollectiveXHeatmapDimension; + phase: CollectiveXPhase; + prefillFloor: number; +} + +const DIMENSION_LABELS = { + ep: 'EP', + routing: 'Routing skew', + resource: 'Resource regime', + placement: 'Placement', +} satisfies Record; + +function compactNumber(value: number): string { + if (value >= 1000) return `${(value / 1000).toFixed(value < 10_000 ? 1 : 0)}k`; + return value.toFixed(value >= 100 ? 0 : 1); +} + +export function CollectiveXHeatmap({ + series, + dimension, + phase, + prefillFloor, +}: CollectiveXHeatmapProps) { + const cells = useMemo( + () => collectiveXHeatmapCells(series, dimension, prefillFloor), + [dimension, prefillFloor, series], + ); + const columns = useMemo( + () => + [...new Set(cells.map((cell) => cell.tokensPerRank))].toSorted((a, b) => a - b).map(String), + [cells], + ); + const rows = useMemo( + () => [...new Set(cells.map((cell) => cell.row))].toSorted().toReversed(), + [cells], + ); + const values = cells.map((cell) => cell.latencyUs); + const minValue = d3.min(values) ?? 1; + const maxValue = d3.max(values) ?? minValue; + const color = useMemo(() => { + if (minValue === maxValue) return () => d3.interpolateRdYlGn(0.5); + const normalize = d3.scaleLog().domain([minValue, maxValue]).range([0, 1]).clamp(true); + return (value: number) => d3.interpolateRdYlGn(1 - normalize(value)); + }, [maxValue, minValue]); + const chartHeight = Math.max(190, rows.length * 34 + 84); + const title = `${DIMENSION_LABELS[dimension]} × tokens/rank`; + + return ( + + {cells.length === 0 ? ( + <> +

{title}

+

+ No heatmap cells are available for this phase and filter set. +

+ + ) : ( + + chartId={`collectivex-heatmap-${dimension}-${phase}`} + testId={`collectivex-heatmap-${dimension}-chart`} + data={cells} + height={chartHeight} + margin={{ + top: 16, + right: 12, + bottom: 48, + left: dimension === 'routing' ? 150 : 100, + }} + watermark="none" + grabCursor={false} + instructions="" + xScale={{ type: 'band', domain: columns, padding: 0.06 }} + yScale={{ type: 'band', domain: rows, padding: 0.08 }} + xAxis={{ + label: 'Source tokens / rank', + tickFormat: (value) => compactNumber(Number(value)), + }} + yAxis={{ label: DIMENSION_LABELS[dimension] }} + layers={[ + { + type: 'custom', + key: `collectivex-heatmap-${dimension}-cells`, + render: (group, context) => { + const xScale = context.xScale as d3.ScaleBand; + const yScale = context.yScale as d3.ScaleBand; + const selection = group + .selectAll( + `.collectivex-heat-cell-${dimension}`, + ) + .data(cells, (cell) => cell.id) + .join('rect') + .attr('class', `collectivex-heat-cell collectivex-heat-cell-${dimension}`) + .attr('x', (cell) => xScale(String(cell.tokensPerRank)) ?? 0) + .attr('y', (cell) => yScale(cell.row) ?? 0) + .attr('width', xScale.bandwidth()) + .attr('height', yScale.bandwidth()) + .attr('rx', 2) + .attr('fill', (cell) => color(cell.latencyUs)); + + group + .selectAll( + `.collectivex-heat-label-${dimension}`, + ) + .data(cells, (cell) => cell.id) + .join('text') + .attr('class', `collectivex-heat-label-${dimension}`) + .attr( + 'x', + (cell) => (xScale(String(cell.tokensPerRank)) ?? 0) + xScale.bandwidth() / 2, + ) + .attr('y', (cell) => (yScale(cell.row) ?? 0) + yScale.bandwidth() / 2 + 3) + .attr('text-anchor', 'middle') + .attr('font-size', 9) + .attr('font-weight', 600) + .attr('fill', '#111827') + .attr('pointer-events', 'none') + .text((cell) => compactNumber(cell.latencyUs)); + + return selection; + }, + }, + ]} + tooltip={{ + rulerType: 'none', + attachToLayer: 0, + content: (cell) => + `
${DIMENSION_LABELS[dimension]}: ${cell.row}
${cell.tokensPerRank} tokens/rank
${cell.latencyUs.toFixed(2)} µs dispatch p50
`, + }} + caption={ + <> +

+ {title} · dispatch p50 · {phase} +

+

+ Each cell is the fastest measured matching series. Green is lower latency; blank + cells were not measured. +

+ + } + /> + )} +
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXScaling.tsx b/packages/app/src/components/collectivex/CollectiveXScaling.tsx new file mode 100644 index 00000000..c39ec653 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXScaling.tsx @@ -0,0 +1,140 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { Card } from '@/components/ui/card'; +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { collectiveXScalingPoints } from './data'; +import type { CollectiveXScalingKind, CollectiveXScalingPoint, CollectiveXSeries } from './types'; + +interface CollectiveXScalingProps { + series: CollectiveXSeries[]; + colors: Record; + kind: CollectiveXScalingKind; +} + +const SCALING_COPY = { + weak: { + title: 'Weak scaling · fixed 64 tokens/rank', + detail: 'Ideal behavior is flat latency as EP degree increases.', + }, + strong: { + title: 'Strong scaling · fixed 512 global tokens', + detail: 'Ideal behavior falls approximately as 1/EP.', + }, +} satisfies Record; + +function paddedLatencyDomain(points: CollectiveXScalingPoint[]): [number, number] { + if (points.length === 0) return [0, 1]; + const max = d3.max(points, (point) => point.latencyUs) ?? 1; + return [0, max * 1.08]; +} + +export function CollectiveXScaling({ series, colors, kind }: CollectiveXScalingProps) { + const points = useMemo(() => collectiveXScalingPoints(series, kind), [series, kind]); + const lines = useMemo(() => { + const output: Record = {}; + for (const point of points) { + (output[point.sku] ??= []).push({ x: point.epSize, y: point.latencyUs }); + } + for (const line of Object.values(output)) line.sort((a, b) => a.x - b.x); + return output; + }, [points]); + const colorBySku = useMemo( + () => + Object.fromEntries( + points.map((point) => [point.sku, colors[point.colorKey] ?? 'var(--muted-foreground)']), + ), + [colors, points], + ); + const epTicks = useMemo( + () => [...new Set(points.map((point) => point.epSize))].toSorted((a, b) => a - b), + [points], + ); + const copy = SCALING_COPY[kind]; + + return ( + + {points.length === 0 ? ( + <> +

{copy.title}

+

+ No SKU has a matched publishable backend-default cohort at two or more EP degrees yet. +

+ + ) : ( + + chartId={`collectivex-scaling-${kind}`} + testId={`collectivex-scaling-${kind}-chart`} + data={points} + height={330} + margin={{ top: 20, right: 18, bottom: 54, left: 72 }} + watermark="logo" + grabCursor={false} + instructions="" + xScale={{ + type: 'linear', + domain: [Math.min(...epTicks), Math.max(...epTicks)], + nice: false, + }} + yScale={{ type: 'linear', domain: paddedLatencyDomain(points), nice: true }} + xAxis={{ + label: 'EP degree', + tickValues: epTicks, + tickFormat: (value) => `EP${Number(value)}`, + }} + yAxis={{ + label: 'Dispatch p50 (µs)', + tickCount: 6, + tickFormat: (value) => Number(value).toFixed(0), + }} + layers={[ + { + type: 'line', + key: `collectivex-${kind}-lines`, + lines, + config: { + getColor: (sku) => colorBySku[sku] ?? 'var(--muted-foreground)', + strokeWidth: 2.25, + curve: d3.curveLinear, + }, + }, + { + type: 'point', + key: `collectivex-${kind}-points`, + data: points, + config: { + getCx: () => 0, + getCy: () => 0, + getX: (point) => point.epSize, + getY: (point) => point.latencyUs, + getColor: (point) => colorBySku[point.sku] ?? 'var(--muted-foreground)', + getRadius: () => 4, + stroke: 'var(--background)', + strokeWidth: 1, + keyFn: (point) => point.id, + maxPoints: Infinity, + }, + }, + ]} + tooltip={{ + rulerType: 'crosshair', + attachToLayer: 1, + content: (point) => + `
${point.sku.toUpperCase()} EP${point.epSize}
${point.latencyUs.toFixed(2)} µs dispatch p50
${kind} scaling contract
`, + getRulerX: (point, scale) => (scale as d3.ScaleLinear)(point.epSize), + getRulerY: (point, scale) => scale(point.latencyUs), + }} + caption={ + <> +

{copy.title}

+

{copy.detail}

+ + } + /> + )} +
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXSizeChart.tsx b/packages/app/src/components/collectivex/CollectiveXSizeChart.tsx new file mode 100644 index 00000000..2cdf9fda --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXSizeChart.tsx @@ -0,0 +1,341 @@ +'use client'; + +import * as d3 from 'd3'; +import { useMemo } from 'react'; + +import { D3Chart } from '@/lib/d3-chart/D3Chart'; + +import { sparseLogTicks } from './axis'; +import type { + CollectiveXCollectiveMetric, + CollectiveXCollectiveRow, + CollectiveXCollectiveSeries, + CollectiveXScale, + CollectiveXTransferMetric, + CollectiveXTransferRow, + CollectiveXTransferSeries, +} from './types'; + +type CollectiveXSizeSeries = CollectiveXCollectiveSeries | CollectiveXTransferSeries; +type CollectiveXSizeMetric = CollectiveXCollectiveMetric | CollectiveXTransferMetric; +type CollectiveXSizeRow = CollectiveXCollectiveRow | CollectiveXTransferRow; + +interface CollectiveXSizeChartProps { + chartId: string; + series: CollectiveXSizeSeries[]; + colors: Record; + metric: CollectiveXSizeMetric; + xScaleType: CollectiveXScale; + yScaleType: CollectiveXScale; + caption?: React.ReactNode; + legendElement?: React.ReactNode; + testId?: string; +} + +interface CollectiveXSizePoint { + seriesId: string; + seriesLabel: string; + colorKey: string; + valid: boolean; + status: string; + note: string | null; + x: number; + y: number; + metric: CollectiveXSizeMetric; + row: CollectiveXSizeRow; + series: CollectiveXSizeSeries; +} + +function isCollectiveSeries(series: CollectiveXSizeSeries): series is CollectiveXCollectiveSeries { + return 'op' in series; +} + +function isCollectiveRow(row: CollectiveXSizeRow): row is CollectiveXCollectiveRow { + return 'latencyUs' in row; +} + +function paddedDomain(values: number[], scaleType: CollectiveXScale): [number, number] { + if (values.length === 0) return scaleType === 'log' ? [1, 10] : [0, 1]; + const min = d3.min(values) ?? 0; + const max = d3.max(values) ?? 1; + if (min === max) { + if (scaleType === 'log') return [Math.max(min / 2, Number.MIN_VALUE), max * 2]; + const padding = Math.max(Math.abs(min) * 0.1, 1); + return [min - padding, max + padding]; + } + if (scaleType === 'log') return [Math.max(min / 1.08, Number.MIN_VALUE), max * 1.08]; + const padding = (max - min) * 0.06; + return [Math.max(0, min - padding), max + padding]; +} + +function formatCompact(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(value < 1e10 ? 1 : 0)}G`; + if (value >= 1e6) return `${(value / 1e6).toFixed(value < 1e7 ? 1 : 0)}M`; + if (value >= 1e3) return `${(value / 1e3).toFixed(value < 1e4 ? 1 : 0)}k`; + if (value >= 10) return value.toFixed(0); + if (value >= 1) return value.toFixed(value < 3 ? 1 : 0); + return value.toFixed(2); +} + +function formatBytes(value: number): string { + if (value >= 1e9) return `${(value / 1e9).toFixed(value < 1e10 ? 1 : 0)}GB`; + if (value >= 1e6) return `${(value / 1e6).toFixed(value < 1e7 ? 1 : 0)}MB`; + if (value >= 1e3) return `${(value / 1e3).toFixed(value < 1e4 ? 1 : 0)}KB`; + return `${value.toFixed(0)}B`; +} + +function metricLabel(metric: CollectiveXSizeMetric, series: CollectiveXSizeSeries[]): string { + if (metric === 'bus-bandwidth' || metric === 'bandwidth') return 'Bandwidth (GB/s)'; + const unit = series.some((item) => !isCollectiveSeries(item) && item.latencyUnit === 'ms') + ? 'ms' + : 'us'; + return `Latency (${unit})`; +} + +function yValue(row: CollectiveXSizeRow, metric: CollectiveXSizeMetric): number | null { + if (isCollectiveRow(row)) { + if (metric === 'bus-bandwidth') return row.busBandwidthGbps; + if (metric === 'latency') return row.latencyUs; + return null; + } + if (metric === 'bandwidth') return row.bandwidthGbps; + if (metric === 'latency') return row.latency; + return null; +} + +function escapeHtml(value: string): string { + return value + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"') + .replaceAll("'", '''); +} + +function formatMetricValue( + value: number, + metric: CollectiveXSizeMetric, + point: CollectiveXSizePoint, +) { + if (metric === 'bus-bandwidth' || metric === 'bandwidth') { + return `${formatCompact(value)} GB/s`; + } + const unit = isCollectiveSeries(point.series) ? 'us' : point.series.latencyUnit; + return `${value.toFixed(value >= 100 ? 1 : 3)} ${unit}`; +} + +function rowTooltip(row: CollectiveXSizeRow, series: CollectiveXSizeSeries): string { + if (isCollectiveRow(row)) { + return [ + `busbw=${row.busBandwidthGbps === null ? 'n/a' : `${formatCompact(row.busBandwidthGbps)} GB/s`}`, + row.algBandwidthGbps === null ? '' : `algbw=${formatCompact(row.algBandwidthGbps)} GB/s`, + row.latencyUs === null ? '' : `time=${row.latencyUs.toFixed(3)} us`, + row.outOfPlaceUs === null && row.inPlaceUs === null + ? '' + : `out-of-place=${row.outOfPlaceUs?.toFixed(3) ?? '?'} us · in-place=${row.inPlaceUs?.toFixed(3) ?? '?'} us`, + row.dtype ? `dtype=${row.dtype}` : '', + row.correct === false ? 'correctness check failed' : '', + ] + .filter(Boolean) + .join('
'); + } + return [ + `bandwidth=${row.bandwidthGbps === null ? 'n/a' : `${formatCompact(row.bandwidthGbps)} GB/s`}`, + row.latency === null + ? '' + : `latency=${row.latency.toFixed(3)} ${isCollectiveSeries(series) ? 'us' : series.latencyUnit}`, + row.sizeClass ? `size class=${escapeHtml(row.sizeClass)}` : '', + row.correct === false ? 'correctness check failed' : '', + ] + .filter(Boolean) + .join('
'); +} + +export function CollectiveXSizeChart({ + chartId, + series, + colors, + metric, + xScaleType, + yScaleType, + caption, + legendElement, + testId, +}: CollectiveXSizeChartProps) { + const points = useMemo( + () => + series.flatMap((item) => + item.rows.flatMap((row) => { + const y = yValue(row, metric); + if (row.sizeBytes <= 0 || y === null) return []; + if ( + (metric === 'bus-bandwidth' || metric === 'bandwidth' || yScaleType === 'log') && + y <= 0 + ) { + return []; + } + return [ + { + seriesId: item.id, + seriesLabel: item.label, + colorKey: item.colorKey, + valid: item.valid, + status: item.status, + note: isCollectiveSeries(item) ? null : item.note, + x: row.sizeBytes, + y, + metric, + row, + series: item, + }, + ]; + }), + ), + [metric, series, yScaleType], + ); + const seriesById = useMemo(() => new Map(series.map((item) => [item.id, item])), [series]); + const lines = useMemo(() => { + const result: Record = {}; + for (const point of points) { + (result[point.seriesId] ??= []).push({ x: point.x, y: point.y }); + } + for (const line of Object.values(result)) { + line.sort((a, b) => a.x - b.x); + } + return result; + }, [points]); + + const xDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.x), + xScaleType, + ), + [points, xScaleType], + ); + const yDomain = useMemo( + () => + paddedDomain( + points.map((point) => point.y), + yScaleType, + ), + [points, yScaleType], + ); + const label = metricLabel(metric, series); + + const noDataOverlay = + points.length === 0 ? ( +
+

No matching CollectiveX data.

+
+ ) : undefined; + + return ( + + chartId={chartId} + data={points} + height={460} + margin={{ top: 24, right: 20, bottom: 62, left: 78 }} + watermark="logo" + testId={testId} + grabCursor + instructions="Shift+Scroll to zoom · Drag to pan · Double-click to reset · Click a point to pin tooltip" + xScale={ + xScaleType === 'log' + ? { type: 'log', domain: xDomain, base: 10, nice: false } + : { type: 'linear', domain: xDomain, nice: true } + } + yScale={{ type: yScaleType, domain: yDomain, nice: yScaleType === 'linear' }} + xAxis={{ + label: `Transfer size (bytes)${xScaleType === 'log' ? ' (log)' : ''}`, + tickCount: 7, + tickValues: + xScaleType === 'log' + ? (scale) => sparseLogTicks(scale.domain().map(Number), 7) + : undefined, + tickFormat: (value) => formatBytes(Number(value)), + }} + yAxis={{ + label: `${label}${yScaleType === 'log' ? ' (log)' : ''}`, + tickCount: 5, + tickValues: + yScaleType === 'log' + ? (scale) => sparseLogTicks(scale.domain().map(Number), 5) + : undefined, + tickFormat: (value) => formatCompact(Number(value)), + }} + layers={[ + { + type: 'line', + key: 'collectivex-size-lines', + lines, + config: { + getColor: (key) => { + const item = seriesById.get(key); + if (!item?.valid) return '#666'; + return colors[item.colorKey] ?? '#888'; + }, + getStrokeDasharray: (key) => (seriesById.get(key)?.valid ? null : '3 4'), + strokeWidth: 2.25, + curve: d3.curveMonotoneX, + }, + }, + { + type: 'point', + key: 'collectivex-size-points', + data: points, + config: { + getCx: () => 0, + getCy: () => 0, + getX: (point) => point.x, + getY: (point) => point.y, + getColor: (point) => (point.valid ? (colors[point.colorKey] ?? '#888') : '#666'), + getRadius: () => 3.5, + stroke: 'var(--background)', + strokeWidth: 1, + keyFn: (point) => `${point.seriesId}-${point.x}`, + maxPoints: Infinity, + }, + }, + ]} + zoom={{ + enabled: true, + axes: 'both', + scaleExtent: [1, 20], + resetEventName: `collectivex_zoom_reset_${chartId}`, + }} + tooltip={{ + rulerType: 'crosshair', + attachToLayer: 1, + content: (point, isPinned) => { + const color = point.valid ? (colors[point.colorKey] ?? '#888') : '#666'; + const seriesMeta = isCollectiveSeries(point.series) + ? `${point.series.topologyClass}${point.series.transport ? ` · ${point.series.transport}` : ''} · world=${point.series.worldSize ?? '?'}` + : `${point.series.topologyClass}${point.series.transport ? ` · ${point.series.transport}` : ''}`; + return `
+ ${isPinned ? '
Click elsewhere to dismiss
' : ''} +
${escapeHtml(point.seriesLabel)}${point.valid ? '' : ' [invalid - excluded]'}
+
${escapeHtml(label)}: ${formatMetricValue(point.y, metric, point)}
+
size=${formatBytes(point.x)}
+
${rowTooltip(point.row, point.series)}
+
${escapeHtml(seriesMeta)} · status=${escapeHtml(point.status)}
+ ${point.note ? `
${escapeHtml(point.note)}
` : ''} +
`; + }, + getRulerX: (point, scale) => + (scale as d3.ScaleLinear | d3.ScaleLogarithmic)(point.x), + getRulerY: (point, scale) => scale(point.y), + onHoverStart: (selection) => { + selection.attr('r', 6); + }, + onHoverEnd: (selection) => { + selection.attr('r', 3.5); + }, + }} + transitionDuration={200} + legendElement={legendElement} + noDataOverlay={noDataOverlay} + caption={caption} + /> + ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXSizePanel.tsx b/packages/app/src/components/collectivex/CollectiveXSizePanel.tsx new file mode 100644 index 00000000..0a311b00 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXSizePanel.tsx @@ -0,0 +1,201 @@ +'use client'; + +import { useMemo, useState } from 'react'; + +import { Card } from '@/components/ui/card'; +import { SegmentedToggle, type SegmentedToggleOption } from '@/components/ui/segmented-toggle'; +import { track } from '@/lib/analytics'; + +import { CollectiveXSizeChart } from './CollectiveXSizeChart'; +import type { + CollectiveXCollectiveMetric, + CollectiveXCollectiveSeries, + CollectiveXScale, + CollectiveXTransferMetric, + CollectiveXTransferSeries, +} from './types'; + +type SizePanelSeries = CollectiveXCollectiveSeries | CollectiveXTransferSeries; +type SizePanelMode = 'collective' | 'transfer'; +type SizePanelMetric = CollectiveXCollectiveMetric | CollectiveXTransferMetric; + +interface CollectiveXSizePanelProps { + panelId: string; + title: string; + description: string; + emptyLabel: string; + footnote: string; + mode: SizePanelMode; + series: SizePanelSeries[]; + colors: Record; + defaultMetric?: SizePanelMetric; + defaultYScale?: CollectiveXScale; +} + +const COLLECTIVE_METRIC_OPTIONS: SegmentedToggleOption[] = [ + { value: 'bus-bandwidth', label: 'Bus bandwidth' }, + { value: 'latency', label: 'Latency' }, +]; + +const TRANSFER_METRIC_OPTIONS: SegmentedToggleOption[] = [ + { value: 'bandwidth', label: 'Bandwidth' }, + { value: 'latency', label: 'Latency' }, +]; + +const SCALE_OPTIONS: SegmentedToggleOption[] = [ + { value: 'log', label: 'Log' }, + { value: 'linear', label: 'Linear' }, +]; + +function SizeLegend({ + series, + colors, +}: { + series: SizePanelSeries[]; + colors: Record; +}) { + const entries = useMemo( + () => + [ + ...new Map(series.map((item) => [`${item.colorKey}|${item.label}`, item])).values(), + ].toSorted((a, b) => a.sku.localeCompare(b.sku) || a.label.localeCompare(b.label)), + [series], + ); + if (entries.length === 0) return null; + return ( +
+ {entries.map((item) => { + const color = item.valid ? (colors[item.colorKey] ?? '#888') : '#666'; + return ( + + + {item.label} + {item.valid ? '' : ' (invalid - excluded)'} + + ); + })} +
+ ); +} + +export function CollectiveXSizePanel({ + panelId, + title, + description, + emptyLabel, + footnote, + mode, + series, + colors, + defaultMetric, + defaultYScale, +}: CollectiveXSizePanelProps) { + const [metric, setMetric] = useState( + defaultMetric ?? (mode === 'collective' ? 'bus-bandwidth' : 'bandwidth'), + ); + const [xScaleType, setXScaleType] = useState('log'); + const [yScaleType, setYScaleType] = useState( + defaultYScale ?? (mode === 'collective' ? 'linear' : 'log'), + ); + const metricOptions = mode === 'collective' ? COLLECTIVE_METRIC_OPTIONS : TRANSFER_METRIC_OPTIONS; + + if (series.length === 0) { + return ( + +

{title}

+

+ No {emptyLabel} results yet. This tab populates automatically when matching CollectiveX + artifacts land in the generated static snapshot. +

+
+ ); + } + + return ( +
+ +

{title}

+

{description}

+
+
+

+ Metric +

+ []} + onValueChange={(value) => { + setMetric(value); + track('collectivex_size_metric_changed', { panelId, metric: value }); + }} + ariaLabel={`${title} metric`} + testId={`collectivex-${panelId}-metric-toggle`} + /> +
+
+

+ X scale +

+ { + setXScaleType(value); + track('collectivex_size_x_scale_changed', { panelId, scale: value }); + }} + ariaLabel={`${title} x scale`} + testId={`collectivex-${panelId}-x-scale-toggle`} + /> +
+
+

+ Y scale +

+ { + setYScaleType(value); + track('collectivex_size_y_scale_changed', { panelId, scale: value }); + }} + ariaLabel={`${title} y scale`} + testId={`collectivex-${panelId}-y-scale-toggle`} + /> +
+
+
+ + + +

{title}

+

+ One line per fixed configuration. Invalid runs are dashed and dimmed rather than + silently dropped. +

+ + } + /> + +

{footnote}

+
+
+ ); +} diff --git a/packages/app/src/components/collectivex/CollectiveXTables.tsx b/packages/app/src/components/collectivex/CollectiveXTables.tsx new file mode 100644 index 00000000..b827df12 --- /dev/null +++ b/packages/app/src/components/collectivex/CollectiveXTables.tsx @@ -0,0 +1,322 @@ +'use client'; + +import { ExternalLink } from 'lucide-react'; +import { useMemo } from 'react'; + +import { Badge } from '@/components/ui/badge'; +import { Card } from '@/components/ui/card'; +import { type DataTableColumn, DataTable } from '@/components/ui/data-table'; +import { track } from '@/lib/analytics'; +import { cn } from '@/lib/utils'; + +import { distributionSensitivity } from './data'; +import type { + CollectiveXFailure, + CollectiveXPublicationStatus, + CollectiveXSensitivity, + CollectiveXSeries, +} from './types'; + +const STATUS_CLASSES = { + official: 'border-emerald-600/40 bg-emerald-500/15 text-emerald-700 dark:text-emerald-300', + 'comparable-experimental': + 'border-amber-600/40 bg-amber-500/15 text-amber-700 dark:text-amber-300', + legacy: 'border-slate-500/40 bg-slate-500/15 text-slate-700 dark:text-slate-300', + diagnostic: 'border-violet-600/40 bg-violet-500/15 text-violet-700 dark:text-violet-300', + invalid: 'border-red-600/40 bg-red-500/15 text-red-700 dark:text-red-300', + failed: 'border-red-700/50 bg-red-700/20 text-red-800 dark:text-red-300', +} satisfies Record; + +function StatusBadge({ status }: { status: CollectiveXPublicationStatus }) { + return ( + + {status} + + ); +} + +function seriesConfig(series: CollectiveXSeries): string { + const extras = [ + series.shape.combineQuantMode === 'none' ? '' : `cq:${series.shape.combineQuantMode}`, + series.shape.activationProfile === 'normal' ? '' : series.shape.activationProfile, + ].filter(Boolean); + return [ + series.shape.dispatchDtype, + series.mode, + series.measurementContract.replace('-v1', ''), + ...extras, + ].join('/'); +} + +function RunLink({ id, url, event }: { id: string | null; url: string | null; event: string }) { + if (!id || !url) return -; + return ( + track(event, { runId: id })} + className="inline-flex items-center gap-1 text-muted-foreground hover:text-foreground" + > + {id} + + + ); +} + +export function CollectiveXSensitivityTable({ series }: { series: CollectiveXSeries[] }) { + const sensitivity = useMemo(() => distributionSensitivity(series), [series]); + const columns = useMemo[]>( + () => [ + { + header: 'SKU', + cell: (row) => row.sku.toUpperCase(), + sortValue: (row) => row.sku, + className: 'font-medium whitespace-nowrap', + }, + { + header: 'Backend', + cell: (row) => row.backend, + sortValue: (row) => row.backend, + }, + { + header: 'Phase', + cell: (row) => row.phase, + sortValue: (row) => row.phase, + }, + { + header: 'Config', + cell: (row) => `${row.dispatchDtype}·${row.mode}·${row.contract.replace('-v1', '')}`, + sortValue: (row) => `${row.dispatchDtype}|${row.mode}|${row.contract}`, + className: 'whitespace-nowrap', + }, + { + header: 'Headline p99 µs', + align: 'right', + cell: (row) => `${row.headlineP99RangeUs[0]}–${row.headlineP99RangeUs[1]}`, + sortValue: (row) => row.headlineP99RangeUs[1], + className: 'tabular-nums whitespace-nowrap', + }, + { + header: 'Worst dist @T', + cell: (row) => `${row.worstDistribution} @${row.worstAtTokensPerRank}`, + sortValue: (row) => row.worstDistribution, + className: 'whitespace-nowrap', + }, + { + header: 'Sensitivity', + align: 'right', + cell: (row) => ( + = 1.5 + ? STATUS_CLASSES.failed + : row.sensitivityRatio >= 1.2 + ? STATUS_CLASSES['comparable-experimental'] + : STATUS_CLASSES.official, + )} + > + {row.sensitivityRatio.toFixed(2)}× + + ), + sortValue: (row) => row.sensitivityRatio, + className: 'tabular-nums', + }, + { + header: 'EPLB zipf→+eplb', + align: 'right', + cell: (row) => + row.eplbZipfRatio === null || row.eplbRecoveredRatio === null + ? '-' + : `${row.eplbZipfRatio.toFixed(2)}→${row.eplbRecoveredRatio.toFixed(2)}×`, + sortValue: (row) => row.eplbRecoveredRatio ?? 0, + className: 'tabular-nums whitespace-nowrap', + }, + ], + [], + ); + + return ( + +

+ Distribution sensitivity{' '} + · not the headline +

+

+ Round-trip p99 for the worst stressor divided by uniform at matched tokens/rank. Uniform + remains the cross-hardware headline. +

+ +
+ ); +} + +export function CollectiveXFailureTable({ failures }: { failures: CollectiveXFailure[] }) { + const columns = useMemo[]>( + () => [ + { + header: 'SKU', + cell: (row) => row.sku.toUpperCase(), + sortValue: (row) => row.sku, + className: 'font-medium whitespace-nowrap', + }, + { + header: 'Backend', + cell: (row) => row.backend, + sortValue: (row) => row.backend, + }, + { + header: 'Phase', + cell: (row) => row.phase ?? '-', + sortValue: (row) => row.phase ?? '', + }, + { + header: 'Config', + cell: (row) => row.config, + sortValue: (row) => row.config, + className: 'whitespace-nowrap', + }, + { + header: 'Status', + cell: (row) => , + sortValue: (row) => row.publicationStatus, + }, + { + header: 'Reason / failure mode', + cell: (row) => row.reason, + sortValue: (row) => row.reason, + }, + { + header: 'RC', + align: 'right', + cell: (row) => row.returnCode ?? '-', + sortValue: (row) => row.returnCode ?? 0, + className: 'tabular-nums', + }, + { + header: 'Run', + cell: (row) => ( + + ), + sortValue: (row) => row.run.id ?? '', + className: 'whitespace-nowrap', + }, + ], + [], + ); + + return ( + +

Failed / quarantined cases

+

+ Failed cases and diagnostic or invalid runs are preserved here instead of disappearing from + the aggregation. +

+ {failures.length === 0 ? ( +

+ No failed or quarantined cases are present in this snapshot. +

+ ) : ( + + )} +
+ ); +} + +export function CollectiveXCoverageTable({ series }: { series: CollectiveXSeries[] }) { + const columns = useMemo[]>( + () => [ + { + header: 'SKU', + cell: (row) => row.sku.toUpperCase(), + sortValue: (row) => row.sku, + className: 'font-medium whitespace-nowrap', + }, + { + header: 'EP', + align: 'right', + cell: (row) => row.epSize ?? '-', + sortValue: (row) => row.epSize ?? 0, + className: 'tabular-nums', + }, + { + header: 'Config', + cell: (row) => seriesConfig(row), + sortValue: seriesConfig, + className: 'whitespace-nowrap', + }, + { + header: 'Phase', + cell: (row) => row.phase, + sortValue: (row) => row.phase, + }, + { + header: 'Routing', + cell: (row) => row.shape.routingLabel, + sortValue: (row) => row.shape.routingLabel, + className: 'whitespace-nowrap', + }, + { + header: 'Workload', + cell: (row) => + row.workloadId ? ( + {row.workloadId.slice(0, 18)} + ) : ( + wid=null + ), + sortValue: (row) => row.workloadId ?? '', + className: 'font-mono text-xs whitespace-nowrap', + }, + { + header: 'Status', + cell: (row) => , + sortValue: (row) => row.publicationStatus, + }, + { + header: 'Correct pts', + align: 'right', + cell: (row) => + `${row.rows.filter((measurement) => measurement.correct).length}/${row.rows.length}`, + sortValue: (row) => + row.rows.filter((measurement) => measurement.correct).length / row.rows.length, + className: 'tabular-nums whitespace-nowrap', + }, + { + header: 'Run', + cell: (row) => ( + + ), + sortValue: (row) => row.run.id ?? '', + className: 'whitespace-nowrap', + }, + ], + [], + ); + + return ( + +

Coverage

+

+ Machine-derived publication status for every retained configuration. A null workload ID is + non-canonical and cannot appear in the publishable or official views. +

+ +
+ ); +} diff --git a/packages/app/src/components/collectivex/axis.test.ts b/packages/app/src/components/collectivex/axis.test.ts new file mode 100644 index 00000000..a5b17156 --- /dev/null +++ b/packages/app/src/components/collectivex/axis.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest'; + +import { sparseLogTicks } from './axis'; + +describe('sparseLogTicks', () => { + it('uses sparse 1-2-5 ticks for a typical latency domain', () => { + expect(sparseLogTicks([48, 225], 5)).toEqual([50, 100, 200]); + }); + + it('caps wide domains without restoring dense minor ticks', () => { + expect(sparseLogTicks([0.1, 1000], 5)).toEqual([0.1, 1, 10, 100, 1000]); + }); + + it('falls back to a small geometric set for a narrow domain', () => { + expect(sparseLogTicks([52, 65], 4)).toEqual([52, 58, 65]); + }); + + it('handles reversed and invalid domains', () => { + expect(sparseLogTicks([225, 48], 5)).toEqual([50, 100, 200]); + expect(sparseLogTicks([0, Number.NaN], 5)).toEqual([]); + }); +}); diff --git a/packages/app/src/components/collectivex/axis.ts b/packages/app/src/components/collectivex/axis.ts new file mode 100644 index 00000000..d3d21250 --- /dev/null +++ b/packages/app/src/components/collectivex/axis.ts @@ -0,0 +1,51 @@ +const LOG_MANTISSAS = [1, 2, 5] as const; + +function evenlySpaced(values: T[], count: number): T[] { + if (values.length <= count) return values; + if (count <= 1) return [values[Math.floor(values.length / 2)]]; + + const selected: T[] = []; + for (let index = 0; index < count; index += 1) { + selected.push(values[Math.round((index * (values.length - 1)) / (count - 1))]); + } + return selected; +} + +function fallbackLogTicks(min: number, max: number, maxTicks: number): number[] { + const count = Math.min(maxTicks, 3); + const logMin = Math.log(min); + const logSpan = Math.log(max) - logMin; + const ticks = Array.from({ length: count }, (_, index) => { + const value = Math.exp(logMin + (logSpan * index) / Math.max(1, count - 1)); + return Number(value.toPrecision(2)); + }); + return [...new Set(ticks)].filter((value) => value >= min && value <= max); +} + +/** + * Generate sparse 1-2-5 log ticks instead of D3's dense minor-tick sequence. + * The callback is evaluated against the current visible domain, including zoom. + */ +export function sparseLogTicks(domain: number[], maxTicks: number): number[] { + const numericDomain = domain.filter((value) => Number.isFinite(value) && value > 0); + if (numericDomain.length < 2 || maxTicks <= 0) return []; + + const min = Math.min(...numericDomain); + const max = Math.max(...numericDomain); + if (min === max) return [min]; + + const ticks: number[] = []; + const firstExponent = Math.floor(Math.log10(min)); + const lastExponent = Math.ceil(Math.log10(max)); + + for (let exponent = firstExponent; exponent <= lastExponent; exponent += 1) { + const magnitude = 10 ** exponent; + for (const mantissa of LOG_MANTISSAS) { + const value = mantissa * magnitude; + if (value >= min && value <= max) ticks.push(value); + } + } + + const candidates = ticks.length >= 2 ? ticks : fallbackLogTicks(min, max, maxTicks); + return evenlySpaced(candidates, maxTicks); +} diff --git a/packages/app/src/components/collectivex/data.test.ts b/packages/app/src/components/collectivex/data.test.ts new file mode 100644 index 00000000..a0611b73 --- /dev/null +++ b/packages/app/src/components/collectivex/data.test.ts @@ -0,0 +1,721 @@ +import { describe, expect, it } from 'vitest'; + +import { + chartPoints, + collectiveXConfigIdentity, + collectiveXHeatmapCells, + collectiveXPrefillFloor, + collectiveXScalingPoints, + collectiveXSeriesLabel, + comparisonDifferences, + distributionSensitivity, + metricValue, + normalizeCollectiveXAllReduceFrameworkDocument, + normalizeCollectiveXArtifactDocument, + normalizeCollectiveXDocument, + normalizeCollectiveXNcclDocument, + normalizeCollectiveXTransferDocument, + publicationMatches, + selectLatestCollectiveXTransferSeries, + selectLatestCollectiveXRecords, +} from './data'; +import type { CollectiveXSeries } from './types'; + +function percentileSet(p50: number, p90: number, p99: number) { + return { p50, p90, p95: p90 + (p99 - p90) / 2, p99 }; +} + +function row( + tokensPerRank = 1, + dispatch = percentileSet(40, 45, 50), + combine = percentileSet(20, 25, 30), + roundtrip = percentileSet(55, 65, 80), +) { + return { + tokens_per_rank: tokensPerRank, + global_tokens: tokensPerRank * 8, + dispatch, + combine, + roundtrip, + isolated_sum: percentileSet( + dispatch.p50 + combine.p50, + dispatch.p90 + combine.p90, + dispatch.p99 + combine.p99, + ), + dispatch_logical_bytes: tokensPerRank * 8000, + combine_logical_bytes: tokensPerRank * 4000, + fanout_mean: 5.5, + recv_tokens_max: 7, + per_rank_dispatch_us: { slowest_rank: 3 }, + samples_pooled: 600, + trials: 3, + correct: true, + }; +} + +function rawDocument(overrides: Record = {}) { + return { + schema_version: 4, + family: 'moe', + generated_at: '2026-06-27T08:31:09Z', + status: 'valid', + publication_status: 'official', + comparison_key: 'abc123', + runner: 'mi355x-amds_04', + backend: 'mori', + phase: 'decode', + mode: 'normal', + resource_mode: 'normalized', + comparison_class: 'standardized', + measurement_contract: 'layout-and-dispatch-v1', + topology_class: 'mi355x-xgmi', + transport: 'xgmi', + world_size: 8, + ep_size: 8, + shape: { + hidden: 7168, + topk: 8, + experts: 256, + experts_per_rank: 32, + dispatch_dtype: 'bf16', + routing: 'uniform', + activation_profile: 'normal', + quant: { + combine_quant_mode: 'none', + }, + }, + resource_profile: { + requested_fraction: 0.18, + achieved_fraction: 0.2, + configured_units: 52, + device_units: 304, + resource_class: 'normalized', + conformance_class: 'resource-constrained', + fixed_kernel: true, + pareto_eligible: true, + }, + placement: { + kind: 'packed', + nodes: 1, + gpus_per_node: 8, + scale_up_domain: 8, + }, + eplb: { + enabled: false, + }, + backend_provenance: { + mori_commit: 'deadbeef', + }, + reproduction: { + image_digest: 'sha256:image', + routing_step: 0, + uneven_tokens: 'none', + git_run: { + run_id: '28156624181', + source_sha: 'cad380a65a01254ab5a470402ef247b8745d4243', + repo: 'SemiAnalysisAI/InferenceX', + }, + }, + workload: { + source: 'canonical-serialized', + workload_id: 'set:8:fixture', + trace_signature: 'trace-1', + }, + routing_identity: { + consistent_across_ranks: true, + trace_signature: 'trace-1', + }, + rows: [row(1), row(2, percentileSet(50, 55, 60), percentileSet(25, 30, 35))], + ...overrides, + }; +} + +function normalized(overrides: Record = {}): CollectiveXSeries { + const result = normalizeCollectiveXDocument(rawDocument(overrides)); + if (!result) throw new Error('test fixture failed to normalize'); + return result; +} + +describe('normalizeCollectiveXDocument', () => { + it('normalizes measured v4 operations, workload identity, resources, and provenance', () => { + const series = normalized(); + + expect(series).toMatchObject({ + sku: 'mi355x', + runner: 'mi355x-amds_04', + backend: 'mori', + phase: 'decode', + publicationStatus: 'official', + resourceMode: 'normalized', + suite: 'resource-constrained', + routingConsistent: true, + traceSignature: 'trace-1', + workloadId: 'set:8:fixture', + backendVersion: 'deadbeef', + imageDigest: 'sha256:image', + repository: 'SemiAnalysisAI/InferenceX', + resourceProfile: { + requestedFraction: 0.18, + achievedFraction: 0.2, + configuredUnits: 52, + deviceUnits: 304, + fixedKernel: true, + paretoEligible: true, + }, + placement: { + kind: 'packed', + nodes: 1, + gpusPerNode: 8, + scaleUpDomain: 8, + }, + }); + expect(series.id).toMatch(/^cx-/u); + expect(series.label).toContain('MI355X EP8'); + expect(series.run).toMatchObject({ + id: '28156624181', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/28156624181', + }); + expect(series.rows[0]).toMatchObject({ + dispatch: { p50: 40, p90: 45, p95: 47.5, p99: 50 }, + combine: { p50: 20, p90: 25, p95: 27.5, p99: 30 }, + roundtrip: { p50: 55, p90: 65, p95: 72.5, p99: 80 }, + isolatedSum: { p50: 60, p90: 70, p95: 75, p99: 80 }, + roundtripMeasured: true, + stragglerRank: 3, + dispatchLogicalBytes: 8000, + combineLogicalBytes: 4000, + samplesPooled: 600, + trials: 3, + }); + }); + + it('labels DeepEP v2 as a distinct backend generation', () => { + const baseShape = rawDocument().shape as Record; + const v1 = rawDocument({ + backend: 'deepep', + backend_provenance: { deepep_version: '1.2.1' }, + }); + const v2 = rawDocument({ + backend: 'deepep', + backend_provenance: { deepep_version: '2.0.0+af9a040' }, + shape: { + ...baseShape, + kernel_gen: 'v2', + }, + }); + const series = normalized(v2); + + expect(series.label).toContain('deepep v2'); + expect(series.shape.kernelGeneration).toBe('v2'); + expect(collectiveXSeriesLabel({ ...series, label: 'MI355X EP8 · deepep · bf16' })).toContain( + 'deepep v2', + ); + expect( + chartPoints([series], 'dispatch', 'p99', 'tokens-per-rank', 'latency')[0]?.seriesLabel, + ).toContain('deepep v2'); + expect(collectiveXConfigIdentity(v1)).not.toBe(collectiveXConfigIdentity(v2)); + }); + + it('supports legacy flat rows without mislabeling the isolated sum as measured', () => { + const series = normalized({ + schema_version: 1, + publication_status: undefined, + resource_mode: undefined, + phase: 'prefill', + reproduction: undefined, + workload: undefined, + routing_identity: undefined, + rows: [ + { + tokens_per_rank: 128, + global_tokens: 1024, + dispatch_us_p50: 100, + dispatch_us_p99: 150, + combine_us_p50: 80, + combine_us_p99: 120, + serial_us_p50: 180, + serial_us_p99: 270, + dispatch_bytes: 2048, + correct: true, + }, + ], + }); + + expect(series.publicationStatus).toBe('legacy'); + expect(series.resourceMode).toBe('tuned'); + expect(series.rows[0].dispatch).toEqual({ p50: 100, p90: 100, p95: 100, p99: 150 }); + expect(series.rows[0].isolatedSum).toEqual({ + p50: 180, + p90: 180, + p95: 180, + p99: 270, + }); + expect(series.rows[0].roundtrip).toEqual(series.rows[0].isolatedSum); + expect(series.rows[0].roundtripMeasured).toBe(false); + }); + + it('uses workflow context when older documents lack run linkage', () => { + const series = normalizeCollectiveXDocument(rawDocument({ reproduction: undefined }), { + run: { + id: '123', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/123', + createdAt: '2026-06-24T00:00:00Z', + sha: 'feedface', + }, + }); + + expect(series?.run).toEqual({ + id: '123', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/123', + createdAt: '2026-06-24T00:00:00Z', + sha: 'feedface', + }); + }); + + it('does not treat a missing correctness flag as a pass', () => { + const document = rawDocument(); + const rows = document.rows as Record[]; + delete rows[0].correct; + expect(normalizeCollectiveXDocument(document)?.rows[0].correct).toBe(false); + }); + + it('rejects non-MoE documents, missing phases, and empty rows', () => { + expect(normalizeCollectiveXDocument(rawDocument({ family: 'nccl' }))).toBeNull(); + expect(normalizeCollectiveXDocument(rawDocument({ phase: undefined }))).toBeNull(); + expect(normalizeCollectiveXDocument(rawDocument({ rows: [] }))).toBeNull(); + }); +}); + +describe('normalizeCollectiveX non-EP families', () => { + it('normalizes NCCL and allreduce framework documents into collective size sweeps', () => { + const nccl = normalizeCollectiveXNcclDocument({ + family: 'nccl', + generated_at: '2026-06-27T08:02:00Z', + status: 'valid', + runner: 'b300-nv_01', + op: 'all_reduce', + topology_class: 'b300-nvlink-island', + transport: 'nvlink', + world_size: 8, + rows: [ + { + size_bytes: 1024, + dtype: 'bf16', + busbw_gbps: 50, + out_of_place: { time_us: 11, algbw_gbps: 25, busbw_gbps: 45 }, + in_place: { time_us: 9, algbw_gbps: 30, busbw_gbps: 50 }, + correct: true, + }, + ], + }); + const allreduceFw = normalizeCollectiveXAllReduceFrameworkDocument({ + family: 'allreduce-fw', + generated_at: '2026-06-27T08:03:00Z', + status: 'valid', + runner: 'b300-nv_01', + transport: 'nvlink', + measurement_contract: 'framework-allreduce-v1', + groups: [ + { + impl: 'flashinfer-oneshot', + topology_class: 'b300-nvlink-island', + world_size: 8, + dtype: 'bf16', + rows: [ + { + size_bytes: 2048, + latency_us: 13, + algbw_gbps: 20, + busbw_gbps: 40, + correct: true, + }, + ], + }, + ], + }); + + expect(nccl).toHaveLength(1); + expect(nccl[0]).toMatchObject({ + op: 'all_reduce', + sku: 'b300', + topologyClass: 'b300-nvlink-island', + rows: [ + { + sizeBytes: 1024, + latencyUs: 9, + algBandwidthGbps: 30, + busBandwidthGbps: 50, + }, + ], + }); + expect(allreduceFw[0]).toMatchObject({ + op: 'all_reduce', + label: 'B300 · flashinfer-oneshot (fw-AR · ws8)', + rows: [{ sizeBytes: 2048, latencyUs: 13, busBandwidthGbps: 40 }], + }); + }); + + it('normalizes transfer families and keeps only the newest KV-cache cohort', () => { + const older = normalizeCollectiveXTransferDocument({ + family: 'kv-cache', + generated_at: '2026-06-27T08:00:00Z', + status: 'valid', + runner: 'h100-dgxc_01', + transport: 'nvlink', + groups: [ + { + direction: 'dtoh', + layout: 'paged', + backend: 'old', + rows: [{ transfer_bytes: 1024, bandwidth_gb_s: 10, time_ms: 0.5, correct: true }], + }, + ], + }); + const newer = normalizeCollectiveXTransferDocument({ + family: 'kv-cache', + generated_at: '2026-06-27T09:00:00Z', + status: 'valid', + runner: 'h100-dgxc_01', + transport: 'nvlink', + groups: [ + { + direction: 'dtoh', + layout: 'contiguous', + backend: 'new', + rows: [{ transfer_bytes: 1024, bandwidth_gb_s: 20, time_ms: 0.25, correct: true }], + }, + ], + }); + const selected = selectLatestCollectiveXTransferSeries([...older, ...newer]); + + expect(selected).toHaveLength(1); + expect(selected[0]).toMatchObject({ + family: 'kv-cache', + label: 'H100 · dtoh · contiguous/new', + latencyUnit: 'ms', + rows: [{ sizeBytes: 1024, bandwidthGbps: 20, latency: 0.25 }], + }); + }); +}); + +describe('configuration identity and publication selection', () => { + it('keeps workload, routing, activation, quantization, and resource variants distinct', () => { + const base = rawDocument(); + const variants = [ + rawDocument({ resource_profile: { requested_fraction: 0.35 } }), + rawDocument({ workload: { workload_id: 'set:8:other', trace_signature: 'trace-2' } }), + rawDocument({ + shape: { + ...(base.shape as Record), + activation_profile: 'zeros', + }, + }), + rawDocument({ + shape: { + ...(base.shape as Record), + quant: { combine_quant_mode: 'fp8' }, + }, + }), + rawDocument({ + eplb: { enabled: true }, + reproduction: { + ...(base.reproduction as Record), + routing_step: 2, + uneven_tokens: 'rank-skew', + }, + }), + ]; + + const identities = new Set([base, ...variants].map(collectiveXConfigIdentity)); + expect(identities.size).toBe(variants.length + 1); + }); + + it('formats temporal and uneven routing labels like the generated source report', () => { + const series = normalized({ + reproduction: { + ...(rawDocument().reproduction as Record), + routing_step: 2, + uneven_tokens: 'empty-rank', + }, + }); + + expect(series.shape.routingLabel).toBe('uniform@s2·empty-rank'); + }); + + it('uses the source ranking: good before failed, then publication rank, then newest', () => { + const official = normalizeCollectiveXArtifactDocument( + rawDocument({ generated_at: '2026-06-25T00:00:00Z' }), + ); + const newerDiagnostic = normalizeCollectiveXArtifactDocument( + rawDocument({ + generated_at: '2026-06-27T00:00:00Z', + publication_status: 'diagnostic', + }), + ); + const failedOnly = normalizeCollectiveXArtifactDocument( + rawDocument({ + generated_at: '2026-06-28T00:00:00Z', + runner: 'h100-dgxc_01', + record_type: 'failed-case', + publication_status: 'failed', + status: 'failed', + rows: [], + failure: { failure_mode: 'timeout', return_code: 124 }, + }), + ); + const records = [official, newerDiagnostic, failedOnly].filter( + (record): record is NonNullable => record !== null, + ); + + const selected = selectLatestCollectiveXRecords(records); + + expect(selected.series).toHaveLength(1); + expect(selected.series[0].publicationStatus).toBe('official'); + expect(selected.failures).toHaveLength(1); + expect(selected.failures[0]).toMatchObject({ + sku: 'h100', + reason: 'timeout', + returnCode: 124, + }); + }); + + it('merges same-config series that differ only by token ladder (trace signature)', () => { + // Two suites measured the SAME config over different token ladders: the trace signature + // (the trailing identity component) differs, but the config is one benchmark. The chart + // must draw ONE line whose rows are the union at distinct token counts, with the best- + // ranked (publication, then newest) doc winning overlapping token counts and provenance. + const standard = normalizeCollectiveXArtifactDocument( + rawDocument({ generated_at: '2026-07-01T00:00:00Z' }), + ); + const ladderVariant = normalizeCollectiveXArtifactDocument( + rawDocument({ + generated_at: '2026-07-02T00:00:00Z', + publication_status: 'comparable-experimental', + workload: { + source: 'canonical-serialized', + workload_id: 'set:8:fixture-b', + trace_signature: 'trace-2', + }, + rows: [row(2, percentileSet(500, 550, 600)), row(8), row(64)], + }), + ); + const records = [standard, ladderVariant].filter( + (record): record is NonNullable => record !== null, + ); + + const selected = selectLatestCollectiveXRecords(records); + + expect(selected.series).toHaveLength(1); + const merged = selected.series[0]; + // official outranks comparable-experimental: its provenance and its overlapping T=2 row win. + expect(merged.publicationStatus).toBe('official'); + expect(merged.rows.map((item) => item.tokensPerRank)).toEqual([1, 2, 8, 64]); + expect(merged.rows[1].dispatch.p50).toBe(50); + }); + + it('keeps genuinely different configs unmerged', () => { + const base = normalizeCollectiveXArtifactDocument(rawDocument()); + const otherRouting = normalizeCollectiveXArtifactDocument( + rawDocument({ + shape: { + ...(rawDocument().shape as Record), + routing: 'zipf', + }, + workload: { + source: 'canonical-serialized', + workload_id: 'set:8:zipf', + trace_signature: 'trace-3', + }, + }), + ); + const records = [base, otherRouting].filter( + (record): record is NonNullable => record !== null, + ); + + expect(selectLatestCollectiveXRecords(records).series).toHaveLength(2); + }); + + it('requires canonical workload identity in publishable and official views', () => { + const official = normalized(); + const noWorkload = { ...official, workloadId: null }; + const diagnostic = { ...official, publicationStatus: 'diagnostic' as const }; + + expect(publicationMatches(official, 'publishable')).toBe(true); + expect(publicationMatches(official, 'official')).toBe(true); + expect(publicationMatches(official, 'official-headline', '7168/8/256')).toBe(true); + expect(publicationMatches(official, 'official-headline', '6144/8/256')).toBe(false); + expect(publicationMatches(noWorkload, 'publishable')).toBe(false); + expect(publicationMatches(noWorkload, 'official-headline', '7168/8/256')).toBe(false); + expect(publicationMatches(diagnostic, 'publishable')).toBe(false); + expect(publicationMatches(diagnostic, 'all')).toBe(true); + }); +}); + +describe('chart and report transforms', () => { + it('calculates all four operation metrics without conflating round trip and isolated sum', () => { + const measurement = normalized().rows[0]; + + expect(metricValue(measurement, 'dispatch', 'p50', 'latency')).toBe(40); + expect(metricValue(measurement, 'combine', 'p50', 'tokens-per-second')).toBeCloseTo(400_000); + expect(metricValue(measurement, 'roundtrip', 'p50', 'latency')).toBe(55); + expect(metricValue(measurement, 'isolated-sum', 'p50', 'payload-rate')).toBe(0.2); + }); + + it('uses only measured prefill rows at or above the shared prefill floor', () => { + const prefill = normalized({ + phase: 'prefill', + rows: [row(1), row(128)], + }); + const deepEpPrefill = normalized({ + runner: 'h100-dgxc_01', + backend: 'deepep', + phase: 'prefill', + rows: [row(128), row(512)], + }); + const floor = collectiveXPrefillFloor([prefill, deepEpPrefill]); + const points = chartPoints([prefill], 'dispatch', 'p99', 'tokens-per-rank', 'latency', floor); + + expect(floor).toBe(128); + expect(points.map((point) => point.x)).toEqual([128]); + }); + + it('builds separate weak and strong scaling contracts from matched EP cohorts', () => { + const ep4 = normalized({ + runner: 'h100-dgxc_01', + backend: 'deepep', + resource_mode: 'tuned', + ep_size: 4, + world_size: 4, + rows: [ + { ...row(64), global_tokens: 256 }, + { ...row(128), global_tokens: 512 }, + ], + }); + const ep8 = normalized({ + runner: 'h100-dgxc_02', + backend: 'deepep', + resource_mode: 'tuned', + ep_size: 8, + world_size: 8, + rows: [{ ...row(64), global_tokens: 512 }], + }); + + expect(collectiveXScalingPoints([ep4, ep8], 'weak').map((point) => point.epSize)).toEqual([ + 4, 8, + ]); + expect(collectiveXScalingPoints([ep4, ep8], 'strong').map((point) => point.epSize)).toEqual([ + 4, 8, + ]); + }); + + it('builds heatmap cells from the fastest matching measured point', () => { + const slower = normalized({ rows: [row(8, percentileSet(50, 55, 60))] }); + const faster = normalized({ + generated_at: '2026-06-28T00:00:00Z', + workload: { workload_id: 'set:8:other', trace_signature: 'trace-2' }, + rows: [row(8, percentileSet(30, 35, 40))], + }); + + expect(collectiveXHeatmapCells([slower, faster], 'ep')).toEqual([ + expect.objectContaining({ row: 'EP8', tokensPerRank: 8, latencyUs: 30 }), + ]); + }); + + it('reports distribution sensitivity against uniform using measured round trip p99', () => { + const routingSeries = (routing: string, p99: number, options: { eplb?: boolean } = {}) => + normalized({ + shape: { + ...(rawDocument().shape as Record), + routing, + }, + eplb: { enabled: options.eplb ?? false }, + workload: { + workload_id: `set:8:${routing}-${options.eplb ? 'eplb' : 'plain'}`, + trace_signature: `trace-${routing}-${options.eplb ? 'eplb' : 'plain'}`, + }, + rows: [ + row(8, percentileSet(40, 45, 50), percentileSet(20, 25, 30), percentileSet(60, 70, p99)), + ], + }); + const sensitivity = distributionSensitivity([ + routingSeries('uniform', 100), + routingSeries('zipf', 200), + routingSeries('zipf', 120, { eplb: true }), + routingSeries('balanced-rank-local', 50), + ]); + + expect(sensitivity).toEqual([ + expect.objectContaining({ + sensitivityRatio: 2, + worstDistribution: 'zipf', + worstAtTokensPerRank: 8, + bestCaseRatio: 0.5, + eplbZipfRatio: 2, + eplbRecoveredRatio: 1.2, + }), + ]); + }); + + it('matches the source report overwrite order for repeated routing/token points', () => { + const repeated = ( + runner: string, + routing: string, + p99: number, + generatedAt: string, + ): CollectiveXSeries => + normalized({ + runner, + generated_at: generatedAt, + shape: { + ...(rawDocument().shape as Record), + routing, + }, + workload: { + workload_id: `set:8:${runner}-${routing}`, + trace_signature: `trace-${runner}-${routing}`, + }, + rows: [ + row(8, percentileSet(40, 45, 50), percentileSet(20, 25, 30), percentileSet(60, 70, p99)), + ], + }); + const sensitivity = distributionSensitivity([ + repeated('h100-dgxc_03', 'zipf', 400, '2026-06-27T00:03:00Z'), + repeated('h100-dgxc_02', 'uniform', 200, '2026-06-27T00:02:00Z'), + repeated('h100-dgxc_01', 'uniform', 100, '2026-06-27T00:01:00Z'), + ]); + + expect(sensitivity).toEqual([ + expect.objectContaining({ + headlineP99RangeUs: [200, 200], + sensitivityRatio: 2, + worstDistribution: 'zipf', + }), + ]); + }); + + it('reports fields that make selected lines non-comparable', () => { + const base = normalized(); + const different = normalized({ + runner: 'h100-dgxc_01', + resource_mode: 'tuned', + measurement_contract: 'cached-layout-comm-only-v1', + ep_size: 4, + world_size: 4, + topology_class: 'h100-nvlink-island', + reproduction: { + ...(rawDocument().reproduction as Record), + git_run: { source_sha: 'other-sha' }, + }, + }); + + expect(comparisonDifferences([base, different])).toEqual( + expect.arrayContaining([ + 'topology', + 'EP degree', + 'resource mode', + 'measurement contract', + 'source SHA', + ]), + ); + }); +}); diff --git a/packages/app/src/components/collectivex/data.ts b/packages/app/src/components/collectivex/data.ts new file mode 100644 index 00000000..904240c4 --- /dev/null +++ b/packages/app/src/components/collectivex/data.ts @@ -0,0 +1,2000 @@ +import type { + CollectiveXChartPoint, + CollectiveXBudgetDecisionRow, + CollectiveXCollectiveRow, + CollectiveXCollectiveSeries, + CollectiveXDecisionSummary, + CollectiveXFailure, + CollectiveXHeatmapCell, + CollectiveXHeatmapDimension, + CollectiveXOperation, + CollectiveXPercentile, + CollectiveXPercentiles, + CollectiveXPhase, + CollectiveXPublicationFilter, + CollectiveXPublicationStatus, + CollectiveXRecommendationRow, + CollectiveXRow, + CollectiveXRunSource, + CollectiveXScalingKind, + CollectiveXScalingPoint, + CollectiveXShape, + CollectiveXSummaryCard, + CollectiveXSensitivity, + CollectiveXSeries, + CollectiveXTransferFamily, + CollectiveXTransferLatencyUnit, + CollectiveXTransferRow, + CollectiveXTransferSeries, + CollectiveXXAxis, + CollectiveXYAxis, +} from './types'; + +interface NormalizeContext { + run?: Partial; +} + +export interface CollectiveXArtifactRecord { + identity: string; + generatedAt: string; + publicationStatus: CollectiveXPublicationStatus; + isFailed: boolean; + series: CollectiveXSeries | null; + failure: CollectiveXFailure | null; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function stringValue(value: unknown, fallback = ''): string { + return typeof value === 'string' ? value : fallback; +} + +function finiteNumber(value: unknown): number | null { + return typeof value === 'number' && Number.isFinite(value) ? value : null; +} + +function positiveNumber(value: unknown): number | null { + const number = finiteNumber(value); + return number !== null && number > 0 ? number : null; +} + +function integerValue(value: unknown, fallback = 0): number { + const number = finiteNumber(value); + return number === null ? fallback : Math.trunc(number); +} + +function nullableBoolean(value: unknown): boolean | null { + return typeof value === 'boolean' ? value : null; +} + +function booleanValue(value: unknown): boolean { + return value === true; +} + +function publicationStatus(value: unknown): CollectiveXPublicationStatus { + switch (value) { + case 'official': + case 'comparable-experimental': + case 'diagnostic': + case 'invalid': + case 'failed': { + return value; + } + default: { + return 'legacy'; + } + } +} + +function readPercentiles( + row: Record, + nestedKey: string, + flatKey = nestedKey, +): CollectiveXPercentiles | null { + const nested = isRecord(row[nestedKey]) ? row[nestedKey] : null; + const p50 = finiteNumber(nested?.p50) ?? finiteNumber(row[`${flatKey}_us_p50`]); + if (p50 === null) return null; + + const p90 = finiteNumber(nested?.p90) ?? finiteNumber(row[`${flatKey}_us_p90`]) ?? p50; + const p95 = finiteNumber(nested?.p95) ?? finiteNumber(row[`${flatKey}_us_p95`]) ?? p90; + const p99 = finiteNumber(nested?.p99) ?? finiteNumber(row[`${flatKey}_us_p99`]) ?? p50; + return { p50, p90, p95, p99 }; +} + +function addPercentiles( + left: CollectiveXPercentiles, + right: CollectiveXPercentiles, +): CollectiveXPercentiles { + return { + p50: left.p50 + right.p50, + p90: left.p90 + right.p90, + p95: left.p95 + right.p95, + p99: left.p99 + right.p99, + }; +} + +function normalizeRow(raw: unknown, epSize: number | null): CollectiveXRow | null { + if (!isRecord(raw)) return null; + + const tokensPerRank = positiveNumber(raw.tokens_per_rank); + const globalTokens = + positiveNumber(raw.global_tokens) ?? + (tokensPerRank !== null && epSize !== null ? tokensPerRank * epSize : null); + if (tokensPerRank === null || globalTokens === null) return null; + + const dispatch = readPercentiles(raw, 'dispatch'); + const combine = readPercentiles(raw, 'combine'); + if (!dispatch || !combine) return null; + + const recordedIsolatedSum = readPercentiles(raw, 'isolated_sum', 'serial'); + const isolatedSum = recordedIsolatedSum ?? addPercentiles(dispatch, combine); + const measuredRoundtrip = readPercentiles(raw, 'roundtrip'); + const roundtrip = measuredRoundtrip ?? isolatedSum; + + const routedBytes = + finiteNumber(raw.dispatch_logical_bytes) ?? + finiteNumber(raw.routed_bytes_total) ?? + finiteNumber(raw.dispatch_bytes) ?? + 0; + const combineBytes = + finiteNumber(raw.combine_logical_bytes) ?? finiteNumber(raw.combine_bytes_total) ?? 0; + const perRankDispatch = isRecord(raw.per_rank_dispatch_us) ? raw.per_rank_dispatch_us : {}; + + return { + tokensPerRank, + globalTokens, + dispatch, + combine, + roundtrip, + isolatedSum, + roundtripMeasured: measuredRoundtrip !== null, + dispatchLogicalBytes: Math.max(0, routedBytes), + combineLogicalBytes: Math.max(0, combineBytes), + fanoutMean: finiteNumber(raw.fanout_mean), + recvTokensMax: finiteNumber(raw.recv_tokens_max) ?? finiteNumber(raw.recv_tokens), + stragglerRank: finiteNumber(perRankDispatch.slowest_rank), + correct: raw.correct === true, + samplesPooled: finiteNumber(raw.samples_pooled), + trials: finiteNumber(raw.trials), + }; +} + +function skuFromRunner(runner: string): string { + return runner.split('_')[0]?.split('-')[0]?.toLowerCase() || 'unknown'; +} + +function backendVersion(raw: Record): string | null { + const provenance = isRecord(raw.backend_provenance) ? raw.backend_provenance : {}; + return ( + stringValue(provenance.deepep_version) || + stringValue(provenance.deepep_commit) || + stringValue(provenance.mori_commit) || + null + ); +} + +function backendKernelGeneration(backend: string, version: string | null): string { + if (backend !== 'deepep') return 'n-a'; + if (version && (/^2(?:\.|$)/u.test(version) || /\bv2\b/iu.test(version))) return 'v2'; + return 'v1'; +} + +export function collectiveXBackendLabel( + backend: string, + version: string | null, + kernelGeneration?: string | null, +): string { + const generation = kernelGeneration || backendKernelGeneration(backend, version); + return generation === 'v2' ? `${backend} v2` : backend; +} + +export function collectiveXSeriesLabel( + series: Pick & { + shape?: Pick; + }, +): string { + const backendLabel = collectiveXBackendLabel( + series.backend, + series.backendVersion, + series.shape?.kernelGeneration, + ); + if (backendLabel === series.backend) return series.label; + + const backendSegment = `· ${series.backend} ·`; + if (!series.label.includes(backendSegment)) return series.label; + return series.label.replace(backendSegment, `· ${backendLabel} ·`); +} + +function stableHash(value: string): string { + let hash = 2166136261; + for (const character of value) { + hash ^= character.codePointAt(0) ?? 0; + hash = Math.imul(hash, 16777619); + } + const unsignedHash = hash < 0 ? hash + 0x1_0000_0000 : hash; + return unsignedHash.toString(16).padStart(8, '0'); +} + +const MODEL_NAMES = new Map([ + ['7168/8/256', 'DeepSeek-V3/V4'], + ['6144/8/256', 'MiniMax-M3'], + ['7168/8/384', 'Kimi-K2'], + ['4096/8/128', 'Qwen3.5'], + ['7168/8/288', 'DeepSeek-V3 (EPLB physical)'], +]); + +export function collectiveXShapeKey(shape: { + hidden: number | null; + topk: number | null; + experts: number | null; +}): string { + return `${shape.hidden ?? '?'}/${shape.topk ?? '?'}/${shape.experts ?? '?'}`; +} + +function modelName(shape: { hidden: number | null; topk: number | null; experts: number | null }) { + const key = collectiveXShapeKey(shape); + return MODEL_NAMES.get(key) ?? `shape ${key}`; +} + +function runSource( + raw: Record, + generatedAt: string, + context: NormalizeContext, +): CollectiveXRunSource { + const reproduction = isRecord(raw.reproduction) ? raw.reproduction : {}; + const gitRun = isRecord(reproduction.git_run) ? reproduction.git_run : {}; + const runId = context.run?.id || stringValue(gitRun.run_id) || null; + + return { + id: runId, + url: + context.run?.url || + (runId + ? `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${encodeURIComponent(runId)}` + : null), + createdAt: context.run?.createdAt || generatedAt || null, + sha: context.run?.sha || stringValue(gitRun.source_sha) || null, + }; +} + +function rawConfig(raw: Record) { + const runner = stringValue(raw.runner, 'unknown'); + const shape = isRecord(raw.shape) ? raw.shape : {}; + const quant = isRecord(shape.quant) ? shape.quant : {}; + const reproduction = isRecord(raw.reproduction) ? raw.reproduction : {}; + const resourceProfile = isRecord(raw.resource_profile) ? raw.resource_profile : {}; + const workload = isRecord(raw.workload) ? raw.workload : {}; + const routingIdentity = isRecord(raw.routing_identity) ? raw.routing_identity : {}; + const eplb = isRecord(raw.eplb) ? raw.eplb : {}; + const placement = isRecord(raw.placement) ? raw.placement : {}; + const backend = stringValue(raw.backend, 'unknown'); + const version = backendVersion(raw); + const kernelGeneration = + stringValue(shape.kernel_gen) || backendKernelGeneration(backend, version); + const phase: CollectiveXPhase | null = + raw.phase === 'decode' || raw.phase === 'prefill' ? raw.phase : null; + const routing = stringValue(shape.routing, 'unknown'); + const eplbEnabled = booleanValue(eplb.enabled) || booleanValue(shape.eplb); + const routingStep = integerValue(reproduction.routing_step ?? shape.routing_step); + const unevenTokens = + stringValue(reproduction.uneven_tokens) || stringValue(shape.uneven_tokens) || 'none'; + const routingLabel = `${routing}${eplbEnabled ? '+eplb' : ''}${ + routingStep === 0 ? '' : `@s${routingStep}` + }${unevenTokens === 'none' ? '' : `·${unevenTokens}`}`; + const traceSignature = + stringValue(workload.trace_signature) || stringValue(routingIdentity.trace_signature) || null; + + return { + runner, + sku: skuFromRunner(runner), + backend, + backendVersion: version, + backendKernelGeneration: kernelGeneration, + phase, + mode: stringValue(raw.mode, 'normal'), + resourceMode: stringValue(raw.resource_mode) || 'tuned', + comparisonClass: stringValue(raw.comparison_class, 'standardized'), + measurementContract: stringValue(raw.measurement_contract, 'unknown'), + topologyClass: stringValue(raw.topology_class, 'unknown'), + transport: stringValue(raw.transport, 'unknown'), + worldSize: positiveNumber(raw.world_size), + epSize: positiveNumber(raw.ep_size) ?? positiveNumber(raw.world_size), + hidden: positiveNumber(shape.hidden), + topk: positiveNumber(shape.topk), + experts: positiveNumber(shape.experts), + model: modelName({ + hidden: positiveNumber(shape.hidden), + topk: positiveNumber(shape.topk), + experts: positiveNumber(shape.experts), + }), + dispatchDtype: stringValue(shape.dispatch_dtype, 'unknown'), + routing, + routingLabel, + routingStep, + unevenTokens, + eplbEnabled, + activationProfile: stringValue(shape.activation_profile, 'normal'), + combineQuantMode: stringValue(quant.combine_quant_mode, 'none'), + requestedFraction: finiteNumber(resourceProfile.requested_fraction), + achievedFraction: finiteNumber(resourceProfile.achieved_fraction), + configuredUnits: finiteNumber(resourceProfile.configured_units), + deviceUnits: finiteNumber(resourceProfile.device_units), + resourceClass: stringValue(resourceProfile.resource_class, 'unknown'), + conformanceClass: stringValue(resourceProfile.conformance_class, 'unknown'), + fixedKernel: booleanValue(resourceProfile.fixed_kernel), + paretoEligible: booleanValue(resourceProfile.pareto_eligible), + placementKind: stringValue(placement.kind, 'packed'), + placementNodes: positiveNumber(placement.nodes), + placementGpusPerNode: positiveNumber(placement.gpus_per_node), + scaleUpDomain: positiveNumber(placement.scale_up_domain), + traceSignature, + workloadId: stringValue(workload.workload_id) || null, + workloadSource: stringValue(workload.source) || null, + routingConsistent: nullableBoolean(routingIdentity.consistent_across_ranks), + eplbImbalanceBefore: finiteNumber(eplb.imbalance_before), + eplbImbalanceAfter: finiteNumber(eplb.imbalance_after), + imageDigest: stringValue(reproduction.image_digest) || null, + repository: + stringValue(isRecord(reproduction.git_run) ? reproduction.git_run.repo : undefined) || null, + }; +} + +export function collectiveXConfigIdentity(raw: Record): string { + const config = rawConfig(raw); + return [ + config.sku, + config.backend, + config.backendKernelGeneration, + config.hidden ?? '', + config.topk ?? '', + config.experts ?? '', + config.dispatchDtype, + config.mode, + config.measurementContract, + config.routingLabel, + config.epSize ?? '', + config.phase ?? '', + config.activationProfile, + config.combineQuantMode, + config.unevenTokens, + config.routingStep, + config.resourceMode, + config.requestedFraction ?? '', + config.traceSignature ?? '', + ].join('|'); +} + +function colorKey(config: ReturnType): string { + return `${config.sku}_${stableHash( + [ + config.sku, + config.backend, + config.backendKernelGeneration, + config.dispatchDtype, + config.mode, + config.resourceMode, + config.measurementContract, + config.epSize ?? '', + config.routingLabel, + ].join('|'), + )}`; +} + +function buildLabel(config: ReturnType): string { + const suffixes = [ + config.mode === 'll' ? 'LL' : '', + config.resourceMode === 'normalized' ? '(norm)' : '', + config.resourceMode === 'default' ? '(def)' : '', + config.measurementContract === 'cached-layout-comm-only-v1' ? '[cl]' : '', + ].filter(Boolean); + const routing = config.routingLabel === 'uniform' ? '' : ` · ${config.routingLabel}`; + return `${config.sku.toUpperCase()} EP${config.epSize ?? '?'} · ${collectiveXBackendLabel( + config.backend, + config.backendVersion, + config.backendKernelGeneration, + )} · ${config.dispatchDtype}${suffixes.length > 0 ? ` ${suffixes.join(' ')}` : ''}${routing}`; +} + +export function normalizeCollectiveXDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXSeries | null { + if (!isRecord(raw) || raw.family !== 'moe' || !Array.isArray(raw.rows)) return null; + + const config = rawConfig(raw); + if (!config.phase || raw.rows.length === 0) return null; + + const schemaVersion = finiteNumber(raw.schema_version) ?? 1; + const generatedAt = + stringValue(raw.generated_at) || context.run?.createdAt || new Date(0).toISOString(); + const identity = collectiveXConfigIdentity(raw); + const rows = raw.rows + .map((row) => normalizeRow(row, config.epSize)) + .filter((row): row is CollectiveXRow => row !== null) + .toSorted((a, b) => a.tokensPerRank - b.tokensPerRank); + if (rows.length === 0) return null; + + return { + id: `cx-${stableHash(identity)}`, + identity, + colorKey: colorKey(config), + comparisonKey: stringValue(raw.comparison_key) || null, + schemaVersion, + generatedAt, + status: stringValue(raw.status, 'unknown'), + publicationStatus: publicationStatus(raw.publication_status), + runner: config.runner, + sku: config.sku, + backend: config.backend, + phase: config.phase, + mode: config.mode, + resourceMode: config.resourceMode, + suite: config.resourceMode === 'normalized' ? 'resource-constrained' : 'backend-default', + comparisonClass: config.comparisonClass, + measurementContract: config.measurementContract, + topologyClass: config.topologyClass, + transport: config.transport, + worldSize: config.worldSize, + epSize: config.epSize, + label: buildLabel(config), + model: config.model, + shape: { + hidden: config.hidden, + topk: config.topk, + experts: config.experts, + routing: config.routing, + routingLabel: config.routingLabel, + routingStep: config.routingStep, + unevenTokens: config.unevenTokens, + eplbEnabled: config.eplbEnabled, + dispatchDtype: config.dispatchDtype, + kernelGeneration: config.backendKernelGeneration, + activationProfile: config.activationProfile, + combineQuantMode: config.combineQuantMode, + }, + resourceProfile: { + requestedFraction: config.requestedFraction, + achievedFraction: config.achievedFraction, + configuredUnits: config.configuredUnits, + deviceUnits: config.deviceUnits, + resourceClass: config.resourceClass, + conformanceClass: config.conformanceClass, + fixedKernel: config.fixedKernel, + paretoEligible: config.paretoEligible, + }, + placement: { + kind: config.placementKind, + nodes: config.placementNodes, + gpusPerNode: config.placementGpusPerNode, + scaleUpDomain: config.scaleUpDomain, + }, + routingConsistent: config.routingConsistent, + traceSignature: config.traceSignature, + workloadId: config.workloadId, + workloadSource: config.workloadSource, + eplbImbalanceBefore: config.eplbImbalanceBefore, + eplbImbalanceAfter: config.eplbImbalanceAfter, + backendVersion: config.backendVersion, + imageDigest: config.imageDigest, + repository: config.repository, + run: runSource(raw, generatedAt, context), + rows, + }; +} + +export function normalizeCollectiveXFailure( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXFailure | null { + if (!isRecord(raw) || raw.family !== 'moe') return null; + + const status = publicationStatus(raw.publication_status); + const recordType = stringValue(raw.record_type); + if (recordType !== 'failed-case' && !['failed', 'invalid', 'diagnostic'].includes(status)) { + return null; + } + + const config = rawConfig(raw); + const identity = collectiveXConfigIdentity(raw); + const generatedAt = + stringValue(raw.generated_at) || context.run?.createdAt || new Date(0).toISOString(); + const failure = isRecord(raw.failure) ? raw.failure : {}; + const validity = isRecord(raw.validity) ? raw.validity : {}; + const anomalySummary = isRecord(raw.anomaly_summary) ? raw.anomaly_summary : {}; + const anomalyTypes = Array.isArray(anomalySummary.types) + ? anomalySummary.types.filter((value): value is string => typeof value === 'string') + : []; + const resourceNonconforming = stringValue(validity.resource_conformance).endsWith( + 'nonconforming', + ); + const reason = + stringValue(failure.failure_mode) || + (resourceNonconforming + ? 'resource-nonconforming' + : anomalyTypes.length > 0 + ? `anomaly:${anomalyTypes.join(',')}` + : status); + + return { + id: `cxf-${stableHash(`${identity}|${generatedAt}|${reason}`)}`, + identity, + generatedAt, + publicationStatus: status, + status: stringValue(raw.status, status), + sku: config.sku, + backend: config.backend, + phase: config.phase, + config: `${config.dispatchDtype}/${config.mode}/${config.measurementContract.replace('-v1', '')}`, + reason, + returnCode: finiteNumber(failure.return_code), + run: runSource(raw, generatedAt, context), + }; +} + +export function normalizeCollectiveXArtifactDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXArtifactRecord | null { + if (!isRecord(raw) || raw.family !== 'moe') return null; + + const series = normalizeCollectiveXDocument(raw, context); + const failure = normalizeCollectiveXFailure(raw, context); + if (!series && !failure) return null; + + const rows = Array.isArray(raw.rows) ? raw.rows : []; + const generatedAt = + stringValue(raw.generated_at) || context.run?.createdAt || new Date(0).toISOString(); + return { + identity: collectiveXConfigIdentity(raw), + generatedAt, + publicationStatus: publicationStatus(raw.publication_status), + isFailed: + stringValue(raw.record_type) === 'failed-case' || + raw.status === 'failed' || + rows.length === 0, + series, + failure, + }; +} + +function generatedAtValue(raw: Record, context: NormalizeContext): string { + return stringValue(raw.generated_at) || context.run?.createdAt || new Date(0).toISOString(); +} + +function artifactRunnerSku(raw: Record): { runner: string; sku: string } { + const runner = stringValue(raw.runner, 'unknown'); + return { runner, sku: skuFromRunner(runner) }; +} + +function colorKeyFromIdentity(sku: string, identity: string): string { + return `${sku}_${stableHash(identity)}`; +} + +export function normalizeCollectiveXNcclDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXCollectiveSeries[] { + if (!isRecord(raw) || raw.family !== 'nccl' || !Array.isArray(raw.rows)) return []; + + const { runner, sku } = artifactRunnerSku(raw); + const generatedAt = generatedAtValue(raw, context); + const topologyClass = stringValue(raw.topology_class, 'unknown'); + const transport = stringValue(raw.transport); + const worldSize = positiveNumber(raw.world_size); + const op = stringValue(raw.op, 'unknown'); + const status = stringValue(raw.status, 'unknown'); + const rows = raw.rows + .map((candidate) => { + if (!isRecord(candidate)) return null; + const sizeBytes = positiveNumber(candidate.size_bytes); + const outOfPlace = isRecord(candidate.out_of_place) ? candidate.out_of_place : {}; + const inPlace = isRecord(candidate.in_place) ? candidate.in_place : {}; + const bestBusBandwidth = finiteNumber(candidate.busbw_gbps); + const inPlaceBusBandwidth = finiteNumber(inPlace.busbw_gbps); + const outOfPlaceBusBandwidth = finiteNumber(outOfPlace.busbw_gbps); + const useInPlace = + bestBusBandwidth !== null && + inPlaceBusBandwidth === bestBusBandwidth && + outOfPlaceBusBandwidth !== bestBusBandwidth; + const latencyUs = useInPlace + ? finiteNumber(inPlace.time_us) + : finiteNumber(outOfPlace.time_us); + if (sizeBytes === null || latencyUs === null) return null; + return { + sizeBytes, + dtype: stringValue(candidate.dtype) || null, + latencyUs, + algBandwidthGbps: useInPlace + ? finiteNumber(inPlace.algbw_gbps) + : finiteNumber(outOfPlace.algbw_gbps), + busBandwidthGbps: bestBusBandwidth, + outOfPlaceUs: finiteNumber(outOfPlace.time_us), + inPlaceUs: finiteNumber(inPlace.time_us), + correct: nullableBoolean(candidate.correct), + }; + }) + .filter((row): row is CollectiveXCollectiveRow => row !== null) + .toSorted((a, b) => a.sizeBytes - b.sizeBytes); + if (rows.length === 0) return []; + + const identity = [ + 'nccl', + sku, + op, + topologyClass, + transport, + worldSize ?? '', + stringValue(raw.measurement_contract), + ].join('|'); + return [ + { + id: `cxn-${stableHash(identity)}`, + identity, + op, + sku, + runner, + topologyClass, + transport, + worldSize, + nodes: positiveNumber(raw.nodes), + dtype: rows[0]?.dtype ?? null, + comparisonClass: stringValue(raw.comparison_class) || null, + comparisonKey: stringValue(raw.comparison_key) || null, + measurementContract: stringValue(raw.measurement_contract) || null, + avgBusBandwidthGbps: finiteNumber(isRecord(raw.summary) ? raw.summary.avg_busbw_gbps : null), + status, + valid: status === 'valid', + colorKey: colorKeyFromIdentity(sku, identity), + label: `${sku.toUpperCase()} · ${topologyClass}${transport ? ` · ${transport}` : ''} (ws${worldSize ?? '?'})`, + generatedAt, + run: runSource(raw, generatedAt, context), + rows, + }, + ]; +} + +export function normalizeCollectiveXAllReduceFrameworkDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXCollectiveSeries[] { + if (!isRecord(raw) || raw.family !== 'allreduce-fw' || !Array.isArray(raw.groups)) return []; + + const { runner, sku } = artifactRunnerSku(raw); + const generatedAt = generatedAtValue(raw, context); + const transport = stringValue(raw.transport); + const status = stringValue(raw.status, 'unknown'); + return raw.groups.flatMap((group) => { + if (!isRecord(group)) return []; + const impl = stringValue(group.impl, 'unknown'); + const topologyClass = + stringValue(group.topology_class) || stringValue(raw.topology_class, 'unknown'); + const worldSize = positiveNumber(group.world_size) ?? positiveNumber(raw.world_size); + const dtype = stringValue(group.dtype) || stringValue(raw.dtype) || null; + const rows = (Array.isArray(group.rows) ? group.rows : []) + .map((candidate) => { + if (!isRecord(candidate)) return null; + const sizeBytes = positiveNumber(candidate.size_bytes); + const latencyUs = finiteNumber(candidate.latency_us); + const busBandwidthGbps = finiteNumber(candidate.busbw_gbps); + if (sizeBytes === null || (latencyUs === null && !busBandwidthGbps)) return null; + return { + sizeBytes, + dtype, + latencyUs, + algBandwidthGbps: finiteNumber(candidate.algbw_gbps), + busBandwidthGbps, + outOfPlaceUs: null, + inPlaceUs: null, + correct: nullableBoolean(candidate.correct), + }; + }) + .filter((row): row is CollectiveXCollectiveRow => row !== null) + .toSorted((a, b) => a.sizeBytes - b.sizeBytes); + if (rows.length === 0) return []; + + const identity = [ + 'allreduce-fw', + sku, + impl, + topologyClass, + transport, + worldSize ?? '', + stringValue(raw.measurement_contract), + ].join('|'); + return [ + { + id: `cxn-${stableHash(identity)}`, + identity, + op: 'all_reduce', + sku, + runner, + topologyClass, + transport, + worldSize, + nodes: positiveNumber(raw.nodes), + dtype, + comparisonClass: stringValue(raw.comparison_class) || null, + comparisonKey: stringValue(group.comparison_key) || stringValue(raw.comparison_key) || null, + measurementContract: stringValue(raw.measurement_contract) || null, + avgBusBandwidthGbps: null, + status, + valid: status === 'valid', + colorKey: colorKeyFromIdentity(sku, identity), + label: `${sku.toUpperCase()} · ${impl} (fw-AR · ws${worldSize ?? '?'})`, + generatedAt, + run: runSource(raw, generatedAt, context), + rows, + }, + ]; + }); +} + +function normalizeTransferRows( + rows: unknown[], + keys: { size: string; bandwidth: string; latency: string }, +): CollectiveXTransferRow[] { + return rows + .map((candidate) => { + if (!isRecord(candidate)) return null; + const sizeBytes = positiveNumber(candidate[keys.size]); + const bandwidthGbps = finiteNumber(candidate[keys.bandwidth]); + if (sizeBytes === null || bandwidthGbps === null) return null; + return { + sizeBytes, + bandwidthGbps, + latency: finiteNumber(candidate[keys.latency]), + sizeClass: stringValue(candidate.size_class) || null, + correct: nullableBoolean(candidate.correct), + }; + }) + .filter((row): row is CollectiveXTransferRow => row !== null) + .toSorted((a, b) => a.sizeBytes - b.sizeBytes); +} + +function transferSeries( + raw: Record, + context: NormalizeContext, + family: CollectiveXTransferFamily, + operation: string | null, + subtype: string | null, + rows: CollectiveXTransferRow[], + latencyUnit: CollectiveXTransferLatencyUnit, + note: string, + topologyClass: string, + cohortIdentity: string, +): CollectiveXTransferSeries | null { + if (rows.length === 0) return null; + const { sku } = artifactRunnerSku(raw); + const generatedAt = generatedAtValue(raw, context); + const transport = stringValue(raw.transport); + const identity = [ + family, + sku, + topologyClass, + transport, + operation ?? '', + subtype ?? '', + latencyUnit, + ].join('|'); + const status = stringValue(raw.status, 'unknown'); + return { + id: `cxt-${stableHash(identity)}`, + identity, + cohortIdentity, + family, + sku, + topologyClass, + transport, + operation, + subtype, + valid: status === 'valid', + status, + note, + peakBandwidthGbps: + finiteNumber(raw.peak_bandwidth_gbps) ?? finiteNumber(raw.peak_bandwidth_gb_s), + latencyUnit, + colorKey: colorKeyFromIdentity(sku, identity), + label: `${sku.toUpperCase()} · ${operation ?? '?'} · ${subtype ?? '?'}`, + generatedAt, + run: runSource(raw, generatedAt, context), + rows, + }; +} + +export function normalizeCollectiveXTransferDocument( + raw: unknown, + context: NormalizeContext = {}, +): CollectiveXTransferSeries[] { + if (!isRecord(raw)) return []; + + const family = raw.family; + if (family === 'offload' && Array.isArray(raw.rows)) { + const diagnostics = isRecord(raw.diagnostics) ? raw.diagnostics : {}; + const overlap = isRecord(diagnostics.overlap_with_compute) + ? diagnostics.overlap_with_compute + : {}; + const numa = isRecord(diagnostics.numa) ? diagnostics.numa : {}; + const notes = [ + finiteNumber(raw.peak_bandwidth_gbps) === null + ? '' + : `peak ${Math.round(finiteNumber(raw.peak_bandwidth_gbps) ?? 0)} GB/s`, + finiteNumber(overlap.overlap_pct) === null + ? '' + : `copy/compute overlap ${Math.round(finiteNumber(overlap.overlap_pct) ?? 0)}%`, + finiteNumber(numa.node_count) === null + ? '' + : `${Math.round(finiteNumber(numa.node_count) ?? 0)} NUMA node(s)`, + ].filter(Boolean); + const byLine = new Map(); + for (const row of raw.rows) { + if (!isRecord(row)) continue; + const key = `${stringValue(row.op)}|${stringValue(row.host_memory)}`; + const values = byLine.get(key) ?? []; + values.push(row); + byLine.set(key, values); + } + return [...byLine.entries()].flatMap(([key, values]) => { + const [operation, hostMemory] = key.split('|'); + const rows = normalizeTransferRows(values, { + size: 'size_bytes', + bandwidth: 'bandwidth_gbps', + latency: 'latency_us', + }); + const series = transferSeries( + raw, + context, + 'offload', + operation || null, + hostMemory || null, + rows, + 'us', + notes.join(' · '), + stringValue(raw.topology_class, 'unknown'), + [ + 'offload', + artifactRunnerSku(raw).sku, + stringValue(raw.topology_class, 'unknown'), + stringValue(raw.transport), + ].join('|'), + ); + return series ? [series] : []; + }); + } + + if (family === 'copy-engine' && Array.isArray(raw.rows)) { + const notes = [ + finiteNumber(raw.peak_bandwidth_gbps) === null + ? '' + : `peak ${Math.round(finiteNumber(raw.peak_bandwidth_gbps) ?? 0)} GB/s`, + typeof raw.copy_engine_uses_near_zero_sms === 'boolean' + ? `copy-engine uses near-zero SMs: ${raw.copy_engine_uses_near_zero_sms ? 'yes' : 'no'}` + : '', + ].filter(Boolean); + const byLine = new Map(); + for (const row of raw.rows) { + if (!isRecord(row)) continue; + const key = `${stringValue(row.op)}|${stringValue(row.engine)}`; + const values = byLine.get(key) ?? []; + values.push(row); + byLine.set(key, values); + } + return [...byLine.entries()].flatMap(([key, values]) => { + const [operation, engine] = key.split('|'); + const rows = normalizeTransferRows(values, { + size: 'size_bytes', + bandwidth: 'bandwidth_gbps', + latency: 'latency_us', + }); + const series = transferSeries( + raw, + context, + 'copy-engine', + operation || null, + engine || null, + rows, + 'us', + notes.join(' · '), + stringValue(raw.topology_class, 'unknown'), + [ + 'copy-engine', + artifactRunnerSku(raw).sku, + stringValue(raw.topology_class, 'unknown'), + stringValue(raw.transport), + ].join('|'), + ); + return series ? [series] : []; + }); + } + + if (family === 'kv-cache' && Array.isArray(raw.groups)) { + const unwired = Array.isArray(raw.declared_unwired_backends) + ? raw.declared_unwired_backends.filter((value): value is string => typeof value === 'string') + : []; + const wired = Array.isArray(raw.wired_backends) + ? raw.wired_backends.filter((value): value is string => typeof value === 'string') + : []; + const note = [ + wired.length > 0 ? `wired: ${wired.join(', ')}` : '', + unwired.length > 0 ? `declared-unwired: ${unwired.join(', ')}` : '', + ] + .filter(Boolean) + .join(' · '); + return raw.groups.flatMap((group) => { + if (!isRecord(group)) return []; + const operation = stringValue(group.direction) || null; + const layout = stringValue(group.layout); + const backend = stringValue(group.backend); + const rows = normalizeTransferRows(Array.isArray(group.rows) ? group.rows : [], { + size: 'transfer_bytes', + bandwidth: 'bandwidth_gb_s', + latency: 'time_ms', + }); + const series = transferSeries( + raw, + context, + 'kv-cache', + operation, + `${layout || '?'}/${backend || '?'}`, + rows, + 'ms', + note, + stringValue(group.topology_class) || stringValue(raw.transport, 'unknown'), + ['kv-cache', artifactRunnerSku(raw).sku, stringValue(raw.transport)].join('|'), + ); + return series ? [series] : []; + }); + } + + if (family === 'rl-mesh' && Array.isArray(raw.groups)) { + const note = [ + finiteNumber(raw.peak_bandwidth_gb_s) === null + ? '' + : `peak ${Math.round(finiteNumber(raw.peak_bandwidth_gb_s) ?? 0)} GB/s`, + positiveNumber(raw.world_size) === null + ? '' + : `world=${positiveNumber(raw.world_size)}: trainer ${positiveNumber( + raw.trainer_ranks, + )} <-> generator ${positiveNumber(raw.generator_ranks)}`, + ] + .filter(Boolean) + .join(' · '); + const shortDirection: Record = { + trainer_to_generator: 'trn->gen', + generator_to_trainer: 'gen->trn', + }; + return raw.groups.flatMap((group) => { + if (!isRecord(group)) return []; + const direction = stringValue(group.direction); + const pattern = stringValue(group.pattern); + const rows = normalizeTransferRows(Array.isArray(group.rows) ? group.rows : [], { + size: 'transfer_bytes', + bandwidth: 'bandwidth_gb_s', + latency: 'time_ms', + }); + const series = transferSeries( + raw, + context, + 'rl-mesh', + direction || null, + pattern || null, + rows, + 'ms', + note, + stringValue(group.topology_class) || stringValue(raw.transport, 'unknown'), + ['rl-mesh', artifactRunnerSku(raw).sku, stringValue(raw.transport)].join('|'), + ); + if (!series) return []; + series.label = `${series.sku.toUpperCase()} · ${(shortDirection[direction] ?? direction) || '?'} · ${pattern || '?'}`; + return [series]; + }); + } + + return []; +} + +function compareGeneratedAt(left: { generatedAt: string }, right: { generatedAt: string }): number { + return Date.parse(left.generatedAt) - Date.parse(right.generatedAt); +} + +export function selectLatestCollectiveXCollectiveSeries( + series: CollectiveXCollectiveSeries[], +): CollectiveXCollectiveSeries[] { + const selected = new Map(); + for (const item of series) { + const previous = selected.get(item.identity); + if (!previous || compareGeneratedAt(item, previous) > 0) selected.set(item.identity, item); + } + return [...selected.values()].toSorted( + (a, b) => + a.op.localeCompare(b.op) || + a.sku.localeCompare(b.sku) || + a.label.localeCompare(b.label) || + a.identity.localeCompare(b.identity), + ); +} + +export function selectLatestCollectiveXTransferSeries( + series: CollectiveXTransferSeries[], +): CollectiveXTransferSeries[] { + const latestCohorts = new Map(); + for (const item of series) { + const previous = latestCohorts.get(item.cohortIdentity); + if (!previous || compareGeneratedAt(item, previous) > 0) { + latestCohorts.set(item.cohortIdentity, item); + } + } + const latestByCohort = new Map( + [...latestCohorts.values()].map((item) => [item.cohortIdentity, item.generatedAt]), + ); + const selected = new Map(); + for (const item of series) { + if (item.generatedAt !== latestByCohort.get(item.cohortIdentity)) continue; + const previous = selected.get(item.identity); + if (!previous || compareGeneratedAt(item, previous) > 0) selected.set(item.identity, item); + } + return [...selected.values()].toSorted( + (a, b) => + a.family.localeCompare(b.family) || + a.sku.localeCompare(b.sku) || + a.label.localeCompare(b.label) || + a.identity.localeCompare(b.identity), + ); +} + +function publicationRank(status: CollectiveXPublicationStatus): number { + return ( + { + official: 4, + 'comparable-experimental': 3, + diagnostic: 2, + legacy: 1, + invalid: 0, + failed: 0, + } satisfies Record + )[status]; +} + +function compareRecords(left: CollectiveXArtifactRecord, right: CollectiveXArtifactRecord): number { + const validDelta = Number(!left.isFailed) - Number(!right.isFailed); + if (validDelta !== 0) return validDelta; + const publicationDelta = + publicationRank(left.publicationStatus) - publicationRank(right.publicationStatus); + if (publicationDelta !== 0) return publicationDelta; + return Date.parse(left.generatedAt) - Date.parse(right.generatedAt); +} + +// The trailing component of collectiveXConfigIdentity is the routing traceSignature. Two docs of +// the SAME benchmark config measured over DIFFERENT token ladders (e.g. the standard dispatch/ +// combine suite and an EP-degree/rack-scale suite that share a config but sweep different token +// points) hash different traces, so identity-level dedup keeps both and the chart draws the same +// config twice. Everything before the trailing signature is the mergeable config identity. +function configMergeKey(identity: string): string { + const cut = identity.lastIndexOf('|'); + return cut === -1 ? identity : identity.slice(0, cut); +} + +// Merge same-config series that differ only by token ladder (trace signature). The routing seed +// and distribution are identical for a matched config, so the routing at a given token count is +// the same trace prefix — rows at DISTINCT token counts are one curve, and overlapping token +// counts keep the best-ranked (publication rank, then newest) series' row. The winner also keeps +// label/provenance; merged rows stay sorted by token count. +export function mergeSameConfigCollectiveXSeries(series: CollectiveXSeries[]): CollectiveXSeries[] { + const groups = new Map(); + for (const item of series) { + const key = configMergeKey(item.identity); + const group = groups.get(key); + if (group) group.push(item); + else groups.set(key, [item]); + } + const merged: CollectiveXSeries[] = []; + for (const group of groups.values()) { + if (group.length === 1) { + merged.push(group[0]); + continue; + } + const ranked = group.toSorted( + (a, b) => + publicationRank(b.publicationStatus) - publicationRank(a.publicationStatus) || + Date.parse(b.generatedAt) - Date.parse(a.generatedAt), + ); + const [base, ...rest] = ranked; + const seen = new Set(base.rows.map((row) => row.tokensPerRank)); + const rows = [...base.rows]; + for (const other of rest) { + for (const row of other.rows) { + if (seen.has(row.tokensPerRank)) continue; + seen.add(row.tokensPerRank); + rows.push(row); + } + } + merged.push({ ...base, rows: rows.toSorted((a, b) => a.tokensPerRank - b.tokensPerRank) }); + } + return merged; +} + +export function selectLatestCollectiveXRecords(records: CollectiveXArtifactRecord[]): { + series: CollectiveXSeries[]; + failures: CollectiveXFailure[]; +} { + const selected = new Map(); + for (const record of records) { + const previous = selected.get(record.identity); + if (!previous || compareRecords(record, previous) > 0) selected.set(record.identity, record); + } + + const values = [...selected.values()]; + return { + series: mergeSameConfigCollectiveXSeries( + values.flatMap((record) => (record.series ? [record.series] : [])), + ).toSorted( + (a, b) => + a.sku.localeCompare(b.sku) || + a.backend.localeCompare(b.backend) || + a.phase.localeCompare(b.phase) || + a.label.localeCompare(b.label) || + a.identity.localeCompare(b.identity), + ), + failures: values + .flatMap((record) => (record.failure ? [record.failure] : [])) + .toSorted( + (a, b) => + a.sku.localeCompare(b.sku) || + (a.phase ?? '').localeCompare(b.phase ?? '') || + a.config.localeCompare(b.config), + ), + }; +} + +export function selectLatestCollectiveXSeries(series: CollectiveXSeries[]): CollectiveXSeries[] { + const records = series.map((item) => ({ + identity: item.identity, + generatedAt: item.generatedAt, + publicationStatus: item.publicationStatus, + isFailed: item.status === 'failed', + series: item, + failure: null, + })); + return selectLatestCollectiveXRecords(records).series; +} + +export function publicationMatches( + series: CollectiveXSeries, + filter: CollectiveXPublicationFilter, + selectedShapeKey = 'all', +): boolean { + if (filter === 'all') return true; + if (filter === 'official-headline') { + return ( + series.publicationStatus === 'official' && + series.workloadId !== null && + (selectedShapeKey === 'all' || collectiveXShapeKey(series.shape) === selectedShapeKey) + ); + } + if (filter === 'official') { + return series.publicationStatus === 'official' && series.workloadId !== null; + } + return ( + !['diagnostic', 'invalid', 'failed'].includes(series.publicationStatus) && + series.workloadId !== null + ); +} + +export function collectiveXPrefillFloor(series: CollectiveXSeries[]): number { + const values = series + .filter((item) => item.phase === 'prefill' && item.backend === 'deepep') + .flatMap((item) => item.rows.map((row) => row.tokensPerRank)); + return values.length > 0 ? Math.min(...values) : 128; +} + +export function operationPercentiles( + row: CollectiveXRow, + operation: CollectiveXOperation, +): CollectiveXPercentiles { + if (operation === 'isolated-sum') return row.isolatedSum; + return row[operation]; +} + +export function metricValue( + row: CollectiveXRow, + operation: CollectiveXOperation, + percentileKey: CollectiveXPercentile, + yAxis: CollectiveXYAxis, +): number { + const latencyUs = operationPercentiles(row, operation)[percentileKey]; + if (yAxis === 'latency') return latencyUs; + if (yAxis === 'tokens-per-second') { + return latencyUs > 0 ? row.globalTokens / (latencyUs * 1e-6) : 0; + } + + const bytes = + operation === 'dispatch' + ? row.dispatchLogicalBytes + : operation === 'combine' + ? row.combineLogicalBytes + : row.dispatchLogicalBytes + row.combineLogicalBytes; + return latencyUs > 0 ? bytes / (latencyUs * 1e3) : 0; +} + +export function chartPoints( + series: CollectiveXSeries[], + operation: CollectiveXOperation, + percentileKey: CollectiveXPercentile, + xAxis: CollectiveXXAxis, + yAxis: CollectiveXYAxis, + prefillFloor = 128, +): CollectiveXChartPoint[] { + return series.flatMap((item) => + item.rows + .filter((row) => item.phase !== 'prefill' || row.tokensPerRank >= prefillFloor) + .map((row) => ({ + seriesId: item.id, + seriesLabel: collectiveXSeriesLabel(item), + colorKey: item.colorKey, + x: xAxis === 'tokens-per-rank' ? row.tokensPerRank : row.globalTokens, + y: metricValue(row, operation, percentileKey, yAxis), + operation, + percentile: percentileKey, + row, + series: item, + })) + .filter((point) => point.x > 0 && point.y > 0), + ); +} + +export function comparisonDifferences(series: CollectiveXSeries[]): string[] { + if (series.length < 2) return []; + const warnings: string[] = []; + const different = (getValue: (item: CollectiveXSeries) => unknown) => + new Set(series.map(getValue)).size > 1; + + if (different((item) => item.topologyClass)) warnings.push('topology'); + if ( + different((item) => + collectiveXBackendLabel(item.backend, item.backendVersion, item.shape.kernelGeneration), + ) + ) { + warnings.push('backend generation'); + } + if (different((item) => item.epSize)) warnings.push('EP degree'); + if (different((item) => item.shape.dispatchDtype)) warnings.push('dispatch dtype'); + if (different((item) => item.mode)) warnings.push('kernel mode'); + if (different((item) => item.resourceMode)) warnings.push('resource mode'); + if (different((item) => item.resourceProfile.requestedFraction)) { + warnings.push('requested resource fraction'); + } + if (different((item) => item.measurementContract)) warnings.push('measurement contract'); + if (different((item) => item.comparisonClass)) warnings.push('comparison class'); + if (different((item) => item.shape.hidden)) warnings.push('hidden size'); + if (different((item) => item.shape.topk)) warnings.push('top-k'); + if (different((item) => item.shape.experts)) warnings.push('expert count'); + if (different((item) => item.shape.combineQuantMode)) warnings.push('combine quantization'); + if (different((item) => item.shape.activationProfile)) warnings.push('activation profile'); + if (different((item) => item.workloadId)) warnings.push('workload ID'); + if (different((item) => item.run.sha)) warnings.push('source SHA'); + + const tracesByRouting = new Map>(); + for (const item of series) { + const traces = tracesByRouting.get(item.shape.routingLabel) ?? new Set(); + traces.add(item.traceSignature ?? 'unknown'); + tracesByRouting.set(item.shape.routingLabel, traces); + } + if ([...tracesByRouting.values()].some((traces) => traces.size > 1)) { + warnings.push('workload trace'); + } + return warnings; +} + +export function collectiveXScalingPoints( + series: CollectiveXSeries[], + kind: CollectiveXScalingKind, +): CollectiveXScalingPoint[] { + const anchorTokensPerRank = 64; + const anchorGlobalTokens = 512; + const candidates = series.filter( + (item) => + item.shape.routing === 'uniform' && + !item.shape.eplbEnabled && + item.mode === 'normal' && + item.measurementContract === 'layout-and-dispatch-v1' && + item.suite === 'backend-default' && + publicationMatches(item, 'publishable') && + item.epSize !== null, + ); + + const bySkuAndEp = new Map(); + for (const item of candidates) { + const key = `${item.sku}|${item.epSize}`; + const group = bySkuAndEp.get(key) ?? []; + group.push(item); + bySkuAndEp.set(key, group); + } + + const points: CollectiveXScalingPoint[] = []; + for (const [key, group] of bySkuAndEp) { + const [sku, rawEpSize] = key.split('|'); + const epSize = Number(rawEpSize); + const matching = group.flatMap((item) => { + const row = + kind === 'weak' + ? item.rows.find((candidate) => candidate.tokensPerRank === anchorTokensPerRank) + : (item.rows.find((candidate) => candidate.globalTokens === anchorGlobalTokens) ?? + item.rows.find( + (candidate) => candidate.tokensPerRank === Math.round(anchorGlobalTokens / epSize), + )); + return row + ? [ + { + item, + latencyUs: row.dispatch.p50, + }, + ] + : []; + }); + if (matching.length === 0) continue; + + const best = matching.reduce((current, candidate) => + candidate.latencyUs < current.latencyUs ? candidate : current, + ); + points.push({ + id: `cxscale-${kind}-${sku}-${epSize}`, + kind, + sku, + colorKey: best.item.colorKey, + epSize, + latencyUs: best.latencyUs, + }); + } + + const skuCounts = new Map>(); + for (const point of points) { + const epSizes = skuCounts.get(point.sku) ?? new Set(); + epSizes.add(point.epSize); + skuCounts.set(point.sku, epSizes); + } + + return points + .filter((point) => (skuCounts.get(point.sku)?.size ?? 0) >= 2) + .toSorted((a, b) => a.sku.localeCompare(b.sku) || a.epSize - b.epSize); +} + +export function collectiveXHeatmapCells( + series: CollectiveXSeries[], + dimension: CollectiveXHeatmapDimension, + prefillFloor = 128, +): CollectiveXHeatmapCell[] { + const cells = new Map(); + const rowLabel = (item: CollectiveXSeries): string | null => { + switch (dimension) { + case 'ep': { + return item.epSize === null ? null : `EP${item.epSize}`; + } + case 'routing': { + return item.shape.routingLabel; + } + case 'resource': { + return item.resourceMode; + } + case 'placement': { + return item.placement.kind || 'packed'; + } + } + }; + + for (const item of series) { + const row = rowLabel(item); + if (!row) continue; + for (const measurement of item.rows) { + if (item.phase === 'prefill' && measurement.tokensPerRank < prefillFloor) continue; + const key = `${row}|${measurement.tokensPerRank}`; + const previous = cells.get(key); + if (!previous || measurement.dispatch.p50 < previous.latencyUs) { + cells.set(key, { + id: `cxheat-${dimension}-${stableHash(key)}`, + row, + tokensPerRank: measurement.tokensPerRank, + latencyUs: measurement.dispatch.p50, + }); + } + } + } + + return [...cells.values()].toSorted( + (a, b) => a.row.localeCompare(b.row) || a.tokensPerRank - b.tokensPerRank, + ); +} + +function round(value: number, digits: number): number { + const factor = 10 ** digits; + return Math.round(value * factor) / factor; +} + +export function distributionSensitivity(series: CollectiveXSeries[]): CollectiveXSensitivity[] { + const groups = new Map>>(); + const metadata = new Map< + string, + Omit< + CollectiveXSensitivity, + | 'id' + | 'headlineP99RangeUs' + | 'sensitivityRatio' + | 'worstDistribution' + | 'worstAtTokensPerRank' + | 'bestCaseRatio' + | 'eplbZipfRatio' + | 'eplbRecoveredRatio' + > + >(); + + // The Python report scans sorted result paths and merges repeated routing/T points in that + // order. Result filenames are runner/backend/phase/timestamp, so preserve the same overwrite + // order here to keep the generated report and frontend summary identical. + const orderedSeries = series.toSorted( + (a, b) => + (a.runner ?? '').localeCompare(b.runner ?? '') || + a.backend.localeCompare(b.backend) || + a.phase.localeCompare(b.phase) || + a.generatedAt.localeCompare(b.generatedAt), + ); + + for (const item of orderedSeries) { + const groupKey = [ + item.sku, + item.backend, + item.phase, + item.shape.dispatchDtype, + item.mode, + item.measurementContract, + item.epSize ?? '', + item.shape.combineQuantMode, + item.shape.activationProfile, + ].join('|'); + metadata.set(groupKey, { + sku: item.sku, + backend: item.backend, + phase: item.phase, + dispatchDtype: item.shape.dispatchDtype, + mode: item.mode, + contract: item.measurementContract, + epSize: item.epSize, + combineQuantMode: item.shape.combineQuantMode, + activationProfile: item.shape.activationProfile, + }); + const routing = `${item.shape.routing}${item.shape.eplbEnabled ? '+eplb' : ''}`; + const byRouting = groups.get(groupKey) ?? new Map>(); + const values = byRouting.get(routing) ?? new Map(); + for (const row of item.rows) { + if (row.roundtripMeasured) values.set(row.tokensPerRank, row.roundtrip.p99); + } + byRouting.set(routing, values); + groups.set(groupKey, byRouting); + } + + const output: CollectiveXSensitivity[] = []; + for (const [groupKey, byRouting] of groups) { + const headline = byRouting.get('uniform'); + const meta = metadata.get(groupKey); + if (!headline || !meta) continue; + + let worst: { ratio: number; routing: string; tokens: number } | null = null; + let bestCaseRatio: number | null = null; + const maxRatioByRouting = new Map(); + + for (const [routing, values] of byRouting) { + if (routing === 'uniform') continue; + const ratios = [...values.entries()] + .filter(([tokens]) => headline.has(tokens) && (headline.get(tokens) ?? 0) > 0) + .map(([tokens, value]) => ({ + tokens, + ratio: value / (headline.get(tokens) ?? 1), + })); + if (ratios.length === 0) continue; + + const max = ratios.reduce((current, candidate) => + candidate.ratio > current.ratio ? candidate : current, + ); + maxRatioByRouting.set(routing, max.ratio); + const baseRouting = routing.replace('+eplb', ''); + if (baseRouting === 'balanced-rank-local') { + bestCaseRatio = Math.min(...ratios.map((item) => item.ratio)); + } else if (!routing.endsWith('+eplb') && (!worst || max.ratio > worst.ratio)) { + worst = { ratio: max.ratio, routing, tokens: max.tokens }; + } + } + + if (!worst) continue; + const headlineValues = [...headline.values()]; + output.push({ + id: `cxs-${stableHash(groupKey)}`, + ...meta, + headlineP99RangeUs: [ + round(Math.min(...headlineValues), 2), + round(Math.max(...headlineValues), 2), + ], + sensitivityRatio: round(worst.ratio, 4), + worstDistribution: worst.routing, + worstAtTokensPerRank: worst.tokens, + bestCaseRatio: bestCaseRatio === null ? null : round(bestCaseRatio, 4), + eplbZipfRatio: + maxRatioByRouting.get('zipf') === undefined + ? null + : round(maxRatioByRouting.get('zipf') ?? 0, 4), + eplbRecoveredRatio: + maxRatioByRouting.get('zipf+eplb') === undefined + ? null + : round(maxRatioByRouting.get('zipf+eplb') ?? 0, 4), + }); + } + + return output.toSorted( + (a, b) => + a.sku.localeCompare(b.sku) || + a.backend.localeCompare(b.backend) || + a.phase.localeCompare(b.phase) || + a.dispatchDtype.localeCompare(b.dispatchDtype), + ); +} + +const HEADLINE_SHAPE_KEY = '7168/8/256'; +const ROUNDTRIP_BUDGETS_US = [100, 250, 500]; + +function isHeadlineSeries(series: CollectiveXSeries): boolean { + return ( + series.publicationStatus === 'official' && + series.workloadId !== null && + collectiveXShapeKey(series.shape) === HEADLINE_SHAPE_KEY && + series.shape.routing === 'uniform' + ); +} + +function roundtripP99(row: CollectiveXRow): number | null { + return row.roundtripMeasured ? row.roundtrip.p99 : null; +} + +function bestRoundtripP99( + series: CollectiveXSeries[], + predicate: (series: CollectiveXSeries) => boolean, +): { latencyUs: number; series: CollectiveXSeries; tokensPerRank: number } | null { + let best: { latencyUs: number; series: CollectiveXSeries; tokensPerRank: number } | null = null; + for (const item of series) { + if (!isHeadlineSeries(item) || !predicate(item)) continue; + const targetTokens = item.phase === 'decode' ? 64 : 256; + const row = item.rows.find((candidate) => candidate.tokensPerRank === targetTokens); + const latencyUs = row ? roundtripP99(row) : null; + if (latencyUs === null) continue; + if (!best || latencyUs < best.latencyUs) { + best = { latencyUs, series: item, tokensPerRank: targetTokens }; + } + } + return best; +} + +function formatBestRoundtripCard( + title: string, + best: { latencyUs: number; series: CollectiveXSeries; tokensPerRank: number } | null, +): CollectiveXSummaryCard { + if (!best) { + return { + title, + value: 'no data', + sub: 'no official headline cell at this phase/EP', + }; + } + return { + title, + value: `${best.series.backend} · ${best.series.sku.toUpperCase()}`, + sub: `${best.latencyUs.toFixed(0)} us RT p99 · ${best.series.shape.dispatchDtype} · T=${best.tokensPerRank}`, + }; +} + +export function maxTokensUnderBudget( + series: CollectiveXSeries[], + budgetsUs = ROUNDTRIP_BUDGETS_US, +): CollectiveXBudgetDecisionRow[] { + const cells = new Map(); + for (const item of series) { + if (!isHeadlineSeries(item)) continue; + const key = [ + item.sku, + item.backend, + item.phase, + item.shape.dispatchDtype, + item.epSize ?? '', + item.mode, + ].join('|'); + const cell = cells.get(key) ?? { series: item, points: [] }; + for (const row of item.rows) { + const latency = roundtripP99(row); + if (latency !== null) cell.points.push([row.tokensPerRank, latency]); + } + cells.set(key, cell); + } + + return [...cells.entries()] + .flatMap(([, cell]) => { + const budgets = Object.fromEntries( + budgetsUs.map((budget) => { + const satisfyingTokens = cell.points + .filter(([, latency]) => latency <= budget) + .map(([tokens]) => tokens); + return [ + String(budget), + satisfyingTokens.length > 0 ? Math.max(...satisfyingTokens) : null, + ]; + }), + ) satisfies Record; + if (Object.values(budgets).every((value) => value === null)) return []; + return [ + { + id: `cxb-${stableHash(cell.series.identity)}`, + sku: cell.series.sku, + backend: cell.series.backend, + phase: cell.series.phase, + dispatchDtype: cell.series.shape.dispatchDtype, + epSize: cell.series.epSize, + mode: cell.series.mode, + budgets, + }, + ]; + }) + .toSorted( + (a, b) => + a.sku.localeCompare(b.sku) || + a.backend.localeCompare(b.backend) || + a.phase.localeCompare(b.phase) || + a.dispatchDtype.localeCompare(b.dispatchDtype) || + (a.epSize ?? 0) - (b.epSize ?? 0), + ); +} + +export function decisionRecommendations( + series: CollectiveXSeries[], +): CollectiveXRecommendationRow[] { + const bySkuPhase = new Map(); + for (const item of series) { + const key = `${item.sku}|${item.phase}`; + const group = bySkuPhase.get(key) ?? []; + group.push(item); + bySkuPhase.set(key, group); + } + + return [...bySkuPhase.entries()] + .flatMap(([key, group]) => { + const [sku, phase] = key.split('|') as [string, CollectiveXPhase]; + const targetTokens = phase === 'decode' ? 64 : 256; + const candidates = group.flatMap((item) => { + const row = item.rows.find((candidate) => candidate.tokensPerRank === targetTokens); + const value = row?.dispatch.p99; + return value + ? [ + { + value, + item, + }, + ] + : []; + }); + if (candidates.length === 0) return []; + const best = candidates.reduce((current, candidate) => + candidate.value < current.value ? candidate : current, + ); + return [ + { + id: `cxr-${stableHash(`${sku}|${phase}`)}`, + sku, + phase, + atTokensPerRank: targetTokens, + lowestP99DispatchUs: round(best.value, 1), + config: [ + best.item.shape.dispatchDtype, + best.item.mode, + best.item.measurementContract, + best.item.shape.routingLabel, + best.item.resourceMode, + ].join('/'), + epSize: best.item.epSize, + }, + ]; + }) + .toSorted((a, b) => a.sku.localeCompare(b.sku) || a.phase.localeCompare(b.phase)); +} + +export function llCrossoverRows(series: CollectiveXSeries[]): Record[] { + const normal = new Map(); + for (const item of series) { + if ( + item.mode === 'normal' && + item.shape.routing === 'uniform' && + item.measurementContract === 'layout-and-dispatch-v1' + ) { + normal.set(`${item.sku}|${item.epSize ?? ''}|${item.shape.dispatchDtype}`, item); + } + } + + const rows: Record[] = []; + for (const ll of series) { + if (ll.mode !== 'll' || ll.shape.routing !== 'uniform') continue; + const matchedNormal = normal.get(`${ll.sku}|${ll.epSize ?? ''}|${ll.shape.dispatchDtype}`); + if (!matchedNormal) continue; + for (const stat of ['p50', 'p99'] as const) { + let crossover: number | string = 'never-in-range'; + for (const row of ll.rows) { + const normalRow = matchedNormal.rows.find( + (candidate) => candidate.tokensPerRank === row.tokensPerRank, + ); + if (!normalRow || !row.roundtripMeasured || !normalRow.roundtripMeasured) continue; + if (normalRow.roundtrip[stat] < row.roundtrip[stat]) { + crossover = row.tokensPerRank; + break; + } + } + rows.push({ + sku: ll.sku, + ep: ll.epSize, + dtype: ll.shape.dispatchDtype, + stat, + basis: 'measured-roundtrip', + normal_faster_at_T: crossover, + }); + } + } + return rows; +} + +export function skewPenaltyRows(series: CollectiveXSeries[]): Record[] { + const uniform = new Map(); + for (const item of series) { + if (item.shape.routing === 'uniform') { + uniform.set( + [ + item.sku, + item.epSize ?? '', + item.phase, + item.mode, + item.shape.dispatchDtype, + item.measurementContract, + ].join('|'), + item, + ); + } + } + + return series.flatMap((item) => { + const routing = `${item.shape.routing}${item.shape.eplbEnabled ? '+eplb' : ''}`; + if (!routing.startsWith('zipf')) return []; + const matched = uniform.get( + [ + item.sku, + item.epSize ?? '', + item.phase, + item.mode, + item.shape.dispatchDtype, + item.measurementContract, + ].join('|'), + ); + if (!matched) return []; + return item.rows.flatMap((row) => { + const uniformRow = matched.rows.find( + (candidate) => candidate.tokensPerRank === row.tokensPerRank, + ); + if (!uniformRow || uniformRow.dispatch.p50 <= 0 || uniformRow.dispatch.p99 <= 0) return []; + return [ + { + sku: item.sku, + ep: item.epSize, + phase: item.phase, + routing, + T: row.tokensPerRank, + p50_amplification: round(row.dispatch.p50 / uniformRow.dispatch.p50, 3), + p99_amplification: round(row.dispatch.p99 / uniformRow.dispatch.p99, 3), + }, + ]; + }); + }); +} + +export function topologyPenaltyRows(series: CollectiveXSeries[]): Record[] { + const byConfig = new Map>(); + for (const item of series) { + if ( + item.shape.routing !== 'uniform' || + item.mode !== 'normal' || + item.measurementContract !== 'layout-and-dispatch-v1' || + item.epSize === null + ) { + continue; + } + const key = `${item.sku}|${item.phase}|${item.shape.dispatchDtype}`; + const group = byConfig.get(key) ?? new Map(); + group.set(item.epSize, item); + byConfig.set(key, group); + } + + return [...byConfig.values()].flatMap((group) => { + if (group.size < 2) return []; + const epSizes = [...group.keys()].toSorted((a, b) => a - b); + const lowEp = epSizes.at(0); + const highEp = epSizes.at(-1); + if (lowEp === undefined || highEp === undefined) return []; + const low = group.get(lowEp); + const high = group.get(highEp); + if (!low || !high) return []; + return low.rows.flatMap((lowRow) => { + const highRow = high.rows.find( + (candidate) => candidate.tokensPerRank === lowRow.tokensPerRank, + ); + if (!highRow || lowRow.dispatch.p50 <= 0) return []; + return [ + { + sku: low.sku, + phase: low.phase, + dtype: low.shape.dispatchDtype, + T: lowRow.tokensPerRank, + [`ep${lowEp}_p50`]: round(lowRow.dispatch.p50, 1), + [`ep${highEp}_p50`]: round(highRow.dispatch.p50, 1), + penalty_pct: round( + (100 * (highRow.dispatch.p50 - lowRow.dispatch.p50)) / lowRow.dispatch.p50, + 1, + ), + }, + ]; + }); + }); +} + +export function resourceParetoRows(series: CollectiveXSeries[]): Record[] { + const cells = new Map< + string, + Map + >(); + for (const item of series) { + if ( + item.mode !== 'normal' || + item.shape.routing !== 'uniform' || + item.measurementContract !== 'layout-and-dispatch-v1' || + item.resourceProfile.fixedKernel || + item.resourceProfile.achievedFraction === null + ) { + continue; + } + for (const row of item.rows) { + const key = `${item.sku}|${item.phase}|${item.shape.dispatchDtype}|${row.tokensPerRank}`; + const curve = cells.get(key) ?? new Map(); + curve.set(round(item.resourceProfile.achievedFraction, 4), { + dispatchP50: round(row.dispatch.p50, 1), + dispatchP99: round(row.dispatch.p99, 1), + resourceClass: item.resourceProfile.resourceClass, + }); + cells.set(key, curve); + } + } + + return [...cells.entries()].flatMap(([key, curve]) => { + if (curve.size < 2) return []; + const [sku, phase, dtype, tokens] = key.split('|'); + const fractions = [...curve.keys()].toSorted((a, b) => a - b); + return [ + { + sku, + phase, + dtype, + T: Number(tokens), + n_points: fractions.length, + curve: fractions.map((fraction) => { + const point = curve.get(fraction); + return { + achieved_fraction: fraction, + dispatch_p50: point?.dispatchP50, + dispatch_p99: point?.dispatchP99, + resource_class: point?.resourceClass, + }; + }), + }, + ]; + }); +} + +export function collectiveXSummaryCards( + series: CollectiveXSeries[], + failures: CollectiveXFailure[], +): CollectiveXSummaryCard[] { + const llRows = llCrossoverRows(series); + const sensitivityRows = distributionSensitivity(series); + const cards: CollectiveXSummaryCard[] = [ + formatBestRoundtripCard( + 'Best backend · decode EP8', + bestRoundtripP99(series, (item) => item.phase === 'decode' && item.epSize === 8), + ), + formatBestRoundtripCard( + 'Best backend · prefill EP8', + bestRoundtripP99(series, (item) => item.phase === 'prefill' && item.epSize === 8), + ), + ]; + + const crossovers = llRows.filter( + (row) => + row.basis === 'measured-roundtrip' && + row.stat === 'p50' && + typeof row.normal_faster_at_T === 'number', + ); + if (crossovers.length > 0) { + const crossover = crossovers.reduce((current, candidate) => + Number(candidate.normal_faster_at_T) < Number(current.normal_faster_at_T) + ? candidate + : current, + ); + cards.push({ + title: 'LL -> normal crossover', + value: `T~${String(crossover.normal_faster_at_T)} tok/rank`, + sub: `${String(crossover.sku).toUpperCase()} EP${String(crossover.ep)} ${String( + crossover.dtype, + )} · normal RT p50 wins above this`, + }); + } else { + cards.push({ + title: 'LL -> normal crossover', + value: 'none in range', + sub: 'normal RT never beats LL within the measured token ladder', + }); + } + + cards.push( + formatBestRoundtripCard( + 'Resource-normalized winner', + bestRoundtripP99( + series, + (item) => + item.phase === 'decode' && item.epSize === 8 && item.suite === 'resource-constrained', + ), + ), + formatBestRoundtripCard( + 'Backend-default winner', + bestRoundtripP99( + series, + (item) => item.phase === 'decode' && item.epSize === 8 && item.suite === 'backend-default', + ), + ), + ); + + if (sensitivityRows.length > 0) { + const worst = sensitivityRows.reduce((current, candidate) => + candidate.sensitivityRatio > current.sensitivityRatio ? candidate : current, + ); + cards.push({ + title: 'Most unstable config', + value: `${worst.sku.toUpperCase()} · ${worst.backend} ${worst.phase}`, + sub: `${worst.sensitivityRatio.toFixed(2)}x p99 under ${worst.worstDistribution} vs uniform`, + warning: true, + }); + } else { + cards.push({ + title: 'Most unstable config', + value: 'n/a', + sub: 'no multi-distribution group yet', + }); + } + + cards.push({ + title: 'Invalid / diagnostic cases', + value: String(failures.length), + sub: failures.length > 0 ? 'see Evidence failed table' : 'none - all runs publishable', + warning: failures.length > 0, + href: '#tab-evidence', + }); + return cards; +} + +export function collectiveXDecisionSummary( + series: CollectiveXSeries[], +): CollectiveXDecisionSummary { + return { + budgetsUs: ROUNDTRIP_BUDGETS_US, + maxTokensUnderBudget: maxTokensUnderBudget(series), + recommendations: decisionRecommendations(series), + llCrossover: llCrossoverRows(series), + resourcePareto: resourceParetoRows(series), + topologyPenalty: topologyPenaltyRows(series), + skewPenalty: skewPenaltyRows(series), + }; +} diff --git a/packages/app/src/components/collectivex/types.ts b/packages/app/src/components/collectivex/types.ts new file mode 100644 index 00000000..1783b70d --- /dev/null +++ b/packages/app/src/components/collectivex/types.ts @@ -0,0 +1,310 @@ +export type CollectiveXPhase = 'decode' | 'prefill'; +export type CollectiveXOperation = 'dispatch' | 'combine' | 'roundtrip' | 'isolated-sum'; +export type CollectiveXPercentile = 'p50' | 'p90' | 'p99'; +export type CollectiveXXAxis = 'tokens-per-rank' | 'global-tokens'; +export type CollectiveXYAxis = 'latency' | 'tokens-per-second' | 'payload-rate'; +export type CollectiveXScale = 'log' | 'linear'; +export type CollectiveXSuite = 'all' | 'backend-default' | 'resource-constrained'; +export type CollectiveXPublicationFilter = 'official-headline' | 'publishable' | 'official' | 'all'; +export type CollectiveXScalingKind = 'weak' | 'strong'; +export type CollectiveXHeatmapDimension = 'ep' | 'routing' | 'resource' | 'placement'; +export type CollectiveXCollectiveMetric = 'bus-bandwidth' | 'latency'; +export type CollectiveXTransferMetric = 'bandwidth' | 'latency'; +export type CollectiveXTransferFamily = 'offload' | 'copy-engine' | 'kv-cache' | 'rl-mesh'; +export type CollectiveXTransferLatencyUnit = 'us' | 'ms'; +export type CollectiveXPublicationStatus = + | 'official' + | 'comparable-experimental' + | 'legacy' + | 'diagnostic' + | 'invalid' + | 'failed'; + +export interface CollectiveXPercentiles { + p50: number; + p90: number; + p95: number; + p99: number; +} + +export interface CollectiveXRow { + tokensPerRank: number; + globalTokens: number; + dispatch: CollectiveXPercentiles; + combine: CollectiveXPercentiles; + roundtrip: CollectiveXPercentiles; + isolatedSum: CollectiveXPercentiles; + roundtripMeasured: boolean; + dispatchLogicalBytes: number; + combineLogicalBytes: number; + fanoutMean: number | null; + recvTokensMax: number | null; + stragglerRank: number | null; + correct: boolean; + samplesPooled: number | null; + trials: number | null; +} + +export interface CollectiveXShape { + hidden: number | null; + topk: number | null; + experts: number | null; + routing: string; + routingLabel: string; + routingStep: number; + unevenTokens: string; + eplbEnabled: boolean; + dispatchDtype: string; + kernelGeneration: string; + activationProfile: string; + combineQuantMode: string; +} + +export interface CollectiveXResourceProfile { + requestedFraction: number | null; + achievedFraction: number | null; + configuredUnits: number | null; + deviceUnits: number | null; + resourceClass: string; + conformanceClass: string; + fixedKernel: boolean; + paretoEligible: boolean; +} + +export interface CollectiveXPlacement { + kind: string; + nodes: number | null; + gpusPerNode: number | null; + scaleUpDomain: number | null; +} + +export interface CollectiveXRunSource { + id: string | null; + url: string | null; + createdAt: string | null; + sha: string | null; +} + +export interface CollectiveXSeries { + id: string; + identity: string; + colorKey: string; + comparisonKey: string | null; + schemaVersion: number; + generatedAt: string; + status: string; + publicationStatus: CollectiveXPublicationStatus; + runner: string; + sku: string; + backend: string; + phase: CollectiveXPhase; + mode: string; + resourceMode: string; + suite: Exclude; + comparisonClass: string; + measurementContract: string; + topologyClass: string; + transport: string; + worldSize: number | null; + epSize: number | null; + label: string; + model: string; + shape: CollectiveXShape; + resourceProfile: CollectiveXResourceProfile; + placement: CollectiveXPlacement; + routingConsistent: boolean | null; + traceSignature: string | null; + workloadId: string | null; + workloadSource: string | null; + eplbImbalanceBefore: number | null; + eplbImbalanceAfter: number | null; + backendVersion: string | null; + imageDigest: string | null; + repository: string | null; + run: CollectiveXRunSource; + rows: CollectiveXRow[]; +} + +export interface CollectiveXFailure { + id: string; + identity: string; + generatedAt: string; + publicationStatus: CollectiveXPublicationStatus; + status: string; + sku: string; + backend: string; + phase: CollectiveXPhase | null; + config: string; + reason: string; + returnCode: number | null; + run: CollectiveXRunSource; +} + +export interface CollectiveXSensitivity { + id: string; + sku: string; + backend: string; + phase: CollectiveXPhase; + dispatchDtype: string; + mode: string; + contract: string; + epSize: number | null; + combineQuantMode: string; + activationProfile: string; + headlineP99RangeUs: [number, number]; + sensitivityRatio: number; + worstDistribution: string; + worstAtTokensPerRank: number; + bestCaseRatio: number | null; + eplbZipfRatio: number | null; + eplbRecoveredRatio: number | null; +} + +export interface CollectiveXScalingPoint { + id: string; + kind: CollectiveXScalingKind; + sku: string; + colorKey: string; + epSize: number; + latencyUs: number; +} + +export interface CollectiveXHeatmapCell { + id: string; + row: string; + tokensPerRank: number; + latencyUs: number; +} + +export interface CollectiveXSummaryCard { + title: string; + value: string; + sub: string; + warning?: boolean; + href?: string; +} + +export interface CollectiveXBudgetDecisionRow { + id: string; + sku: string; + backend: string; + phase: CollectiveXPhase; + dispatchDtype: string; + epSize: number | null; + mode: string; + budgets: Record; +} + +export interface CollectiveXRecommendationRow { + id: string; + sku: string; + phase: CollectiveXPhase; + atTokensPerRank: number; + lowestP99DispatchUs: number; + config: string; + epSize: number | null; +} + +export interface CollectiveXDecisionSummary { + budgetsUs: number[]; + maxTokensUnderBudget: CollectiveXBudgetDecisionRow[]; + recommendations: CollectiveXRecommendationRow[]; + llCrossover: Record[]; + resourcePareto: Record[]; + topologyPenalty: Record[]; + skewPenalty: Record[]; +} + +export interface CollectiveXCollectiveRow { + sizeBytes: number; + dtype: string | null; + latencyUs: number | null; + algBandwidthGbps: number | null; + busBandwidthGbps: number | null; + outOfPlaceUs: number | null; + inPlaceUs: number | null; + correct: boolean | null; +} + +export interface CollectiveXCollectiveSeries { + id: string; + identity: string; + op: string; + sku: string; + runner: string; + topologyClass: string; + transport: string; + worldSize: number | null; + nodes: number | null; + dtype: string | null; + comparisonClass: string | null; + comparisonKey: string | null; + measurementContract: string | null; + avgBusBandwidthGbps: number | null; + status: string; + valid: boolean; + colorKey: string; + label: string; + generatedAt: string; + run: CollectiveXRunSource; + rows: CollectiveXCollectiveRow[]; +} + +export interface CollectiveXTransferRow { + sizeBytes: number; + bandwidthGbps: number | null; + latency: number | null; + sizeClass?: string | null; + correct?: boolean | null; +} + +export interface CollectiveXTransferSeries { + id: string; + identity: string; + cohortIdentity: string; + family: CollectiveXTransferFamily; + sku: string; + topologyClass: string; + transport: string; + operation: string | null; + subtype: string | null; + valid: boolean; + status: string; + note: string; + peakBandwidthGbps: number | null; + latencyUnit: CollectiveXTransferLatencyUnit; + colorKey: string; + label: string; + generatedAt: string; + run: CollectiveXRunSource; + rows: CollectiveXTransferRow[]; +} + +export interface CollectiveXApiResponse { + snapshotVersion: number; + series: CollectiveXSeries[]; + failures: CollectiveXFailure[]; + summaryCards?: CollectiveXSummaryCard[]; + decision?: CollectiveXDecisionSummary; + nccl?: CollectiveXCollectiveSeries[]; + offload?: CollectiveXTransferSeries[]; + copyEngine?: CollectiveXTransferSeries[]; + kvCache?: CollectiveXTransferSeries[]; + rlMesh?: CollectiveXTransferSeries[]; + scannedRuns: number; + scannedArtifacts: number; + contributingRuns: number; + generatedAt: string; +} + +export interface CollectiveXChartPoint { + seriesId: string; + seriesLabel: string; + colorKey: string; + x: number; + y: number; + operation: CollectiveXOperation; + percentile: CollectiveXPercentile; + row: CollectiveXRow; + series: CollectiveXSeries; +} diff --git a/packages/app/src/components/header/header.tsx b/packages/app/src/components/header/header.tsx index 8fbf52ac..7ad0bfbe 100644 --- a/packages/app/src/components/header/header.tsx +++ b/packages/app/src/components/header/header.tsx @@ -22,6 +22,7 @@ const DASHBOARD_TABS = [ '/reliability', '/gpu-specs', '/gpu-metrics', + '/collectivex', '/submissions', '/current-inferencex-image', ]; diff --git a/packages/app/src/components/tab-nav.tsx b/packages/app/src/components/tab-nav.tsx index ce5f3257..b631e910 100644 --- a/packages/app/src/components/tab-nav.tsx +++ b/packages/app/src/components/tab-nav.tsx @@ -29,6 +29,7 @@ const VISIBLE_TABS = [ { href: '/historical', label: 'Historical Trends', testId: 'tab-trigger-historical' }, { href: '/calculator', label: 'TCO Calculator', testId: 'tab-trigger-calculator' }, { href: '/gpu-specs', label: 'GPU Specs', testId: 'tab-trigger-gpu-specs' }, + { href: '/collectivex', label: 'CollectiveX', testId: 'tab-trigger-collectivex' }, { href: '/submissions', label: 'Submissions', testId: 'tab-trigger-submissions' }, ] as const; diff --git a/packages/app/src/hooks/api/use-collectivex.ts b/packages/app/src/hooks/api/use-collectivex.ts new file mode 100644 index 00000000..865c4259 --- /dev/null +++ b/packages/app/src/hooks/api/use-collectivex.ts @@ -0,0 +1,11 @@ +import { useQuery } from '@tanstack/react-query'; + +import { fetchCollectiveX } from '@/lib/api'; + +export function useCollectiveX() { + return useQuery({ + queryKey: ['collectivex'], + queryFn: ({ signal }) => fetchCollectiveX(signal), + staleTime: 5 * 60 * 1000, + }); +} diff --git a/packages/app/src/lib/api.test.ts b/packages/app/src/lib/api.test.ts index a1f29006..d8d60ffa 100644 --- a/packages/app/src/lib/api.test.ts +++ b/packages/app/src/lib/api.test.ts @@ -4,6 +4,7 @@ import { fetchBenchmarks, fetchWorkflowInfo, fetchAvailability, + fetchCollectiveX, fetchReliability, fetchEvaluations, } from './api'; @@ -126,3 +127,16 @@ describe('fetchEvaluations', () => { expect(result[0].task).toBe('gsm8k'); }); }); + +describe('fetchCollectiveX', () => { + it('fetches the generated static snapshot', async () => { + mockOk({ series: [], scannedRuns: 0, contributingRuns: 0, generatedAt: '' }); + + await fetchCollectiveX(); + + expect(mockFetch).toHaveBeenCalledWith( + '/data/collectivex.json', + expect.objectContaining({ cache: 'no-store' }), + ); + }); +}); diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts index 0dac5883..15ba197b 100644 --- a/packages/app/src/lib/api.ts +++ b/packages/app/src/lib/api.ts @@ -4,6 +4,7 @@ */ import type { WorkerPower } from '@/components/inference/types'; +import type { CollectiveXApiResponse } from '@/components/collectivex/types'; import type { SubmissionsResponse } from './submissions-types'; @@ -134,8 +135,12 @@ export interface EvalRow { run_url: string | null; } -async function fetchJson(url: string, signal?: AbortSignal): Promise { - const res = await fetch(url, { signal }); +async function fetchJson( + url: string, + signal?: AbortSignal, + options?: Omit, +): Promise { + const res = await fetch(url, { ...options, signal }); if (!res.ok) throw new Error(`API error: ${res.status} ${res.statusText}`); return res.json(); } @@ -293,6 +298,12 @@ export function fetchSubmissions(signal?: AbortSignal) { return fetchJson('/api/v1/submissions', signal); } +export function fetchCollectiveX(signal?: AbortSignal) { + return fetchJson('/data/collectivex.json', signal, { + cache: 'no-store', + }); +} + export interface FeedbackListRow { id: string; created_at: string; diff --git a/packages/app/src/lib/collectivex-snapshot.test.ts b/packages/app/src/lib/collectivex-snapshot.test.ts new file mode 100644 index 00000000..5b2ea164 --- /dev/null +++ b/packages/app/src/lib/collectivex-snapshot.test.ts @@ -0,0 +1,476 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const state = vi.hoisted(() => ({ + documents: [] as unknown[], + entries: [] as { entryName: string; contents: string }[], +})); + +vi.mock('@semianalysisai/inferencex-constants', () => ({ + GITHUB_API_BASE: 'https://api.github.com', + GITHUB_OWNER: 'SemiAnalysisAI', + GITHUB_REPO: 'InferenceX', +})); + +vi.mock('adm-zip', () => ({ + default: class MockAdmZip { + getEntries() { + if (state.entries.length > 0) { + return state.entries.map((entry) => ({ + entryName: entry.entryName, + getData: () => Buffer.from(entry.contents), + })); + } + return state.documents.map((document, index) => ({ + entryName: `result-${index}.json`, + getData: () => Buffer.from(JSON.stringify(document)), + })); + } + }, +})); + +import { generateCollectiveXSnapshot } from './collectivex-snapshot'; + +const originalFetch = globalThis.fetch; + +function workflowRun(id = 12345, status = 'completed', name = 'CollectiveX Sweep') { + return { + id, + name, + head_branch: 'collectivex', + head_sha: 'abc123', + created_at: '2026-06-27T08:00:00Z', + html_url: `https://github.com/SemiAnalysisAI/InferenceX/actions/runs/${id}`, + conclusion: status === 'completed' ? 'success' : null, + status, + }; +} + +function resultDocument(overrides: Record = {}) { + return { + schema_version: 4, + family: 'moe', + generated_at: '2026-06-27T08:01:00Z', + status: 'valid', + publication_status: 'official', + runner: 'mi355x-amds_04', + backend: 'mori', + phase: 'decode', + mode: 'normal', + resource_mode: 'normalized', + comparison_class: 'standardized', + measurement_contract: 'layout-and-dispatch-v1', + topology_class: 'mi355x-xgmi', + transport: 'xgmi', + world_size: 8, + ep_size: 8, + shape: { + hidden: 7168, + topk: 8, + experts: 256, + dispatch_dtype: 'bf16', + routing: 'uniform', + activation_profile: 'normal', + quant: { combine_quant_mode: 'none' }, + }, + workload: { + workload_id: 'set:8:fixture', + trace_signature: 'trace-fixture', + }, + reproduction: { + git_run: { + run_id: '12345', + source_sha: 'abc123', + repo: 'SemiAnalysisAI/InferenceX', + }, + }, + rows: [ + { + tokens_per_rank: 1, + global_tokens: 8, + dispatch: { p50: 40, p90: 45, p95: 47, p99: 50 }, + combine: { p50: 20, p90: 25, p95: 27, p99: 30 }, + roundtrip: { p50: 55, p90: 65, p95: 70, p99: 80 }, + isolated_sum: { p50: 60, p90: 70, p95: 74, p99: 80 }, + dispatch_logical_bytes: 8000, + combine_logical_bytes: 4000, + correct: true, + }, + ], + ...overrides, + }; +} + +function ncclDocument(overrides: Record = {}) { + return { + family: 'nccl', + generated_at: '2026-06-27T08:02:00Z', + status: 'valid', + runner: 'mi355x-amds_04', + op: 'all_reduce', + topology_class: 'mi355x-xgmi', + transport: 'xgmi', + world_size: 8, + nodes: 1, + comparison_class: 'standardized', + measurement_contract: 'nccl-tests-v1', + rows: [ + { + size_bytes: 1024, + dtype: 'bf16', + busbw_gbps: 42, + out_of_place: { time_us: 12, algbw_gbps: 18, busbw_gbps: 42 }, + in_place: { time_us: 13, algbw_gbps: 16, busbw_gbps: 40 }, + correct: true, + }, + ], + ...overrides, + }; +} + +function artifactList(name = 'collectivex_mi355x_mori_decode_12345') { + return { + artifacts: [ + { + id: 1, + name, + archive_download_url: 'https://api.github.com/artifacts/1/zip', + }, + ], + }; +} + +function artifactDownload() { + return { + ok: true, + headers: new Headers({ 'Content-Length': '1024' }), + arrayBuffer: () => Promise.resolve(new Uint8Array([1]).buffer), + }; +} + +beforeEach(() => { + state.documents = [resultDocument()]; + state.entries = []; +}); + +afterEach(() => { + globalThis.fetch = originalFetch; + vi.restoreAllMocks(); +}); + +describe('generateCollectiveXSnapshot', () => { + it('discovers successful workflow artifacts and returns a versioned normalized snapshot', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot).toMatchObject({ + snapshotVersion: 3, + scannedRuns: 1, + scannedArtifacts: 1, + contributingRuns: 1, + generatedAt: '2026-06-27T08:01:00.000Z', + failures: [], + }); + expect(snapshot.series).toHaveLength(1); + expect(snapshot.series[0]).toMatchObject({ + sku: 'mi355x', + backend: 'mori', + workloadId: 'set:8:fixture', + run: { + id: '12345', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/12345', + }, + }); + expect(snapshot.series[0].id).toMatch(/^cx-/u); + }); + + it('keeps legacy CollectiveX Experimental workflow runs in branch discovery', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => + Promise.resolve({ + workflow_runs: [ + workflowRun(12345, 'completed', 'CollectiveX Experimental'), + workflowRun(99999, 'completed', 'Unrelated Workflow'), + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot.scannedRuns).toBe(1); + expect(snapshot.series[0].run.id).toBe('12345'); + }); + + it('paginates workflow discovery beyond the first 100 successful runs', async () => { + const firstPage = Array.from({ length: 100 }, (_, index) => workflowRun(index + 1)); + const secondPage = [workflowRun(101)]; + const requestedUrls: string[] = []; + + globalThis.fetch = vi.fn((input) => { + const url = String(input); + requestedUrls.push(url); + if (url.includes('/actions/runs?')) { + const page = new URL(url).searchParams.get('page'); + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ workflow_runs: page === '1' ? firstPage : secondPage }), + } as Response); + } + if (url.includes('/actions/runs/1/artifacts')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(artifactList()), + } as Response); + } + if (url.includes('/actions/runs/') && url.includes('/artifacts')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ artifacts: [] }), + } as Response); + } + if (url === 'https://api.github.com/artifacts/1/zip') { + return Promise.resolve(artifactDownload() as Response); + } + throw new Error(`Unexpected URL: ${url}`); + }); + + const snapshot = await generateCollectiveXSnapshot({ + token: 'test-token', + maxDiscoveryRuns: 101, + }); + + expect(snapshot.scannedRuns).toBe(101); + expect(snapshot.scannedArtifacts).toBe(1); + expect(requestedUrls.filter((url) => url.includes('/actions/runs?'))).toHaveLength(2); + }); + + it('includes a just-finished source run before GitHub marks the workflow successful', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(workflowRun(67890, 'in_progress')), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ + token: 'test-token', + sourceRunId: '67890', + }); + + expect(snapshot.scannedRuns).toBe(1); + expect(snapshot.series[0].run.id).toBe('67890'); + }); + + it('loads branch-only aggregate artifacts with NDJSON entries', async () => { + const requestedUrls: string[] = []; + state.entries = [ + { + entryName: 'collectivex_ep.ndjson', + contents: `${JSON.stringify(resultDocument())}\n${JSON.stringify(ncclDocument())}\n`, + }, + ]; + globalThis.fetch = vi.fn((input) => { + const url = String(input); + requestedUrls.push(url); + if (url.includes('/actions/runs?')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + } as Response); + } + if (url.includes('/actions/runs/12345/artifacts')) { + return Promise.resolve({ + ok: true, + json: () => Promise.resolve(artifactList('cxsweep-aggregate-deepep-v2-12345')), + } as Response); + } + if (url === 'https://api.github.com/artifacts/1/zip') { + return Promise.resolve(artifactDownload() as Response); + } + throw new Error(`Unexpected URL: ${url}`); + }); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(requestedUrls.filter((url) => url.includes('/actions/runs?'))).toHaveLength(1); + expect(snapshot.scannedArtifacts).toBe(1); + expect(snapshot.series).toHaveLength(1); + expect(snapshot.nccl).toHaveLength(1); + }); + + it('retries a transient GitHub request failure before generating the snapshot', async () => { + globalThis.fetch = vi + .fn() + .mockRejectedValueOnce(new TypeError('fetch failed')) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot.series).toHaveLength(1); + expect(globalThis.fetch).toHaveBeenCalledTimes(4); + }); + + it('preserves a selected failed case when no good result exists for its configuration', async () => { + state.documents = [ + resultDocument({ + record_type: 'failed-case', + publication_status: 'failed', + status: 'failed', + rows: [], + failure: { failure_mode: 'timeout', return_code: 124 }, + }), + ]; + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot.series).toHaveLength(0); + expect(snapshot.failures).toEqual([ + expect.objectContaining({ reason: 'timeout', returnCode: 124 }), + ]); + }); + + it('carries forward previous-snapshot series whose source runs vanished', async () => { + const mockDiscovery = () => + vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList()), + }) + .mockResolvedValueOnce(artifactDownload()); + + globalThis.fetch = mockDiscovery(); + const first = await generateCollectiveXSnapshot({ token: 'test-token' }); + expect(first.series).toHaveLength(1); + + // A series whose source run no longer exists on GitHub (deleted run / expired + // artifact), plus a stale duplicate of the fresh identity that must lose selection. + const vanished = { + ...first.series[0], + id: 'cx-vanished', + identity: 'vanished-identity', + sku: 'gb200', + }; + const stale = { ...first.series[0], id: 'cx-stale', generatedAt: '2026-06-20T00:00:00Z' }; + const previousSnapshot = { ...first, series: [vanished, stale] }; + + globalThis.fetch = mockDiscovery(); + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token', previousSnapshot }); + + expect(snapshot.series).toHaveLength(2); + expect(snapshot.series.map((item) => item.identity)).toContain('vanished-identity'); + const fresh = snapshot.series.find((item) => item.identity === first.series[0].identity); + expect(fresh?.generatedAt).toBe(first.series[0].generatedAt); + }); + + it('rejects a source run from a different branch', async () => { + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ ...workflowRun(67890), head_branch: 'main' }), + }); + + await expect( + generateCollectiveXSnapshot({ token: 'test-token', sourceRunId: '67890' }), + ).rejects.toThrow('is not from the collectivex branch'); + }); + + it('includes non-EP CollectiveX artifacts in the generated snapshot', async () => { + state.documents = [ncclDocument()]; + globalThis.fetch = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve({ workflow_runs: [workflowRun()] }), + }) + .mockResolvedValueOnce({ + ok: true, + json: () => Promise.resolve(artifactList('collectivex_mi355x_nccl_12345')), + }) + .mockResolvedValueOnce(artifactDownload()); + + const snapshot = await generateCollectiveXSnapshot({ token: 'test-token' }); + + expect(snapshot.series).toHaveLength(0); + expect(snapshot.nccl).toEqual([ + expect.objectContaining({ + op: 'all_reduce', + sku: 'mi355x', + label: 'MI355X · mi355x-xgmi · xgmi (ws8)', + }), + ]); + }); + + it('requires a GitHub token', async () => { + await expect(generateCollectiveXSnapshot({ token: '' })).rejects.toThrow( + 'GitHub token not configured', + ); + }); + + it('surfaces workflow discovery failures', async () => { + globalThis.fetch = vi.fn().mockResolvedValue({ + ok: false, + status: 502, + }); + + await expect(generateCollectiveXSnapshot({ token: 'test-token' })).rejects.toThrow( + 'Failed to list CollectiveX workflow runs: 502', + ); + expect(globalThis.fetch).toHaveBeenCalledTimes(3); + }); +}); diff --git a/packages/app/src/lib/collectivex-snapshot.ts b/packages/app/src/lib/collectivex-snapshot.ts new file mode 100644 index 00000000..d05dde53 --- /dev/null +++ b/packages/app/src/lib/collectivex-snapshot.ts @@ -0,0 +1,390 @@ +import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; + +import { + collectiveXDecisionSummary, + collectiveXSummaryCards, + normalizeCollectiveXAllReduceFrameworkDocument, + normalizeCollectiveXArtifactDocument, + normalizeCollectiveXNcclDocument, + normalizeCollectiveXTransferDocument, + selectLatestCollectiveXCollectiveSeries, + selectLatestCollectiveXRecords, + selectLatestCollectiveXTransferSeries, + type CollectiveXArtifactRecord, +} from '@/components/collectivex/data'; +import type { + CollectiveXApiResponse, + CollectiveXCollectiveSeries, + CollectiveXTransferSeries, +} from '@/components/collectivex/types'; + +import { extractZipEntries, type GithubArtifact, type GithubWorkflowRun } from './github-artifacts'; + +const WORKFLOW_NAMES = new Set(['CollectiveX Sweep', 'CollectiveX Experimental']); +const WORKFLOW_BRANCH = 'collectivex'; +const DEFAULT_MAX_DISCOVERY_RUNS = 500; +const RUNS_PAGE_SIZE = 100; +const DOWNLOAD_CONCURRENCY = 8; +const MAX_ARTIFACT_BYTES = 10 * 1024 * 1024; +const REQUEST_TIMEOUT_MS = 30_000; +const MAX_REQUEST_ATTEMPTS = 3; +const RETRYABLE_STATUSES = new Set([408, 429, 500, 502, 503, 504]); + +interface GithubWorkflowRunsResponse { + workflow_runs?: GithubWorkflowRun[]; +} + +interface ArtifactWithRun { + artifact: GithubArtifact; + run: GithubWorkflowRun; +} + +interface NormalizedArtifactBundle { + records: CollectiveXArtifactRecord[]; + collectives: CollectiveXCollectiveSeries[]; + transfers: CollectiveXTransferSeries[]; +} + +export interface GenerateCollectiveXSnapshotOptions { + token: string; + sourceRunId?: string; + maxDiscoveryRuns?: number; + /** + * Previous committed snapshot to carry forward. Discovery only sees runs that still + * exist on GitHub (deleted runs and >90-day-expired artifacts vanish silently), so a + * from-scratch rebuild permanently drops every result not re-swept since. Seeding the + * selection pools with the previous snapshot keeps those series; fresh runs still + * supersede per identity via the same valid > publication-rank > newest comparators. + */ + previousSnapshot?: CollectiveXApiResponse | null; +} + +function previousSnapshotRecords(previous: CollectiveXApiResponse): CollectiveXArtifactRecord[] { + return [ + ...previous.series.map((item) => ({ + identity: item.identity, + generatedAt: item.generatedAt, + publicationStatus: item.publicationStatus, + isFailed: item.status === 'failed', + series: item, + failure: null, + })), + ...previous.failures.map((item) => ({ + identity: item.identity, + generatedAt: item.generatedAt, + publicationStatus: item.publicationStatus, + isFailed: true, + series: null, + failure: item, + })), + ]; +} + +const GITHUB_HEADERS = { + Accept: 'application/vnd.github.v3+json', +} as const; + +async function waitBeforeRetry(attempt: number): Promise { + const delayMs = process.env.NODE_ENV === 'test' ? 0 : Math.min(250 * 2 ** (attempt - 1), 2000); + await new Promise((resolve) => { + setTimeout(resolve, delayMs); + }); +} + +async function fetchCollectiveXGithub(url: string, token: string): Promise { + let lastError: unknown; + + for (let attempt = 1; attempt <= MAX_REQUEST_ATTEMPTS; attempt++) { + try { + const response = await fetch(url, { + cache: 'no-store', + headers: { + ...GITHUB_HEADERS, + Authorization: `Bearer ${token}`, + }, + signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS), + }); + if ( + response.ok || + !RETRYABLE_STATUSES.has(response.status) || + attempt === MAX_REQUEST_ATTEMPTS + ) { + return response; + } + lastError = new Error(`GitHub request returned ${response.status}`); + } catch (error) { + lastError = error; + if (attempt === MAX_REQUEST_ATTEMPTS) break; + } + + await waitBeforeRetry(attempt); + } + + throw new Error(`GitHub request failed after ${MAX_REQUEST_ATTEMPTS} attempts`, { + cause: lastError, + }); +} + +async function inBatches( + items: T[], + batchSize: number, + task: (item: T) => Promise, +): Promise { + const results: R[] = []; + for (let offset = 0; offset < items.length; offset += batchSize) { + results.push(...(await Promise.all(items.slice(offset, offset + batchSize).map(task)))); + } + return results; +} + +async function fetchCompletedCollectiveXRuns( + token: string, + maxDiscoveryRuns: number, +): Promise { + const runs: GithubWorkflowRun[] = []; + + for (let page = 1; runs.length < maxDiscoveryRuns; page++) { + const params = new URLSearchParams({ + branch: WORKFLOW_BRANCH, + // "completed" (any conclusion), NOT "success": CollectiveX sweep runs routinely conclude + // "failure" while carrying full valid data — a single diagnostic case (e.g. the empty-rank + // uneven-token probe, or a flaky flashinfer config) flips the job conclusion even though + // 200+ correct results were uploaded. Validity is judged per record inside the artifacts + // (status/publicationStatus; compareRecords prefers valid-over-failed, then newest), so + // filtering runs by conclusion here silently drops the newest and best datasets. + status: 'completed', + per_page: String(RUNS_PAGE_SIZE), + page: String(page), + }); + const response = await fetchCollectiveXGithub( + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs?${params}`, + token, + ); + if (!response.ok) { + throw new Error(`Failed to list CollectiveX workflow runs: ${response.status}`); + } + + const payload = (await response.json()) as GithubWorkflowRunsResponse; + const pageRuns = payload.workflow_runs ?? []; + runs.push(...pageRuns.filter((run) => WORKFLOW_NAMES.has(run.name))); + if (pageRuns.length < RUNS_PAGE_SIZE) break; + } + + return runs.slice(0, maxDiscoveryRuns); +} + +async function fetchSourceRun(runId: string, token: string): Promise { + const response = await fetchCollectiveXGithub( + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}`, + token, + ); + if (!response.ok) { + throw new Error(`Failed to load CollectiveX source run ${runId}: ${response.status}`); + } + const run = (await response.json()) as GithubWorkflowRun; + if (run.head_branch !== WORKFLOW_BRANCH) { + throw new Error(`CollectiveX source run ${runId} is not from the ${WORKFLOW_BRANCH} branch`); + } + return run; +} + +async function discoverRuns( + token: string, + maxDiscoveryRuns: number, + sourceRunId?: string, +): Promise { + const completedRuns = await fetchCompletedCollectiveXRuns(token, maxDiscoveryRuns); + if (!sourceRunId || completedRuns.some((run) => String(run.id) === sourceRunId)) { + return completedRuns; + } + + // The source workflow dispatches this generator after artifact upload but + // before the workflow itself reaches "success", so explicitly include it. + const sourceRun = await fetchSourceRun(sourceRunId, token); + return [sourceRun, ...completedRuns.filter((run) => String(run.id) !== String(sourceRun.id))]; +} + +function isCollectiveXArtifact(artifact: GithubArtifact): boolean { + return artifact.name.startsWith('collectivex_') || artifact.name.startsWith('cxsweep-aggregate-'); +} + +async function fetchRunArtifacts(runId: string, token: string): Promise { + const artifacts: GithubArtifact[] = []; + + for (let page = 1; ; page++) { + const params = new URLSearchParams({ + per_page: String(RUNS_PAGE_SIZE), + page: String(page), + }); + const response = await fetchCollectiveXGithub( + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}/artifacts?${params}`, + token, + ); + if (!response.ok) { + throw new Error(`Failed to list CollectiveX artifacts for run ${runId}: ${response.status}`); + } + + const payload = (await response.json()) as { artifacts?: GithubArtifact[] }; + const pageArtifacts = payload.artifacts ?? []; + artifacts.push(...pageArtifacts); + if (pageArtifacts.length < RUNS_PAGE_SIZE) break; + } + + return artifacts; +} + +async function discoverArtifacts( + runs: GithubWorkflowRun[], + token: string, +): Promise { + const artifactLists = await inBatches(runs, DOWNLOAD_CONCURRENCY, async (run) => ({ + run, + artifacts: await fetchRunArtifacts(String(run.id), token), + })); + + return artifactLists.flatMap(({ run, artifacts }) => + artifacts.filter(isCollectiveXArtifact).map((artifact) => ({ artifact, run })), + ); +} + +function parseArtifactDocuments(buffer: Buffer, artifactName: string): unknown[] { + const parseError = (entryName: string, error: unknown) => { + console.warn(`Failed to parse ${entryName} from ${artifactName}:`, error); + }; + const jsonDocuments = extractZipEntries( + buffer, + '.json', + (_entryName, contents) => { + const parsed = JSON.parse(contents) as unknown; + return Array.isArray(parsed) ? parsed : [parsed]; + }, + parseError, + ); + const ndjsonDocuments = extractZipEntries( + buffer, + '.ndjson', + (_entryName, contents) => { + const documents: unknown[] = []; + for (const line of contents.split(/\r?\n/u)) { + const trimmed = line.trim(); + if (trimmed) documents.push(JSON.parse(trimmed) as unknown); + } + return documents; + }, + parseError, + ); + return [...jsonDocuments, ...ndjsonDocuments]; +} + +async function downloadAndNormalize( + source: ArtifactWithRun, + token: string, +): Promise { + const response = await fetchCollectiveXGithub(source.artifact.archive_download_url, token); + if (!response.ok) { + throw new Error( + `Failed to download CollectiveX artifact ${source.artifact.name}: ${response.status}`, + ); + } + + const contentLength = Number(response.headers.get('Content-Length') ?? 0); + if (contentLength > MAX_ARTIFACT_BYTES) { + console.warn(`Skipping oversized CollectiveX artifact ${source.artifact.name}`); + return { records: [], collectives: [], transfers: [] }; + } + + const buffer = Buffer.from(await response.arrayBuffer()); + if (buffer.byteLength > MAX_ARTIFACT_BYTES) { + console.warn(`Skipping oversized CollectiveX artifact ${source.artifact.name}`); + return { records: [], collectives: [], transfers: [] }; + } + + const bundle: NormalizedArtifactBundle = { records: [], collectives: [], transfers: [] }; + for (const document of parseArtifactDocuments(buffer, source.artifact.name)) { + const context = { + run: { + id: String(source.run.id), + url: source.run.html_url, + createdAt: source.run.created_at, + sha: source.run.head_sha, + }, + }; + const record = normalizeCollectiveXArtifactDocument(document, context); + if (record) bundle.records.push(record); + bundle.collectives.push( + ...normalizeCollectiveXNcclDocument(document, context), + ...normalizeCollectiveXAllReduceFrameworkDocument(document, context), + ); + bundle.transfers.push(...normalizeCollectiveXTransferDocument(document, context)); + } + return bundle; +} + +export async function generateCollectiveXSnapshot({ + token, + sourceRunId, + maxDiscoveryRuns = DEFAULT_MAX_DISCOVERY_RUNS, + previousSnapshot = null, +}: GenerateCollectiveXSnapshotOptions): Promise { + if (!token) throw new Error('GitHub token not configured'); + + const runs = await discoverRuns(token, maxDiscoveryRuns, sourceRunId); + const artifacts = await discoverArtifacts(runs, token); + const bundles = await inBatches(artifacts, DOWNLOAD_CONCURRENCY, (source) => + downloadAndNormalize(source, token), + ); + const { series, failures } = selectLatestCollectiveXRecords([ + ...(previousSnapshot ? previousSnapshotRecords(previousSnapshot) : []), + ...bundles.flatMap((bundle) => bundle.records), + ]); + const nccl = selectLatestCollectiveXCollectiveSeries([ + ...(previousSnapshot?.nccl ?? []), + ...bundles.flatMap((bundle) => bundle.collectives), + ]); + const transfers = selectLatestCollectiveXTransferSeries([ + ...(previousSnapshot + ? [ + ...(previousSnapshot.offload ?? []), + ...(previousSnapshot.copyEngine ?? []), + ...(previousSnapshot.kvCache ?? []), + ...(previousSnapshot.rlMesh ?? []), + ] + : []), + ...bundles.flatMap((bundle) => bundle.transfers), + ]); + const offload = transfers.filter((item) => item.family === 'offload'); + const copyEngine = transfers.filter((item) => item.family === 'copy-engine'); + const kvCache = transfers.filter((item) => item.family === 'kv-cache'); + const rlMesh = transfers.filter((item) => item.family === 'rl-mesh'); + if (series.length === 0 && failures.length === 0 && nccl.length === 0 && transfers.length === 0) { + throw new Error('No valid CollectiveX results found in recent workflow runs'); + } + + const contributingRuns = new Set( + [...series, ...failures, ...nccl, ...transfers].map((item) => item.run.id).filter(Boolean), + ).size; + const newestTimestamp = Math.max( + ...[...series, ...failures, ...nccl, ...transfers] + .map((item) => Date.parse(item.generatedAt)) + .filter(Number.isFinite), + ); + + return { + snapshotVersion: 3, + series, + failures, + summaryCards: collectiveXSummaryCards(series, failures), + decision: collectiveXDecisionSummary(series), + nccl, + offload, + copyEngine, + kvCache, + rlMesh, + scannedRuns: runs.length, + scannedArtifacts: artifacts.length, + contributingRuns, + generatedAt: Number.isFinite(newestTimestamp) + ? new Date(newestTimestamp).toISOString() + : new Date(0).toISOString(), + }; +} diff --git a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts index 057cadf8..32e9f60c 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.test.ts @@ -46,6 +46,14 @@ describe('buildScale', () => { } }); + it('supports a custom log base without expanding the domain when nice=false', () => { + const scale = buildScale({ type: 'log', domain: [1, 128], base: 2, nice: false }, [0, 700]); + + expect('base' in scale && scale.base()).toBe(2); + expect(scale.domain()).toEqual([1, 128]); + expect('ticks' in scale && scale.ticks(8)).toEqual([1, 2, 4, 8, 16, 32, 64, 128]); + }); + it('builds a time scale', () => { const d1 = new Date('2025-01-01'); const d2 = new Date('2025-12-31'); diff --git a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts index cb8ce2e7..c865575f 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/scale-builders.ts @@ -26,7 +26,11 @@ export function buildScale(config: ScaleConfig, range: [number, number]): BuiltS } case 'log': { - const l = d3.scaleLog().domain(config.domain).range(range); + const l = d3 + .scaleLog() + .base(config.base ?? 10) + .domain(config.domain) + .range(range); return config.nice === false ? l : l.nice(); } diff --git a/packages/app/src/lib/d3-chart/D3Chart/types.ts b/packages/app/src/lib/d3-chart/D3Chart/types.ts index 3062784e..f716a012 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/types.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/types.ts @@ -20,7 +20,7 @@ import type { RadarConfig } from '../layers/radar'; export type ScaleConfig = | { type: 'band'; domain: string[]; padding?: number } | { type: 'linear'; domain: [number, number]; nice?: boolean } - | { type: 'log'; domain: [number, number]; nice?: boolean } + | { type: 'log'; domain: [number, number]; nice?: boolean; base?: number } | { type: 'time'; domain: [Date, Date]; nice?: boolean }; // --------------------------------------------------------------------------- @@ -126,6 +126,8 @@ export interface AxisConfig { label?: string; tickFormat?: (d: d3.AxisDomain) => string; tickCount?: number; + /** Explicit ticks or a domain-aware generator, useful for geometric and sparse log axes. */ + tickValues?: (number | Date)[] | ((scale: AnyScale) => (number | Date)[]); /** Post-render callback for custom axis label formatting (e.g., multi-line tspan). */ customize?: (axisGroup: d3.Selection) => void; } diff --git a/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts b/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts index 8953d156..70896df6 100644 --- a/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts +++ b/packages/app/src/lib/d3-chart/D3Chart/useD3ChartRenderer.ts @@ -8,7 +8,7 @@ import type { ChartLayout, ContinuousScale } from '../types'; import { buildScale, isBandScale, type BuiltScale } from './scale-builders'; import { renderLayer, updateLayerOnZoom } from './layer-renderer'; -import type { D3ChartProps, RenderContext, ZoomContext } from './types'; +import type { AxisConfig, D3ChartProps, RenderContext, ZoomContext } from './types'; interface RendererDeps { svgRef: React.RefObject; @@ -51,6 +51,14 @@ interface RendererDeps { ) => void; } +function resolveTickValues( + tickValues: AxisConfig['tickValues'], + scale: AnyScale, +): (number | Date)[] | undefined { + if (!tickValues) return undefined; + return typeof tickValues === 'function' ? tickValues(scale) : tickValues; +} + /** * Core render effect for D3Chart. Builds scales, renders structure/axes/grid/layers, * wires up tooltip and zoom handlers. @@ -162,12 +170,24 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps // ── Grid + Axes (skip when no scale configs) ── if (hasScales) { - renderGrid(layout, xScale as AnyScale, yScale as any, yAxisConfig?.tickCount ?? 5); + const xTickValues = resolveTickValues(xAxisConfig?.tickValues, xScale as AnyScale); + const yTickValues = resolveTickValues(yAxisConfig?.tickValues, yScale as AnyScale); + renderGrid( + layout, + xScale as AnyScale, + yScale as any, + yAxisConfig?.tickCount ?? 5, + 0, + xTickValues, + yTickValues, + ); renderAxes(layout, xScale as AnyScale, yScale as any, { xTickFormat: xAxisConfig?.tickFormat, yTickFormat: yAxisConfig?.tickFormat, xTickCount: xAxisConfig?.tickCount, yTickCount: yAxisConfig?.tickCount, + xTickValues, + yTickValues, }); // Custom axis formatting callbacks @@ -408,11 +428,15 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps } // Update axes + grid + const xTickValues = resolveTickValues(xAxisConfig?.tickValues, newXScale as AnyScale); + const yTickValues = resolveTickValues(yAxisConfig?.tickValues, newYScale as AnyScale); renderAxes(layout, newXScale as AnyScale, newYScale as any, { xTickFormat: xAxisConfig?.tickFormat, yTickFormat: yAxisConfig?.tickFormat, xTickCount: xAxisConfig?.tickCount, yTickCount: yAxisConfig?.tickCount, + xTickValues, + yTickValues, }); if (xAxisConfig?.customize) { xAxisConfig.customize(layout.xAxisGroup); @@ -425,6 +449,9 @@ export function useD3ChartRenderer(props: D3ChartProps, deps: RendererDeps newXScale as AnyScale, newYScale as any, yAxisConfig?.tickCount ?? 5, + 0, + xTickValues, + yTickValues, ); // Update layers diff --git a/packages/app/src/lib/d3-chart/chart-update.test.ts b/packages/app/src/lib/d3-chart/chart-update.test.ts index a4a52b85..a8b3d012 100644 --- a/packages/app/src/lib/d3-chart/chart-update.test.ts +++ b/packages/app/src/lib/d3-chart/chart-update.test.ts @@ -51,6 +51,23 @@ describe('renderAxes', () => { expect(tickCount).toBeLessThanOrEqual(8); }); + it('renders only explicit x tick values within the visible domain', () => { + const layout = makeLayout(); + const xScale = d3.scaleLinear().domain([2, 10]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + + renderAxes(layout, xScale, yScale, { + xTickValues: [1, 2, 4, 16], + xTickFormat: String, + }); + + const labels: string[] = []; + layout.xAxisGroup.selectAll('.tick text').each(function () { + labels.push(d3.select(this).text()); + }); + expect(labels).toEqual(['2', '4']); + }); + it('respects yTickCount', () => { const layout = makeLayout(); const xScale = d3.scaleLinear().domain([0, 100]).range([0, layout.width]); @@ -130,6 +147,26 @@ describe('renderAxes', () => { }); }); + describe('with log scales', () => { + it('uses measured geometric sweep values instead of generated log subdivisions', () => { + const layout = makeLayout(); + const xScale = d3.scaleLog().base(2).domain([0.9, 70]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + const measuredValues = [1, 2, 4, 8, 16, 32, 64]; + + renderAxes(layout, xScale, yScale, { + xTickValues: measuredValues, + xTickFormat: String, + }); + + const labels: string[] = []; + layout.xAxisGroup.selectAll('.tick text').each(function () { + labels.push(d3.select(this).text()); + }); + expect(labels).toEqual(measuredValues.map(String)); + }); + }); + describe('with band scales', () => { it('renders band scale on x-axis', () => { const layout = makeLayout(); @@ -281,6 +318,24 @@ describe('renderGrid', () => { expect(vLines).toBeGreaterThan(0); }); + it('uses explicit x tick values for vertical grid lines', () => { + const layout = makeLayout(); + const xScale = d3.scaleLog().base(2).domain([1, 64]).range([0, layout.width]); + const yScale = d3.scaleLinear().domain([0, 50]).range([layout.height, 0]); + const measuredValues = [1, 4, 16, 64]; + + renderGrid(layout, xScale, yScale, 5, 0, measuredValues); + + const positions: number[] = []; + layout.gridGroup + .select('.grid-v') + .selectAll('line') + .each(function () { + positions.push(Number(d3.select(this).attr('x1'))); + }); + expect(positions).toEqual(measuredValues.map((value) => xScale(value))); + }); + it('creates horizontal grid lines matching y-scale ticks', () => { const layout = makeLayout(); const xScale = d3.scaleLinear().domain([0, 100]).range([0, layout.width]); diff --git a/packages/app/src/lib/d3-chart/chart-update.ts b/packages/app/src/lib/d3-chart/chart-update.ts index 45c458c7..fb79ac27 100644 --- a/packages/app/src/lib/d3-chart/chart-update.ts +++ b/packages/app/src/lib/d3-chart/chart-update.ts @@ -10,6 +10,8 @@ export interface AxisUpdateConfig { yTickFormat?: (d: d3.AxisDomain) => string; xTickCount?: number; yTickCount?: number; + xTickValues?: (number | Date)[]; + yTickValues?: (number | Date)[]; /** Override tick size for Y axis (default: 6, use 0 for band scales). */ yTickSize?: number; /** When set, axes animate to new positions over this duration (ms). */ @@ -23,8 +25,16 @@ export function renderAxes( yScale: ContinuousScale | d3.ScaleBand, config: AxisUpdateConfig, ): void { - const { xTickFormat, yTickFormat, xTickCount, yTickCount, yTickSize, transitionDuration } = - config; + const { + xTickFormat, + yTickFormat, + xTickCount, + yTickCount, + xTickValues, + yTickValues, + yTickSize, + transitionDuration, + } = config; const dur = transitionDuration ?? 0; // X axis @@ -36,6 +46,9 @@ export function renderAxes( } else { const gen = d3.axisBottom(xScale as ContinuousScale).tickSize(6); if (xTickCount) gen.ticks(xTickCount); + if (xTickValues) { + gen.tickValues(visibleTickValues(xScale, xTickValues) as Iterable); + } if (xTickFormat) gen.tickFormat(xTickFormat as any); xAxisGen = gen as unknown as d3.Axis; } @@ -54,6 +67,9 @@ export function renderAxes( } else { const yAxisGen = d3.axisLeft(yScale as ContinuousScale).tickSize(yTickSize ?? 6); if (yTickCount) yAxisGen.ticks(yTickCount); + if (yTickValues) { + yAxisGen.tickValues(visibleTickValues(yScale, yTickValues) as Iterable); + } if (yTickFormat) yAxisGen.tickFormat(yTickFormat as any); const yTarget = dur > 0 ? layout.yAxisGroup.transition().duration(dur) : layout.yAxisGroup; (yTarget as any).call(yAxisGen as any); @@ -67,6 +83,8 @@ export function renderGrid( yScale: ContinuousScale | d3.ScaleBand, yTickCount?: number, transitionDuration = 0, + xTickValues?: (number | Date)[], + yTickValues?: (number | Date)[], ): void { const { width, height, gridGroup } = layout; const dur = transitionDuration; @@ -87,7 +105,9 @@ export function renderGrid( .attr('y2', height); } else { const tickScale = xScale as { ticks: (count?: number) => number[]; (v: number): number }; - const xTicks = tickScale.ticks(); + const xTicks = xTickValues + ? (visibleTickValues(xScale, xTickValues) as number[]) + : tickScale.ticks(); const vJoin = vGroup .selectAll('line') .data(xTicks) @@ -126,7 +146,9 @@ export function renderGrid( .attr('y2', (d) => (bandScale(d) || 0) + bandScale.bandwidth() / 2) .style('stroke-width', 0.5); } else { - const yTicks = yScale.ticks(yTickCount ?? 5); + const yTicks = yTickValues + ? (visibleTickValues(yScale, yTickValues) as number[]) + : yScale.ticks(yTickCount ?? 5); const hJoin = hGroup .selectAll('line') .data(yTicks) @@ -149,3 +171,18 @@ export function renderGrid( .attr('y2', (d: number) => yScale(d)); } } + +function visibleTickValues( + scale: ContinuousScale | d3.ScaleTime, + values: (number | Date)[], +): (number | Date)[] { + const domain = scale.domain(); + const start = Number(domain[0]); + const end = Number(domain.at(-1)); + const min = Math.min(start, end); + const max = Math.max(start, end); + return values.filter((value) => { + const numeric = Number(value); + return Number.isFinite(numeric) && numeric >= min && numeric <= max; + }); +} diff --git a/packages/app/src/lib/d3-chart/layers/lines.test.ts b/packages/app/src/lib/d3-chart/layers/lines.test.ts index 6631bcc8..26216ef8 100644 --- a/packages/app/src/lib/d3-chart/layers/lines.test.ts +++ b/packages/app/src/lib/d3-chart/layers/lines.test.ts @@ -108,6 +108,28 @@ describe('renderLines', () => { } }); + it('sets a per-series stroke dash pattern when configured', () => { + const group = createMockGroup(); + const { xScale, yScale } = makeScales(); + renderLines( + group as any, + SAMPLE_LINES, + xScale, + yScale, + makeConfig({ + getStrokeDasharray: (key) => (key === 'seriesB' ? '6 4' : null), + }), + ); + + const paths = group.selectAll('.line-path'); + const dashByClass: Record = {}; + for (const el of paths.elements) { + dashByClass[el.attrs['class'] as string] = el.attrs['stroke-dasharray'] ?? null; + } + expect(dashByClass['line-path line-seriesA']).toBeNull(); + expect(dashByClass['line-path line-seriesB']).toBe('6 4'); + }); + it('generates valid d attribute from line generator', () => { const group = createMockGroup(); const { xScale, yScale } = makeScales(); diff --git a/packages/app/src/lib/d3-chart/layers/lines.ts b/packages/app/src/lib/d3-chart/layers/lines.ts index 79c394e9..fd17ff2e 100644 --- a/packages/app/src/lib/d3-chart/layers/lines.ts +++ b/packages/app/src/lib/d3-chart/layers/lines.ts @@ -6,6 +6,7 @@ type AnyXScale = ContinuousScale | d3.ScaleTime; export interface LineConfig { getColor: (key: string) => string; + getStrokeDasharray?: (key: string) => string | null; strokeWidth?: number; curve?: d3.CurveFactory; /** Return false to create gaps in the line (e.g., missing data points). */ @@ -58,6 +59,7 @@ export function renderLines( .merge(selection) .attr('class', (d) => `line-path line-${d.key}`) .attr('stroke', (d) => config.getColor(d.key)) + .attr('stroke-dasharray', (d) => config.getStrokeDasharray?.(d.key) ?? null) .attr('stroke-width', config.strokeWidth ?? 2) .attr('d', (d) => lineGenerator(d.points)); } diff --git a/packages/app/src/lib/tab-meta.ts b/packages/app/src/lib/tab-meta.ts index 9145532b..351388d9 100644 --- a/packages/app/src/lib/tab-meta.ts +++ b/packages/app/src/lib/tab-meta.ts @@ -15,6 +15,7 @@ export const VALID_TABS = [ 'calculator', 'reliability', 'gpu-specs', + 'collectivex', 'ai-chart', 'gpu-metrics', 'submissions', @@ -55,6 +56,11 @@ export const TAB_META: Record = description: 'Detailed GPU specifications for AI inference. Compare NVIDIA, AMD, and Intel GPUs — memory bandwidth, FLOPS, interconnects, and topology.', }, + collectivex: { + title: 'CollectiveX Communication Benchmarks', + description: + 'Experimental cross-vendor expert-parallel communication benchmarks. Compare MoE dispatch and combine latency across NVIDIA and AMD GPU platforms.', + }, 'ai-chart': { title: 'AI-Powered Chart Generation', description: